]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gcse.c
sparc-protos.h: Add prototypes for fp_zero_operand and reg_or_0_operand.
[thirdparty/gcc.git] / gcc / gcse.c
CommitLineData
f4e584dc 1/* Global common subexpression elimination/Partial redundancy elimination
7506f491 2 and global constant/copy propagation for GNU compiler.
5b1ef594 3 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
7506f491
DE
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
21
22/* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
f4e584dc
JL
27 - a store to the same address as a load does not kill the load if the
28 source of the store is also the destination of the load. Handling this
29 allows more load motion, particularly out of loops.
7506f491
DE
30 - ability to realloc sbitmap vectors would allow one initial computation
31 of reg_set_in_block with only subsequent additions, rather than
32 recomputing it for each pass
33
7506f491
DE
34*/
35
36/* References searched while implementing this.
7506f491
DE
37
38 Compilers Principles, Techniques and Tools
39 Aho, Sethi, Ullman
40 Addison-Wesley, 1988
41
42 Global Optimization by Suppression of Partial Redundancies
43 E. Morel, C. Renvoise
44 communications of the acm, Vol. 22, Num. 2, Feb. 1979
45
46 A Portable Machine-Independent Global Optimizer - Design and Measurements
47 Frederick Chow
48 Stanford Ph.D. thesis, Dec. 1983
49
7506f491
DE
50 A Fast Algorithm for Code Movement Optimization
51 D.M. Dhamdhere
52 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
53
54 A Solution to a Problem with Morel and Renvoise's
55 Global Optimization by Suppression of Partial Redundancies
56 K-H Drechsler, M.P. Stadel
57 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
58
59 Practical Adaptation of the Global Optimization
60 Algorithm of Morel and Renvoise
61 D.M. Dhamdhere
62 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
63
64 Efficiently Computing Static Single Assignment Form and the Control
65 Dependence Graph
66 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
67 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
68
7506f491
DE
69 Lazy Code Motion
70 J. Knoop, O. Ruthing, B. Steffen
71 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
72
73 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
74 Time for Reducible Flow Control
75 Thomas Ball
76 ACM Letters on Programming Languages and Systems,
77 Vol. 2, Num. 1-4, Mar-Dec 1993
78
79 An Efficient Representation for Sparse Sets
80 Preston Briggs, Linda Torczon
81 ACM Letters on Programming Languages and Systems,
82 Vol. 2, Num. 1-4, Mar-Dec 1993
83
84 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
85 K-H Drechsler, M.P. Stadel
86 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
87
88 Partial Dead Code Elimination
89 J. Knoop, O. Ruthing, B. Steffen
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
91
92 Effective Partial Redundancy Elimination
93 P. Briggs, K.D. Cooper
94 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
95
96 The Program Structure Tree: Computing Control Regions in Linear Time
97 R. Johnson, D. Pearson, K. Pingali
98 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
99
100 Optimal Code Motion: Theory and Practice
101 J. Knoop, O. Ruthing, B. Steffen
102 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
103
104 The power of assignment motion
105 J. Knoop, O. Ruthing, B. Steffen
106 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
107
108 Global code motion / global value numbering
109 C. Click
110 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
111
112 Value Driven Redundancy Elimination
113 L.T. Simpson
114 Rice University Ph.D. thesis, Apr. 1996
115
116 Value Numbering
117 L.T. Simpson
118 Massively Scalar Compiler Project, Rice University, Sep. 1996
119
120 High Performance Compilers for Parallel Computing
121 Michael Wolfe
122 Addison-Wesley, 1996
123
f4e584dc
JL
124 Advanced Compiler Design and Implementation
125 Steven Muchnick
126 Morgan Kaufmann, 1997
127
a42cd965
AM
128 Building an Optimizing Compiler
129 Robert Morgan
130 Digital Press, 1998
131
f4e584dc
JL
132 People wishing to speed up the code here should read:
133 Elimination Algorithms for Data Flow Analysis
134 B.G. Ryder, M.C. Paull
135 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
136
137 How to Analyze Large Programs Efficiently and Informatively
138 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
139 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
140
7506f491
DE
141 People wishing to do something different can find various possibilities
142 in the above papers and elsewhere.
143*/
144
145#include "config.h"
50b2596f 146#include "system.h"
01198c2f 147#include "toplev.h"
7506f491
DE
148
149#include "rtl.h"
6baf1cc8 150#include "tm_p.h"
7506f491
DE
151#include "regs.h"
152#include "hard-reg-set.h"
153#include "flags.h"
154#include "real.h"
155#include "insn-config.h"
156#include "recog.h"
157#include "basic-block.h"
50b2596f 158#include "output.h"
49ad7cfa 159#include "function.h"
3cdbd1f8 160#include "expr.h"
fb0c0a12 161#include "ggc.h"
f1fa37ff 162#include "params.h"
7506f491 163#include "obstack.h"
adfcce61 164#include "df.h"
7506f491
DE
165#define obstack_chunk_alloc gmalloc
166#define obstack_chunk_free free
167
7506f491
DE
168/* Propagate flow information through back edges and thus enable PRE's
169 moving loop invariant calculations out of loops.
170
171 Originally this tended to create worse overall code, but several
172 improvements during the development of PRE seem to have made following
173 back edges generally a win.
174
175 Note much of the loop invariant code motion done here would normally
176 be done by loop.c, which has more heuristics for when to move invariants
177 out of loops. At some point we might need to move some of those
178 heuristics into gcse.c. */
179#define FOLLOW_BACK_EDGES 1
180
f4e584dc
JL
181/* We support GCSE via Partial Redundancy Elimination. PRE optimizations
182 are a superset of those done by GCSE.
7506f491 183
f4e584dc 184 We perform the following steps:
7506f491
DE
185
186 1) Compute basic block information.
187
188 2) Compute table of places where registers are set.
189
190 3) Perform copy/constant propagation.
191
192 4) Perform global cse.
193
e78d9500 194 5) Perform another pass of copy/constant propagation.
7506f491
DE
195
196 Two passes of copy/constant propagation are done because the first one
197 enables more GCSE and the second one helps to clean up the copies that
198 GCSE creates. This is needed more for PRE than for Classic because Classic
199 GCSE will try to use an existing register containing the common
200 subexpression rather than create a new one. This is harder to do for PRE
201 because of the code motion (which Classic GCSE doesn't do).
202
203 Expressions we are interested in GCSE-ing are of the form
204 (set (pseudo-reg) (expression)).
205 Function want_to_gcse_p says what these are.
206
207 PRE handles moving invariant expressions out of loops (by treating them as
f4e584dc 208 partially redundant).
7506f491
DE
209
210 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
211 assignment) based GVN (global value numbering). L. T. Simpson's paper
212 (Rice University) on value numbering is a useful reference for this.
213
214 **********************
215
216 We used to support multiple passes but there are diminishing returns in
217 doing so. The first pass usually makes 90% of the changes that are doable.
218 A second pass can make a few more changes made possible by the first pass.
219 Experiments show any further passes don't make enough changes to justify
220 the expense.
221
222 A study of spec92 using an unlimited number of passes:
223 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
224 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
225 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
226
227 It was found doing copy propagation between each pass enables further
228 substitutions.
229
230 PRE is quite expensive in complicated functions because the DFA can take
740f35a0 231 awhile to converge. Hence we only perform one pass. The parameter max-gcse-passes can
7506f491
DE
232 be modified if one wants to experiment.
233
234 **********************
235
236 The steps for PRE are:
237
238 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
239
240 2) Perform the data flow analysis for PRE.
241
242 3) Delete the redundant instructions
243
244 4) Insert the required copies [if any] that make the partially
245 redundant instructions fully redundant.
246
247 5) For other reaching expressions, insert an instruction to copy the value
248 to a newly created pseudo that will reach the redundant instruction.
249
250 The deletion is done first so that when we do insertions we
251 know which pseudo reg to use.
252
253 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
254 argue it is not. The number of iterations for the algorithm to converge
255 is typically 2-4 so I don't view it as that expensive (relatively speaking).
256
f4e584dc 257 PRE GCSE depends heavily on the second CSE pass to clean up the copies
7506f491
DE
258 we create. To make an expression reach the place where it's redundant,
259 the result of the expression is copied to a new register, and the redundant
260 expression is deleted by replacing it with this new register. Classic GCSE
261 doesn't have this problem as much as it computes the reaching defs of
262 each register in each block and thus can try to use an existing register.
263
264 **********************
265
7506f491
DE
266 A fair bit of simplicity is created by creating small functions for simple
267 tasks, even when the function is only called in one place. This may
268 measurably slow things down [or may not] by creating more function call
269 overhead than is necessary. The source is laid out so that it's trivial
270 to make the affected functions inline so that one can measure what speed
271 up, if any, can be achieved, and maybe later when things settle things can
272 be rearranged.
273
274 Help stamp out big monolithic functions! */
275\f
276/* GCSE global vars. */
277
278/* -dG dump file. */
279static FILE *gcse_file;
280
f4e584dc
JL
281/* Note whether or not we should run jump optimization after gcse. We
282 want to do this for two cases.
283
284 * If we changed any jumps via cprop.
285
286 * If we added any labels via edge splitting. */
287
288static int run_jump_opt_after_gcse;
289
7506f491
DE
290/* Bitmaps are normally not included in debugging dumps.
291 However it's useful to be able to print them from GDB.
292 We could create special functions for this, but it's simpler to
293 just allow passing stderr to the dump_foo fns. Since stderr can
294 be a macro, we store a copy here. */
295static FILE *debug_stderr;
296
297/* An obstack for our working variables. */
298static struct obstack gcse_obstack;
299
300/* Non-zero for each mode that supports (set (reg) (reg)).
301 This is trivially true for integer and floating point values.
302 It may or may not be true for condition codes. */
303static char can_copy_p[(int) NUM_MACHINE_MODES];
304
305/* Non-zero if can_copy_p has been initialized. */
306static int can_copy_init_p;
307
adfcce61
DB
308/* Dataflow analyzer */
309struct df *df_analyzer;
310
311
c4c81601 312struct reg_use {rtx reg_rtx; };
abd535b6 313
7506f491
DE
314/* Hash table of expressions. */
315
316struct expr
317{
318 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
319 rtx expr;
320 /* Index in the available expression bitmaps. */
321 int bitmap_index;
322 /* Next entry with the same hash. */
323 struct expr *next_same_hash;
324 /* List of anticipatable occurrences in basic blocks in the function.
325 An "anticipatable occurrence" is one that is the first occurrence in the
f4e584dc
JL
326 basic block, the operands are not modified in the basic block prior
327 to the occurrence and the output is not used between the start of
328 the block and the occurrence. */
7506f491
DE
329 struct occr *antic_occr;
330 /* List of available occurrence in basic blocks in the function.
331 An "available occurrence" is one that is the last occurrence in the
332 basic block and the operands are not modified by following statements in
333 the basic block [including this insn]. */
334 struct occr *avail_occr;
335 /* Non-null if the computation is PRE redundant.
336 The value is the newly created pseudo-reg to record a copy of the
337 expression in all the places that reach the redundant copy. */
338 rtx reaching_reg;
339};
340
341/* Occurrence of an expression.
342 There is one per basic block. If a pattern appears more than once the
343 last appearance is used [or first for anticipatable expressions]. */
344
345struct occr
346{
347 /* Next occurrence of this expression. */
348 struct occr *next;
349 /* The insn that computes the expression. */
350 rtx insn;
351 /* Non-zero if this [anticipatable] occurrence has been deleted. */
352 char deleted_p;
353 /* Non-zero if this [available] occurrence has been copied to
354 reaching_reg. */
355 /* ??? This is mutually exclusive with deleted_p, so they could share
356 the same byte. */
357 char copied_p;
358};
359
360/* Expression and copy propagation hash tables.
361 Each hash table is an array of buckets.
362 ??? It is known that if it were an array of entries, structure elements
363 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
364 not clear whether in the final analysis a sufficient amount of memory would
365 be saved as the size of the available expression bitmaps would be larger
366 [one could build a mapping table without holes afterwards though].
c4c81601 367 Someday I'll perform the computation and figure it out. */
7506f491
DE
368
369/* Total size of the expression hash table, in elements. */
2e653e39
RK
370static unsigned int expr_hash_table_size;
371
7506f491
DE
372/* The table itself.
373 This is an array of `expr_hash_table_size' elements. */
374static struct expr **expr_hash_table;
375
376/* Total size of the copy propagation hash table, in elements. */
ebb13e7e 377static unsigned int set_hash_table_size;
c4c81601 378
7506f491
DE
379/* The table itself.
380 This is an array of `set_hash_table_size' elements. */
381static struct expr **set_hash_table;
382
383/* Mapping of uids to cuids.
384 Only real insns get cuids. */
385static int *uid_cuid;
386
387/* Highest UID in UID_CUID. */
388static int max_uid;
389
390/* Get the cuid of an insn. */
b86db3eb
BS
391#ifdef ENABLE_CHECKING
392#define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
393#else
7506f491 394#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
b86db3eb 395#endif
7506f491
DE
396
397/* Number of cuids. */
398static int max_cuid;
399
400/* Mapping of cuids to insns. */
401static rtx *cuid_insn;
402
403/* Get insn from cuid. */
404#define CUID_INSN(CUID) (cuid_insn[CUID])
405
406/* Maximum register number in function prior to doing gcse + 1.
407 Registers created during this pass have regno >= max_gcse_regno.
408 This is named with "gcse" to not collide with global of same name. */
770ae6cc 409static unsigned int max_gcse_regno;
7506f491
DE
410
411/* Maximum number of cse-able expressions found. */
412static int n_exprs;
c4c81601 413
7506f491
DE
414/* Maximum number of assignments for copy propagation found. */
415static int n_sets;
416
417/* Table of registers that are modified.
c4c81601 418
7506f491
DE
419 For each register, each element is a list of places where the pseudo-reg
420 is set.
421
422 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
423 requires knowledge of which blocks kill which regs [and thus could use
f4e584dc 424 a bitmap instead of the lists `reg_set_table' uses].
7506f491 425
c4c81601
RK
426 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
427 num-regs) [however perhaps it may be useful to keep the data as is]. One
428 advantage of recording things this way is that `reg_set_table' is fairly
429 sparse with respect to pseudo regs but for hard regs could be fairly dense
430 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
7506f491
DE
431 up functions like compute_transp since in the case of pseudo-regs we only
432 need to iterate over the number of times a pseudo-reg is set, not over the
433 number of basic blocks [clearly there is a bit of a slow down in the cases
434 where a pseudo is set more than once in a block, however it is believed
435 that the net effect is to speed things up]. This isn't done for hard-regs
436 because recording call-clobbered hard-regs in `reg_set_table' at each
c4c81601
RK
437 function call can consume a fair bit of memory, and iterating over
438 hard-regs stored this way in compute_transp will be more expensive. */
7506f491 439
c4c81601
RK
440typedef struct reg_set
441{
7506f491
DE
442 /* The next setting of this register. */
443 struct reg_set *next;
444 /* The insn where it was set. */
445 rtx insn;
446} reg_set;
c4c81601 447
7506f491 448static reg_set **reg_set_table;
c4c81601 449
7506f491
DE
450/* Size of `reg_set_table'.
451 The table starts out at max_gcse_regno + slop, and is enlarged as
452 necessary. */
453static int reg_set_table_size;
c4c81601 454
7506f491
DE
455/* Amount to grow `reg_set_table' by when it's full. */
456#define REG_SET_TABLE_SLOP 100
457
a13d4ebf
AM
458/* This is a list of expressions which are MEMs and will be used by load
459 or store motion.
460 Load motion tracks MEMs which aren't killed by
461 anything except itself. (ie, loads and stores to a single location).
462 We can then allow movement of these MEM refs with a little special
463 allowance. (all stores copy the same value to the reaching reg used
464 for the loads). This means all values used to store into memory must have
465 no side effects so we can re-issue the setter value.
466 Store Motion uses this structure as an expression table to track stores
467 which look interesting, and might be moveable towards the exit block. */
468
469struct ls_expr
470{
471 struct expr * expr; /* Gcse expression reference for LM. */
472 rtx pattern; /* Pattern of this mem. */
adfcce61
DB
473 rtx loads; /* INSN list for where load appears */
474 rtx stores; /* INSN list for where store appears */
a13d4ebf
AM
475 struct ls_expr * next; /* Next in the list. */
476 int invalid; /* Invalid for some reason. */
477 int index; /* If it maps to a bitmap index. */
478 int hash_index; /* Index when in a hash table. */
479 rtx reaching_reg; /* Register to use when re-writing. */
480};
481
482/* Head of the list of load/store memory refs. */
483static struct ls_expr * pre_ldst_mems = NULL;
484
7506f491
DE
485/* Bitmap containing one bit for each register in the program.
486 Used when performing GCSE to track which registers have been set since
487 the start of the basic block. */
488static sbitmap reg_set_bitmap;
489
490/* For each block, a bitmap of registers set in the block.
491 This is used by expr_killed_p and compute_transp.
492 It is computed during hash table computation and not by compute_sets
493 as it includes registers added since the last pass (or between cprop and
494 gcse) and it's currently not easy to realloc sbitmap vectors. */
495static sbitmap *reg_set_in_block;
496
a13d4ebf
AM
497/* Array, indexed by basic block number for a list of insns which modify
498 memory within that block. */
499static rtx * modify_mem_list;
500
501/* This array parallels modify_mem_list, but is kept canonicalized. */
502static rtx * canon_modify_mem_list;
7506f491
DE
503/* Various variables for statistics gathering. */
504
505/* Memory used in a pass.
506 This isn't intended to be absolutely precise. Its intent is only
507 to keep an eye on memory usage. */
508static int bytes_used;
c4c81601 509
7506f491
DE
510/* GCSE substitutions made. */
511static int gcse_subst_count;
512/* Number of copy instructions created. */
513static int gcse_create_count;
514/* Number of constants propagated. */
515static int const_prop_count;
516/* Number of copys propagated. */
517static int copy_prop_count;
7506f491
DE
518\f
519/* These variables are used by classic GCSE.
520 Normally they'd be defined a bit later, but `rd_gen' needs to
521 be declared sooner. */
522
7506f491
DE
523/* Each block has a bitmap of each type.
524 The length of each blocks bitmap is:
525
526 max_cuid - for reaching definitions
527 n_exprs - for available expressions
528
529 Thus we view the bitmaps as 2 dimensional arrays. i.e.
530 rd_kill[block_num][cuid_num]
c4c81601 531 ae_kill[block_num][expr_num] */
7506f491
DE
532
533/* For reaching defs */
534static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
535
536/* for available exprs */
537static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
b5ce41ff 538
0511851c
MM
539/* Objects of this type are passed around by the null-pointer check
540 removal routines. */
c4c81601
RK
541struct null_pointer_info
542{
0511851c
MM
543 /* The basic block being processed. */
544 int current_block;
545 /* The first register to be handled in this pass. */
770ae6cc 546 unsigned int min_reg;
0511851c 547 /* One greater than the last register to be handled in this pass. */
770ae6cc 548 unsigned int max_reg;
0511851c
MM
549 sbitmap *nonnull_local;
550 sbitmap *nonnull_killed;
551};
7506f491 552\f
c4c81601
RK
553static void compute_can_copy PARAMS ((void));
554static char *gmalloc PARAMS ((unsigned int));
555static char *grealloc PARAMS ((char *, unsigned int));
556static char *gcse_alloc PARAMS ((unsigned long));
557static void alloc_gcse_mem PARAMS ((rtx));
558static void free_gcse_mem PARAMS ((void));
559static void alloc_reg_set_mem PARAMS ((int));
560static void free_reg_set_mem PARAMS ((void));
561static int get_bitmap_width PARAMS ((int, int, int));
562static void record_one_set PARAMS ((int, rtx));
563static void record_set_info PARAMS ((rtx, rtx, void *));
564static void compute_sets PARAMS ((rtx));
565static void hash_scan_insn PARAMS ((rtx, int, int));
566static void hash_scan_set PARAMS ((rtx, rtx, int));
567static void hash_scan_clobber PARAMS ((rtx, rtx));
568static void hash_scan_call PARAMS ((rtx, rtx));
569static int want_to_gcse_p PARAMS ((rtx));
570static int oprs_unchanged_p PARAMS ((rtx, rtx, int));
571static int oprs_anticipatable_p PARAMS ((rtx, rtx));
572static int oprs_available_p PARAMS ((rtx, rtx));
573static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
574 int, int));
575static void insert_set_in_table PARAMS ((rtx, rtx));
576static unsigned int hash_expr PARAMS ((rtx, enum machine_mode, int *, int));
577static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
c0712acb 578static unsigned int hash_string_1 PARAMS ((const char *));
c4c81601
RK
579static unsigned int hash_set PARAMS ((int, int));
580static int expr_equiv_p PARAMS ((rtx, rtx));
581static void record_last_reg_set_info PARAMS ((rtx, int));
582static void record_last_mem_set_info PARAMS ((rtx));
583static void record_last_set_info PARAMS ((rtx, rtx, void *));
711d877c 584static void compute_hash_table PARAMS ((int));
c4c81601
RK
585static void alloc_set_hash_table PARAMS ((int));
586static void free_set_hash_table PARAMS ((void));
587static void compute_set_hash_table PARAMS ((void));
2e653e39 588static void alloc_expr_hash_table PARAMS ((unsigned int));
c4c81601
RK
589static void free_expr_hash_table PARAMS ((void));
590static void compute_expr_hash_table PARAMS ((void));
591static void dump_hash_table PARAMS ((FILE *, const char *, struct expr **,
592 int, int));
593static struct expr *lookup_expr PARAMS ((rtx));
770ae6cc
RK
594static struct expr *lookup_set PARAMS ((unsigned int, rtx));
595static struct expr *next_set PARAMS ((unsigned int, struct expr *));
c4c81601
RK
596static void reset_opr_set_tables PARAMS ((void));
597static int oprs_not_set_p PARAMS ((rtx, rtx));
598static void mark_call PARAMS ((rtx));
599static void mark_set PARAMS ((rtx, rtx));
600static void mark_clobber PARAMS ((rtx, rtx));
601static void mark_oprs_set PARAMS ((rtx));
602static void alloc_cprop_mem PARAMS ((int, int));
603static void free_cprop_mem PARAMS ((void));
604static void compute_transp PARAMS ((rtx, int, sbitmap *, int));
605static void compute_transpout PARAMS ((void));
606static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
607 int));
711d877c 608static void compute_cprop_data PARAMS ((void));
9e71c818 609static void find_used_regs PARAMS ((rtx *, void *));
c4c81601
RK
610static int try_replace_reg PARAMS ((rtx, rtx, rtx));
611static struct expr *find_avail_set PARAMS ((int, rtx));
0005550b 612static int cprop_jump PARAMS ((basic_block, rtx, rtx, rtx));
e2bef702 613#ifdef HAVE_cc0
0005550b 614static int cprop_cc0_jump PARAMS ((basic_block, rtx, struct reg_use *, rtx));
e2bef702 615#endif
a13d4ebf 616static void mems_conflict_for_gcse_p PARAMS ((rtx, rtx, void *));
e2d2ed72 617static int load_killed_in_block_p PARAMS ((basic_block, int, rtx, int));
a13d4ebf 618static void canon_list_insert PARAMS ((rtx, rtx, void *));
0005550b 619static int cprop_insn PARAMS ((basic_block, rtx, int));
c4c81601
RK
620static int cprop PARAMS ((int));
621static int one_cprop_pass PARAMS ((int, int));
622static void alloc_pre_mem PARAMS ((int, int));
623static void free_pre_mem PARAMS ((void));
624static void compute_pre_data PARAMS ((void));
e2d2ed72
AM
625static int pre_expr_reaches_here_p PARAMS ((basic_block, struct expr *,
626 basic_block));
627static void insert_insn_end_bb PARAMS ((struct expr *, basic_block, int));
c4c81601
RK
628static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
629static void pre_insert_copies PARAMS ((void));
630static int pre_delete PARAMS ((void));
631static int pre_gcse PARAMS ((void));
632static int one_pre_gcse_pass PARAMS ((int));
633static void add_label_notes PARAMS ((rtx, rtx));
634static void alloc_code_hoist_mem PARAMS ((int, int));
635static void free_code_hoist_mem PARAMS ((void));
711d877c 636static void compute_code_hoist_vbeinout PARAMS ((void));
c4c81601 637static void compute_code_hoist_data PARAMS ((void));
e2d2ed72
AM
638static int hoist_expr_reaches_here_p PARAMS ((basic_block, int, basic_block,
639 char *));
c4c81601
RK
640static void hoist_code PARAMS ((void));
641static int one_code_hoisting_pass PARAMS ((void));
642static void alloc_rd_mem PARAMS ((int, int));
643static void free_rd_mem PARAMS ((void));
e2d2ed72 644static void handle_rd_kill_set PARAMS ((rtx, int, basic_block));
c4c81601 645static void compute_kill_rd PARAMS ((void));
711d877c 646static void compute_rd PARAMS ((void));
c4c81601
RK
647static void alloc_avail_expr_mem PARAMS ((int, int));
648static void free_avail_expr_mem PARAMS ((void));
649static void compute_ae_gen PARAMS ((void));
e2d2ed72 650static int expr_killed_p PARAMS ((rtx, basic_block));
c4c81601 651static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *));
711d877c 652static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *,
e2d2ed72 653 basic_block, int));
c4c81601
RK
654static rtx computing_insn PARAMS ((struct expr *, rtx));
655static int def_reaches_here_p PARAMS ((rtx, rtx));
656static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
657static int handle_avail_expr PARAMS ((rtx, struct expr *));
658static int classic_gcse PARAMS ((void));
659static int one_classic_gcse_pass PARAMS ((int));
660static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
8e184d9c
JJ
661static void delete_null_pointer_checks_1 PARAMS ((varray_type *, unsigned int *,
662 sbitmap *, sbitmap *,
711d877c
KG
663 struct null_pointer_info *));
664static rtx process_insert_insn PARAMS ((struct expr *));
665static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **));
c4c81601 666static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
e2d2ed72
AM
667 basic_block, int, char *));
668static int pre_expr_reaches_here_p_work PARAMS ((basic_block, struct expr *,
669 basic_block, char *));
a13d4ebf
AM
670static struct ls_expr * ldst_entry PARAMS ((rtx));
671static void free_ldst_entry PARAMS ((struct ls_expr *));
672static void free_ldst_mems PARAMS ((void));
673static void print_ldst_list PARAMS ((FILE *));
674static struct ls_expr * find_rtx_in_ldst PARAMS ((rtx));
675static int enumerate_ldsts PARAMS ((void));
676static inline struct ls_expr * first_ls_expr PARAMS ((void));
677static inline struct ls_expr * next_ls_expr PARAMS ((struct ls_expr *));
678static int simple_mem PARAMS ((rtx));
679static void invalidate_any_buried_refs PARAMS ((rtx));
680static void compute_ld_motion_mems PARAMS ((void));
681static void trim_ld_motion_mems PARAMS ((void));
682static void update_ld_motion_stores PARAMS ((struct expr *));
adfcce61 683static int store_ops_ok PARAMS ((rtx, basic_block, rtx, int));
a13d4ebf
AM
684static void find_moveable_store PARAMS ((rtx));
685static int compute_store_table PARAMS ((void));
686static int load_kills_store PARAMS ((rtx, rtx));
687static int find_loads PARAMS ((rtx, rtx));
688static int store_killed_in_insn PARAMS ((rtx, rtx));
adfcce61 689static int store_killed_after PARAMS ((rtx, rtx, basic_block, int));
e2d2ed72 690static int store_killed_before PARAMS ((rtx, rtx, basic_block));
a13d4ebf 691static void build_store_vectors PARAMS ((void));
e2d2ed72 692static void insert_insn_start_bb PARAMS ((rtx, basic_block));
a13d4ebf 693static int insert_store PARAMS ((struct ls_expr *, edge));
e2d2ed72
AM
694static void replace_store_insn PARAMS ((rtx, rtx, basic_block));
695static void delete_store PARAMS ((struct ls_expr *,
696 basic_block));
a13d4ebf
AM
697static void free_store_memory PARAMS ((void));
698static void store_motion PARAMS ((void));
7506f491
DE
699\f
700/* Entry point for global common subexpression elimination.
701 F is the first instruction in the function. */
702
e78d9500 703int
7506f491
DE
704gcse_main (f, file)
705 rtx f;
706 FILE *file;
707{
708 int changed, pass;
709 /* Bytes used at start of pass. */
710 int initial_bytes_used;
711 /* Maximum number of bytes used by a pass. */
712 int max_pass_bytes;
713 /* Point to release obstack data from for each pass. */
714 char *gcse_obstack_bottom;
715
a13d4ebf
AM
716 /* Insertion of instructions on edges can create new basic blocks; we
717 need the original basic block count so that we can properly deallocate
718 arrays sized on the number of basic blocks originally in the cfg. */
719 int orig_bb_count;
b5ce41ff
JL
720 /* We do not construct an accurate cfg in functions which call
721 setjmp, so just punt to be safe. */
7506f491 722 if (current_function_calls_setjmp)
e78d9500 723 return 0;
7506f491 724
b5ce41ff
JL
725 /* Assume that we do not need to run jump optimizations after gcse. */
726 run_jump_opt_after_gcse = 0;
727
7506f491
DE
728 /* For calling dump_foo fns from gdb. */
729 debug_stderr = stderr;
b5ce41ff 730 gcse_file = file;
7506f491 731
b5ce41ff
JL
732 /* Identify the basic block information for this function, including
733 successors and predecessors. */
7506f491 734 max_gcse_regno = max_reg_num ();
7506f491 735
a42cd965
AM
736 if (file)
737 dump_flow_info (file);
738
a13d4ebf 739 orig_bb_count = n_basic_blocks;
7506f491
DE
740 /* Return if there's nothing to do. */
741 if (n_basic_blocks <= 1)
a18820c6 742 return 0;
7506f491 743
55f7891b
JL
744 /* Trying to perform global optimizations on flow graphs which have
745 a high connectivity will take a long time and is unlikely to be
746 particularly useful.
747
43e72072 748 In normal circumstances a cfg should have about twice as many edges
55f7891b
JL
749 as blocks. But we do not want to punish small functions which have
750 a couple switch statements. So we require a relatively large number
751 of basic blocks and the ratio of edges to blocks to be high. */
752 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
18424ae1
BL
753 {
754 if (warn_disabled_optimization)
755 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
756 n_basic_blocks, n_edges / n_basic_blocks);
757 return 0;
758 }
55f7891b 759
f1fa37ff
MM
760 /* If allocating memory for the cprop bitmap would take up too much
761 storage it's better just to disable the optimization. */
762 if ((n_basic_blocks
763 * SBITMAP_SET_SIZE (max_gcse_regno)
764 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
765 {
766 if (warn_disabled_optimization)
767 warning ("GCSE disabled: %d basic blocks and %d registers",
768 n_basic_blocks, max_gcse_regno);
769
770 return 0;
771 }
772
7506f491
DE
773 /* See what modes support reg/reg copy operations. */
774 if (! can_copy_init_p)
775 {
776 compute_can_copy ();
777 can_copy_init_p = 1;
778 }
779
780 gcc_obstack_init (&gcse_obstack);
a42cd965 781 bytes_used = 0;
7506f491 782
a13d4ebf
AM
783 /* We need alias. */
784 init_alias_analysis ();
c4c81601
RK
785 /* Record where pseudo-registers are set. This data is kept accurate
786 during each pass. ??? We could also record hard-reg information here
787 [since it's unchanging], however it is currently done during hash table
788 computation.
b5ce41ff 789
c4c81601
RK
790 It may be tempting to compute MEM set information here too, but MEM sets
791 will be subject to code motion one day and thus we need to compute
b5ce41ff 792 information about memory sets when we build the hash tables. */
7506f491
DE
793
794 alloc_reg_set_mem (max_gcse_regno);
795 compute_sets (f);
796
797 pass = 0;
798 initial_bytes_used = bytes_used;
799 max_pass_bytes = 0;
800 gcse_obstack_bottom = gcse_alloc (1);
801 changed = 1;
740f35a0 802 while (changed && pass < MAX_GCSE_PASSES)
7506f491
DE
803 {
804 changed = 0;
805 if (file)
806 fprintf (file, "GCSE pass %d\n\n", pass + 1);
807
808 /* Initialize bytes_used to the space for the pred/succ lists,
809 and the reg_set_table data. */
810 bytes_used = initial_bytes_used;
811
812 /* Each pass may create new registers, so recalculate each time. */
813 max_gcse_regno = max_reg_num ();
814
815 alloc_gcse_mem (f);
816
b5ce41ff
JL
817 /* Don't allow constant propagation to modify jumps
818 during this pass. */
819 changed = one_cprop_pass (pass + 1, 0);
7506f491
DE
820
821 if (optimize_size)
b5ce41ff 822 changed |= one_classic_gcse_pass (pass + 1);
7506f491 823 else
a42cd965
AM
824 {
825 changed |= one_pre_gcse_pass (pass + 1);
a13d4ebf
AM
826 /* We may have just created new basic blocks. Release and
827 recompute various things which are sized on the number of
828 basic blocks. */
829 if (changed)
830 {
831 int i;
832
833 for (i = 0; i < orig_bb_count; i++)
834 {
835 if (modify_mem_list[i])
836 free_INSN_LIST_list (modify_mem_list + i);
837 if (canon_modify_mem_list[i])
838 free_INSN_LIST_list (canon_modify_mem_list + i);
839 }
840 modify_mem_list
841 = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx *));
842 canon_modify_mem_list
843 = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx *));
844 memset ((char *) modify_mem_list, 0, n_basic_blocks * sizeof (rtx *));
845 memset ((char *) canon_modify_mem_list, 0, n_basic_blocks * sizeof (rtx *));
846 orig_bb_count = n_basic_blocks;
847 }
a42cd965
AM
848 free_reg_set_mem ();
849 alloc_reg_set_mem (max_reg_num ());
850 compute_sets (f);
851 run_jump_opt_after_gcse = 1;
852 }
7506f491
DE
853
854 if (max_pass_bytes < bytes_used)
855 max_pass_bytes = bytes_used;
856
bb457bd9
JL
857 /* Free up memory, then reallocate for code hoisting. We can
858 not re-use the existing allocated memory because the tables
859 will not have info for the insns or registers created by
860 partial redundancy elimination. */
7506f491
DE
861 free_gcse_mem ();
862
bb457bd9
JL
863 /* It does not make sense to run code hoisting unless we optimizing
864 for code size -- it rarely makes programs faster, and can make
865 them bigger if we did partial redundancy elimination (when optimizing
866 for space, we use a classic gcse algorithm instead of partial
867 redundancy algorithms). */
868 if (optimize_size)
869 {
870 max_gcse_regno = max_reg_num ();
871 alloc_gcse_mem (f);
872 changed |= one_code_hoisting_pass ();
873 free_gcse_mem ();
874
875 if (max_pass_bytes < bytes_used)
876 max_pass_bytes = bytes_used;
877 }
878
7506f491
DE
879 if (file)
880 {
881 fprintf (file, "\n");
882 fflush (file);
883 }
c4c81601 884
7506f491
DE
885 obstack_free (&gcse_obstack, gcse_obstack_bottom);
886 pass++;
887 }
888
b5ce41ff
JL
889 /* Do one last pass of copy propagation, including cprop into
890 conditional jumps. */
891
892 max_gcse_regno = max_reg_num ();
893 alloc_gcse_mem (f);
894 /* This time, go ahead and allow cprop to alter jumps. */
895 one_cprop_pass (pass + 1, 1);
896 free_gcse_mem ();
7506f491
DE
897
898 if (file)
899 {
900 fprintf (file, "GCSE of %s: %d basic blocks, ",
901 current_function_name, n_basic_blocks);
902 fprintf (file, "%d pass%s, %d bytes\n\n",
903 pass, pass > 1 ? "es" : "", max_pass_bytes);
904 }
905
6496a589 906 obstack_free (&gcse_obstack, NULL);
7506f491 907 free_reg_set_mem ();
a13d4ebf
AM
908 /* We are finished with alias. */
909 end_alias_analysis ();
910 allocate_reg_info (max_reg_num (), FALSE, FALSE);
911
912 if (!optimize_size && flag_gcse_sm)
913 store_motion ();
914 /* Record where pseudo-registers are set. */
e78d9500 915 return run_jump_opt_after_gcse;
7506f491
DE
916}
917\f
918/* Misc. utilities. */
919
920/* Compute which modes support reg/reg copy operations. */
921
922static void
923compute_can_copy ()
924{
925 int i;
50b2596f 926#ifndef AVOID_CCMODE_COPIES
7506f491 927 rtx reg,insn;
50b2596f 928#endif
961192e1 929 memset (can_copy_p, 0, NUM_MACHINE_MODES);
7506f491
DE
930
931 start_sequence ();
932 for (i = 0; i < NUM_MACHINE_MODES; i++)
c4c81601
RK
933 if (GET_MODE_CLASS (i) == MODE_CC)
934 {
7506f491 935#ifdef AVOID_CCMODE_COPIES
c4c81601 936 can_copy_p[i] = 0;
7506f491 937#else
c4c81601
RK
938 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
939 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
9714cf43 940 if (recog (PATTERN (insn), insn, NULL) >= 0)
c4c81601 941 can_copy_p[i] = 1;
7506f491 942#endif
c4c81601 943 }
141b5810
AO
944 else
945 can_copy_p[i] = 1;
c4c81601 946
7506f491 947 end_sequence ();
7506f491
DE
948}
949\f
950/* Cover function to xmalloc to record bytes allocated. */
951
952static char *
953gmalloc (size)
954 unsigned int size;
955{
956 bytes_used += size;
957 return xmalloc (size);
958}
959
960/* Cover function to xrealloc.
961 We don't record the additional size since we don't know it.
962 It won't affect memory usage stats much anyway. */
963
964static char *
965grealloc (ptr, size)
966 char *ptr;
967 unsigned int size;
968{
969 return xrealloc (ptr, size);
970}
971
972/* Cover function to obstack_alloc.
973 We don't need to record the bytes allocated here since
974 obstack_chunk_alloc is set to gmalloc. */
975
976static char *
977gcse_alloc (size)
978 unsigned long size;
979{
980 return (char *) obstack_alloc (&gcse_obstack, size);
981}
982
983/* Allocate memory for the cuid mapping array,
984 and reg/memory set tracking tables.
985
986 This is called at the start of each pass. */
987
988static void
989alloc_gcse_mem (f)
990 rtx f;
991{
992 int i,n;
993 rtx insn;
994
995 /* Find the largest UID and create a mapping from UIDs to CUIDs.
996 CUIDs are like UIDs except they increase monotonically, have no gaps,
997 and only apply to real insns. */
998
999 max_uid = get_max_uid ();
1000 n = (max_uid + 1) * sizeof (int);
1001 uid_cuid = (int *) gmalloc (n);
961192e1 1002 memset ((char *) uid_cuid, 0, n);
7506f491
DE
1003 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
1004 {
2c3c49de 1005 if (INSN_P (insn))
b86db3eb 1006 uid_cuid[INSN_UID (insn)] = i++;
7506f491 1007 else
b86db3eb 1008 uid_cuid[INSN_UID (insn)] = i;
7506f491
DE
1009 }
1010
1011 /* Create a table mapping cuids to insns. */
1012
1013 max_cuid = i;
1014 n = (max_cuid + 1) * sizeof (rtx);
1015 cuid_insn = (rtx *) gmalloc (n);
961192e1 1016 memset ((char *) cuid_insn, 0, n);
7506f491 1017 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
2c3c49de 1018 if (INSN_P (insn))
c4c81601 1019 CUID_INSN (i++) = insn;
7506f491
DE
1020
1021 /* Allocate vars to track sets of regs. */
7506f491
DE
1022 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
1023
1024 /* Allocate vars to track sets of regs, memory per block. */
7506f491
DE
1025 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
1026 max_gcse_regno);
a13d4ebf
AM
1027 /* Allocate array to keep a list of insns which modify memory in each
1028 basic block. */
1029 modify_mem_list = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx *));
1030 canon_modify_mem_list = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx *));
1031 memset ((char *) modify_mem_list, 0, n_basic_blocks * sizeof (rtx *));
1032 memset ((char *) canon_modify_mem_list, 0, n_basic_blocks * sizeof (rtx *));
7506f491
DE
1033}
1034
1035/* Free memory allocated by alloc_gcse_mem. */
1036
1037static void
1038free_gcse_mem ()
1039{
1040 free (uid_cuid);
1041 free (cuid_insn);
1042
1043 free (reg_set_bitmap);
1044
5a660bff 1045 sbitmap_vector_free (reg_set_in_block);
a13d4ebf
AM
1046 /* re-Cache any INSN_LIST nodes we have allocated. */
1047 {
1048 int i;
1049
1050 for (i = 0; i < n_basic_blocks; i++)
1051 {
1052 if (modify_mem_list[i])
1053 free_INSN_LIST_list (modify_mem_list + i);
1054 if (canon_modify_mem_list[i])
1055 free_INSN_LIST_list (canon_modify_mem_list + i);
1056 }
1057
1058 free (modify_mem_list);
1059 free (canon_modify_mem_list);
1060 modify_mem_list = 0;
1061 canon_modify_mem_list = 0;
1062 }
7506f491
DE
1063}
1064
0511851c
MM
1065/* Many of the global optimization algorithms work by solving dataflow
1066 equations for various expressions. Initially, some local value is
c4c81601
RK
1067 computed for each expression in each block. Then, the values across the
1068 various blocks are combined (by following flow graph edges) to arrive at
1069 global values. Conceptually, each set of equations is independent. We
1070 may therefore solve all the equations in parallel, solve them one at a
1071 time, or pick any intermediate approach.
1072
1073 When you're going to need N two-dimensional bitmaps, each X (say, the
1074 number of blocks) by Y (say, the number of expressions), call this
1075 function. It's not important what X and Y represent; only that Y
1076 correspond to the things that can be done in parallel. This function will
1077 return an appropriate chunking factor C; you should solve C sets of
1078 equations in parallel. By going through this function, we can easily
1079 trade space against time; by solving fewer equations in parallel we use
1080 less space. */
0511851c
MM
1081
1082static int
1083get_bitmap_width (n, x, y)
1084 int n;
1085 int x;
1086 int y;
1087{
1088 /* It's not really worth figuring out *exactly* how much memory will
1089 be used by a particular choice. The important thing is to get
1090 something approximately right. */
1091 size_t max_bitmap_memory = 10 * 1024 * 1024;
1092
1093 /* The number of bytes we'd use for a single column of minimum
1094 width. */
1095 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
1096
1097 /* Often, it's reasonable just to solve all the equations in
1098 parallel. */
1099 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1100 return y;
1101
1102 /* Otherwise, pick the largest width we can, without going over the
1103 limit. */
1104 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1105 / column_size);
1106}
b5ce41ff
JL
1107\f
1108/* Compute the local properties of each recorded expression.
c4c81601
RK
1109
1110 Local properties are those that are defined by the block, irrespective of
1111 other blocks.
b5ce41ff
JL
1112
1113 An expression is transparent in a block if its operands are not modified
1114 in the block.
1115
1116 An expression is computed (locally available) in a block if it is computed
1117 at least once and expression would contain the same value if the
1118 computation was moved to the end of the block.
1119
1120 An expression is locally anticipatable in a block if it is computed at
1121 least once and expression would contain the same value if the computation
1122 was moved to the beginning of the block.
1123
c4c81601
RK
1124 We call this routine for cprop, pre and code hoisting. They all compute
1125 basically the same information and thus can easily share this code.
7506f491 1126
c4c81601
RK
1127 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1128 properties. If NULL, then it is not necessary to compute or record that
1129 particular property.
b5ce41ff 1130
c4c81601
RK
1131 SETP controls which hash table to look at. If zero, this routine looks at
1132 the expr hash table; if nonzero this routine looks at the set hash table.
1133 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1134 ABSALTERED. */
b5ce41ff
JL
1135
1136static void
1137compute_local_properties (transp, comp, antloc, setp)
1138 sbitmap *transp;
1139 sbitmap *comp;
1140 sbitmap *antloc;
1141 int setp;
1142{
2e653e39 1143 unsigned int i, hash_table_size;
b5ce41ff
JL
1144 struct expr **hash_table;
1145
1146 /* Initialize any bitmaps that were passed in. */
1147 if (transp)
695ab36a
BS
1148 {
1149 if (setp)
1150 sbitmap_vector_zero (transp, n_basic_blocks);
1151 else
1152 sbitmap_vector_ones (transp, n_basic_blocks);
1153 }
c4c81601 1154
b5ce41ff
JL
1155 if (comp)
1156 sbitmap_vector_zero (comp, n_basic_blocks);
1157 if (antloc)
1158 sbitmap_vector_zero (antloc, n_basic_blocks);
1159
1160 /* We use the same code for cprop, pre and hoisting. For cprop
1161 we care about the set hash table, for pre and hoisting we
1162 care about the expr hash table. */
1163 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1164 hash_table = setp ? set_hash_table : expr_hash_table;
1165
1166 for (i = 0; i < hash_table_size; i++)
7506f491 1167 {
b5ce41ff
JL
1168 struct expr *expr;
1169
1170 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1171 {
b5ce41ff 1172 int indx = expr->bitmap_index;
c4c81601 1173 struct occr *occr;
b5ce41ff
JL
1174
1175 /* The expression is transparent in this block if it is not killed.
1176 We start by assuming all are transparent [none are killed], and
1177 then reset the bits for those that are. */
b5ce41ff
JL
1178 if (transp)
1179 compute_transp (expr->expr, indx, transp, setp);
1180
1181 /* The occurrences recorded in antic_occr are exactly those that
1182 we want to set to non-zero in ANTLOC. */
b5ce41ff 1183 if (antloc)
c4c81601
RK
1184 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1185 {
1186 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
b5ce41ff 1187
c4c81601
RK
1188 /* While we're scanning the table, this is a good place to
1189 initialize this. */
1190 occr->deleted_p = 0;
1191 }
b5ce41ff
JL
1192
1193 /* The occurrences recorded in avail_occr are exactly those that
1194 we want to set to non-zero in COMP. */
1195 if (comp)
c4c81601
RK
1196 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1197 {
1198 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
b5ce41ff 1199
c4c81601
RK
1200 /* While we're scanning the table, this is a good place to
1201 initialize this. */
1202 occr->copied_p = 0;
1203 }
b5ce41ff
JL
1204
1205 /* While we're scanning the table, this is a good place to
1206 initialize this. */
1207 expr->reaching_reg = 0;
1208 }
7506f491 1209 }
7506f491
DE
1210}
1211\f
1212/* Register set information.
1213
1214 `reg_set_table' records where each register is set or otherwise
1215 modified. */
1216
1217static struct obstack reg_set_obstack;
1218
1219static void
1220alloc_reg_set_mem (n_regs)
1221 int n_regs;
1222{
c4c81601 1223 unsigned int n;
7506f491
DE
1224
1225 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1226 n = reg_set_table_size * sizeof (struct reg_set *);
1227 reg_set_table = (struct reg_set **) gmalloc (n);
961192e1 1228 memset ((char *) reg_set_table, 0, n);
7506f491
DE
1229
1230 gcc_obstack_init (&reg_set_obstack);
1231}
1232
1233static void
1234free_reg_set_mem ()
1235{
1236 free (reg_set_table);
6496a589 1237 obstack_free (&reg_set_obstack, NULL);
7506f491
DE
1238}
1239
1240/* Record REGNO in the reg_set table. */
1241
1242static void
1243record_one_set (regno, insn)
1244 int regno;
1245 rtx insn;
1246{
172890a2 1247 /* Allocate a new reg_set element and link it onto the list. */
63bc1d05 1248 struct reg_set *new_reg_info;
7506f491
DE
1249
1250 /* If the table isn't big enough, enlarge it. */
1251 if (regno >= reg_set_table_size)
1252 {
1253 int new_size = regno + REG_SET_TABLE_SLOP;
c4c81601
RK
1254
1255 reg_set_table
1256 = (struct reg_set **) grealloc ((char *) reg_set_table,
1257 new_size * sizeof (struct reg_set *));
961192e1 1258 memset ((char *) (reg_set_table + reg_set_table_size), 0,
7506f491
DE
1259 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1260 reg_set_table_size = new_size;
1261 }
1262
1263 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1264 sizeof (struct reg_set));
1265 bytes_used += sizeof (struct reg_set);
1266 new_reg_info->insn = insn;
274969ea
MM
1267 new_reg_info->next = reg_set_table[regno];
1268 reg_set_table[regno] = new_reg_info;
7506f491
DE
1269}
1270
c4c81601
RK
1271/* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1272 an insn. The DATA is really the instruction in which the SET is
1273 occurring. */
7506f491
DE
1274
1275static void
84832317 1276record_set_info (dest, setter, data)
50b2596f 1277 rtx dest, setter ATTRIBUTE_UNUSED;
84832317 1278 void *data;
7506f491 1279{
84832317
MM
1280 rtx record_set_insn = (rtx) data;
1281
c4c81601
RK
1282 if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1283 record_one_set (REGNO (dest), record_set_insn);
7506f491
DE
1284}
1285
1286/* Scan the function and record each set of each pseudo-register.
1287
c4c81601
RK
1288 This is called once, at the start of the gcse pass. See the comments for
1289 `reg_set_table' for further documenation. */
7506f491
DE
1290
1291static void
1292compute_sets (f)
1293 rtx f;
1294{
c4c81601 1295 rtx insn;
7506f491 1296
c4c81601 1297 for (insn = f; insn != 0; insn = NEXT_INSN (insn))
2c3c49de 1298 if (INSN_P (insn))
c4c81601 1299 note_stores (PATTERN (insn), record_set_info, insn);
7506f491
DE
1300}
1301\f
1302/* Hash table support. */
1303
1304/* For each register, the cuid of the first/last insn in the block to set it,
e7d99f1e 1305 or -1 if not set. */
c4c81601 1306#define NEVER_SET -1
7506f491
DE
1307static int *reg_first_set;
1308static int *reg_last_set;
1309
7506f491 1310
fb0c0a12
RK
1311/* See whether X, the source of a set, is something we want to consider for
1312 GCSE. */
7506f491
DE
1313
1314static int
1315want_to_gcse_p (x)
1316 rtx x;
1317{
fb0c0a12
RK
1318 static rtx test_insn = 0;
1319 int num_clobbers = 0;
1320 int icode;
1321
c4c81601 1322 switch (GET_CODE (x))
7506f491
DE
1323 {
1324 case REG:
1325 case SUBREG:
1326 case CONST_INT:
1327 case CONST_DOUBLE:
1328 case CALL:
1329 return 0;
1330
1331 default:
1332 break;
1333 }
1334
fb0c0a12
RK
1335 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
1336 if (general_operand (x, GET_MODE (x)))
1337 return 1;
1338 else if (GET_MODE (x) == VOIDmode)
1339 return 0;
1340
1341 /* Otherwise, check if we can make a valid insn from it. First initialize
1342 our test insn if we haven't already. */
1343 if (test_insn == 0)
1344 {
1345 test_insn
1346 = make_insn_raw (gen_rtx_SET (VOIDmode,
1347 gen_rtx_REG (word_mode,
1348 FIRST_PSEUDO_REGISTER * 2),
1349 const0_rtx));
1350 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
1351 ggc_add_rtx_root (&test_insn, 1);
1352 }
1353
1354 /* Now make an insn like the one we would make when GCSE'ing and see if
1355 valid. */
1356 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
1357 SET_SRC (PATTERN (test_insn)) = x;
1358 return ((icode = recog (PATTERN (test_insn), test_insn, &num_clobbers)) >= 0
1359 && (num_clobbers == 0 || ! added_clobbers_hard_reg_p (icode)));
7506f491
DE
1360}
1361
1362/* Return non-zero if the operands of expression X are unchanged from the
1363 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1364 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1365
1366static int
1367oprs_unchanged_p (x, insn, avail_p)
1368 rtx x, insn;
1369 int avail_p;
1370{
c4c81601 1371 int i, j;
7506f491 1372 enum rtx_code code;
6f7d635c 1373 const char *fmt;
7506f491 1374
7506f491
DE
1375 if (x == 0)
1376 return 1;
1377
1378 code = GET_CODE (x);
1379 switch (code)
1380 {
1381 case REG:
1382 if (avail_p)
b86ba9c8 1383 return (reg_last_set[REGNO (x)] == NEVER_SET
7506f491
DE
1384 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1385 else
b86ba9c8 1386 return (reg_first_set[REGNO (x)] == NEVER_SET
7506f491
DE
1387 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1388
1389 case MEM:
e2d2ed72 1390 if (load_killed_in_block_p (BLOCK_FOR_INSN (insn), INSN_CUID (insn),
a13d4ebf
AM
1391 x, avail_p))
1392 return 0;
7506f491 1393 else
c4c81601 1394 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
7506f491
DE
1395
1396 case PRE_DEC:
1397 case PRE_INC:
1398 case POST_DEC:
1399 case POST_INC:
4b983fdc
RH
1400 case PRE_MODIFY:
1401 case POST_MODIFY:
7506f491
DE
1402 return 0;
1403
1404 case PC:
1405 case CC0: /*FIXME*/
1406 case CONST:
1407 case CONST_INT:
1408 case CONST_DOUBLE:
1409 case SYMBOL_REF:
1410 case LABEL_REF:
1411 case ADDR_VEC:
1412 case ADDR_DIFF_VEC:
1413 return 1;
1414
1415 default:
1416 break;
1417 }
1418
c4c81601 1419 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
1420 {
1421 if (fmt[i] == 'e')
1422 {
c4c81601
RK
1423 /* If we are about to do the last recursive call needed at this
1424 level, change it into iteration. This function is called enough
1425 to be worth it. */
7506f491 1426 if (i == 0)
c4c81601
RK
1427 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1428
1429 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
7506f491
DE
1430 return 0;
1431 }
1432 else if (fmt[i] == 'E')
c4c81601
RK
1433 for (j = 0; j < XVECLEN (x, i); j++)
1434 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1435 return 0;
7506f491
DE
1436 }
1437
1438 return 1;
1439}
1440
a13d4ebf
AM
1441/* Used for communication between mems_conflict_for_gcse_p and
1442 load_killed_in_block_p. Nonzero if mems_conflict_for_gcse_p finds a
1443 conflict between two memory references. */
1444static int gcse_mems_conflict_p;
1445
1446/* Used for communication between mems_conflict_for_gcse_p and
1447 load_killed_in_block_p. A memory reference for a load instruction,
1448 mems_conflict_for_gcse_p will see if a memory store conflicts with
1449 this memory load. */
1450static rtx gcse_mem_operand;
1451
1452/* DEST is the output of an instruction. If it is a memory reference, and
1453 possibly conflicts with the load found in gcse_mem_operand, then set
1454 gcse_mems_conflict_p to a nonzero value. */
1455
1456static void
1457mems_conflict_for_gcse_p (dest, setter, data)
1458 rtx dest, setter ATTRIBUTE_UNUSED;
1459 void *data ATTRIBUTE_UNUSED;
1460{
1461 while (GET_CODE (dest) == SUBREG
1462 || GET_CODE (dest) == ZERO_EXTRACT
1463 || GET_CODE (dest) == SIGN_EXTRACT
1464 || GET_CODE (dest) == STRICT_LOW_PART)
1465 dest = XEXP (dest, 0);
1466
1467 /* If DEST is not a MEM, then it will not conflict with the load. Note
1468 that function calls are assumed to clobber memory, but are handled
1469 elsewhere. */
1470 if (GET_CODE (dest) != MEM)
1471 return;
a13d4ebf
AM
1472 /* If we are setting a MEM in our list of specially recognized MEMs,
1473 don't mark as killed this time. */
1474
1475 if (dest == gcse_mem_operand && pre_ldst_mems != NULL)
1476 {
1477 if (!find_rtx_in_ldst (dest))
1478 gcse_mems_conflict_p = 1;
1479 return;
1480 }
a13d4ebf
AM
1481 if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
1482 rtx_addr_varies_p))
1483 gcse_mems_conflict_p = 1;
1484}
1485
1486/* Return nonzero if the expression in X (a memory reference) is killed
1487 in block BB before or after the insn with the CUID in UID_LIMIT.
1488 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1489 before UID_LIMIT.
1490
1491 To check the entire block, set UID_LIMIT to max_uid + 1 and
1492 AVAIL_P to 0. */
1493
1494static int
1495load_killed_in_block_p (bb, uid_limit, x, avail_p)
e2d2ed72 1496 basic_block bb;
a13d4ebf
AM
1497 int uid_limit;
1498 rtx x;
1499 int avail_p;
1500{
e2d2ed72 1501 rtx list_entry = modify_mem_list[bb->index];
a13d4ebf
AM
1502 while (list_entry)
1503 {
1504 rtx setter;
1505 /* Ignore entries in the list that do not apply. */
1506 if ((avail_p
1507 && INSN_CUID (XEXP (list_entry, 0)) < uid_limit)
1508 || (! avail_p
1509 && INSN_CUID (XEXP (list_entry, 0)) > uid_limit))
1510 {
1511 list_entry = XEXP (list_entry, 1);
1512 continue;
1513 }
1514
1515 setter = XEXP (list_entry, 0);
1516
1517 /* If SETTER is a call everything is clobbered. Note that calls
1518 to pure functions are never put on the list, so we need not
1519 worry about them. */
1520 if (GET_CODE (setter) == CALL_INSN)
1521 return 1;
1522
1523 /* SETTER must be an INSN of some kind that sets memory. Call
1524 note_stores to examine each hunk of memory that is modified.
1525
1526 The note_stores interface is pretty limited, so we have to
1527 communicate via global variables. Yuk. */
1528 gcse_mem_operand = x;
1529 gcse_mems_conflict_p = 0;
1530 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
1531 if (gcse_mems_conflict_p)
1532 return 1;
1533 list_entry = XEXP (list_entry, 1);
1534 }
1535 return 0;
1536}
1537
7506f491
DE
1538/* Return non-zero if the operands of expression X are unchanged from
1539 the start of INSN's basic block up to but not including INSN. */
1540
1541static int
1542oprs_anticipatable_p (x, insn)
1543 rtx x, insn;
1544{
1545 return oprs_unchanged_p (x, insn, 0);
1546}
1547
1548/* Return non-zero if the operands of expression X are unchanged from
1549 INSN to the end of INSN's basic block. */
1550
1551static int
1552oprs_available_p (x, insn)
1553 rtx x, insn;
1554{
1555 return oprs_unchanged_p (x, insn, 1);
1556}
1557
1558/* Hash expression X.
c4c81601
RK
1559
1560 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1561 indicating if a volatile operand is found or if the expression contains
1562 something we don't want to insert in the table.
7506f491
DE
1563
1564 ??? One might want to merge this with canon_hash. Later. */
1565
1566static unsigned int
1567hash_expr (x, mode, do_not_record_p, hash_table_size)
1568 rtx x;
1569 enum machine_mode mode;
1570 int *do_not_record_p;
1571 int hash_table_size;
1572{
1573 unsigned int hash;
1574
1575 *do_not_record_p = 0;
1576
1577 hash = hash_expr_1 (x, mode, do_not_record_p);
1578 return hash % hash_table_size;
1579}
172890a2 1580
6462bb43 1581/* Hash a string. Just add its bytes up. */
172890a2 1582
6462bb43
AO
1583static inline unsigned
1584hash_string_1 (ps)
1585 const char *ps;
1586{
1587 unsigned hash = 0;
1588 const unsigned char *p = (const unsigned char *)ps;
1589
1590 if (p)
1591 while (*p)
1592 hash += *p++;
1593
1594 return hash;
1595}
7506f491
DE
1596
1597/* Subroutine of hash_expr to do the actual work. */
1598
1599static unsigned int
1600hash_expr_1 (x, mode, do_not_record_p)
1601 rtx x;
1602 enum machine_mode mode;
1603 int *do_not_record_p;
1604{
1605 int i, j;
1606 unsigned hash = 0;
1607 enum rtx_code code;
6f7d635c 1608 const char *fmt;
7506f491 1609
c4c81601
RK
1610 /* Used to turn recursion into iteration. We can't rely on GCC's
1611 tail-recursion eliminatio since we need to keep accumulating values
1612 in HASH. */
7506f491
DE
1613
1614 if (x == 0)
1615 return hash;
1616
c4c81601 1617 repeat:
7506f491
DE
1618 code = GET_CODE (x);
1619 switch (code)
1620 {
1621 case REG:
c4c81601
RK
1622 hash += ((unsigned int) REG << 7) + REGNO (x);
1623 return hash;
7506f491
DE
1624
1625 case CONST_INT:
c4c81601
RK
1626 hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1627 + (unsigned int) INTVAL (x));
1628 return hash;
7506f491
DE
1629
1630 case CONST_DOUBLE:
1631 /* This is like the general case, except that it only counts
1632 the integers representing the constant. */
c4c81601 1633 hash += (unsigned int) code + (unsigned int) GET_MODE (x);
7506f491
DE
1634 if (GET_MODE (x) != VOIDmode)
1635 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
c4c81601 1636 hash += (unsigned int) XWINT (x, i);
7506f491 1637 else
c4c81601
RK
1638 hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1639 + (unsigned int) CONST_DOUBLE_HIGH (x));
7506f491
DE
1640 return hash;
1641
1642 /* Assume there is only one rtx object for any given label. */
1643 case LABEL_REF:
1644 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1645 differences and differences between each stage's debugging dumps. */
c4c81601
RK
1646 hash += (((unsigned int) LABEL_REF << 7)
1647 + CODE_LABEL_NUMBER (XEXP (x, 0)));
7506f491
DE
1648 return hash;
1649
1650 case SYMBOL_REF:
1651 {
1652 /* Don't hash on the symbol's address to avoid bootstrap differences.
1653 Different hash values may cause expressions to be recorded in
1654 different orders and thus different registers to be used in the
1655 final assembler. This also avoids differences in the dump files
1656 between various stages. */
1657 unsigned int h = 0;
3cce094d 1658 const unsigned char *p = (const unsigned char *) XSTR (x, 0);
c4c81601 1659
7506f491
DE
1660 while (*p)
1661 h += (h << 7) + *p++; /* ??? revisit */
c4c81601
RK
1662
1663 hash += ((unsigned int) SYMBOL_REF << 7) + h;
7506f491
DE
1664 return hash;
1665 }
1666
1667 case MEM:
1668 if (MEM_VOLATILE_P (x))
1669 {
1670 *do_not_record_p = 1;
1671 return 0;
1672 }
c4c81601
RK
1673
1674 hash += (unsigned int) MEM;
297c3335 1675 hash += MEM_ALIAS_SET (x);
7506f491
DE
1676 x = XEXP (x, 0);
1677 goto repeat;
1678
1679 case PRE_DEC:
1680 case PRE_INC:
1681 case POST_DEC:
1682 case POST_INC:
1683 case PC:
1684 case CC0:
1685 case CALL:
1686 case UNSPEC_VOLATILE:
1687 *do_not_record_p = 1;
1688 return 0;
1689
1690 case ASM_OPERANDS:
1691 if (MEM_VOLATILE_P (x))
1692 {
1693 *do_not_record_p = 1;
1694 return 0;
1695 }
6462bb43
AO
1696 else
1697 {
1698 /* We don't want to take the filename and line into account. */
1699 hash += (unsigned) code + (unsigned) GET_MODE (x)
1700 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1701 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1702 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1703
1704 if (ASM_OPERANDS_INPUT_LENGTH (x))
1705 {
1706 for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1707 {
1708 hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1709 GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1710 do_not_record_p)
1711 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1712 (x, i)));
1713 }
1714
1715 hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1716 x = ASM_OPERANDS_INPUT (x, 0);
1717 mode = GET_MODE (x);
1718 goto repeat;
1719 }
1720 return hash;
1721 }
7506f491
DE
1722
1723 default:
1724 break;
1725 }
1726
7506f491 1727 hash += (unsigned) code + (unsigned) GET_MODE (x);
c4c81601 1728 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
1729 {
1730 if (fmt[i] == 'e')
1731 {
7506f491
DE
1732 /* If we are about to do the last recursive call
1733 needed at this level, change it into iteration.
1734 This function is called enough to be worth it. */
1735 if (i == 0)
1736 {
c4c81601 1737 x = XEXP (x, i);
7506f491
DE
1738 goto repeat;
1739 }
c4c81601
RK
1740
1741 hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
7506f491
DE
1742 if (*do_not_record_p)
1743 return 0;
1744 }
c4c81601 1745
7506f491
DE
1746 else if (fmt[i] == 'E')
1747 for (j = 0; j < XVECLEN (x, i); j++)
1748 {
1749 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1750 if (*do_not_record_p)
1751 return 0;
1752 }
c4c81601 1753
7506f491 1754 else if (fmt[i] == 's')
6462bb43 1755 hash += hash_string_1 (XSTR (x, i));
7506f491 1756 else if (fmt[i] == 'i')
c4c81601 1757 hash += (unsigned int) XINT (x, i);
adfcce61 1758 else if (fmt[i] == 't');
7506f491
DE
1759 else
1760 abort ();
1761 }
1762
1763 return hash;
1764}
1765
1766/* Hash a set of register REGNO.
1767
c4c81601
RK
1768 Sets are hashed on the register that is set. This simplifies the PRE copy
1769 propagation code.
7506f491
DE
1770
1771 ??? May need to make things more elaborate. Later, as necessary. */
1772
1773static unsigned int
1774hash_set (regno, hash_table_size)
1775 int regno;
1776 int hash_table_size;
1777{
1778 unsigned int hash;
1779
1780 hash = regno;
1781 return hash % hash_table_size;
1782}
1783
1784/* Return non-zero if exp1 is equivalent to exp2.
1785 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1786
1787static int
1788expr_equiv_p (x, y)
1789 rtx x, y;
1790{
1791 register int i, j;
1792 register enum rtx_code code;
6f7d635c 1793 register const char *fmt;
7506f491
DE
1794
1795 if (x == y)
1796 return 1;
c4c81601 1797
7506f491
DE
1798 if (x == 0 || y == 0)
1799 return x == y;
1800
1801 code = GET_CODE (x);
1802 if (code != GET_CODE (y))
1803 return 0;
1804
1805 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1806 if (GET_MODE (x) != GET_MODE (y))
1807 return 0;
1808
1809 switch (code)
1810 {
1811 case PC:
1812 case CC0:
1813 return x == y;
1814
1815 case CONST_INT:
1816 return INTVAL (x) == INTVAL (y);
1817
1818 case LABEL_REF:
1819 return XEXP (x, 0) == XEXP (y, 0);
1820
1821 case SYMBOL_REF:
1822 return XSTR (x, 0) == XSTR (y, 0);
1823
1824 case REG:
1825 return REGNO (x) == REGNO (y);
1826
297c3335
RH
1827 case MEM:
1828 /* Can't merge two expressions in different alias sets, since we can
1829 decide that the expression is transparent in a block when it isn't,
1830 due to it being set with the different alias set. */
1831 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1832 return 0;
1833 break;
1834
7506f491
DE
1835 /* For commutative operations, check both orders. */
1836 case PLUS:
1837 case MULT:
1838 case AND:
1839 case IOR:
1840 case XOR:
1841 case NE:
1842 case EQ:
1843 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1844 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1845 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1846 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1847
6462bb43
AO
1848 case ASM_OPERANDS:
1849 /* We don't use the generic code below because we want to
1850 disregard filename and line numbers. */
1851
1852 /* A volatile asm isn't equivalent to any other. */
1853 if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1854 return 0;
1855
1856 if (GET_MODE (x) != GET_MODE (y)
1857 || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1858 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1859 ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1860 || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1861 || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1862 return 0;
1863
1864 if (ASM_OPERANDS_INPUT_LENGTH (x))
1865 {
1866 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1867 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1868 ASM_OPERANDS_INPUT (y, i))
1869 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1870 ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1871 return 0;
1872 }
1873
1874 return 1;
1875
7506f491
DE
1876 default:
1877 break;
1878 }
1879
1880 /* Compare the elements. If any pair of corresponding elements
1881 fail to match, return 0 for the whole thing. */
1882
1883 fmt = GET_RTX_FORMAT (code);
1884 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1885 {
1886 switch (fmt[i])
1887 {
1888 case 'e':
1889 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1890 return 0;
1891 break;
1892
1893 case 'E':
1894 if (XVECLEN (x, i) != XVECLEN (y, i))
1895 return 0;
1896 for (j = 0; j < XVECLEN (x, i); j++)
1897 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1898 return 0;
1899 break;
1900
1901 case 's':
1902 if (strcmp (XSTR (x, i), XSTR (y, i)))
1903 return 0;
1904 break;
1905
1906 case 'i':
1907 if (XINT (x, i) != XINT (y, i))
1908 return 0;
1909 break;
1910
1911 case 'w':
1912 if (XWINT (x, i) != XWINT (y, i))
1913 return 0;
1914 break;
1915
1916 case '0':
adfcce61 1917 case 't':
7506f491 1918 break;
adfcce61 1919
7506f491
DE
1920 default:
1921 abort ();
1922 }
1923 }
1924
1925 return 1;
1926}
1927
1928/* Insert expression X in INSN in the hash table.
1929 If it is already present, record it as the last occurrence in INSN's
1930 basic block.
1931
1932 MODE is the mode of the value X is being stored into.
1933 It is only used if X is a CONST_INT.
1934
1935 ANTIC_P is non-zero if X is an anticipatable expression.
1936 AVAIL_P is non-zero if X is an available expression. */
1937
1938static void
1939insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1940 rtx x;
1941 enum machine_mode mode;
1942 rtx insn;
1943 int antic_p, avail_p;
1944{
1945 int found, do_not_record_p;
1946 unsigned int hash;
1947 struct expr *cur_expr, *last_expr = NULL;
1948 struct occr *antic_occr, *avail_occr;
1949 struct occr *last_occr = NULL;
1950
1951 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1952
1953 /* Do not insert expression in table if it contains volatile operands,
1954 or if hash_expr determines the expression is something we don't want
1955 to or can't handle. */
1956 if (do_not_record_p)
1957 return;
1958
1959 cur_expr = expr_hash_table[hash];
1960 found = 0;
1961
c4c81601 1962 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
7506f491
DE
1963 {
1964 /* If the expression isn't found, save a pointer to the end of
1965 the list. */
1966 last_expr = cur_expr;
1967 cur_expr = cur_expr->next_same_hash;
1968 }
1969
1970 if (! found)
1971 {
1972 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1973 bytes_used += sizeof (struct expr);
1974 if (expr_hash_table[hash] == NULL)
c4c81601
RK
1975 /* This is the first pattern that hashed to this index. */
1976 expr_hash_table[hash] = cur_expr;
7506f491 1977 else
c4c81601
RK
1978 /* Add EXPR to end of this hash chain. */
1979 last_expr->next_same_hash = cur_expr;
1980
7506f491
DE
1981 /* Set the fields of the expr element. */
1982 cur_expr->expr = x;
1983 cur_expr->bitmap_index = n_exprs++;
1984 cur_expr->next_same_hash = NULL;
1985 cur_expr->antic_occr = NULL;
1986 cur_expr->avail_occr = NULL;
1987 }
1988
1989 /* Now record the occurrence(s). */
7506f491
DE
1990 if (antic_p)
1991 {
1992 antic_occr = cur_expr->antic_occr;
1993
1994 /* Search for another occurrence in the same basic block. */
1995 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1996 {
1997 /* If an occurrence isn't found, save a pointer to the end of
1998 the list. */
1999 last_occr = antic_occr;
2000 antic_occr = antic_occr->next;
2001 }
2002
2003 if (antic_occr)
c4c81601
RK
2004 /* Found another instance of the expression in the same basic block.
2005 Prefer the currently recorded one. We want the first one in the
2006 block and the block is scanned from start to end. */
2007 ; /* nothing to do */
7506f491
DE
2008 else
2009 {
2010 /* First occurrence of this expression in this basic block. */
2011 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2012 bytes_used += sizeof (struct occr);
2013 /* First occurrence of this expression in any block? */
2014 if (cur_expr->antic_occr == NULL)
2015 cur_expr->antic_occr = antic_occr;
2016 else
2017 last_occr->next = antic_occr;
c4c81601 2018
7506f491
DE
2019 antic_occr->insn = insn;
2020 antic_occr->next = NULL;
2021 }
2022 }
2023
2024 if (avail_p)
2025 {
2026 avail_occr = cur_expr->avail_occr;
2027
2028 /* Search for another occurrence in the same basic block. */
2029 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
2030 {
2031 /* If an occurrence isn't found, save a pointer to the end of
2032 the list. */
2033 last_occr = avail_occr;
2034 avail_occr = avail_occr->next;
2035 }
2036
2037 if (avail_occr)
c4c81601
RK
2038 /* Found another instance of the expression in the same basic block.
2039 Prefer this occurrence to the currently recorded one. We want
2040 the last one in the block and the block is scanned from start
2041 to end. */
2042 avail_occr->insn = insn;
7506f491
DE
2043 else
2044 {
2045 /* First occurrence of this expression in this basic block. */
2046 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2047 bytes_used += sizeof (struct occr);
c4c81601 2048
7506f491
DE
2049 /* First occurrence of this expression in any block? */
2050 if (cur_expr->avail_occr == NULL)
2051 cur_expr->avail_occr = avail_occr;
2052 else
2053 last_occr->next = avail_occr;
c4c81601 2054
7506f491
DE
2055 avail_occr->insn = insn;
2056 avail_occr->next = NULL;
2057 }
2058 }
2059}
2060
2061/* Insert pattern X in INSN in the hash table.
2062 X is a SET of a reg to either another reg or a constant.
2063 If it is already present, record it as the last occurrence in INSN's
2064 basic block. */
2065
2066static void
2067insert_set_in_table (x, insn)
2068 rtx x;
2069 rtx insn;
2070{
2071 int found;
2072 unsigned int hash;
2073 struct expr *cur_expr, *last_expr = NULL;
2074 struct occr *cur_occr, *last_occr = NULL;
2075
2076 if (GET_CODE (x) != SET
2077 || GET_CODE (SET_DEST (x)) != REG)
2078 abort ();
2079
2080 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
2081
2082 cur_expr = set_hash_table[hash];
2083 found = 0;
2084
c4c81601 2085 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
7506f491
DE
2086 {
2087 /* If the expression isn't found, save a pointer to the end of
2088 the list. */
2089 last_expr = cur_expr;
2090 cur_expr = cur_expr->next_same_hash;
2091 }
2092
2093 if (! found)
2094 {
2095 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
2096 bytes_used += sizeof (struct expr);
2097 if (set_hash_table[hash] == NULL)
c4c81601
RK
2098 /* This is the first pattern that hashed to this index. */
2099 set_hash_table[hash] = cur_expr;
7506f491 2100 else
c4c81601
RK
2101 /* Add EXPR to end of this hash chain. */
2102 last_expr->next_same_hash = cur_expr;
2103
7506f491
DE
2104 /* Set the fields of the expr element.
2105 We must copy X because it can be modified when copy propagation is
2106 performed on its operands. */
7506f491
DE
2107 cur_expr->expr = copy_rtx (x);
2108 cur_expr->bitmap_index = n_sets++;
2109 cur_expr->next_same_hash = NULL;
2110 cur_expr->antic_occr = NULL;
2111 cur_expr->avail_occr = NULL;
2112 }
2113
2114 /* Now record the occurrence. */
7506f491
DE
2115 cur_occr = cur_expr->avail_occr;
2116
2117 /* Search for another occurrence in the same basic block. */
2118 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
2119 {
2120 /* If an occurrence isn't found, save a pointer to the end of
2121 the list. */
2122 last_occr = cur_occr;
2123 cur_occr = cur_occr->next;
2124 }
2125
2126 if (cur_occr)
c4c81601
RK
2127 /* Found another instance of the expression in the same basic block.
2128 Prefer this occurrence to the currently recorded one. We want the
2129 last one in the block and the block is scanned from start to end. */
2130 cur_occr->insn = insn;
7506f491
DE
2131 else
2132 {
2133 /* First occurrence of this expression in this basic block. */
2134 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2135 bytes_used += sizeof (struct occr);
c4c81601 2136
7506f491
DE
2137 /* First occurrence of this expression in any block? */
2138 if (cur_expr->avail_occr == NULL)
2139 cur_expr->avail_occr = cur_occr;
2140 else
2141 last_occr->next = cur_occr;
c4c81601 2142
7506f491
DE
2143 cur_occr->insn = insn;
2144 cur_occr->next = NULL;
2145 }
2146}
2147
c4c81601
RK
2148/* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is
2149 non-zero, this is for the assignment hash table, otherwise it is for the
2150 expression hash table. */
7506f491
DE
2151
2152static void
2153hash_scan_set (pat, insn, set_p)
2154 rtx pat, insn;
2155 int set_p;
2156{
2157 rtx src = SET_SRC (pat);
2158 rtx dest = SET_DEST (pat);
172890a2 2159 rtx note;
7506f491
DE
2160
2161 if (GET_CODE (src) == CALL)
2162 hash_scan_call (src, insn);
2163
172890a2 2164 else if (GET_CODE (dest) == REG)
7506f491 2165 {
172890a2 2166 unsigned int regno = REGNO (dest);
7506f491
DE
2167 rtx tmp;
2168
172890a2
RK
2169 /* If this is a single set and we are doing constant propagation,
2170 see if a REG_NOTE shows this equivalent to a constant. */
2171 if (set_p && (note = find_reg_equal_equiv_note (insn)) != 0
2172 && CONSTANT_P (XEXP (note, 0)))
2173 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
2174
7506f491
DE
2175 /* Only record sets of pseudo-regs in the hash table. */
2176 if (! set_p
2177 && regno >= FIRST_PSEUDO_REGISTER
2178 /* Don't GCSE something if we can't do a reg/reg copy. */
2179 && can_copy_p [GET_MODE (dest)]
2180 /* Is SET_SRC something we want to gcse? */
172890a2
RK
2181 && want_to_gcse_p (src)
2182 /* Don't CSE a nop. */
43e72072
JJ
2183 && ! set_noop_p (pat)
2184 /* Don't GCSE if it has attached REG_EQUIV note.
2185 At this point this only function parameters should have
2186 REG_EQUIV notes and if the argument slot is used somewhere
2187 explicitely, it means address of parameter has been taken,
2188 so we should not extend the lifetime of the pseudo. */
2189 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
2190 || GET_CODE (XEXP (note, 0)) != MEM))
7506f491
DE
2191 {
2192 /* An expression is not anticipatable if its operands are
52d76e11
RK
2193 modified before this insn or if this is not the only SET in
2194 this insn. */
2195 int antic_p = oprs_anticipatable_p (src, insn) && single_set (insn);
7506f491
DE
2196 /* An expression is not available if its operands are
2197 subsequently modified, including this insn. */
2198 int avail_p = oprs_available_p (src, insn);
c4c81601 2199
7506f491
DE
2200 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
2201 }
c4c81601 2202
7506f491
DE
2203 /* Record sets for constant/copy propagation. */
2204 else if (set_p
2205 && regno >= FIRST_PSEUDO_REGISTER
2206 && ((GET_CODE (src) == REG
2207 && REGNO (src) >= FIRST_PSEUDO_REGISTER
172890a2
RK
2208 && can_copy_p [GET_MODE (dest)]
2209 && REGNO (src) != regno)
e78d9500 2210 || GET_CODE (src) == CONST_INT
05f6f07c 2211 || GET_CODE (src) == SYMBOL_REF
e78d9500 2212 || GET_CODE (src) == CONST_DOUBLE)
7506f491
DE
2213 /* A copy is not available if its src or dest is subsequently
2214 modified. Here we want to search from INSN+1 on, but
2215 oprs_available_p searches from INSN on. */
2216 && (insn == BLOCK_END (BLOCK_NUM (insn))
2217 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
2218 && oprs_available_p (pat, tmp))))
2219 insert_set_in_table (pat, insn);
2220 }
7506f491
DE
2221}
2222
2223static void
2224hash_scan_clobber (x, insn)
50b2596f 2225 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
7506f491
DE
2226{
2227 /* Currently nothing to do. */
2228}
2229
2230static void
2231hash_scan_call (x, insn)
50b2596f 2232 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
7506f491
DE
2233{
2234 /* Currently nothing to do. */
2235}
2236
2237/* Process INSN and add hash table entries as appropriate.
2238
2239 Only available expressions that set a single pseudo-reg are recorded.
2240
2241 Single sets in a PARALLEL could be handled, but it's an extra complication
2242 that isn't dealt with right now. The trick is handling the CLOBBERs that
2243 are also in the PARALLEL. Later.
2244
2245 If SET_P is non-zero, this is for the assignment hash table,
ed79bb3d
R
2246 otherwise it is for the expression hash table.
2247 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2248 not record any expressions. */
7506f491
DE
2249
2250static void
ed79bb3d 2251hash_scan_insn (insn, set_p, in_libcall_block)
7506f491
DE
2252 rtx insn;
2253 int set_p;
48e87cef 2254 int in_libcall_block;
7506f491
DE
2255{
2256 rtx pat = PATTERN (insn);
c4c81601 2257 int i;
7506f491 2258
172890a2
RK
2259 if (in_libcall_block)
2260 return;
2261
7506f491
DE
2262 /* Pick out the sets of INSN and for other forms of instructions record
2263 what's been modified. */
2264
172890a2
RK
2265 if (GET_CODE (pat) == SET)
2266 hash_scan_set (pat, insn, set_p);
7506f491 2267 else if (GET_CODE (pat) == PARALLEL)
c4c81601
RK
2268 for (i = 0; i < XVECLEN (pat, 0); i++)
2269 {
2270 rtx x = XVECEXP (pat, 0, i);
7506f491 2271
c4c81601 2272 if (GET_CODE (x) == SET)
172890a2 2273 hash_scan_set (x, insn, set_p);
c4c81601
RK
2274 else if (GET_CODE (x) == CLOBBER)
2275 hash_scan_clobber (x, insn);
2276 else if (GET_CODE (x) == CALL)
2277 hash_scan_call (x, insn);
2278 }
7506f491 2279
7506f491
DE
2280 else if (GET_CODE (pat) == CLOBBER)
2281 hash_scan_clobber (pat, insn);
2282 else if (GET_CODE (pat) == CALL)
2283 hash_scan_call (pat, insn);
2284}
2285
2286static void
2287dump_hash_table (file, name, table, table_size, total_size)
2288 FILE *file;
dff01034 2289 const char *name;
7506f491
DE
2290 struct expr **table;
2291 int table_size, total_size;
2292{
2293 int i;
2294 /* Flattened out table, so it's printed in proper order. */
4da896b2
MM
2295 struct expr **flat_table;
2296 unsigned int *hash_val;
c4c81601 2297 struct expr *expr;
4da896b2
MM
2298
2299 flat_table
2300 = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2301 hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
7506f491 2302
7506f491 2303 for (i = 0; i < table_size; i++)
c4c81601
RK
2304 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2305 {
2306 flat_table[expr->bitmap_index] = expr;
2307 hash_val[expr->bitmap_index] = i;
2308 }
7506f491
DE
2309
2310 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2311 name, table_size, total_size);
2312
2313 for (i = 0; i < total_size; i++)
21318741
RK
2314 if (flat_table[i] != 0)
2315 {
a0ac9e5a 2316 expr = flat_table[i];
21318741
RK
2317 fprintf (file, "Index %d (hash value %d)\n ",
2318 expr->bitmap_index, hash_val[i]);
a0ac9e5a 2319 print_rtl (file, expr->expr);
21318741
RK
2320 fprintf (file, "\n");
2321 }
7506f491
DE
2322
2323 fprintf (file, "\n");
4da896b2 2324
4da896b2
MM
2325 free (flat_table);
2326 free (hash_val);
7506f491
DE
2327}
2328
2329/* Record register first/last/block set information for REGNO in INSN.
c4c81601 2330
7506f491
DE
2331 reg_first_set records the first place in the block where the register
2332 is set and is used to compute "anticipatability".
c4c81601 2333
7506f491
DE
2334 reg_last_set records the last place in the block where the register
2335 is set and is used to compute "availability".
c4c81601 2336
7506f491
DE
2337 reg_set_in_block records whether the register is set in the block
2338 and is used to compute "transparency". */
2339
2340static void
2341record_last_reg_set_info (insn, regno)
2342 rtx insn;
2343 int regno;
2344{
b86ba9c8 2345 if (reg_first_set[regno] == NEVER_SET)
7506f491 2346 reg_first_set[regno] = INSN_CUID (insn);
c4c81601 2347
7506f491
DE
2348 reg_last_set[regno] = INSN_CUID (insn);
2349 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2350}
2351
a13d4ebf
AM
2352
2353/* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
2354 Note we store a pair of elements in the list, so they have to be
2355 taken off pairwise. */
2356
2357static void
2358canon_list_insert (dest, unused1, v_insn)
2359 rtx dest ATTRIBUTE_UNUSED;
2360 rtx unused1 ATTRIBUTE_UNUSED;
2361 void * v_insn;
2362{
2363 rtx dest_addr, insn;
2364
2365 while (GET_CODE (dest) == SUBREG
2366 || GET_CODE (dest) == ZERO_EXTRACT
2367 || GET_CODE (dest) == SIGN_EXTRACT
2368 || GET_CODE (dest) == STRICT_LOW_PART)
2369 dest = XEXP (dest, 0);
2370
2371 /* If DEST is not a MEM, then it will not conflict with a load. Note
2372 that function calls are assumed to clobber memory, but are handled
2373 elsewhere. */
2374
2375 if (GET_CODE (dest) != MEM)
2376 return;
2377
2378 dest_addr = get_addr (XEXP (dest, 0));
2379 dest_addr = canon_rtx (dest_addr);
2380 insn = (rtx) v_insn;
2381
2382 canon_modify_mem_list[BLOCK_NUM (insn)] =
2383 alloc_INSN_LIST (dest_addr, canon_modify_mem_list[BLOCK_NUM (insn)]);
2384 canon_modify_mem_list[BLOCK_NUM (insn)] =
2385 alloc_INSN_LIST (dest, canon_modify_mem_list[BLOCK_NUM (insn)]);
2386}
2387
a13d4ebf
AM
2388/* Record memory modification information for INSN. We do not actually care
2389 about the memory location(s) that are set, or even how they are set (consider
2390 a CALL_INSN). We merely need to record which insns modify memory. */
7506f491
DE
2391
2392static void
2393record_last_mem_set_info (insn)
2394 rtx insn;
2395{
ccef9ef5
DB
2396 /* load_killed_in_block_p will handle the case of calls clobbering
2397 everything. */
a13d4ebf
AM
2398 modify_mem_list[BLOCK_NUM (insn)] =
2399 alloc_INSN_LIST (insn, modify_mem_list[BLOCK_NUM (insn)]);
2400
2401 if (GET_CODE (insn) == CALL_INSN)
2402 {
2403 /* Note that traversals of this loop (other than for free-ing)
2404 will break after encountering a CALL_INSN. So, there's no
2405 need to insert a pair of items, as canon_list_insert does. */
2406 canon_modify_mem_list[BLOCK_NUM (insn)] =
2407 alloc_INSN_LIST (insn, canon_modify_mem_list[BLOCK_NUM (insn)]);
2408 }
2409 else
2410 note_stores (PATTERN (insn), canon_list_insert, (void*)insn );
7506f491
DE
2411}
2412
7506f491 2413/* Called from compute_hash_table via note_stores to handle one
84832317
MM
2414 SET or CLOBBER in an insn. DATA is really the instruction in which
2415 the SET is taking place. */
7506f491
DE
2416
2417static void
84832317 2418record_last_set_info (dest, setter, data)
50b2596f 2419 rtx dest, setter ATTRIBUTE_UNUSED;
84832317 2420 void *data;
7506f491 2421{
84832317
MM
2422 rtx last_set_insn = (rtx) data;
2423
7506f491
DE
2424 if (GET_CODE (dest) == SUBREG)
2425 dest = SUBREG_REG (dest);
2426
2427 if (GET_CODE (dest) == REG)
2428 record_last_reg_set_info (last_set_insn, REGNO (dest));
2429 else if (GET_CODE (dest) == MEM
2430 /* Ignore pushes, they clobber nothing. */
2431 && ! push_operand (dest, GET_MODE (dest)))
2432 record_last_mem_set_info (last_set_insn);
2433}
2434
2435/* Top level function to create an expression or assignment hash table.
2436
2437 Expression entries are placed in the hash table if
2438 - they are of the form (set (pseudo-reg) src),
2439 - src is something we want to perform GCSE on,
2440 - none of the operands are subsequently modified in the block
2441
2442 Assignment entries are placed in the hash table if
2443 - they are of the form (set (pseudo-reg) src),
2444 - src is something we want to perform const/copy propagation on,
2445 - none of the operands or target are subsequently modified in the block
c4c81601 2446
7506f491
DE
2447 Currently src must be a pseudo-reg or a const_int.
2448
2449 F is the first insn.
2450 SET_P is non-zero for computing the assignment hash table. */
2451
2452static void
b5ce41ff 2453compute_hash_table (set_p)
7506f491
DE
2454 int set_p;
2455{
2456 int bb;
2457
2458 /* While we compute the hash table we also compute a bit array of which
2459 registers are set in which blocks.
7506f491
DE
2460 ??? This isn't needed during const/copy propagation, but it's cheap to
2461 compute. Later. */
2462 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
7506f491 2463
a13d4ebf
AM
2464 /* re-Cache any INSN_LIST nodes we have allocated. */
2465 {
2466 int i;
2467 for (i = 0; i < n_basic_blocks; i++)
2468 {
2469 if (modify_mem_list[i])
2470 free_INSN_LIST_list (modify_mem_list + i);
2471 if (canon_modify_mem_list[i])
2472 free_INSN_LIST_list (canon_modify_mem_list + i);
2473 }
2474 }
7506f491
DE
2475 /* Some working arrays used to track first and last set in each block. */
2476 /* ??? One could use alloca here, but at some size a threshold is crossed
2477 beyond which one should use malloc. Are we at that threshold here? */
2478 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2479 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2480
2481 for (bb = 0; bb < n_basic_blocks; bb++)
2482 {
2483 rtx insn;
770ae6cc 2484 unsigned int regno;
ed79bb3d 2485 int in_libcall_block;
770ae6cc 2486 unsigned int i;
7506f491
DE
2487
2488 /* First pass over the instructions records information used to
2489 determine when registers and memory are first and last set.
ccef9ef5 2490 ??? hard-reg reg_set_in_block computation
7506f491
DE
2491 could be moved to compute_sets since they currently don't change. */
2492
b86ba9c8
GK
2493 for (i = 0; i < max_gcse_regno; i++)
2494 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
770ae6cc 2495
7506f491 2496
3b413743
RH
2497 for (insn = BLOCK_HEAD (bb);
2498 insn && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
2499 insn = NEXT_INSN (insn))
2500 {
2501#ifdef NON_SAVING_SETJMP
2502 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2503 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2504 {
2505 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2506 record_last_reg_set_info (insn, regno);
2507 continue;
2508 }
2509#endif
2510
2c3c49de 2511 if (! INSN_P (insn))
7506f491
DE
2512 continue;
2513
2514 if (GET_CODE (insn) == CALL_INSN)
2515 {
2516 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4e2db584 2517 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
7506f491 2518 record_last_reg_set_info (insn, regno);
c4c81601 2519
7506f491
DE
2520 if (! CONST_CALL_P (insn))
2521 record_last_mem_set_info (insn);
2522 }
2523
84832317 2524 note_stores (PATTERN (insn), record_last_set_info, insn);
7506f491
DE
2525 }
2526
2527 /* The next pass builds the hash table. */
2528
3b413743
RH
2529 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2530 insn && insn != NEXT_INSN (BLOCK_END (bb));
7506f491 2531 insn = NEXT_INSN (insn))
2c3c49de 2532 if (INSN_P (insn))
c4c81601
RK
2533 {
2534 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2535 in_libcall_block = 1;
2536 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2537 in_libcall_block = 0;
2538 hash_scan_insn (insn, set_p, in_libcall_block);
7506f491
DE
2539 }
2540 }
2541
2542 free (reg_first_set);
2543 free (reg_last_set);
c4c81601 2544
7506f491
DE
2545 /* Catch bugs early. */
2546 reg_first_set = reg_last_set = 0;
2547}
2548
2549/* Allocate space for the set hash table.
2550 N_INSNS is the number of instructions in the function.
2551 It is used to determine the number of buckets to use. */
2552
2553static void
2554alloc_set_hash_table (n_insns)
2555 int n_insns;
2556{
2557 int n;
2558
2559 set_hash_table_size = n_insns / 4;
2560 if (set_hash_table_size < 11)
2561 set_hash_table_size = 11;
c4c81601 2562
7506f491
DE
2563 /* Attempt to maintain efficient use of hash table.
2564 Making it an odd number is simplest for now.
2565 ??? Later take some measurements. */
2566 set_hash_table_size |= 1;
2567 n = set_hash_table_size * sizeof (struct expr *);
2568 set_hash_table = (struct expr **) gmalloc (n);
2569}
2570
2571/* Free things allocated by alloc_set_hash_table. */
2572
2573static void
2574free_set_hash_table ()
2575{
2576 free (set_hash_table);
2577}
2578
2579/* Compute the hash table for doing copy/const propagation. */
2580
2581static void
b5ce41ff 2582compute_set_hash_table ()
7506f491
DE
2583{
2584 /* Initialize count of number of entries in hash table. */
2585 n_sets = 0;
961192e1 2586 memset ((char *) set_hash_table, 0,
c4c81601 2587 set_hash_table_size * sizeof (struct expr *));
7506f491 2588
b5ce41ff 2589 compute_hash_table (1);
7506f491
DE
2590}
2591
2592/* Allocate space for the expression hash table.
2593 N_INSNS is the number of instructions in the function.
2594 It is used to determine the number of buckets to use. */
2595
2596static void
2597alloc_expr_hash_table (n_insns)
2e653e39 2598 unsigned int n_insns;
7506f491
DE
2599{
2600 int n;
2601
2602 expr_hash_table_size = n_insns / 2;
2603 /* Make sure the amount is usable. */
2604 if (expr_hash_table_size < 11)
2605 expr_hash_table_size = 11;
c4c81601 2606
7506f491
DE
2607 /* Attempt to maintain efficient use of hash table.
2608 Making it an odd number is simplest for now.
2609 ??? Later take some measurements. */
2610 expr_hash_table_size |= 1;
2611 n = expr_hash_table_size * sizeof (struct expr *);
2612 expr_hash_table = (struct expr **) gmalloc (n);
2613}
2614
2615/* Free things allocated by alloc_expr_hash_table. */
2616
2617static void
2618free_expr_hash_table ()
2619{
2620 free (expr_hash_table);
2621}
2622
2623/* Compute the hash table for doing GCSE. */
2624
2625static void
b5ce41ff 2626compute_expr_hash_table ()
7506f491
DE
2627{
2628 /* Initialize count of number of entries in hash table. */
2629 n_exprs = 0;
961192e1 2630 memset ((char *) expr_hash_table, 0,
c4c81601 2631 expr_hash_table_size * sizeof (struct expr *));
7506f491 2632
b5ce41ff 2633 compute_hash_table (0);
7506f491
DE
2634}
2635\f
2636/* Expression tracking support. */
2637
2638/* Lookup pattern PAT in the expression table.
2639 The result is a pointer to the table entry, or NULL if not found. */
2640
2641static struct expr *
2642lookup_expr (pat)
2643 rtx pat;
2644{
2645 int do_not_record_p;
2646 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2647 expr_hash_table_size);
2648 struct expr *expr;
2649
2650 if (do_not_record_p)
2651 return NULL;
2652
2653 expr = expr_hash_table[hash];
2654
2655 while (expr && ! expr_equiv_p (expr->expr, pat))
2656 expr = expr->next_same_hash;
2657
2658 return expr;
2659}
2660
c4c81601
RK
2661/* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that
2662 matches it, otherwise return the first entry for REGNO. The result is a
2663 pointer to the table entry, or NULL if not found. */
7506f491
DE
2664
2665static struct expr *
2666lookup_set (regno, pat)
770ae6cc 2667 unsigned int regno;
7506f491
DE
2668 rtx pat;
2669{
2670 unsigned int hash = hash_set (regno, set_hash_table_size);
2671 struct expr *expr;
2672
2673 expr = set_hash_table[hash];
2674
2675 if (pat)
2676 {
2677 while (expr && ! expr_equiv_p (expr->expr, pat))
2678 expr = expr->next_same_hash;
2679 }
2680 else
2681 {
2682 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2683 expr = expr->next_same_hash;
2684 }
2685
2686 return expr;
2687}
2688
2689/* Return the next entry for REGNO in list EXPR. */
2690
2691static struct expr *
2692next_set (regno, expr)
770ae6cc 2693 unsigned int regno;
7506f491
DE
2694 struct expr *expr;
2695{
2696 do
2697 expr = expr->next_same_hash;
2698 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
c4c81601 2699
7506f491
DE
2700 return expr;
2701}
2702
2703/* Reset tables used to keep track of what's still available [since the
2704 start of the block]. */
2705
2706static void
2707reset_opr_set_tables ()
2708{
2709 /* Maintain a bitmap of which regs have been set since beginning of
2710 the block. */
2711 sbitmap_zero (reg_set_bitmap);
c4c81601 2712
7506f491
DE
2713 /* Also keep a record of the last instruction to modify memory.
2714 For now this is very trivial, we only record whether any memory
2715 location has been modified. */
a13d4ebf
AM
2716 {
2717 int i;
2718
2719 /* re-Cache any INSN_LIST nodes we have allocated. */
2720 for (i = 0; i < n_basic_blocks; i++)
2721 {
2722 if (modify_mem_list[i])
2723 free_INSN_LIST_list (modify_mem_list + i);
2724 if (canon_modify_mem_list[i])
2725 free_INSN_LIST_list (canon_modify_mem_list + i);
2726 }
2727 }
7506f491
DE
2728}
2729
2730/* Return non-zero if the operands of X are not set before INSN in
2731 INSN's basic block. */
2732
2733static int
2734oprs_not_set_p (x, insn)
2735 rtx x, insn;
2736{
c4c81601 2737 int i, j;
7506f491 2738 enum rtx_code code;
6f7d635c 2739 const char *fmt;
7506f491 2740
7506f491
DE
2741 if (x == 0)
2742 return 1;
2743
2744 code = GET_CODE (x);
2745 switch (code)
2746 {
2747 case PC:
2748 case CC0:
2749 case CONST:
2750 case CONST_INT:
2751 case CONST_DOUBLE:
2752 case SYMBOL_REF:
2753 case LABEL_REF:
2754 case ADDR_VEC:
2755 case ADDR_DIFF_VEC:
2756 return 1;
2757
2758 case MEM:
e2d2ed72
AM
2759 if (load_killed_in_block_p (BLOCK_FOR_INSN (insn),
2760 INSN_CUID (insn), x, 0))
a13d4ebf 2761 return 0;
c4c81601
RK
2762 else
2763 return oprs_not_set_p (XEXP (x, 0), insn);
7506f491
DE
2764
2765 case REG:
2766 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2767
2768 default:
2769 break;
2770 }
2771
c4c81601 2772 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
2773 {
2774 if (fmt[i] == 'e')
2775 {
7506f491
DE
2776 /* If we are about to do the last recursive call
2777 needed at this level, change it into iteration.
2778 This function is called enough to be worth it. */
2779 if (i == 0)
c4c81601
RK
2780 return oprs_not_set_p (XEXP (x, i), insn);
2781
2782 if (! oprs_not_set_p (XEXP (x, i), insn))
7506f491
DE
2783 return 0;
2784 }
2785 else if (fmt[i] == 'E')
c4c81601
RK
2786 for (j = 0; j < XVECLEN (x, i); j++)
2787 if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2788 return 0;
7506f491
DE
2789 }
2790
2791 return 1;
2792}
2793
2794/* Mark things set by a CALL. */
2795
2796static void
b5ce41ff
JL
2797mark_call (insn)
2798 rtx insn;
7506f491 2799{
a13d4ebf
AM
2800 if (! CONST_CALL_P (insn))
2801 record_last_mem_set_info (insn);
7506f491
DE
2802}
2803
2804/* Mark things set by a SET. */
2805
2806static void
2807mark_set (pat, insn)
2808 rtx pat, insn;
2809{
2810 rtx dest = SET_DEST (pat);
2811
2812 while (GET_CODE (dest) == SUBREG
2813 || GET_CODE (dest) == ZERO_EXTRACT
2814 || GET_CODE (dest) == SIGN_EXTRACT
2815 || GET_CODE (dest) == STRICT_LOW_PART)
2816 dest = XEXP (dest, 0);
2817
a13d4ebf
AM
2818 if (GET_CODE (dest) == REG)
2819 SET_BIT (reg_set_bitmap, REGNO (dest));
2820 else if (GET_CODE (dest) == MEM)
2821 record_last_mem_set_info (insn);
2822
7506f491 2823 if (GET_CODE (SET_SRC (pat)) == CALL)
b5ce41ff 2824 mark_call (insn);
7506f491
DE
2825}
2826
2827/* Record things set by a CLOBBER. */
2828
2829static void
2830mark_clobber (pat, insn)
2831 rtx pat, insn;
2832{
2833 rtx clob = XEXP (pat, 0);
2834
2835 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2836 clob = XEXP (clob, 0);
2837
a13d4ebf
AM
2838 if (GET_CODE (clob) == REG)
2839 SET_BIT (reg_set_bitmap, REGNO (clob));
2840 else
2841 record_last_mem_set_info (insn);
7506f491
DE
2842}
2843
2844/* Record things set by INSN.
2845 This data is used by oprs_not_set_p. */
2846
2847static void
2848mark_oprs_set (insn)
2849 rtx insn;
2850{
2851 rtx pat = PATTERN (insn);
c4c81601 2852 int i;
7506f491
DE
2853
2854 if (GET_CODE (pat) == SET)
2855 mark_set (pat, insn);
2856 else if (GET_CODE (pat) == PARALLEL)
c4c81601
RK
2857 for (i = 0; i < XVECLEN (pat, 0); i++)
2858 {
2859 rtx x = XVECEXP (pat, 0, i);
2860
2861 if (GET_CODE (x) == SET)
2862 mark_set (x, insn);
2863 else if (GET_CODE (x) == CLOBBER)
2864 mark_clobber (x, insn);
2865 else if (GET_CODE (x) == CALL)
2866 mark_call (insn);
2867 }
7506f491 2868
7506f491
DE
2869 else if (GET_CODE (pat) == CLOBBER)
2870 mark_clobber (pat, insn);
2871 else if (GET_CODE (pat) == CALL)
b5ce41ff 2872 mark_call (insn);
7506f491 2873}
b5ce41ff 2874
7506f491
DE
2875\f
2876/* Classic GCSE reaching definition support. */
2877
2878/* Allocate reaching def variables. */
2879
2880static void
2881alloc_rd_mem (n_blocks, n_insns)
2882 int n_blocks, n_insns;
2883{
2884 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2885 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2886
2887 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2888 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2889
2890 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2891 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2892
2893 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2894 sbitmap_vector_zero (rd_out, n_basic_blocks);
2895}
2896
2897/* Free reaching def variables. */
2898
2899static void
2900free_rd_mem ()
2901{
5a660bff
DB
2902 sbitmap_vector_free (rd_kill);
2903 sbitmap_vector_free (rd_gen);
2904 sbitmap_vector_free (reaching_defs);
2905 sbitmap_vector_free (rd_out);
7506f491
DE
2906}
2907
c4c81601 2908/* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
7506f491
DE
2909
2910static void
2911handle_rd_kill_set (insn, regno, bb)
2912 rtx insn;
e2d2ed72
AM
2913 int regno;
2914 basic_block bb;
7506f491 2915{
c4c81601 2916 struct reg_set *this_reg;
7506f491 2917
c4c81601
RK
2918 for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2919 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
e2d2ed72 2920 SET_BIT (rd_kill[bb->index], INSN_CUID (this_reg->insn));
7506f491
DE
2921}
2922
7506f491
DE
2923/* Compute the set of kill's for reaching definitions. */
2924
2925static void
2926compute_kill_rd ()
2927{
c4c81601 2928 int bb, cuid;
172890a2
RK
2929 unsigned int regno;
2930 int i;
7506f491
DE
2931
2932 /* For each block
2933 For each set bit in `gen' of the block (i.e each insn which
ac7c5af5
JL
2934 generates a definition in the block)
2935 Call the reg set by the insn corresponding to that bit regx
2936 Look at the linked list starting at reg_set_table[regx]
2937 For each setting of regx in the linked list, which is not in
2938 this block
c4c81601 2939 Set the bit in `kill' corresponding to that insn. */
7506f491 2940 for (bb = 0; bb < n_basic_blocks; bb++)
c4c81601
RK
2941 for (cuid = 0; cuid < max_cuid; cuid++)
2942 if (TEST_BIT (rd_gen[bb], cuid))
7506f491 2943 {
c4c81601
RK
2944 rtx insn = CUID_INSN (cuid);
2945 rtx pat = PATTERN (insn);
7506f491 2946
c4c81601
RK
2947 if (GET_CODE (insn) == CALL_INSN)
2948 {
2949 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4e2db584
RH
2950 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
2951 handle_rd_kill_set (insn, regno, BASIC_BLOCK (bb));
c4c81601 2952 }
7506f491 2953
c4c81601
RK
2954 if (GET_CODE (pat) == PARALLEL)
2955 {
2956 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
7506f491 2957 {
c4c81601 2958 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
7506f491 2959
c4c81601
RK
2960 if ((code == SET || code == CLOBBER)
2961 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2962 handle_rd_kill_set (insn,
2963 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
e2d2ed72 2964 BASIC_BLOCK (bb));
ac7c5af5 2965 }
ac7c5af5 2966 }
c4c81601
RK
2967 else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2968 /* Each setting of this register outside of this block
2969 must be marked in the set of kills in this block. */
e2d2ed72 2970 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), BASIC_BLOCK (bb));
7506f491 2971 }
7506f491
DE
2972}
2973
2974/* Compute the reaching definitions as in
2975 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2976 Chapter 10. It is the same algorithm as used for computing available
2977 expressions but applied to the gens and kills of reaching definitions. */
2978
2979static void
2980compute_rd ()
2981{
2982 int bb, changed, passes;
2983
2984 for (bb = 0; bb < n_basic_blocks; bb++)
2985 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2986
2987 passes = 0;
2988 changed = 1;
2989 while (changed)
2990 {
2991 changed = 0;
2992 for (bb = 0; bb < n_basic_blocks; bb++)
ac7c5af5 2993 {
36349f8b 2994 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
7506f491
DE
2995 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2996 reaching_defs[bb], rd_kill[bb]);
ac7c5af5 2997 }
7506f491
DE
2998 passes++;
2999 }
3000
3001 if (gcse_file)
3002 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
3003}
3004\f
3005/* Classic GCSE available expression support. */
3006
3007/* Allocate memory for available expression computation. */
3008
3009static void
3010alloc_avail_expr_mem (n_blocks, n_exprs)
3011 int n_blocks, n_exprs;
3012{
3013 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3014 sbitmap_vector_zero (ae_kill, n_basic_blocks);
3015
3016 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3017 sbitmap_vector_zero (ae_gen, n_basic_blocks);
3018
3019 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3020 sbitmap_vector_zero (ae_in, n_basic_blocks);
3021
3022 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3023 sbitmap_vector_zero (ae_out, n_basic_blocks);
7506f491
DE
3024}
3025
3026static void
3027free_avail_expr_mem ()
3028{
5a660bff
DB
3029 sbitmap_vector_free (ae_kill);
3030 sbitmap_vector_free (ae_gen);
3031 sbitmap_vector_free (ae_in);
3032 sbitmap_vector_free (ae_out);
7506f491
DE
3033}
3034
3035/* Compute the set of available expressions generated in each basic block. */
3036
3037static void
3038compute_ae_gen ()
3039{
2e653e39 3040 unsigned int i;
c4c81601
RK
3041 struct expr *expr;
3042 struct occr *occr;
7506f491
DE
3043
3044 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
3045 This is all we have to do because an expression is not recorded if it
3046 is not available, and the only expressions we want to work with are the
3047 ones that are recorded. */
7506f491 3048 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
3049 for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
3050 for (occr = expr->avail_occr; occr != 0; occr = occr->next)
3051 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
7506f491
DE
3052}
3053
3054/* Return non-zero if expression X is killed in BB. */
3055
3056static int
3057expr_killed_p (x, bb)
3058 rtx x;
e2d2ed72 3059 basic_block bb;
7506f491 3060{
c4c81601 3061 int i, j;
7506f491 3062 enum rtx_code code;
6f7d635c 3063 const char *fmt;
7506f491 3064
7506f491
DE
3065 if (x == 0)
3066 return 1;
3067
3068 code = GET_CODE (x);
3069 switch (code)
3070 {
3071 case REG:
e2d2ed72 3072 return TEST_BIT (reg_set_in_block[bb->index], REGNO (x));
7506f491
DE
3073
3074 case MEM:
a13d4ebf
AM
3075 if (load_killed_in_block_p (bb, get_max_uid () + 1, x, 0))
3076 return 1;
c4c81601
RK
3077 else
3078 return expr_killed_p (XEXP (x, 0), bb);
7506f491
DE
3079
3080 case PC:
3081 case CC0: /*FIXME*/
3082 case CONST:
3083 case CONST_INT:
3084 case CONST_DOUBLE:
3085 case SYMBOL_REF:
3086 case LABEL_REF:
3087 case ADDR_VEC:
3088 case ADDR_DIFF_VEC:
3089 return 0;
3090
3091 default:
3092 break;
3093 }
3094
c4c81601 3095 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
3096 {
3097 if (fmt[i] == 'e')
3098 {
7506f491
DE
3099 /* If we are about to do the last recursive call
3100 needed at this level, change it into iteration.
3101 This function is called enough to be worth it. */
3102 if (i == 0)
c4c81601
RK
3103 return expr_killed_p (XEXP (x, i), bb);
3104 else if (expr_killed_p (XEXP (x, i), bb))
7506f491
DE
3105 return 1;
3106 }
3107 else if (fmt[i] == 'E')
c4c81601
RK
3108 for (j = 0; j < XVECLEN (x, i); j++)
3109 if (expr_killed_p (XVECEXP (x, i, j), bb))
3110 return 1;
7506f491
DE
3111 }
3112
3113 return 0;
3114}
3115
3116/* Compute the set of available expressions killed in each basic block. */
3117
3118static void
a42cd965
AM
3119compute_ae_kill (ae_gen, ae_kill)
3120 sbitmap *ae_gen, *ae_kill;
7506f491 3121{
2e653e39
RK
3122 int bb;
3123 unsigned int i;
c4c81601 3124 struct expr *expr;
7506f491
DE
3125
3126 for (bb = 0; bb < n_basic_blocks; bb++)
c4c81601
RK
3127 for (i = 0; i < expr_hash_table_size; i++)
3128 for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
7506f491 3129 {
c4c81601
RK
3130 /* Skip EXPR if generated in this block. */
3131 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
3132 continue;
7506f491 3133
e2d2ed72 3134 if (expr_killed_p (expr->expr, BASIC_BLOCK (bb)))
c4c81601 3135 SET_BIT (ae_kill[bb], expr->bitmap_index);
7506f491 3136 }
7506f491 3137}
7506f491
DE
3138\f
3139/* Actually perform the Classic GCSE optimizations. */
3140
3141/* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
3142
3143 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
3144 as a positive reach. We want to do this when there are two computations
3145 of the expression in the block.
3146
3147 VISITED is a pointer to a working buffer for tracking which BB's have
3148 been visited. It is NULL for the top-level call.
3149
3150 We treat reaching expressions that go through blocks containing the same
3151 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
3152 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3153 2 as not reaching. The intent is to improve the probability of finding
3154 only one reaching expression and to reduce register lifetimes by picking
3155 the closest such expression. */
3156
3157static int
283a2545 3158expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
7506f491
DE
3159 struct occr *occr;
3160 struct expr *expr;
e2d2ed72 3161 basic_block bb;
7506f491
DE
3162 int check_self_loop;
3163 char *visited;
3164{
36349f8b 3165 edge pred;
7506f491 3166
e2d2ed72 3167 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
7506f491 3168 {
e2d2ed72 3169 basic_block pred_bb = pred->src;
7506f491 3170
e2d2ed72 3171 if (visited[pred_bb->index])
c4c81601 3172 /* This predecessor has already been visited. Nothing to do. */
7506f491 3173 ;
7506f491 3174 else if (pred_bb == bb)
ac7c5af5 3175 {
7506f491
DE
3176 /* BB loops on itself. */
3177 if (check_self_loop
e2d2ed72
AM
3178 && TEST_BIT (ae_gen[pred_bb->index], expr->bitmap_index)
3179 && BLOCK_NUM (occr->insn) == pred_bb->index)
7506f491 3180 return 1;
c4c81601 3181
e2d2ed72 3182 visited[pred_bb->index] = 1;
ac7c5af5 3183 }
c4c81601 3184
7506f491 3185 /* Ignore this predecessor if it kills the expression. */
e2d2ed72
AM
3186 else if (TEST_BIT (ae_kill[pred_bb->index], expr->bitmap_index))
3187 visited[pred_bb->index] = 1;
c4c81601 3188
7506f491 3189 /* Does this predecessor generate this expression? */
e2d2ed72 3190 else if (TEST_BIT (ae_gen[pred_bb->index], expr->bitmap_index))
7506f491
DE
3191 {
3192 /* Is this the occurrence we're looking for?
3193 Note that there's only one generating occurrence per block
3194 so we just need to check the block number. */
e2d2ed72 3195 if (BLOCK_NUM (occr->insn) == pred_bb->index)
7506f491 3196 return 1;
c4c81601 3197
e2d2ed72 3198 visited[pred_bb->index] = 1;
7506f491 3199 }
c4c81601 3200
7506f491
DE
3201 /* Neither gen nor kill. */
3202 else
ac7c5af5 3203 {
e2d2ed72 3204 visited[pred_bb->index] = 1;
283a2545
RL
3205 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
3206 visited))
c4c81601 3207
7506f491 3208 return 1;
ac7c5af5 3209 }
7506f491
DE
3210 }
3211
3212 /* All paths have been checked. */
3213 return 0;
3214}
3215
283a2545
RL
3216/* This wrapper for expr_reaches_here_p_work() is to ensure that any
3217 memory allocated for that function is returned. */
3218
3219static int
3220expr_reaches_here_p (occr, expr, bb, check_self_loop)
3221 struct occr *occr;
3222 struct expr *expr;
e2d2ed72 3223 basic_block bb;
283a2545
RL
3224 int check_self_loop;
3225{
3226 int rval;
c4c81601 3227 char *visited = (char *) xcalloc (n_basic_blocks, 1);
283a2545 3228
c4c81601 3229 rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
283a2545
RL
3230
3231 free (visited);
c4c81601 3232 return rval;
283a2545
RL
3233}
3234
7506f491
DE
3235/* Return the instruction that computes EXPR that reaches INSN's basic block.
3236 If there is more than one such instruction, return NULL.
3237
3238 Called only by handle_avail_expr. */
3239
3240static rtx
3241computing_insn (expr, insn)
3242 struct expr *expr;
3243 rtx insn;
3244{
e2d2ed72 3245 basic_block bb = BLOCK_FOR_INSN (insn);
7506f491
DE
3246
3247 if (expr->avail_occr->next == NULL)
3248 {
e2d2ed72 3249 if (BLOCK_FOR_INSN (expr->avail_occr->insn) == bb)
c4c81601
RK
3250 /* The available expression is actually itself
3251 (i.e. a loop in the flow graph) so do nothing. */
3252 return NULL;
3253
7506f491
DE
3254 /* (FIXME) Case that we found a pattern that was created by
3255 a substitution that took place. */
3256 return expr->avail_occr->insn;
3257 }
3258 else
3259 {
3260 /* Pattern is computed more than once.
3261 Search backwards from this insn to see how many of these
3262 computations actually reach this insn. */
3263 struct occr *occr;
3264 rtx insn_computes_expr = NULL;
3265 int can_reach = 0;
3266
3267 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3268 {
e2d2ed72 3269 if (BLOCK_FOR_INSN (occr->insn) == bb)
7506f491
DE
3270 {
3271 /* The expression is generated in this block.
3272 The only time we care about this is when the expression
3273 is generated later in the block [and thus there's a loop].
3274 We let the normal cse pass handle the other cases. */
c4c81601
RK
3275 if (INSN_CUID (insn) < INSN_CUID (occr->insn)
3276 && expr_reaches_here_p (occr, expr, bb, 1))
7506f491
DE
3277 {
3278 can_reach++;
3279 if (can_reach > 1)
3280 return NULL;
c4c81601 3281
7506f491
DE
3282 insn_computes_expr = occr->insn;
3283 }
3284 }
c4c81601
RK
3285 else if (expr_reaches_here_p (occr, expr, bb, 0))
3286 {
3287 can_reach++;
3288 if (can_reach > 1)
3289 return NULL;
3290
3291 insn_computes_expr = occr->insn;
3292 }
7506f491
DE
3293 }
3294
3295 if (insn_computes_expr == NULL)
3296 abort ();
c4c81601 3297
7506f491
DE
3298 return insn_computes_expr;
3299 }
3300}
3301
3302/* Return non-zero if the definition in DEF_INSN can reach INSN.
3303 Only called by can_disregard_other_sets. */
3304
3305static int
3306def_reaches_here_p (insn, def_insn)
3307 rtx insn, def_insn;
3308{
3309 rtx reg;
3310
3311 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3312 return 1;
3313
3314 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3315 {
3316 if (INSN_CUID (def_insn) < INSN_CUID (insn))
ac7c5af5 3317 {
7506f491
DE
3318 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3319 return 1;
c4c81601 3320 else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
7506f491
DE
3321 reg = XEXP (PATTERN (def_insn), 0);
3322 else if (GET_CODE (PATTERN (def_insn)) == SET)
3323 reg = SET_DEST (PATTERN (def_insn));
3324 else
3325 abort ();
c4c81601 3326
7506f491
DE
3327 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3328 }
3329 else
3330 return 0;
3331 }
3332
3333 return 0;
3334}
3335
c4c81601
RK
3336/* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The
3337 value returned is the number of definitions that reach INSN. Returning a
3338 value of zero means that [maybe] more than one definition reaches INSN and
3339 the caller can't perform whatever optimization it is trying. i.e. it is
3340 always safe to return zero. */
7506f491
DE
3341
3342static int
3343can_disregard_other_sets (addr_this_reg, insn, for_combine)
3344 struct reg_set **addr_this_reg;
3345 rtx insn;
3346 int for_combine;
3347{
3348 int number_of_reaching_defs = 0;
c4c81601 3349 struct reg_set *this_reg;
7506f491 3350
c4c81601
RK
3351 for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3352 if (def_reaches_here_p (insn, this_reg->insn))
3353 {
3354 number_of_reaching_defs++;
3355 /* Ignore parallels for now. */
3356 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3357 return 0;
3358
3359 if (!for_combine
3360 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3361 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3362 SET_SRC (PATTERN (insn)))))
3363 /* A setting of the reg to a different value reaches INSN. */
3364 return 0;
3365
3366 if (number_of_reaching_defs > 1)
3367 {
3368 /* If in this setting the value the register is being set to is
3369 equal to the previous value the register was set to and this
3370 setting reaches the insn we are trying to do the substitution
3371 on then we are ok. */
3372 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
7506f491 3373 return 0;
c4c81601
RK
3374 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3375 SET_SRC (PATTERN (insn))))
3376 return 0;
3377 }
7506f491 3378
c4c81601
RK
3379 *addr_this_reg = this_reg;
3380 }
7506f491
DE
3381
3382 return number_of_reaching_defs;
3383}
3384
3385/* Expression computed by insn is available and the substitution is legal,
3386 so try to perform the substitution.
3387
3388 The result is non-zero if any changes were made. */
3389
3390static int
3391handle_avail_expr (insn, expr)
3392 rtx insn;
3393 struct expr *expr;
3394{
0631e0bf 3395 rtx pat, insn_computes_expr, expr_set;
7506f491
DE
3396 rtx to;
3397 struct reg_set *this_reg;
3398 int found_setting, use_src;
3399 int changed = 0;
3400
3401 /* We only handle the case where one computation of the expression
3402 reaches this instruction. */
3403 insn_computes_expr = computing_insn (expr, insn);
3404 if (insn_computes_expr == NULL)
3405 return 0;
0631e0bf
JH
3406 expr_set = single_set (insn_computes_expr);
3407 if (!expr_set)
3408 abort ();
7506f491
DE
3409
3410 found_setting = 0;
3411 use_src = 0;
3412
3413 /* At this point we know only one computation of EXPR outside of this
3414 block reaches this insn. Now try to find a register that the
3415 expression is computed into. */
0631e0bf 3416 if (GET_CODE (SET_SRC (expr_set)) == REG)
7506f491
DE
3417 {
3418 /* This is the case when the available expression that reaches
3419 here has already been handled as an available expression. */
770ae6cc 3420 unsigned int regnum_for_replacing
0631e0bf 3421 = REGNO (SET_SRC (expr_set));
c4c81601 3422
7506f491
DE
3423 /* If the register was created by GCSE we can't use `reg_set_table',
3424 however we know it's set only once. */
3425 if (regnum_for_replacing >= max_gcse_regno
3426 /* If the register the expression is computed into is set only once,
3427 or only one set reaches this insn, we can use it. */
3428 || (((this_reg = reg_set_table[regnum_for_replacing]),
3429 this_reg->next == NULL)
3430 || can_disregard_other_sets (&this_reg, insn, 0)))
3431 {
3432 use_src = 1;
3433 found_setting = 1;
3434 }
3435 }
3436
3437 if (!found_setting)
3438 {
770ae6cc 3439 unsigned int regnum_for_replacing
0631e0bf 3440 = REGNO (SET_DEST (expr_set));
c4c81601 3441
7506f491
DE
3442 /* This shouldn't happen. */
3443 if (regnum_for_replacing >= max_gcse_regno)
3444 abort ();
c4c81601 3445
7506f491 3446 this_reg = reg_set_table[regnum_for_replacing];
c4c81601 3447
7506f491
DE
3448 /* If the register the expression is computed into is set only once,
3449 or only one set reaches this insn, use it. */
3450 if (this_reg->next == NULL
3451 || can_disregard_other_sets (&this_reg, insn, 0))
3452 found_setting = 1;
3453 }
3454
3455 if (found_setting)
3456 {
3457 pat = PATTERN (insn);
3458 if (use_src)
0631e0bf 3459 to = SET_SRC (expr_set);
7506f491 3460 else
0631e0bf 3461 to = SET_DEST (expr_set);
7506f491
DE
3462 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3463
3464 /* We should be able to ignore the return code from validate_change but
3465 to play it safe we check. */
3466 if (changed)
3467 {
3468 gcse_subst_count++;
3469 if (gcse_file != NULL)
3470 {
c4c81601
RK
3471 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3472 INSN_UID (insn));
3473 fprintf (gcse_file, " reg %d %s insn %d\n",
3474 REGNO (to), use_src ? "from" : "set in",
7506f491
DE
3475 INSN_UID (insn_computes_expr));
3476 }
7506f491
DE
3477 }
3478 }
c4c81601 3479
7506f491
DE
3480 /* The register that the expr is computed into is set more than once. */
3481 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3482 {
3483 /* Insert an insn after insnx that copies the reg set in insnx
3484 into a new pseudo register call this new register REGN.
3485 From insnb until end of basic block or until REGB is set
3486 replace all uses of REGB with REGN. */
3487 rtx new_insn;
3488
0631e0bf 3489 to = gen_reg_rtx (GET_MODE (SET_DEST (expr_set)));
7506f491
DE
3490
3491 /* Generate the new insn. */
3492 /* ??? If the change fails, we return 0, even though we created
3493 an insn. I think this is ok. */
9e6a5703
JC
3494 new_insn
3495 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
0631e0bf 3496 SET_DEST (expr_set)),
c4c81601
RK
3497 insn_computes_expr);
3498
7506f491 3499 /* Keep block number table up to date. */
ccbaf064 3500 set_block_for_new_insns (new_insn, BLOCK_FOR_INSN (insn_computes_expr));
c4c81601 3501
7506f491
DE
3502 /* Keep register set table up to date. */
3503 record_one_set (REGNO (to), new_insn);
3504
3505 gcse_create_count++;
3506 if (gcse_file != NULL)
ac7c5af5 3507 {
c4c81601 3508 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
7506f491 3509 INSN_UID (NEXT_INSN (insn_computes_expr)),
c4c81601
RK
3510 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3511 fprintf (gcse_file, ", computed in insn %d,\n",
7506f491 3512 INSN_UID (insn_computes_expr));
c4c81601
RK
3513 fprintf (gcse_file, " into newly allocated reg %d\n",
3514 REGNO (to));
ac7c5af5 3515 }
7506f491
DE
3516
3517 pat = PATTERN (insn);
3518
3519 /* Do register replacement for INSN. */
3520 changed = validate_change (insn, &SET_SRC (pat),
c4c81601
RK
3521 SET_DEST (PATTERN
3522 (NEXT_INSN (insn_computes_expr))),
7506f491
DE
3523 0);
3524
3525 /* We should be able to ignore the return code from validate_change but
3526 to play it safe we check. */
3527 if (changed)
3528 {
3529 gcse_subst_count++;
3530 if (gcse_file != NULL)
3531 {
c4c81601
RK
3532 fprintf (gcse_file,
3533 "GCSE: Replacing the source in insn %d with reg %d ",
7506f491 3534 INSN_UID (insn),
c4c81601
RK
3535 REGNO (SET_DEST (PATTERN (NEXT_INSN
3536 (insn_computes_expr)))));
3537 fprintf (gcse_file, "set in insn %d\n",
7506f491
DE
3538 INSN_UID (insn_computes_expr));
3539 }
7506f491
DE
3540 }
3541 }
3542
3543 return changed;
3544}
3545
c4c81601
RK
3546/* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3547 the dataflow analysis has been done.
7506f491
DE
3548
3549 The result is non-zero if a change was made. */
3550
3551static int
3552classic_gcse ()
3553{
3554 int bb, changed;
3555 rtx insn;
3556
3557 /* Note we start at block 1. */
3558
3559 changed = 0;
3560 for (bb = 1; bb < n_basic_blocks; bb++)
3561 {
3562 /* Reset tables used to keep track of what's still valid [since the
3563 start of the block]. */
3564 reset_opr_set_tables ();
3565
3b413743
RH
3566 for (insn = BLOCK_HEAD (bb);
3567 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
3568 insn = NEXT_INSN (insn))
3569 {
3570 /* Is insn of form (set (pseudo-reg) ...)? */
7506f491
DE
3571 if (GET_CODE (insn) == INSN
3572 && GET_CODE (PATTERN (insn)) == SET
3573 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3574 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3575 {
3576 rtx pat = PATTERN (insn);
3577 rtx src = SET_SRC (pat);
3578 struct expr *expr;
3579
3580 if (want_to_gcse_p (src)
3581 /* Is the expression recorded? */
3582 && ((expr = lookup_expr (src)) != NULL)
3583 /* Is the expression available [at the start of the
3584 block]? */
3585 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3586 /* Are the operands unchanged since the start of the
3587 block? */
3588 && oprs_not_set_p (src, insn))
3589 changed |= handle_avail_expr (insn, expr);
3590 }
3591
3592 /* Keep track of everything modified by this insn. */
3593 /* ??? Need to be careful w.r.t. mods done to INSN. */
2c3c49de 3594 if (INSN_P (insn))
7506f491 3595 mark_oprs_set (insn);
ac7c5af5 3596 }
7506f491
DE
3597 }
3598
3599 return changed;
3600}
3601
3602/* Top level routine to perform one classic GCSE pass.
3603
3604 Return non-zero if a change was made. */
3605
3606static int
b5ce41ff 3607one_classic_gcse_pass (pass)
7506f491
DE
3608 int pass;
3609{
3610 int changed = 0;
3611
3612 gcse_subst_count = 0;
3613 gcse_create_count = 0;
3614
3615 alloc_expr_hash_table (max_cuid);
3616 alloc_rd_mem (n_basic_blocks, max_cuid);
b5ce41ff 3617 compute_expr_hash_table ();
7506f491
DE
3618 if (gcse_file)
3619 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3620 expr_hash_table_size, n_exprs);
c4c81601 3621
7506f491
DE
3622 if (n_exprs > 0)
3623 {
3624 compute_kill_rd ();
3625 compute_rd ();
3626 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3627 compute_ae_gen ();
a42cd965 3628 compute_ae_kill (ae_gen, ae_kill);
bd0eaec2 3629 compute_available (ae_gen, ae_kill, ae_out, ae_in);
7506f491
DE
3630 changed = classic_gcse ();
3631 free_avail_expr_mem ();
3632 }
c4c81601 3633
7506f491
DE
3634 free_rd_mem ();
3635 free_expr_hash_table ();
3636
3637 if (gcse_file)
3638 {
3639 fprintf (gcse_file, "\n");
c4c81601
RK
3640 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3641 current_function_name, pass, bytes_used, gcse_subst_count);
3642 fprintf (gcse_file, "%d insns created\n", gcse_create_count);
7506f491
DE
3643 }
3644
3645 return changed;
3646}
3647\f
3648/* Compute copy/constant propagation working variables. */
3649
3650/* Local properties of assignments. */
7506f491
DE
3651static sbitmap *cprop_pavloc;
3652static sbitmap *cprop_absaltered;
3653
3654/* Global properties of assignments (computed from the local properties). */
7506f491
DE
3655static sbitmap *cprop_avin;
3656static sbitmap *cprop_avout;
3657
c4c81601
RK
3658/* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3659 basic blocks. N_SETS is the number of sets. */
7506f491
DE
3660
3661static void
3662alloc_cprop_mem (n_blocks, n_sets)
3663 int n_blocks, n_sets;
3664{
3665 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3666 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3667
3668 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3669 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3670}
3671
3672/* Free vars used by copy/const propagation. */
3673
3674static void
3675free_cprop_mem ()
3676{
5a660bff
DB
3677 sbitmap_vector_free (cprop_pavloc);
3678 sbitmap_vector_free (cprop_absaltered);
3679 sbitmap_vector_free (cprop_avin);
3680 sbitmap_vector_free (cprop_avout);
7506f491
DE
3681}
3682
c4c81601
RK
3683/* For each block, compute whether X is transparent. X is either an
3684 expression or an assignment [though we don't care which, for this context
3685 an assignment is treated as an expression]. For each block where an
3686 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3687 bit in BMAP. */
7506f491
DE
3688
3689static void
3690compute_transp (x, indx, bmap, set_p)
3691 rtx x;
3692 int indx;
3693 sbitmap *bmap;
3694 int set_p;
3695{
c4c81601 3696 int bb, i, j;
7506f491 3697 enum rtx_code code;
c4c81601 3698 reg_set *r;
6f7d635c 3699 const char *fmt;
7506f491 3700
c4c81601
RK
3701 /* repeat is used to turn tail-recursion into iteration since GCC
3702 can't do it when there's no return value. */
7506f491
DE
3703 repeat:
3704
3705 if (x == 0)
3706 return;
3707
3708 code = GET_CODE (x);
3709 switch (code)
3710 {
3711 case REG:
c4c81601
RK
3712 if (set_p)
3713 {
3714 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3715 {
3716 for (bb = 0; bb < n_basic_blocks; bb++)
3717 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3718 SET_BIT (bmap[bb], indx);
3719 }
3720 else
3721 {
3722 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3723 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3724 }
3725 }
3726 else
3727 {
3728 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3729 {
3730 for (bb = 0; bb < n_basic_blocks; bb++)
3731 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3732 RESET_BIT (bmap[bb], indx);
3733 }
3734 else
3735 {
3736 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3737 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3738 }
3739 }
7506f491 3740
c4c81601 3741 return;
7506f491
DE
3742
3743 case MEM:
a13d4ebf
AM
3744 for (bb = 0; bb < n_basic_blocks; bb++)
3745 {
3746 rtx list_entry = canon_modify_mem_list[bb];
3747
3748 while (list_entry)
3749 {
3750 rtx dest, dest_addr;
3751
3752 if (GET_CODE (XEXP (list_entry, 0)) == CALL_INSN)
3753 {
3754 if (set_p)
3755 SET_BIT (bmap[bb], indx);
3756 else
3757 RESET_BIT (bmap[bb], indx);
3758 break;
3759 }
3760 /* LIST_ENTRY must be an INSN of some kind that sets memory.
3761 Examine each hunk of memory that is modified. */
3762
3763 dest = XEXP (list_entry, 0);
3764 list_entry = XEXP (list_entry, 1);
3765 dest_addr = XEXP (list_entry, 0);
3766
3767 if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
3768 x, rtx_addr_varies_p))
3769 {
3770 if (set_p)
3771 SET_BIT (bmap[bb], indx);
3772 else
3773 RESET_BIT (bmap[bb], indx);
3774 break;
3775 }
3776 list_entry = XEXP (list_entry, 1);
3777 }
3778 }
c4c81601 3779
7506f491
DE
3780 x = XEXP (x, 0);
3781 goto repeat;
3782
3783 case PC:
3784 case CC0: /*FIXME*/
3785 case CONST:
3786 case CONST_INT:
3787 case CONST_DOUBLE:
3788 case SYMBOL_REF:
3789 case LABEL_REF:
3790 case ADDR_VEC:
3791 case ADDR_DIFF_VEC:
3792 return;
3793
3794 default:
3795 break;
3796 }
3797
c4c81601 3798 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
3799 {
3800 if (fmt[i] == 'e')
3801 {
7506f491
DE
3802 /* If we are about to do the last recursive call
3803 needed at this level, change it into iteration.
3804 This function is called enough to be worth it. */
3805 if (i == 0)
3806 {
c4c81601 3807 x = XEXP (x, i);
7506f491
DE
3808 goto repeat;
3809 }
c4c81601
RK
3810
3811 compute_transp (XEXP (x, i), indx, bmap, set_p);
7506f491
DE
3812 }
3813 else if (fmt[i] == 'E')
c4c81601
RK
3814 for (j = 0; j < XVECLEN (x, i); j++)
3815 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
7506f491
DE
3816 }
3817}
3818
7506f491
DE
3819/* Top level routine to do the dataflow analysis needed by copy/const
3820 propagation. */
3821
3822static void
3823compute_cprop_data ()
3824{
b5ce41ff 3825 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
ce724250
JL
3826 compute_available (cprop_pavloc, cprop_absaltered,
3827 cprop_avout, cprop_avin);
7506f491
DE
3828}
3829\f
3830/* Copy/constant propagation. */
3831
7506f491
DE
3832/* Maximum number of register uses in an insn that we handle. */
3833#define MAX_USES 8
3834
3835/* Table of uses found in an insn.
3836 Allocated statically to avoid alloc/free complexity and overhead. */
3837static struct reg_use reg_use_table[MAX_USES];
3838
3839/* Index into `reg_use_table' while building it. */
3840static int reg_use_count;
3841
c4c81601
RK
3842/* Set up a list of register numbers used in INSN. The found uses are stored
3843 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3844 and contains the number of uses in the table upon exit.
7506f491 3845
c4c81601
RK
3846 ??? If a register appears multiple times we will record it multiple times.
3847 This doesn't hurt anything but it will slow things down. */
7506f491
DE
3848
3849static void
9e71c818
JH
3850find_used_regs (xptr, data)
3851 rtx *xptr;
3852 void *data ATTRIBUTE_UNUSED;
7506f491 3853{
c4c81601 3854 int i, j;
7506f491 3855 enum rtx_code code;
6f7d635c 3856 const char *fmt;
9e71c818 3857 rtx x = *xptr;
7506f491 3858
c4c81601
RK
3859 /* repeat is used to turn tail-recursion into iteration since GCC
3860 can't do it when there's no return value. */
7506f491 3861 repeat:
7506f491
DE
3862 if (x == 0)
3863 return;
3864
3865 code = GET_CODE (x);
9e71c818 3866 if (REG_P (x))
7506f491 3867 {
7506f491
DE
3868 if (reg_use_count == MAX_USES)
3869 return;
c4c81601 3870
7506f491
DE
3871 reg_use_table[reg_use_count].reg_rtx = x;
3872 reg_use_count++;
7506f491
DE
3873 }
3874
3875 /* Recursively scan the operands of this expression. */
3876
c4c81601 3877 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
3878 {
3879 if (fmt[i] == 'e')
3880 {
3881 /* If we are about to do the last recursive call
3882 needed at this level, change it into iteration.
3883 This function is called enough to be worth it. */
3884 if (i == 0)
3885 {
3886 x = XEXP (x, 0);
3887 goto repeat;
3888 }
c4c81601 3889
9e71c818 3890 find_used_regs (&XEXP (x, i), data);
7506f491
DE
3891 }
3892 else if (fmt[i] == 'E')
c4c81601 3893 for (j = 0; j < XVECLEN (x, i); j++)
9e71c818 3894 find_used_regs (&XVECEXP (x, i, j), data);
7506f491
DE
3895 }
3896}
3897
3898/* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3899 Returns non-zero is successful. */
3900
3901static int
3902try_replace_reg (from, to, insn)
3903 rtx from, to, insn;
3904{
172890a2 3905 rtx note = find_reg_equal_equiv_note (insn);
fb0c0a12 3906 rtx src = 0;
172890a2
RK
3907 int success = 0;
3908 rtx set = single_set (insn);
833fc3ad 3909
9e71c818
JH
3910 success = validate_replace_src (from, to, insn);
3911
3912 /* If above failed and this is a single set, try to simplify the source of
3913 the set given our substitution. We could perhaps try this for multiple
3914 SETs, but it probably won't buy us anything. */
3915 if (!success && set != 0)
833fc3ad 3916 {
172890a2
RK
3917 src = simplify_replace_rtx (SET_SRC (set), from, to);
3918
9e71c818
JH
3919 if (!rtx_equal_p (src, SET_SRC (set))
3920 && validate_change (insn, &SET_SRC (set), src, 0))
172890a2 3921 success = 1;
833fc3ad
JH
3922 }
3923
fb0c0a12
RK
3924 /* If we've failed to do replacement, have a single SET, and don't already
3925 have a note, add a REG_EQUAL note to not lose information. */
172890a2 3926 if (!success && note == 0 && set != 0)
fb0c0a12 3927 note = REG_NOTES (insn)
172890a2 3928 = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
e251e2a2 3929
172890a2
RK
3930 /* If there is already a NOTE, update the expression in it with our
3931 replacement. */
3932 else if (note != 0)
3933 XEXP (note, 0) = simplify_replace_rtx (XEXP (note, 0), from, to);
833fc3ad 3934
172890a2
RK
3935 /* REG_EQUAL may get simplified into register.
3936 We don't allow that. Remove that note. This code ought
3937 not to hapen, because previous code ought to syntetize
3938 reg-reg move, but be on the safe side. */
3939 if (note && REG_P (XEXP (note, 0)))
3940 remove_note (insn, note);
833fc3ad 3941
833fc3ad
JH
3942 return success;
3943}
c4c81601
RK
3944
3945/* Find a set of REGNOs that are available on entry to INSN's block. Returns
3946 NULL no such set is found. */
7506f491
DE
3947
3948static struct expr *
3949find_avail_set (regno, insn)
3950 int regno;
3951 rtx insn;
3952{
cafba495
BS
3953 /* SET1 contains the last set found that can be returned to the caller for
3954 use in a substitution. */
3955 struct expr *set1 = 0;
3956
3957 /* Loops are not possible here. To get a loop we would need two sets
3958 available at the start of the block containing INSN. ie we would
3959 need two sets like this available at the start of the block:
3960
3961 (set (reg X) (reg Y))
3962 (set (reg Y) (reg X))
3963
3964 This can not happen since the set of (reg Y) would have killed the
3965 set of (reg X) making it unavailable at the start of this block. */
3966 while (1)
3967 {
3968 rtx src;
3969 struct expr *set = lookup_set (regno, NULL_RTX);
3970
3971 /* Find a set that is available at the start of the block
3972 which contains INSN. */
3973 while (set)
3974 {
3975 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3976 break;
3977 set = next_set (regno, set);
3978 }
7506f491 3979
cafba495
BS
3980 /* If no available set was found we've reached the end of the
3981 (possibly empty) copy chain. */
3982 if (set == 0)
3983 break;
3984
3985 if (GET_CODE (set->expr) != SET)
3986 abort ();
3987
3988 src = SET_SRC (set->expr);
3989
3990 /* We know the set is available.
3991 Now check that SRC is ANTLOC (i.e. none of the source operands
3992 have changed since the start of the block).
3993
3994 If the source operand changed, we may still use it for the next
3995 iteration of this loop, but we may not use it for substitutions. */
c4c81601 3996
cafba495
BS
3997 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3998 set1 = set;
3999
4000 /* If the source of the set is anything except a register, then
4001 we have reached the end of the copy chain. */
4002 if (GET_CODE (src) != REG)
7506f491 4003 break;
7506f491 4004
cafba495
BS
4005 /* Follow the copy chain, ie start another iteration of the loop
4006 and see if we have an available copy into SRC. */
4007 regno = REGNO (src);
4008 }
4009
4010 /* SET1 holds the last set that was available and anticipatable at
4011 INSN. */
4012 return set1;
7506f491
DE
4013}
4014
abd535b6 4015/* Subroutine of cprop_insn that tries to propagate constants into
172890a2
RK
4016 JUMP_INSNS. INSN must be a conditional jump. FROM is what we will try to
4017 replace, SRC is the constant we will try to substitute for it. Returns
4018 nonzero if a change was made. We know INSN has just a SET. */
c4c81601 4019
abd535b6 4020static int
0005550b 4021cprop_jump (bb, insn, from, src)
172890a2
RK
4022 rtx insn;
4023 rtx from;
abd535b6 4024 rtx src;
0005550b 4025 basic_block bb;
abd535b6 4026{
172890a2
RK
4027 rtx set = PATTERN (insn);
4028 rtx new = simplify_replace_rtx (SET_SRC (set), from, src);
abd535b6
BS
4029
4030 /* If no simplification can be made, then try the next
4031 register. */
172890a2 4032 if (rtx_equal_p (new, SET_SRC (set)))
abd535b6
BS
4033 return 0;
4034
172890a2
RK
4035 /* If this is now a no-op leave it that way, but update LABEL_NUSED if
4036 necessary. */
4037 if (new == pc_rtx)
abd535b6 4038 {
172890a2
RK
4039 SET_SRC (set) = new;
4040
4041 if (JUMP_LABEL (insn) != 0)
abd535b6 4042 --LABEL_NUSES (JUMP_LABEL (insn));
172890a2 4043 }
abd535b6 4044
172890a2
RK
4045 /* Otherwise, this must be a valid instruction. */
4046 else if (! validate_change (insn, &SET_SRC (set), new, 0))
4047 return 0;
abd535b6 4048
172890a2
RK
4049 /* If this has turned into an unconditional jump,
4050 then put a barrier after it so that the unreachable
4051 code will be deleted. */
4052 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
4053 emit_barrier_after (insn);
abd535b6 4054
172890a2 4055 run_jump_opt_after_gcse = 1;
c4c81601 4056
172890a2
RK
4057 const_prop_count++;
4058 if (gcse_file != NULL)
4059 {
4060 fprintf (gcse_file,
4061 "CONST-PROP: Replacing reg %d in insn %d with constant ",
4062 REGNO (from), INSN_UID (insn));
4063 print_rtl (gcse_file, src);
4064 fprintf (gcse_file, "\n");
abd535b6 4065 }
0005550b 4066 purge_dead_edges (bb);
172890a2
RK
4067
4068 return 1;
abd535b6
BS
4069}
4070
4071#ifdef HAVE_cc0
c4c81601
RK
4072
4073/* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
4074 for machines that have CC0. INSN is a single set that stores into CC0;
4075 the insn following it is a conditional jump. REG_USED is the use we will
4076 try to replace, SRC is the constant we will try to substitute for it.
abd535b6 4077 Returns nonzero if a change was made. */
c4c81601 4078
abd535b6 4079static int
0005550b
JH
4080cprop_cc0_jump (bb, insn, reg_used, src)
4081 basic_block bb;
abd535b6
BS
4082 rtx insn;
4083 struct reg_use *reg_used;
4084 rtx src;
4085{
172890a2
RK
4086 /* First substitute in the SET_SRC of INSN, then substitute that for
4087 CC0 in JUMP. */
abd535b6 4088 rtx jump = NEXT_INSN (insn);
172890a2
RK
4089 rtx new_src = simplify_replace_rtx (SET_SRC (PATTERN (insn)),
4090 reg_used->reg_rtx, src);
abd535b6 4091
0005550b 4092 if (! cprop_jump (bb, jump, cc0_rtx, new_src))
abd535b6
BS
4093 return 0;
4094
4095 /* If we succeeded, delete the cc0 setter. */
4096 PUT_CODE (insn, NOTE);
4097 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4098 NOTE_SOURCE_FILE (insn) = 0;
172890a2 4099
abd535b6
BS
4100 return 1;
4101 }
4102#endif
4103
7506f491
DE
4104/* Perform constant and copy propagation on INSN.
4105 The result is non-zero if a change was made. */
4106
4107static int
0005550b
JH
4108cprop_insn (bb, insn, alter_jumps)
4109 basic_block bb;
7506f491 4110 rtx insn;
b5ce41ff 4111 int alter_jumps;
7506f491
DE
4112{
4113 struct reg_use *reg_used;
4114 int changed = 0;
833fc3ad 4115 rtx note;
7506f491 4116
9e71c818 4117 if (!INSN_P (insn))
7506f491
DE
4118 return 0;
4119
4120 reg_use_count = 0;
9e71c818 4121 note_uses (&PATTERN (insn), find_used_regs, NULL);
833fc3ad 4122
172890a2 4123 note = find_reg_equal_equiv_note (insn);
833fc3ad
JH
4124
4125 /* We may win even when propagating constants into notes. */
4126 if (note)
9e71c818 4127 find_used_regs (&XEXP (note, 0), NULL);
7506f491 4128
c4c81601
RK
4129 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
4130 reg_used++, reg_use_count--)
7506f491 4131 {
770ae6cc 4132 unsigned int regno = REGNO (reg_used->reg_rtx);
7506f491
DE
4133 rtx pat, src;
4134 struct expr *set;
7506f491
DE
4135
4136 /* Ignore registers created by GCSE.
4137 We do this because ... */
4138 if (regno >= max_gcse_regno)
4139 continue;
4140
4141 /* If the register has already been set in this block, there's
4142 nothing we can do. */
4143 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
4144 continue;
4145
4146 /* Find an assignment that sets reg_used and is available
4147 at the start of the block. */
4148 set = find_avail_set (regno, insn);
4149 if (! set)
4150 continue;
4151
4152 pat = set->expr;
4153 /* ??? We might be able to handle PARALLELs. Later. */
4154 if (GET_CODE (pat) != SET)
4155 abort ();
c4c81601 4156
7506f491
DE
4157 src = SET_SRC (pat);
4158
e78d9500 4159 /* Constant propagation. */
05f6f07c
BS
4160 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
4161 || GET_CODE (src) == SYMBOL_REF)
7506f491 4162 {
e78d9500
JL
4163 /* Handle normal insns first. */
4164 if (GET_CODE (insn) == INSN
4165 && try_replace_reg (reg_used->reg_rtx, src, insn))
7506f491
DE
4166 {
4167 changed = 1;
4168 const_prop_count++;
4169 if (gcse_file != NULL)
4170 {
c4c81601
RK
4171 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
4172 regno);
4173 fprintf (gcse_file, "insn %d with constant ",
4174 INSN_UID (insn));
e78d9500 4175 print_rtl (gcse_file, src);
7506f491
DE
4176 fprintf (gcse_file, "\n");
4177 }
4178
4179 /* The original insn setting reg_used may or may not now be
4180 deletable. We leave the deletion to flow. */
4181 }
e78d9500
JL
4182
4183 /* Try to propagate a CONST_INT into a conditional jump.
4184 We're pretty specific about what we will handle in this
4185 code, we can extend this as necessary over time.
4186
4187 Right now the insn in question must look like
abd535b6 4188 (set (pc) (if_then_else ...)) */
b5ce41ff 4189 else if (alter_jumps
6e9a3c38
JL
4190 && GET_CODE (insn) == JUMP_INSN
4191 && condjump_p (insn)
4192 && ! simplejump_p (insn))
0005550b 4193 changed |= cprop_jump (bb, insn, reg_used->reg_rtx, src);
172890a2 4194
abd535b6
BS
4195#ifdef HAVE_cc0
4196 /* Similar code for machines that use a pair of CC0 setter and
4197 conditional jump insn. */
4198 else if (alter_jumps
4199 && GET_CODE (PATTERN (insn)) == SET
4200 && SET_DEST (PATTERN (insn)) == cc0_rtx
4201 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
4202 && condjump_p (NEXT_INSN (insn))
172890a2 4203 && ! simplejump_p (NEXT_INSN (insn))
21715220 4204 && cprop_cc0_jump (bb, insn, reg_used, src))
172890a2
RK
4205 {
4206 changed = 1;
4207 break;
d7836e38 4208 }
abd535b6 4209#endif
7506f491
DE
4210 }
4211 else if (GET_CODE (src) == REG
4212 && REGNO (src) >= FIRST_PSEUDO_REGISTER
4213 && REGNO (src) != regno)
4214 {
cafba495 4215 if (try_replace_reg (reg_used->reg_rtx, src, insn))
7506f491 4216 {
cafba495
BS
4217 changed = 1;
4218 copy_prop_count++;
4219 if (gcse_file != NULL)
7506f491 4220 {
c4c81601
RK
4221 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
4222 regno, INSN_UID (insn));
4223 fprintf (gcse_file, " with reg %d\n", REGNO (src));
7506f491 4224 }
cafba495
BS
4225
4226 /* The original insn setting reg_used may or may not now be
4227 deletable. We leave the deletion to flow. */
4228 /* FIXME: If it turns out that the insn isn't deletable,
4229 then we may have unnecessarily extended register lifetimes
4230 and made things worse. */
7506f491
DE
4231 }
4232 }
4233 }
4234
4235 return changed;
4236}
4237
c4c81601
RK
4238/* Forward propagate copies. This includes copies and constants. Return
4239 non-zero if a change was made. */
7506f491
DE
4240
4241static int
b5ce41ff
JL
4242cprop (alter_jumps)
4243 int alter_jumps;
7506f491
DE
4244{
4245 int bb, changed;
4246 rtx insn;
4247
4248 /* Note we start at block 1. */
4249
4250 changed = 0;
4251 for (bb = 1; bb < n_basic_blocks; bb++)
4252 {
4253 /* Reset tables used to keep track of what's still valid [since the
4254 start of the block]. */
4255 reset_opr_set_tables ();
4256
3b413743
RH
4257 for (insn = BLOCK_HEAD (bb);
4258 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
7506f491 4259 insn = NEXT_INSN (insn))
172890a2
RK
4260 if (INSN_P (insn))
4261 {
0005550b 4262 changed |= cprop_insn (BASIC_BLOCK (bb), insn, alter_jumps);
7506f491 4263
172890a2
RK
4264 /* Keep track of everything modified by this insn. */
4265 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4266 call mark_oprs_set if we turned the insn into a NOTE. */
4267 if (GET_CODE (insn) != NOTE)
4268 mark_oprs_set (insn);
ac7c5af5 4269 }
7506f491
DE
4270 }
4271
4272 if (gcse_file != NULL)
4273 fprintf (gcse_file, "\n");
4274
4275 return changed;
4276}
4277
4278/* Perform one copy/constant propagation pass.
4279 F is the first insn in the function.
4280 PASS is the pass count. */
4281
4282static int
b5ce41ff 4283one_cprop_pass (pass, alter_jumps)
7506f491 4284 int pass;
b5ce41ff 4285 int alter_jumps;
7506f491
DE
4286{
4287 int changed = 0;
4288
4289 const_prop_count = 0;
4290 copy_prop_count = 0;
4291
4292 alloc_set_hash_table (max_cuid);
b5ce41ff 4293 compute_set_hash_table ();
7506f491
DE
4294 if (gcse_file)
4295 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4296 n_sets);
4297 if (n_sets > 0)
4298 {
4299 alloc_cprop_mem (n_basic_blocks, n_sets);
4300 compute_cprop_data ();
b5ce41ff 4301 changed = cprop (alter_jumps);
7506f491
DE
4302 free_cprop_mem ();
4303 }
c4c81601 4304
7506f491
DE
4305 free_set_hash_table ();
4306
4307 if (gcse_file)
4308 {
c4c81601
RK
4309 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4310 current_function_name, pass, bytes_used);
4311 fprintf (gcse_file, "%d const props, %d copy props\n\n",
4312 const_prop_count, copy_prop_count);
7506f491
DE
4313 }
4314
4315 return changed;
4316}
4317\f
a65f3558 4318/* Compute PRE+LCM working variables. */
7506f491
DE
4319
4320/* Local properties of expressions. */
4321/* Nonzero for expressions that are transparent in the block. */
a65f3558 4322static sbitmap *transp;
7506f491 4323
5c35539b
RH
4324/* Nonzero for expressions that are transparent at the end of the block.
4325 This is only zero for expressions killed by abnormal critical edge
4326 created by a calls. */
a65f3558 4327static sbitmap *transpout;
5c35539b 4328
a65f3558
JL
4329/* Nonzero for expressions that are computed (available) in the block. */
4330static sbitmap *comp;
7506f491 4331
a65f3558
JL
4332/* Nonzero for expressions that are locally anticipatable in the block. */
4333static sbitmap *antloc;
7506f491 4334
a65f3558
JL
4335/* Nonzero for expressions where this block is an optimal computation
4336 point. */
4337static sbitmap *pre_optimal;
5c35539b 4338
a65f3558
JL
4339/* Nonzero for expressions which are redundant in a particular block. */
4340static sbitmap *pre_redundant;
7506f491 4341
a42cd965
AM
4342/* Nonzero for expressions which should be inserted on a specific edge. */
4343static sbitmap *pre_insert_map;
4344
4345/* Nonzero for expressions which should be deleted in a specific block. */
4346static sbitmap *pre_delete_map;
4347
4348/* Contains the edge_list returned by pre_edge_lcm. */
4349static struct edge_list *edge_list;
4350
a65f3558
JL
4351/* Redundant insns. */
4352static sbitmap pre_redundant_insns;
7506f491 4353
a65f3558 4354/* Allocate vars used for PRE analysis. */
7506f491
DE
4355
4356static void
a65f3558
JL
4357alloc_pre_mem (n_blocks, n_exprs)
4358 int n_blocks, n_exprs;
7506f491 4359{
a65f3558
JL
4360 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4361 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4362 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5faf03ae 4363
a42cd965
AM
4364 pre_optimal = NULL;
4365 pre_redundant = NULL;
4366 pre_insert_map = NULL;
4367 pre_delete_map = NULL;
4368 ae_in = NULL;
4369 ae_out = NULL;
a42cd965 4370 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
c4c81601 4371
a42cd965 4372 /* pre_insert and pre_delete are allocated later. */
7506f491
DE
4373}
4374
a65f3558 4375/* Free vars used for PRE analysis. */
7506f491
DE
4376
4377static void
a65f3558 4378free_pre_mem ()
7506f491 4379{
5a660bff
DB
4380 sbitmap_vector_free (transp);
4381 sbitmap_vector_free (comp);
bd3675fc
JL
4382
4383 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
7506f491 4384
a42cd965 4385 if (pre_optimal)
5a660bff 4386 sbitmap_vector_free (pre_optimal);
a42cd965 4387 if (pre_redundant)
5a660bff 4388 sbitmap_vector_free (pre_redundant);
a42cd965 4389 if (pre_insert_map)
5a660bff 4390 sbitmap_vector_free (pre_insert_map);
a42cd965 4391 if (pre_delete_map)
5a660bff 4392 sbitmap_vector_free (pre_delete_map);
a42cd965 4393 if (ae_in)
5a660bff 4394 sbitmap_vector_free (ae_in);
a42cd965 4395 if (ae_out)
5a660bff 4396 sbitmap_vector_free (ae_out);
a42cd965 4397
bd3675fc 4398 transp = comp = NULL;
a42cd965 4399 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
55d3f917 4400 ae_in = ae_out = NULL;
7506f491
DE
4401}
4402
4403/* Top level routine to do the dataflow analysis needed by PRE. */
4404
4405static void
4406compute_pre_data ()
4407{
b614171e 4408 sbitmap trapping_expr;
c66e8ae9 4409 int i;
b614171e 4410 unsigned int ui;
c66e8ae9 4411
a65f3558 4412 compute_local_properties (transp, comp, antloc, 0);
a42cd965 4413 sbitmap_vector_zero (ae_kill, n_basic_blocks);
c66e8ae9 4414
b614171e
MM
4415 /* Collect expressions which might trap. */
4416 trapping_expr = sbitmap_alloc (n_exprs);
4417 sbitmap_zero (trapping_expr);
4418 for (ui = 0; ui < expr_hash_table_size; ui++)
4419 {
4420 struct expr *e;
4421 for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash)
4422 if (may_trap_p (e->expr))
4423 SET_BIT (trapping_expr, e->bitmap_index);
4424 }
4425
c66e8ae9
JL
4426 /* Compute ae_kill for each basic block using:
4427
4428 ~(TRANSP | COMP)
4429
a2e90653 4430 This is significantly faster than compute_ae_kill. */
c66e8ae9
JL
4431
4432 for (i = 0; i < n_basic_blocks; i++)
4433 {
b614171e
MM
4434 edge e;
4435
4436 /* If the current block is the destination of an abnormal edge, we
4437 kill all trapping expressions because we won't be able to properly
4438 place the instruction on the edge. So make them neither
4439 anticipatable nor transparent. This is fairly conservative. */
4440 for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
4441 if (e->flags & EDGE_ABNORMAL)
4442 {
4443 sbitmap_difference (antloc[i], antloc[i], trapping_expr);
4444 sbitmap_difference (transp[i], transp[i], trapping_expr);
4445 break;
4446 }
4447
c66e8ae9
JL
4448 sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4449 sbitmap_not (ae_kill[i], ae_kill[i]);
4450 }
4451
a42cd965
AM
4452 edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4453 ae_kill, &pre_insert_map, &pre_delete_map);
5a660bff 4454 sbitmap_vector_free (antloc);
bd3675fc 4455 antloc = NULL;
5a660bff 4456 sbitmap_vector_free (ae_kill);
bd3675fc 4457 ae_kill = NULL;
b614171e 4458 free (trapping_expr);
7506f491
DE
4459}
4460\f
4461/* PRE utilities */
4462
a65f3558
JL
4463/* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4464 block BB.
7506f491
DE
4465
4466 VISITED is a pointer to a working buffer for tracking which BB's have
4467 been visited. It is NULL for the top-level call.
4468
4469 We treat reaching expressions that go through blocks containing the same
4470 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4471 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4472 2 as not reaching. The intent is to improve the probability of finding
4473 only one reaching expression and to reduce register lifetimes by picking
4474 the closest such expression. */
4475
4476static int
89e606c9 4477pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
e2d2ed72 4478 basic_block occr_bb;
7506f491 4479 struct expr *expr;
e2d2ed72 4480 basic_block bb;
7506f491
DE
4481 char *visited;
4482{
36349f8b 4483 edge pred;
7506f491 4484
e2d2ed72 4485 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
7506f491 4486 {
e2d2ed72 4487 basic_block pred_bb = pred->src;
7506f491 4488
36349f8b 4489 if (pred->src == ENTRY_BLOCK_PTR
7506f491 4490 /* Has predecessor has already been visited? */
e2d2ed72 4491 || visited[pred_bb->index])
c4c81601
RK
4492 ;/* Nothing to do. */
4493
7506f491 4494 /* Does this predecessor generate this expression? */
e2d2ed72 4495 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
7506f491
DE
4496 {
4497 /* Is this the occurrence we're looking for?
4498 Note that there's only one generating occurrence per block
4499 so we just need to check the block number. */
a65f3558 4500 if (occr_bb == pred_bb)
7506f491 4501 return 1;
c4c81601 4502
e2d2ed72 4503 visited[pred_bb->index] = 1;
7506f491
DE
4504 }
4505 /* Ignore this predecessor if it kills the expression. */
e2d2ed72
AM
4506 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
4507 visited[pred_bb->index] = 1;
c4c81601 4508
7506f491
DE
4509 /* Neither gen nor kill. */
4510 else
ac7c5af5 4511 {
e2d2ed72 4512 visited[pred_bb->index] = 1;
89e606c9 4513 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
7506f491 4514 return 1;
ac7c5af5 4515 }
7506f491
DE
4516 }
4517
4518 /* All paths have been checked. */
4519 return 0;
4520}
283a2545
RL
4521
4522/* The wrapper for pre_expr_reaches_here_work that ensures that any
4523 memory allocated for that function is returned. */
4524
4525static int
89e606c9 4526pre_expr_reaches_here_p (occr_bb, expr, bb)
e2d2ed72 4527 basic_block occr_bb;
283a2545 4528 struct expr *expr;
e2d2ed72 4529 basic_block bb;
283a2545
RL
4530{
4531 int rval;
c4c81601 4532 char *visited = (char *) xcalloc (n_basic_blocks, 1);
283a2545 4533
89e606c9 4534 rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
283a2545
RL
4535
4536 free (visited);
c4c81601 4537 return rval;
283a2545 4538}
7506f491 4539\f
a42cd965
AM
4540
4541/* Given an expr, generate RTL which we can insert at the end of a BB,
4542 or on an edge. Set the block number of any insns generated to
4543 the value of BB. */
4544
4545static rtx
4546process_insert_insn (expr)
4547 struct expr *expr;
4548{
4549 rtx reg = expr->reaching_reg;
fb0c0a12
RK
4550 rtx exp = copy_rtx (expr->expr);
4551 rtx pat;
a42cd965
AM
4552
4553 start_sequence ();
fb0c0a12
RK
4554
4555 /* If the expression is something that's an operand, like a constant,
4556 just copy it to a register. */
4557 if (general_operand (exp, GET_MODE (reg)))
4558 emit_move_insn (reg, exp);
4559
4560 /* Otherwise, make a new insn to compute this expression and make sure the
4561 insn will be recognized (this also adds any needed CLOBBERs). Copy the
4562 expression to make sure we don't have any sharing issues. */
8d444206 4563 else if (insn_invalid_p (emit_insn (gen_rtx_SET (VOIDmode, reg, exp))))
fb0c0a12
RK
4564 abort ();
4565
a42cd965
AM
4566 pat = gen_sequence ();
4567 end_sequence ();
4568
4569 return pat;
4570}
4571
a65f3558
JL
4572/* Add EXPR to the end of basic block BB.
4573
4574 This is used by both the PRE and code hoisting.
4575
4576 For PRE, we want to verify that the expr is either transparent
4577 or locally anticipatable in the target block. This check makes
4578 no sense for code hoisting. */
7506f491
DE
4579
4580static void
a65f3558 4581insert_insn_end_bb (expr, bb, pre)
7506f491 4582 struct expr *expr;
e2d2ed72 4583 basic_block bb;
a65f3558 4584 int pre;
7506f491 4585{
e2d2ed72 4586 rtx insn = bb->end;
7506f491
DE
4587 rtx new_insn;
4588 rtx reg = expr->reaching_reg;
4589 int regno = REGNO (reg);
a42cd965 4590 rtx pat;
c4c81601 4591 int i;
7506f491 4592
a42cd965 4593 pat = process_insert_insn (expr);
7506f491
DE
4594
4595 /* If the last insn is a jump, insert EXPR in front [taking care to
4596 handle cc0, etc. properly]. */
4597
4598 if (GET_CODE (insn) == JUMP_INSN)
4599 {
50b2596f 4600#ifdef HAVE_cc0
7506f491 4601 rtx note;
50b2596f 4602#endif
7506f491
DE
4603
4604 /* If this is a jump table, then we can't insert stuff here. Since
4605 we know the previous real insn must be the tablejump, we insert
4606 the new instruction just before the tablejump. */
4607 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4608 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4609 insn = prev_real_insn (insn);
4610
4611#ifdef HAVE_cc0
4612 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4613 if cc0 isn't set. */
4614 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4615 if (note)
4616 insn = XEXP (note, 0);
4617 else
4618 {
4619 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4620 if (maybe_cc0_setter
2c3c49de 4621 && INSN_P (maybe_cc0_setter)
7506f491
DE
4622 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4623 insn = maybe_cc0_setter;
4624 }
4625#endif
4626 /* FIXME: What if something in cc0/jump uses value set in new insn? */
e2d2ed72 4627 new_insn = emit_block_insn_before (pat, insn, bb);
3947e2f9 4628 }
c4c81601 4629
3947e2f9
RH
4630 /* Likewise if the last insn is a call, as will happen in the presence
4631 of exception handling. */
5c35539b 4632 else if (GET_CODE (insn) == CALL_INSN)
3947e2f9 4633 {
3947e2f9
RH
4634 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4635 we search backward and place the instructions before the first
4636 parameter is loaded. Do this for everyone for consistency and a
c4c81601 4637 presumtion that we'll get better code elsewhere as well.
3947e2f9 4638
c4c81601 4639 It should always be the case that we can put these instructions
a65f3558
JL
4640 anywhere in the basic block with performing PRE optimizations.
4641 Check this. */
c4c81601 4642
a65f3558 4643 if (pre
e2d2ed72
AM
4644 && !TEST_BIT (antloc[bb->index], expr->bitmap_index)
4645 && !TEST_BIT (transp[bb->index], expr->bitmap_index))
3947e2f9
RH
4646 abort ();
4647
4648 /* Since different machines initialize their parameter registers
4649 in different orders, assume nothing. Collect the set of all
4650 parameter registers. */
833366d6 4651 insn = find_first_parameter_load (insn, bb->head);
3947e2f9 4652
b1d26727
JL
4653 /* If we found all the parameter loads, then we want to insert
4654 before the first parameter load.
4655
4656 If we did not find all the parameter loads, then we might have
4657 stopped on the head of the block, which could be a CODE_LABEL.
4658 If we inserted before the CODE_LABEL, then we would be putting
4659 the insn in the wrong basic block. In that case, put the insn
b5229628 4660 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
0a377997 4661 while (GET_CODE (insn) == CODE_LABEL
589ca5cb 4662 || NOTE_INSN_BASIC_BLOCK_P (insn))
b5229628 4663 insn = NEXT_INSN (insn);
c4c81601 4664
e2d2ed72 4665 new_insn = emit_block_insn_before (pat, insn, bb);
7506f491
DE
4666 }
4667 else
4668 {
4669 new_insn = emit_insn_after (pat, insn);
e2d2ed72 4670 bb->end = new_insn;
7506f491
DE
4671 }
4672
a65f3558
JL
4673 /* Keep block number table up to date.
4674 Note, PAT could be a multiple insn sequence, we have to make
4675 sure that each insn in the sequence is handled. */
4676 if (GET_CODE (pat) == SEQUENCE)
4677 {
a65f3558
JL
4678 for (i = 0; i < XVECLEN (pat, 0); i++)
4679 {
4680 rtx insn = XVECEXP (pat, 0, i);
c4c81601 4681
e2d2ed72 4682 set_block_for_insn (insn, bb);
2c3c49de 4683 if (INSN_P (insn))
a65f3558 4684 add_label_notes (PATTERN (insn), new_insn);
c4c81601 4685
84832317 4686 note_stores (PATTERN (insn), record_set_info, insn);
a65f3558
JL
4687 }
4688 }
4689 else
4690 {
4691 add_label_notes (SET_SRC (pat), new_insn);
e2d2ed72 4692 set_block_for_new_insns (new_insn, bb);
c4c81601 4693
a65f3558
JL
4694 /* Keep register set table up to date. */
4695 record_one_set (regno, new_insn);
4696 }
3947e2f9 4697
7506f491
DE
4698 gcse_create_count++;
4699
4700 if (gcse_file)
4701 {
c4c81601 4702 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
e2d2ed72 4703 bb->index, INSN_UID (new_insn));
c4c81601
RK
4704 fprintf (gcse_file, "copying expression %d to reg %d\n",
4705 expr->bitmap_index, regno);
7506f491
DE
4706 }
4707}
4708
a42cd965
AM
4709/* Insert partially redundant expressions on edges in the CFG to make
4710 the expressions fully redundant. */
7506f491 4711
a42cd965
AM
4712static int
4713pre_edge_insert (edge_list, index_map)
4714 struct edge_list *edge_list;
7506f491
DE
4715 struct expr **index_map;
4716{
c4c81601 4717 int e, i, j, num_edges, set_size, did_insert = 0;
a65f3558
JL
4718 sbitmap *inserted;
4719
a42cd965
AM
4720 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4721 if it reaches any of the deleted expressions. */
7506f491 4722
a42cd965
AM
4723 set_size = pre_insert_map[0]->size;
4724 num_edges = NUM_EDGES (edge_list);
4725 inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4726 sbitmap_vector_zero (inserted, num_edges);
7506f491 4727
a42cd965 4728 for (e = 0; e < num_edges; e++)
7506f491
DE
4729 {
4730 int indx;
e2d2ed72 4731 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
a65f3558 4732
a65f3558 4733 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
7506f491 4734 {
a42cd965 4735 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
7506f491 4736
a65f3558 4737 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
c4c81601
RK
4738 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4739 {
4740 struct expr *expr = index_map[j];
4741 struct occr *occr;
a65f3558 4742
c4c81601
RK
4743 /* Now look at each deleted occurence of this expression. */
4744 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4745 {
4746 if (! occr->deleted_p)
4747 continue;
4748
4749 /* Insert this expression on this edge if if it would
4750 reach the deleted occurence in BB. */
4751 if (!TEST_BIT (inserted[e], j))
4752 {
4753 rtx insn;
4754 edge eg = INDEX_EDGE (edge_list, e);
4755
4756 /* We can't insert anything on an abnormal and
4757 critical edge, so we insert the insn at the end of
4758 the previous block. There are several alternatives
4759 detailed in Morgans book P277 (sec 10.5) for
4760 handling this situation. This one is easiest for
4761 now. */
4762
4763 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4764 insert_insn_end_bb (index_map[j], bb, 0);
4765 else
4766 {
4767 insn = process_insert_insn (index_map[j]);
4768 insert_insn_on_edge (insn, eg);
4769 }
4770
4771 if (gcse_file)
4772 {
4773 fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
e2d2ed72 4774 bb->index,
c4c81601
RK
4775 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4776 fprintf (gcse_file, "copy expression %d\n",
4777 expr->bitmap_index);
4778 }
4779
a13d4ebf 4780 update_ld_motion_stores (expr);
c4c81601
RK
4781 SET_BIT (inserted[e], j);
4782 did_insert = 1;
4783 gcse_create_count++;
4784 }
4785 }
4786 }
7506f491
DE
4787 }
4788 }
5faf03ae 4789
5a660bff 4790 sbitmap_vector_free (inserted);
a42cd965 4791 return did_insert;
7506f491
DE
4792}
4793
c4c81601 4794/* Copy the result of INSN to REG. INDX is the expression number. */
7506f491
DE
4795
4796static void
4797pre_insert_copy_insn (expr, insn)
4798 struct expr *expr;
4799 rtx insn;
4800{
4801 rtx reg = expr->reaching_reg;
4802 int regno = REGNO (reg);
4803 int indx = expr->bitmap_index;
4804 rtx set = single_set (insn);
4805 rtx new_insn;
e2d2ed72 4806 basic_block bb = BLOCK_FOR_INSN (insn);
7506f491
DE
4807
4808 if (!set)
4809 abort ();
c4c81601 4810
cccf0ae8 4811 new_insn = emit_insn_after (gen_move_insn (reg, SET_DEST (set)), insn);
c4c81601 4812
7506f491 4813 /* Keep block number table up to date. */
e2d2ed72 4814 set_block_for_new_insns (new_insn, bb);
c4c81601 4815
7506f491
DE
4816 /* Keep register set table up to date. */
4817 record_one_set (regno, new_insn);
e2d2ed72
AM
4818 if (insn == bb->end)
4819 bb->end = new_insn;
7506f491
DE
4820
4821 gcse_create_count++;
4822
4823 if (gcse_file)
a42cd965
AM
4824 fprintf (gcse_file,
4825 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4826 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4827 INSN_UID (insn), regno);
222f7ba9 4828 update_ld_motion_stores (expr);
7506f491
DE
4829}
4830
4831/* Copy available expressions that reach the redundant expression
4832 to `reaching_reg'. */
4833
4834static void
4835pre_insert_copies ()
4836{
2e653e39 4837 unsigned int i;
c4c81601
RK
4838 struct expr *expr;
4839 struct occr *occr;
4840 struct occr *avail;
a65f3558 4841
7506f491
DE
4842 /* For each available expression in the table, copy the result to
4843 `reaching_reg' if the expression reaches a deleted one.
4844
4845 ??? The current algorithm is rather brute force.
4846 Need to do some profiling. */
4847
4848 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
4849 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4850 {
4851 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4852 we don't want to insert a copy here because the expression may not
4853 really be redundant. So only insert an insn if the expression was
4854 deleted. This test also avoids further processing if the
4855 expression wasn't deleted anywhere. */
4856 if (expr->reaching_reg == NULL)
4857 continue;
4858
4859 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4860 {
4861 if (! occr->deleted_p)
4862 continue;
7506f491 4863
c4c81601
RK
4864 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4865 {
4866 rtx insn = avail->insn;
7506f491 4867
c4c81601
RK
4868 /* No need to handle this one if handled already. */
4869 if (avail->copied_p)
4870 continue;
7506f491 4871
c4c81601
RK
4872 /* Don't handle this one if it's a redundant one. */
4873 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4874 continue;
7506f491 4875
c4c81601 4876 /* Or if the expression doesn't reach the deleted one. */
e2d2ed72
AM
4877 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
4878 expr,
4879 BLOCK_FOR_INSN (occr->insn)))
c4c81601 4880 continue;
7506f491 4881
c4c81601
RK
4882 /* Copy the result of avail to reaching_reg. */
4883 pre_insert_copy_insn (expr, insn);
4884 avail->copied_p = 1;
4885 }
4886 }
4887 }
7506f491
DE
4888}
4889
4890/* Delete redundant computations.
7506f491
DE
4891 Deletion is done by changing the insn to copy the `reaching_reg' of
4892 the expression into the result of the SET. It is left to later passes
4893 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4894
4895 Returns non-zero if a change is made. */
4896
4897static int
4898pre_delete ()
4899{
2e653e39 4900 unsigned int i;
63bc1d05 4901 int changed;
c4c81601
RK
4902 struct expr *expr;
4903 struct occr *occr;
a65f3558 4904
7506f491
DE
4905 changed = 0;
4906 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
4907 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4908 {
4909 int indx = expr->bitmap_index;
7506f491 4910
c4c81601
RK
4911 /* We only need to search antic_occr since we require
4912 ANTLOC != 0. */
7506f491 4913
c4c81601
RK
4914 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4915 {
4916 rtx insn = occr->insn;
4917 rtx set;
e2d2ed72 4918 basic_block bb = BLOCK_FOR_INSN (insn);
7506f491 4919
e2d2ed72 4920 if (TEST_BIT (pre_delete_map[bb->index], indx))
c4c81601
RK
4921 {
4922 set = single_set (insn);
4923 if (! set)
4924 abort ();
4925
4926 /* Create a pseudo-reg to store the result of reaching
4927 expressions into. Get the mode for the new pseudo from
4928 the mode of the original destination pseudo. */
4929 if (expr->reaching_reg == NULL)
4930 expr->reaching_reg
4931 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4932
4933 /* In theory this should never fail since we're creating
4934 a reg->reg copy.
4935
4936 However, on the x86 some of the movXX patterns actually
4937 contain clobbers of scratch regs. This may cause the
4938 insn created by validate_change to not match any pattern
4939 and thus cause validate_change to fail. */
4940 if (validate_change (insn, &SET_SRC (set),
4941 expr->reaching_reg, 0))
4942 {
4943 occr->deleted_p = 1;
4944 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4945 changed = 1;
4946 gcse_subst_count++;
4947 }
7506f491 4948
c4c81601
RK
4949 if (gcse_file)
4950 {
4951 fprintf (gcse_file,
4952 "PRE: redundant insn %d (expression %d) in ",
4953 INSN_UID (insn), indx);
4954 fprintf (gcse_file, "bb %d, reaching reg is %d\n",
e2d2ed72 4955 bb->index, REGNO (expr->reaching_reg));
c4c81601
RK
4956 }
4957 }
4958 }
4959 }
7506f491
DE
4960
4961 return changed;
4962}
4963
4964/* Perform GCSE optimizations using PRE.
4965 This is called by one_pre_gcse_pass after all the dataflow analysis
4966 has been done.
4967
c4c81601
RK
4968 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4969 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4970 Compiler Design and Implementation.
7506f491 4971
c4c81601
RK
4972 ??? A new pseudo reg is created to hold the reaching expression. The nice
4973 thing about the classical approach is that it would try to use an existing
4974 reg. If the register can't be adequately optimized [i.e. we introduce
4975 reload problems], one could add a pass here to propagate the new register
4976 through the block.
7506f491 4977
c4c81601
RK
4978 ??? We don't handle single sets in PARALLELs because we're [currently] not
4979 able to copy the rest of the parallel when we insert copies to create full
4980 redundancies from partial redundancies. However, there's no reason why we
4981 can't handle PARALLELs in the cases where there are no partial
7506f491
DE
4982 redundancies. */
4983
4984static int
4985pre_gcse ()
4986{
2e653e39
RK
4987 unsigned int i;
4988 int did_insert, changed;
7506f491 4989 struct expr **index_map;
c4c81601 4990 struct expr *expr;
7506f491
DE
4991
4992 /* Compute a mapping from expression number (`bitmap_index') to
4993 hash table entry. */
4994
dd1bd863 4995 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
7506f491 4996 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
4997 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4998 index_map[expr->bitmap_index] = expr;
7506f491
DE
4999
5000 /* Reset bitmap used to track which insns are redundant. */
a65f3558
JL
5001 pre_redundant_insns = sbitmap_alloc (max_cuid);
5002 sbitmap_zero (pre_redundant_insns);
7506f491
DE
5003
5004 /* Delete the redundant insns first so that
5005 - we know what register to use for the new insns and for the other
5006 ones with reaching expressions
5007 - we know which insns are redundant when we go to create copies */
c4c81601 5008
7506f491
DE
5009 changed = pre_delete ();
5010
a42cd965 5011 did_insert = pre_edge_insert (edge_list, index_map);
c4c81601 5012
7506f491 5013 /* In other places with reaching expressions, copy the expression to the
a42cd965 5014 specially allocated pseudo-reg that reaches the redundant expr. */
7506f491 5015 pre_insert_copies ();
a42cd965
AM
5016 if (did_insert)
5017 {
5018 commit_edge_insertions ();
5019 changed = 1;
5020 }
7506f491 5021
283a2545 5022 free (index_map);
a65f3558 5023 free (pre_redundant_insns);
7506f491
DE
5024 return changed;
5025}
5026
5027/* Top level routine to perform one PRE GCSE pass.
5028
5029 Return non-zero if a change was made. */
5030
5031static int
b5ce41ff 5032one_pre_gcse_pass (pass)
7506f491
DE
5033 int pass;
5034{
5035 int changed = 0;
5036
5037 gcse_subst_count = 0;
5038 gcse_create_count = 0;
5039
5040 alloc_expr_hash_table (max_cuid);
a42cd965 5041 add_noreturn_fake_exit_edges ();
a13d4ebf
AM
5042 if (flag_gcse_lm)
5043 compute_ld_motion_mems ();
5044
b5ce41ff 5045 compute_expr_hash_table ();
a13d4ebf 5046 trim_ld_motion_mems ();
7506f491
DE
5047 if (gcse_file)
5048 dump_hash_table (gcse_file, "Expression", expr_hash_table,
5049 expr_hash_table_size, n_exprs);
c4c81601 5050
7506f491
DE
5051 if (n_exprs > 0)
5052 {
5053 alloc_pre_mem (n_basic_blocks, n_exprs);
5054 compute_pre_data ();
5055 changed |= pre_gcse ();
a42cd965 5056 free_edge_list (edge_list);
7506f491
DE
5057 free_pre_mem ();
5058 }
c4c81601 5059
a13d4ebf 5060 free_ldst_mems ();
a42cd965 5061 remove_fake_edges ();
7506f491
DE
5062 free_expr_hash_table ();
5063
5064 if (gcse_file)
5065 {
c4c81601
RK
5066 fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
5067 current_function_name, pass, bytes_used);
5068 fprintf (gcse_file, "%d substs, %d insns created\n",
5069 gcse_subst_count, gcse_create_count);
7506f491
DE
5070 }
5071
5072 return changed;
5073}
aeb2f500
JW
5074\f
5075/* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
5b1ef594
JDA
5076 If notes are added to an insn which references a CODE_LABEL, the
5077 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
5078 because the following loop optimization pass requires them. */
aeb2f500
JW
5079
5080/* ??? This is very similar to the loop.c add_label_notes function. We
5081 could probably share code here. */
5082
5083/* ??? If there was a jump optimization pass after gcse and before loop,
5084 then we would not need to do this here, because jump would add the
5085 necessary REG_LABEL notes. */
5086
5087static void
5088add_label_notes (x, insn)
5089 rtx x;
5090 rtx insn;
5091{
5092 enum rtx_code code = GET_CODE (x);
5093 int i, j;
6f7d635c 5094 const char *fmt;
aeb2f500
JW
5095
5096 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
5097 {
6b3603c2 5098 /* This code used to ignore labels that referred to dispatch tables to
ac7c5af5 5099 avoid flow generating (slighly) worse code.
6b3603c2 5100
ac7c5af5
JL
5101 We no longer ignore such label references (see LABEL_REF handling in
5102 mark_jump_label for additional information). */
c4c81601 5103
6b3603c2
JL
5104 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
5105 REG_NOTES (insn));
5b1ef594
JDA
5106 if (LABEL_P (XEXP (x, 0)))
5107 LABEL_NUSES (XEXP (x, 0))++;
aeb2f500
JW
5108 return;
5109 }
5110
c4c81601 5111 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
aeb2f500
JW
5112 {
5113 if (fmt[i] == 'e')
5114 add_label_notes (XEXP (x, i), insn);
5115 else if (fmt[i] == 'E')
5116 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5117 add_label_notes (XVECEXP (x, i, j), insn);
5118 }
5119}
a65f3558
JL
5120
5121/* Compute transparent outgoing information for each block.
5122
5123 An expression is transparent to an edge unless it is killed by
5124 the edge itself. This can only happen with abnormal control flow,
5125 when the edge is traversed through a call. This happens with
5126 non-local labels and exceptions.
5127
5128 This would not be necessary if we split the edge. While this is
5129 normally impossible for abnormal critical edges, with some effort
5130 it should be possible with exception handling, since we still have
5131 control over which handler should be invoked. But due to increased
5132 EH table sizes, this may not be worthwhile. */
5133
5134static void
5135compute_transpout ()
5136{
5137 int bb;
2e653e39 5138 unsigned int i;
c4c81601 5139 struct expr *expr;
a65f3558
JL
5140
5141 sbitmap_vector_ones (transpout, n_basic_blocks);
5142
5143 for (bb = 0; bb < n_basic_blocks; ++bb)
5144 {
a65f3558
JL
5145 /* Note that flow inserted a nop a the end of basic blocks that
5146 end in call instructions for reasons other than abnormal
5147 control flow. */
5148 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
5149 continue;
5150
5151 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
5152 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
5153 if (GET_CODE (expr->expr) == MEM)
5154 {
5155 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
5156 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
5157 continue;
a65f3558 5158
c4c81601
RK
5159 /* ??? Optimally, we would use interprocedural alias
5160 analysis to determine if this mem is actually killed
5161 by this call. */
5162 RESET_BIT (transpout[bb], expr->bitmap_index);
5163 }
a65f3558
JL
5164 }
5165}
dfdb644f
JL
5166
5167/* Removal of useless null pointer checks */
5168
dfdb644f 5169/* Called via note_stores. X is set by SETTER. If X is a register we must
0511851c
MM
5170 invalidate nonnull_local and set nonnull_killed. DATA is really a
5171 `null_pointer_info *'.
dfdb644f
JL
5172
5173 We ignore hard registers. */
c4c81601 5174
dfdb644f 5175static void
84832317 5176invalidate_nonnull_info (x, setter, data)
dfdb644f
JL
5177 rtx x;
5178 rtx setter ATTRIBUTE_UNUSED;
0511851c 5179 void *data;
dfdb644f 5180{
770ae6cc
RK
5181 unsigned int regno;
5182 struct null_pointer_info *npi = (struct null_pointer_info *) data;
c4c81601 5183
dfdb644f
JL
5184 while (GET_CODE (x) == SUBREG)
5185 x = SUBREG_REG (x);
5186
5187 /* Ignore anything that is not a register or is a hard register. */
5188 if (GET_CODE (x) != REG
0511851c
MM
5189 || REGNO (x) < npi->min_reg
5190 || REGNO (x) >= npi->max_reg)
dfdb644f
JL
5191 return;
5192
0511851c 5193 regno = REGNO (x) - npi->min_reg;
dfdb644f 5194
0511851c
MM
5195 RESET_BIT (npi->nonnull_local[npi->current_block], regno);
5196 SET_BIT (npi->nonnull_killed[npi->current_block], regno);
dfdb644f
JL
5197}
5198
0511851c
MM
5199/* Do null-pointer check elimination for the registers indicated in
5200 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5201 they are not our responsibility to free. */
dfdb644f 5202
0511851c 5203static void
8e184d9c
JJ
5204delete_null_pointer_checks_1 (delete_list, block_reg, nonnull_avin,
5205 nonnull_avout, npi)
5206 varray_type *delete_list;
770ae6cc 5207 unsigned int *block_reg;
0511851c
MM
5208 sbitmap *nonnull_avin;
5209 sbitmap *nonnull_avout;
5210 struct null_pointer_info *npi;
dfdb644f 5211{
ce724250 5212 int bb;
0511851c
MM
5213 int current_block;
5214 sbitmap *nonnull_local = npi->nonnull_local;
5215 sbitmap *nonnull_killed = npi->nonnull_killed;
dfdb644f 5216
dfdb644f
JL
5217 /* Compute local properties, nonnull and killed. A register will have
5218 the nonnull property if at the end of the current block its value is
5219 known to be nonnull. The killed property indicates that somewhere in
5220 the block any information we had about the register is killed.
5221
5222 Note that a register can have both properties in a single block. That
5223 indicates that it's killed, then later in the block a new value is
5224 computed. */
5225 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5226 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
c4c81601 5227
dfdb644f
JL
5228 for (current_block = 0; current_block < n_basic_blocks; current_block++)
5229 {
5230 rtx insn, stop_insn;
5231
0511851c
MM
5232 /* Set the current block for invalidate_nonnull_info. */
5233 npi->current_block = current_block;
5234
dfdb644f
JL
5235 /* Scan each insn in the basic block looking for memory references and
5236 register sets. */
5237 stop_insn = NEXT_INSN (BLOCK_END (current_block));
5238 for (insn = BLOCK_HEAD (current_block);
5239 insn != stop_insn;
5240 insn = NEXT_INSN (insn))
5241 {
5242 rtx set;
0511851c 5243 rtx reg;
dfdb644f
JL
5244
5245 /* Ignore anything that is not a normal insn. */
2c3c49de 5246 if (! INSN_P (insn))
dfdb644f
JL
5247 continue;
5248
5249 /* Basically ignore anything that is not a simple SET. We do have
5250 to make sure to invalidate nonnull_local and set nonnull_killed
5251 for such insns though. */
5252 set = single_set (insn);
5253 if (!set)
5254 {
0511851c 5255 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
dfdb644f
JL
5256 continue;
5257 }
5258
5259 /* See if we've got a useable memory load. We handle it first
5260 in case it uses its address register as a dest (which kills
5261 the nonnull property). */
5262 if (GET_CODE (SET_SRC (set)) == MEM
0511851c
MM
5263 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5264 && REGNO (reg) >= npi->min_reg
5265 && REGNO (reg) < npi->max_reg)
dfdb644f 5266 SET_BIT (nonnull_local[current_block],
0511851c 5267 REGNO (reg) - npi->min_reg);
dfdb644f
JL
5268
5269 /* Now invalidate stuff clobbered by this insn. */
0511851c 5270 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
dfdb644f
JL
5271
5272 /* And handle stores, we do these last since any sets in INSN can
5273 not kill the nonnull property if it is derived from a MEM
5274 appearing in a SET_DEST. */
5275 if (GET_CODE (SET_DEST (set)) == MEM
0511851c
MM
5276 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5277 && REGNO (reg) >= npi->min_reg
5278 && REGNO (reg) < npi->max_reg)
dfdb644f 5279 SET_BIT (nonnull_local[current_block],
0511851c 5280 REGNO (reg) - npi->min_reg);
dfdb644f
JL
5281 }
5282 }
5283
5284 /* Now compute global properties based on the local properties. This
5285 is a classic global availablity algorithm. */
ce724250
JL
5286 compute_available (nonnull_local, nonnull_killed,
5287 nonnull_avout, nonnull_avin);
dfdb644f
JL
5288
5289 /* Now look at each bb and see if it ends with a compare of a value
5290 against zero. */
5291 for (bb = 0; bb < n_basic_blocks; bb++)
5292 {
5293 rtx last_insn = BLOCK_END (bb);
0511851c 5294 rtx condition, earliest;
dfdb644f
JL
5295 int compare_and_branch;
5296
0511851c
MM
5297 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5298 since BLOCK_REG[BB] is zero if this block did not end with a
5299 comparison against zero, this condition works. */
5300 if (block_reg[bb] < npi->min_reg
5301 || block_reg[bb] >= npi->max_reg)
dfdb644f
JL
5302 continue;
5303
5304 /* LAST_INSN is a conditional jump. Get its condition. */
5305 condition = get_condition (last_insn, &earliest);
5306
40d7a3fe
NB
5307 /* If we can't determine the condition then skip. */
5308 if (! condition)
5309 continue;
5310
dfdb644f 5311 /* Is the register known to have a nonzero value? */
0511851c 5312 if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
dfdb644f
JL
5313 continue;
5314
5315 /* Try to compute whether the compare/branch at the loop end is one or
5316 two instructions. */
5317 if (earliest == last_insn)
5318 compare_and_branch = 1;
5319 else if (earliest == prev_nonnote_insn (last_insn))
5320 compare_and_branch = 2;
5321 else
5322 continue;
5323
5324 /* We know the register in this comparison is nonnull at exit from
5325 this block. We can optimize this comparison. */
5326 if (GET_CODE (condition) == NE)
5327 {
5328 rtx new_jump;
5329
5330 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5331 last_insn);
5332 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5333 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5334 emit_barrier_after (new_jump);
5335 }
8e184d9c
JJ
5336 if (!*delete_list)
5337 VARRAY_RTX_INIT (*delete_list, 10, "delete_list");
5338
5339 VARRAY_PUSH_RTX (*delete_list, last_insn);
dfdb644f 5340 if (compare_and_branch == 2)
8e184d9c 5341 VARRAY_PUSH_RTX (*delete_list, earliest);
0511851c
MM
5342
5343 /* Don't check this block again. (Note that BLOCK_END is
5344 invalid here; we deleted the last instruction in the
5345 block.) */
5346 block_reg[bb] = 0;
5347 }
5348}
5349
5350/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5351 at compile time.
5352
5353 This is conceptually similar to global constant/copy propagation and
5354 classic global CSE (it even uses the same dataflow equations as cprop).
5355
5356 If a register is used as memory address with the form (mem (reg)), then we
5357 know that REG can not be zero at that point in the program. Any instruction
5358 which sets REG "kills" this property.
5359
5360 So, if every path leading to a conditional branch has an available memory
5361 reference of that form, then we know the register can not have the value
5362 zero at the conditional branch.
5363
5364 So we merely need to compute the local properies and propagate that data
5365 around the cfg, then optimize where possible.
5366
5367 We run this pass two times. Once before CSE, then again after CSE. This
5368 has proven to be the most profitable approach. It is rare for new
5369 optimization opportunities of this nature to appear after the first CSE
5370 pass.
5371
5372 This could probably be integrated with global cprop with a little work. */
5373
5374void
5375delete_null_pointer_checks (f)
2e653e39 5376 rtx f ATTRIBUTE_UNUSED;
0511851c 5377{
0511851c 5378 sbitmap *nonnull_avin, *nonnull_avout;
770ae6cc 5379 unsigned int *block_reg;
8e184d9c 5380 varray_type delete_list = NULL;
0511851c
MM
5381 int bb;
5382 int reg;
5383 int regs_per_pass;
5384 int max_reg;
8e184d9c 5385 unsigned int i;
0511851c
MM
5386 struct null_pointer_info npi;
5387
0511851c
MM
5388 /* If we have only a single block, then there's nothing to do. */
5389 if (n_basic_blocks <= 1)
a18820c6 5390 return;
0511851c
MM
5391
5392 /* Trying to perform global optimizations on flow graphs which have
5393 a high connectivity will take a long time and is unlikely to be
5394 particularly useful.
5395
43e72072 5396 In normal circumstances a cfg should have about twice as many edges
0511851c
MM
5397 as blocks. But we do not want to punish small functions which have
5398 a couple switch statements. So we require a relatively large number
5399 of basic blocks and the ratio of edges to blocks to be high. */
5400 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
a18820c6 5401 return;
0511851c 5402
0511851c
MM
5403 /* We need four bitmaps, each with a bit for each register in each
5404 basic block. */
5405 max_reg = max_reg_num ();
5406 regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5407
5408 /* Allocate bitmaps to hold local and global properties. */
5409 npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5410 npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5411 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5412 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5413
5414 /* Go through the basic blocks, seeing whether or not each block
5415 ends with a conditional branch whose condition is a comparison
5416 against zero. Record the register compared in BLOCK_REG. */
f9e158c3 5417 block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
0511851c
MM
5418 for (bb = 0; bb < n_basic_blocks; bb++)
5419 {
5420 rtx last_insn = BLOCK_END (bb);
5421 rtx condition, earliest, reg;
5422
5423 /* We only want conditional branches. */
5424 if (GET_CODE (last_insn) != JUMP_INSN
7f1c097d
JH
5425 || !any_condjump_p (last_insn)
5426 || !onlyjump_p (last_insn))
0511851c
MM
5427 continue;
5428
5429 /* LAST_INSN is a conditional jump. Get its condition. */
5430 condition = get_condition (last_insn, &earliest);
5431
5432 /* If we were unable to get the condition, or it is not a equality
5433 comparison against zero then there's nothing we can do. */
5434 if (!condition
5435 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5436 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5437 || (XEXP (condition, 1)
5438 != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5439 continue;
5440
5441 /* We must be checking a register against zero. */
5442 reg = XEXP (condition, 0);
5443 if (GET_CODE (reg) != REG)
5444 continue;
5445
5446 block_reg[bb] = REGNO (reg);
5447 }
5448
5449 /* Go through the algorithm for each block of registers. */
5450 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5451 {
5452 npi.min_reg = reg;
5453 npi.max_reg = MIN (reg + regs_per_pass, max_reg);
8e184d9c 5454 delete_null_pointer_checks_1 (&delete_list, block_reg, nonnull_avin,
0511851c 5455 nonnull_avout, &npi);
dfdb644f
JL
5456 }
5457
8e184d9c
JJ
5458 /* Now delete the instructions all at once. This breaks the CFG. */
5459 if (delete_list)
5460 {
5461 for (i = 0; i < VARRAY_ACTIVE_SIZE (delete_list); i++)
5462 delete_insn (VARRAY_RTX (delete_list, i));
5463 VARRAY_FREE (delete_list);
5464 }
5465
0511851c
MM
5466 /* Free the table of registers compared at the end of every block. */
5467 free (block_reg);
5468
dfdb644f 5469 /* Free bitmaps. */
5a660bff
DB
5470 sbitmap_vector_free (npi.nonnull_local);
5471 sbitmap_vector_free (npi.nonnull_killed);
5472 sbitmap_vector_free (nonnull_avin);
5473 sbitmap_vector_free (nonnull_avout);
dfdb644f 5474}
bb457bd9
JL
5475
5476/* Code Hoisting variables and subroutines. */
5477
5478/* Very busy expressions. */
5479static sbitmap *hoist_vbein;
5480static sbitmap *hoist_vbeout;
5481
5482/* Hoistable expressions. */
5483static sbitmap *hoist_exprs;
5484
5485/* Dominator bitmaps. */
5486static sbitmap *dominators;
bb457bd9
JL
5487
5488/* ??? We could compute post dominators and run this algorithm in
5489 reverse to to perform tail merging, doing so would probably be
5490 more effective than the tail merging code in jump.c.
5491
5492 It's unclear if tail merging could be run in parallel with
5493 code hoisting. It would be nice. */
5494
5495/* Allocate vars used for code hoisting analysis. */
5496
5497static void
5498alloc_code_hoist_mem (n_blocks, n_exprs)
5499 int n_blocks, n_exprs;
5500{
5501 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5502 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5503 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5504
5505 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5506 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5507 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5508 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5509
5510 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
bb457bd9
JL
5511}
5512
5513/* Free vars used for code hoisting analysis. */
5514
5515static void
5516free_code_hoist_mem ()
5517{
5a660bff
DB
5518 sbitmap_vector_free (antloc);
5519 sbitmap_vector_free (transp);
5520 sbitmap_vector_free (comp);
bb457bd9 5521
5a660bff
DB
5522 sbitmap_vector_free (hoist_vbein);
5523 sbitmap_vector_free (hoist_vbeout);
5524 sbitmap_vector_free (hoist_exprs);
5525 sbitmap_vector_free (transpout);
bb457bd9 5526
5a660bff 5527 sbitmap_vector_free (dominators);
bb457bd9
JL
5528}
5529
5530/* Compute the very busy expressions at entry/exit from each block.
5531
5532 An expression is very busy if all paths from a given point
5533 compute the expression. */
5534
5535static void
5536compute_code_hoist_vbeinout ()
5537{
5538 int bb, changed, passes;
5539
5540 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5541 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5542
5543 passes = 0;
5544 changed = 1;
c4c81601 5545
bb457bd9
JL
5546 while (changed)
5547 {
5548 changed = 0;
c4c81601 5549
bb457bd9
JL
5550 /* We scan the blocks in the reverse order to speed up
5551 the convergence. */
5552 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5553 {
5554 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5555 hoist_vbeout[bb], transp[bb]);
5556 if (bb != n_basic_blocks - 1)
a42cd965 5557 sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
bb457bd9 5558 }
c4c81601 5559
bb457bd9
JL
5560 passes++;
5561 }
5562
5563 if (gcse_file)
5564 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5565}
5566
5567/* Top level routine to do the dataflow analysis needed by code hoisting. */
5568
5569static void
5570compute_code_hoist_data ()
5571{
5572 compute_local_properties (transp, comp, antloc, 0);
5573 compute_transpout ();
5574 compute_code_hoist_vbeinout ();
f8032688 5575 calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
bb457bd9
JL
5576 if (gcse_file)
5577 fprintf (gcse_file, "\n");
5578}
5579
5580/* Determine if the expression identified by EXPR_INDEX would
5581 reach BB unimpared if it was placed at the end of EXPR_BB.
5582
5583 It's unclear exactly what Muchnick meant by "unimpared". It seems
5584 to me that the expression must either be computed or transparent in
5585 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5586 would allow the expression to be hoisted out of loops, even if
5587 the expression wasn't a loop invariant.
5588
5589 Contrast this to reachability for PRE where an expression is
5590 considered reachable if *any* path reaches instead of *all*
5591 paths. */
5592
5593static int
5594hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
e2d2ed72 5595 basic_block expr_bb;
bb457bd9 5596 int expr_index;
e2d2ed72 5597 basic_block bb;
bb457bd9
JL
5598 char *visited;
5599{
5600 edge pred;
283a2545
RL
5601 int visited_allocated_locally = 0;
5602
bb457bd9
JL
5603
5604 if (visited == NULL)
5605 {
283a2545
RL
5606 visited_allocated_locally = 1;
5607 visited = xcalloc (n_basic_blocks, 1);
bb457bd9
JL
5608 }
5609
e2d2ed72 5610 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
bb457bd9 5611 {
e2d2ed72 5612 basic_block pred_bb = pred->src;
bb457bd9
JL
5613
5614 if (pred->src == ENTRY_BLOCK_PTR)
5615 break;
e2d2ed72 5616 else if (visited[pred_bb->index])
bb457bd9 5617 continue;
c4c81601 5618
bb457bd9 5619 /* Does this predecessor generate this expression? */
e2d2ed72 5620 else if (TEST_BIT (comp[pred_bb->index], expr_index))
bb457bd9 5621 break;
e2d2ed72 5622 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
bb457bd9 5623 break;
c4c81601 5624
bb457bd9
JL
5625 /* Not killed. */
5626 else
5627 {
e2d2ed72 5628 visited[pred_bb->index] = 1;
bb457bd9
JL
5629 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5630 pred_bb, visited))
5631 break;
5632 }
5633 }
283a2545
RL
5634 if (visited_allocated_locally)
5635 free (visited);
c4c81601 5636
bb457bd9
JL
5637 return (pred == NULL);
5638}
5639\f
5640/* Actually perform code hoisting. */
c4c81601 5641
bb457bd9
JL
5642static void
5643hoist_code ()
5644{
2e653e39
RK
5645 int bb, dominated;
5646 unsigned int i;
bb457bd9 5647 struct expr **index_map;
c4c81601 5648 struct expr *expr;
bb457bd9
JL
5649
5650 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5651
5652 /* Compute a mapping from expression number (`bitmap_index') to
5653 hash table entry. */
5654
dd1bd863 5655 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
bb457bd9 5656 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
5657 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5658 index_map[expr->bitmap_index] = expr;
bb457bd9
JL
5659
5660 /* Walk over each basic block looking for potentially hoistable
5661 expressions, nothing gets hoisted from the entry block. */
5662 for (bb = 0; bb < n_basic_blocks; bb++)
5663 {
5664 int found = 0;
5665 int insn_inserted_p;
5666
5667 /* Examine each expression that is very busy at the exit of this
5668 block. These are the potentially hoistable expressions. */
5669 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5670 {
5671 int hoistable = 0;
c4c81601
RK
5672
5673 if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
bb457bd9
JL
5674 {
5675 /* We've found a potentially hoistable expression, now
5676 we look at every block BB dominates to see if it
5677 computes the expression. */
5678 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5679 {
5680 /* Ignore self dominance. */
5681 if (bb == dominated
5682 || ! TEST_BIT (dominators[dominated], bb))
5683 continue;
5684
5685 /* We've found a dominated block, now see if it computes
5686 the busy expression and whether or not moving that
5687 expression to the "beginning" of that block is safe. */
5688 if (!TEST_BIT (antloc[dominated], i))
5689 continue;
5690
5691 /* Note if the expression would reach the dominated block
5692 unimpared if it was placed at the end of BB.
5693
5694 Keep track of how many times this expression is hoistable
5695 from a dominated block into BB. */
e2d2ed72
AM
5696 if (hoist_expr_reaches_here_p (BASIC_BLOCK (bb), i,
5697 BASIC_BLOCK (dominated), NULL))
bb457bd9
JL
5698 hoistable++;
5699 }
5700
5701 /* If we found more than one hoistable occurence of this
5702 expression, then note it in the bitmap of expressions to
5703 hoist. It makes no sense to hoist things which are computed
5704 in only one BB, and doing so tends to pessimize register
5705 allocation. One could increase this value to try harder
5706 to avoid any possible code expansion due to register
5707 allocation issues; however experiments have shown that
5708 the vast majority of hoistable expressions are only movable
5709 from two successors, so raising this threshhold is likely
5710 to nullify any benefit we get from code hoisting. */
5711 if (hoistable > 1)
5712 {
5713 SET_BIT (hoist_exprs[bb], i);
5714 found = 1;
5715 }
5716 }
5717 }
5718
5719 /* If we found nothing to hoist, then quit now. */
5720 if (! found)
5721 continue;
5722
5723 /* Loop over all the hoistable expressions. */
5724 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5725 {
5726 /* We want to insert the expression into BB only once, so
5727 note when we've inserted it. */
5728 insn_inserted_p = 0;
5729
5730 /* These tests should be the same as the tests above. */
5731 if (TEST_BIT (hoist_vbeout[bb], i))
5732 {
5733 /* We've found a potentially hoistable expression, now
5734 we look at every block BB dominates to see if it
5735 computes the expression. */
5736 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5737 {
5738 /* Ignore self dominance. */
5739 if (bb == dominated
5740 || ! TEST_BIT (dominators[dominated], bb))
5741 continue;
5742
5743 /* We've found a dominated block, now see if it computes
5744 the busy expression and whether or not moving that
5745 expression to the "beginning" of that block is safe. */
5746 if (!TEST_BIT (antloc[dominated], i))
5747 continue;
5748
5749 /* The expression is computed in the dominated block and
5750 it would be safe to compute it at the start of the
5751 dominated block. Now we have to determine if the
5752 expresion would reach the dominated block if it was
5753 placed at the end of BB. */
e2d2ed72
AM
5754 if (hoist_expr_reaches_here_p (BASIC_BLOCK (bb), i,
5755 BASIC_BLOCK (dominated), NULL))
bb457bd9
JL
5756 {
5757 struct expr *expr = index_map[i];
5758 struct occr *occr = expr->antic_occr;
5759 rtx insn;
5760 rtx set;
5761
bb457bd9
JL
5762 /* Find the right occurence of this expression. */
5763 while (BLOCK_NUM (occr->insn) != dominated && occr)
5764 occr = occr->next;
5765
5766 /* Should never happen. */
5767 if (!occr)
5768 abort ();
5769
5770 insn = occr->insn;
5771
5772 set = single_set (insn);
5773 if (! set)
5774 abort ();
5775
5776 /* Create a pseudo-reg to store the result of reaching
5777 expressions into. Get the mode for the new pseudo
5778 from the mode of the original destination pseudo. */
5779 if (expr->reaching_reg == NULL)
5780 expr->reaching_reg
5781 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5782
5783 /* In theory this should never fail since we're creating
5784 a reg->reg copy.
5785
c4c81601
RK
5786 However, on the x86 some of the movXX patterns
5787 actually contain clobbers of scratch regs. This may
5788 cause the insn created by validate_change to not
5789 match any pattern and thus cause validate_change to
5790 fail. */
bb457bd9
JL
5791 if (validate_change (insn, &SET_SRC (set),
5792 expr->reaching_reg, 0))
5793 {
5794 occr->deleted_p = 1;
5795 if (!insn_inserted_p)
5796 {
e2d2ed72
AM
5797 insert_insn_end_bb (index_map[i],
5798 BASIC_BLOCK (bb), 0);
bb457bd9
JL
5799 insn_inserted_p = 1;
5800 }
5801 }
5802 }
5803 }
5804 }
5805 }
5806 }
c4c81601 5807
283a2545 5808 free (index_map);
bb457bd9
JL
5809}
5810
5811/* Top level routine to perform one code hoisting (aka unification) pass
5812
5813 Return non-zero if a change was made. */
5814
5815static int
5816one_code_hoisting_pass ()
5817{
5818 int changed = 0;
5819
5820 alloc_expr_hash_table (max_cuid);
5821 compute_expr_hash_table ();
5822 if (gcse_file)
5823 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5824 expr_hash_table_size, n_exprs);
c4c81601 5825
bb457bd9
JL
5826 if (n_exprs > 0)
5827 {
5828 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5829 compute_code_hoist_data ();
5830 hoist_code ();
5831 free_code_hoist_mem ();
5832 }
c4c81601 5833
bb457bd9
JL
5834 free_expr_hash_table ();
5835
5836 return changed;
5837}
a13d4ebf
AM
5838\f
5839/* Here we provide the things required to do store motion towards
5840 the exit. In order for this to be effective, gcse also needed to
5841 be taught how to move a load when it is kill only by a store to itself.
5842
5843 int i;
5844 float a[10];
5845
5846 void foo(float scale)
5847 {
5848 for (i=0; i<10; i++)
5849 a[i] *= scale;
5850 }
5851
5852 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
5853 the load out since its live around the loop, and stored at the bottom
5854 of the loop.
5855
5856 The 'Load Motion' referred to and implemented in this file is
5857 an enhancement to gcse which when using edge based lcm, recognizes
5858 this situation and allows gcse to move the load out of the loop.
5859
5860 Once gcse has hoisted the load, store motion can then push this
5861 load towards the exit, and we end up with no loads or stores of 'i'
5862 in the loop. */
5863
5864/* This will search the ldst list for a matching expresion. If it
5865 doesn't find one, we create one and initialize it. */
5866
5867static struct ls_expr *
5868ldst_entry (x)
5869 rtx x;
5870{
5871 struct ls_expr * ptr;
5872
5873 for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr))
5874 if (expr_equiv_p (ptr->pattern, x))
5875 break;
5876
5877 if (!ptr)
5878 {
5879 ptr = (struct ls_expr *) xmalloc (sizeof (struct ls_expr));
5880
5881 ptr->next = pre_ldst_mems;
5882 ptr->expr = NULL;
5883 ptr->pattern = x;
5884 ptr->loads = NULL_RTX;
5885 ptr->stores = NULL_RTX;
5886 ptr->reaching_reg = NULL_RTX;
5887 ptr->invalid = 0;
5888 ptr->index = 0;
5889 ptr->hash_index = 0;
5890 pre_ldst_mems = ptr;
5891 }
5892
5893 return ptr;
5894}
5895
5896/* Free up an individual ldst entry. */
5897
5898static void
5899free_ldst_entry (ptr)
5900 struct ls_expr * ptr;
5901{
a13d4ebf 5902
adfcce61
DB
5903 free_INSN_LIST_list (&ptr->stores);
5904 free_INSN_LIST_list (&ptr->loads);
a13d4ebf
AM
5905 free (ptr);
5906}
5907
5908/* Free up all memory associated with the ldst list. */
5909
5910static void
5911free_ldst_mems ()
5912{
5913 while (pre_ldst_mems)
5914 {
5915 struct ls_expr * tmp = pre_ldst_mems;
5916
5917 pre_ldst_mems = pre_ldst_mems->next;
5918
5919 free_ldst_entry (tmp);
5920 }
5921
5922 pre_ldst_mems = NULL;
5923}
5924
5925/* Dump debugging info about the ldst list. */
5926
5927static void
5928print_ldst_list (file)
5929 FILE * file;
5930{
5931 struct ls_expr * ptr;
5932
5933 fprintf (file, "LDST list: \n");
5934
5935 for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr))
5936 {
5937 fprintf (file, " Pattern (%3d): ", ptr->index);
5938
5939 print_rtl (file, ptr->pattern);
5940
5941 fprintf (file, "\n Loads : ");
5942
5943 if (ptr->loads)
5944 print_rtl (file, ptr->loads);
5945 else
5946 fprintf (file, "(nil)");
5947
5948 fprintf (file, "\n Stores : ");
5949
5950 if (ptr->stores)
5951 print_rtl (file, ptr->stores);
5952 else
5953 fprintf (file, "(nil)");
5954
5955 fprintf (file, "\n\n");
5956 }
5957
5958 fprintf (file, "\n");
5959}
5960
5961/* Returns 1 if X is in the list of ldst only expressions. */
5962
5963static struct ls_expr *
5964find_rtx_in_ldst (x)
5965 rtx x;
5966{
5967 struct ls_expr * ptr;
5968
5969 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
5970 if (expr_equiv_p (ptr->pattern, x) && ! ptr->invalid)
5971 return ptr;
5972
5973 return NULL;
5974}
5975
5976/* Assign each element of the list of mems a monotonically increasing value. */
5977
5978static int
5979enumerate_ldsts ()
5980{
5981 struct ls_expr * ptr;
5982 int n = 0;
5983
5984 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
5985 ptr->index = n++;
5986
5987 return n;
5988}
5989
5990/* Return first item in the list. */
5991
5992static inline struct ls_expr *
5993first_ls_expr ()
5994{
5995 return pre_ldst_mems;
5996}
5997
5998/* Return the next item in ther list after the specified one. */
5999
6000static inline struct ls_expr *
6001next_ls_expr (ptr)
6002 struct ls_expr * ptr;
6003{
6004 return ptr->next;
6005}
6006\f
6007/* Load Motion for loads which only kill themselves. */
6008
6009/* Return true if x is a simple MEM operation, with no registers or
6010 side effects. These are the types of loads we consider for the
6011 ld_motion list, otherwise we let the usual aliasing take care of it. */
6012
6013static int
6014simple_mem (x)
6015 rtx x;
6016{
6017 if (GET_CODE (x) != MEM)
6018 return 0;
6019
6020 if (MEM_VOLATILE_P (x))
6021 return 0;
6022
6023 if (GET_MODE (x) == BLKmode)
6024 return 0;
adfcce61
DB
6025#if 0
6026 /* See comment in find_moveable_store */
6027 if (!rtx_addr_varies_p (XEXP (x, 0), 0))
a13d4ebf 6028 return 1;
adfcce61 6029#endif
a13d4ebf
AM
6030 return 0;
6031}
6032
6033/* Make sure there isn't a buried reference in this pattern anywhere.
6034 If there is, invalidate the entry for it since we're not capable
6035 of fixing it up just yet.. We have to be sure we know about ALL
6036 loads since the aliasing code will allow all entries in the
6037 ld_motion list to not-alias itself. If we miss a load, we will get
6038 the wrong value since gcse might common it and we won't know to
6039 fix it up. */
6040
6041static void
6042invalidate_any_buried_refs (x)
6043 rtx x;
6044{
6045 const char * fmt;
6046 int i,j;
6047 struct ls_expr * ptr;
6048
6049 /* Invalidate it in the list. */
6050 if (GET_CODE (x) == MEM && simple_mem (x))
6051 {
6052 ptr = ldst_entry (x);
6053 ptr->invalid = 1;
6054 }
6055
6056 /* Recursively process the insn. */
6057 fmt = GET_RTX_FORMAT (GET_CODE (x));
6058
6059 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6060 {
6061 if (fmt[i] == 'e')
6062 invalidate_any_buried_refs (XEXP (x, i));
6063 else if (fmt[i] == 'E')
6064 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6065 invalidate_any_buried_refs (XVECEXP (x, i, j));
6066 }
6067}
6068
6069/* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
6070 being defined as MEM loads and stores to symbols, with no
6071 side effects and no registers in the expression. If there are any
6072 uses/defs which dont match this criteria, it is invalidated and
6073 trimmed out later. */
6074
6075static void
6076compute_ld_motion_mems ()
6077{
6078 struct ls_expr * ptr;
6079 int bb;
6080 rtx insn;
6081
6082 pre_ldst_mems = NULL;
6083
6084 for (bb = 0; bb < n_basic_blocks; bb++)
6085 {
6086 for (insn = BLOCK_HEAD (bb);
6087 insn && insn != NEXT_INSN (BLOCK_END (bb));
6088 insn = NEXT_INSN (insn))
6089 {
6090 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
6091 {
6092 if (GET_CODE (PATTERN (insn)) == SET)
6093 {
6094 rtx src = SET_SRC (PATTERN (insn));
6095 rtx dest = SET_DEST (PATTERN (insn));
6096
6097 /* Check for a simple LOAD... */
6098 if (GET_CODE (src) == MEM && simple_mem (src))
6099 {
6100 ptr = ldst_entry (src);
6101 if (GET_CODE (dest) == REG)
6102 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
6103 else
6104 ptr->invalid = 1;
6105 }
6106 else
6107 {
6108 /* Make sure there isn't a buried load somewhere. */
6109 invalidate_any_buried_refs (src);
6110 }
a13d4ebf
AM
6111 /* Check for stores. Don't worry about aliased ones, they
6112 will block any movement we might do later. We only care
6113 about this exact pattern since those are the only
6114 circumstance that we will ignore the aliasing info. */
6115 if (GET_CODE (dest) == MEM && simple_mem (dest))
6116 {
6117 ptr = ldst_entry (dest);
6118
f54104df
AO
6119 if (GET_CODE (src) != MEM
6120 && GET_CODE (src) != ASM_OPERANDS)
a13d4ebf
AM
6121 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
6122 else
6123 ptr->invalid = 1;
6124 }
6125 }
6126 else
6127 invalidate_any_buried_refs (PATTERN (insn));
6128 }
6129 }
6130 }
6131}
6132
6133/* Remove any references that have been either invalidated or are not in the
6134 expression list for pre gcse. */
6135
6136static void
6137trim_ld_motion_mems ()
6138{
6139 struct ls_expr * last = NULL;
6140 struct ls_expr * ptr = first_ls_expr ();
6141
6142 while (ptr != NULL)
6143 {
6144 int del = ptr->invalid;
6145 struct expr * expr = NULL;
6146
6147 /* Delete if entry has been made invalid. */
6148 if (!del)
6149 {
6150 unsigned int i;
6151
6152 del = 1;
6153 /* Delete if we cannot find this mem in the expression list. */
6154 for (i = 0; i < expr_hash_table_size && del; i++)
6155 {
6156 for (expr = expr_hash_table[i];
6157 expr != NULL;
6158 expr = expr->next_same_hash)
6159 if (expr_equiv_p (expr->expr, ptr->pattern))
6160 {
6161 del = 0;
6162 break;
6163 }
6164 }
6165 }
6166
6167 if (del)
6168 {
6169 if (last != NULL)
6170 {
6171 last->next = ptr->next;
6172 free_ldst_entry (ptr);
6173 ptr = last->next;
6174 }
6175 else
6176 {
6177 pre_ldst_mems = pre_ldst_mems->next;
6178 free_ldst_entry (ptr);
6179 ptr = pre_ldst_mems;
6180 }
6181 }
6182 else
6183 {
6184 /* Set the expression field if we are keeping it. */
6185 last = ptr;
6186 ptr->expr = expr;
6187 ptr = ptr->next;
6188 }
6189 }
6190
6191 /* Show the world what we've found. */
6192 if (gcse_file && pre_ldst_mems != NULL)
6193 print_ldst_list (gcse_file);
6194}
6195
6196/* This routine will take an expression which we are replacing with
6197 a reaching register, and update any stores that are needed if
6198 that expression is in the ld_motion list. Stores are updated by
6199 copying their SRC to the reaching register, and then storeing
6200 the reaching register into the store location. These keeps the
6201 correct value in the reaching register for the loads. */
6202
6203static void
6204update_ld_motion_stores (expr)
6205 struct expr * expr;
6206{
6207 struct ls_expr * mem_ptr;
6208
6209 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
6210 {
6211 /* We can try to find just the REACHED stores, but is shouldn't
6212 matter to set the reaching reg everywhere... some might be
6213 dead and should be eliminated later. */
6214
6215 /* We replace SET mem = expr with
6216 SET reg = expr
6217 SET mem = reg , where reg is the
6218 reaching reg used in the load. */
6219 rtx list = mem_ptr->stores;
6220
6221 for ( ; list != NULL_RTX; list = XEXP (list, 1))
6222 {
6223 rtx insn = XEXP (list, 0);
6224 rtx pat = PATTERN (insn);
6225 rtx src = SET_SRC (pat);
6226 rtx reg = expr->reaching_reg;
c57718d3 6227 rtx copy, new;
a13d4ebf
AM
6228
6229 /* If we've already copied it, continue. */
6230 if (expr->reaching_reg == src)
6231 continue;
6232
6233 if (gcse_file)
6234 {
6235 fprintf (gcse_file, "PRE: store updated with reaching reg ");
6236 print_rtl (gcse_file, expr->reaching_reg);
6237 fprintf (gcse_file, ":\n ");
6238 print_inline_rtx (gcse_file, insn, 8);
6239 fprintf (gcse_file, "\n");
6240 }
6241
6242 copy = gen_move_insn ( reg, SET_SRC (pat));
c57718d3
RK
6243 new = emit_insn_before (copy, insn);
6244 record_one_set (REGNO (reg), new);
6245 set_block_for_new_insns (new, BLOCK_FOR_INSN (insn));
a13d4ebf
AM
6246 SET_SRC (pat) = reg;
6247
6248 /* un-recognize this pattern since it's probably different now. */
6249 INSN_CODE (insn) = -1;
6250 gcse_create_count++;
6251 }
6252 }
6253}
6254\f
6255/* Store motion code. */
6256
a13d4ebf
AM
6257/* Used in computing the reverse edge graph bit vectors. */
6258static sbitmap * st_antloc;
6259
6260/* Global holding the number of store expressions we are dealing with. */
6261static int num_stores;
6262
a13d4ebf 6263
adfcce61
DB
6264/* Mark which registers are used by the mem, in the sbitmap used. */
6265static int
6266mark_mem_regs (x, used)
6267 rtx x;
6268 sbitmap used;
a13d4ebf 6269{
adfcce61
DB
6270 register const char *fmt;
6271 int i, j;
a13d4ebf 6272
adfcce61
DB
6273 if (GET_CODE (x) == REG)
6274 {
6275 if (!TEST_BIT (used, REGNO (x)))
6276 {
6277 SET_BIT (used, REGNO (x));
6278 return 1;
6279}
6280 return 0;
6281 }
6282
6283 fmt = GET_RTX_FORMAT (GET_CODE (x));
6284 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6285 {
6286 if (fmt[i] == 'e')
6287 {
6288 if (mark_mem_regs (XEXP (x, i),used))
6289 return 1;
6290 }
6291 else if (fmt[i] == 'E')
6292 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6293 if (mark_mem_regs (XVECEXP (x, i, j),used))
6294 return 1;
6295 }
6296
6297 return 0;
a13d4ebf
AM
6298}
6299
adfcce61 6300
a13d4ebf 6301/* Return non-zero if the register operands of expression X are killed
adfcce61 6302 before/after insn in basic block BB. */
a13d4ebf
AM
6303
6304static int
adfcce61 6305store_ops_ok (x, bb,insn, before)
a13d4ebf 6306 rtx x;
e2d2ed72 6307 basic_block bb;
adfcce61
DB
6308 rtx insn;
6309 int before;
a13d4ebf
AM
6310{
6311 int i;
6312 enum rtx_code code;
6313 const char * fmt;
6314
6315 /* Repeat is used to turn tail-recursion into iteration. */
6316 repeat:
6317
6318 if (x == 0)
6319 return 1;
6320
6321 code = GET_CODE (x);
6322 switch (code)
6323 {
6324 case REG:
adfcce61
DB
6325 {
6326 /* Okay, since the reg def chains are ordered by bb/insn
6327 (since that's how it calculates them, and even if it didn't,
6328 we could just sort them), we just walk until we find a def
6329 in our BB, then walk until we find a def after/before our
6330 insn, and if we find a reg def after/before our insn, in the
6331 same bb, we return the approriate value. If there is no
6332 such def, to prevent walking *every* reg def, we stop once
6333 we are out of our BB again. */
6334 struct df_link *currref;
6335 bool thereyet=FALSE;
6336 for (currref = df_analyzer->regs[REGNO(x)].defs;
6337 currref;
6338 currref = currref->next)
6339 {
6340 if (! (DF_REF_BB (currref->ref) == bb))
6341 {
6342 if (!thereyet)
6343 continue;
6344 else
6345 return 1;
6346 }
6347 if (before)
6348 {
6349 if (INSN_UID (DF_REF_INSN (currref->ref)) >= INSN_UID (insn))
6350 continue;
6351 }
6352 else
6353 {
6354 if (INSN_UID (DF_REF_INSN (currref->ref)) < INSN_UID (insn))
6355 continue;
6356 }
6357 thereyet = TRUE;
6358 if (DF_REF_TYPE (currref->ref) == DF_REF_REG_DEF)
6359 return 0;
6360 }
6361 return 1;
6362 }
a13d4ebf 6363
adfcce61 6364
a13d4ebf
AM
6365 case MEM:
6366 x = XEXP (x, 0);
6367 goto repeat;
6368
6369 case PRE_DEC:
6370 case PRE_INC:
6371 case POST_DEC:
6372 case POST_INC:
6373 return 0;
6374
6375 case PC:
6376 case CC0: /*FIXME*/
6377 case CONST:
6378 case CONST_INT:
6379 case CONST_DOUBLE:
6380 case SYMBOL_REF:
6381 case LABEL_REF:
6382 case ADDR_VEC:
6383 case ADDR_DIFF_VEC:
6384 return 1;
6385
6386 default:
6387 break;
6388 }
6389
6390 i = GET_RTX_LENGTH (code) - 1;
6391 fmt = GET_RTX_FORMAT (code);
6392
6393 for (; i >= 0; i--)
6394 {
6395 if (fmt[i] == 'e')
6396 {
6397 rtx tem = XEXP (x, i);
6398
6399 /* If we are about to do the last recursive call
6400 needed at this level, change it into iteration.
6401 This function is called enough to be worth it. */
6402 if (i == 0)
6403 {
6404 x = tem;
6405 goto repeat;
6406 }
6407
adfcce61 6408 if (! store_ops_ok (tem, bb, insn, before))
a13d4ebf
AM
6409 return 0;
6410 }
6411 else if (fmt[i] == 'E')
6412 {
6413 int j;
6414
6415 for (j = 0; j < XVECLEN (x, i); j++)
6416 {
adfcce61 6417 if (! store_ops_ok (XVECEXP (x, i, j), bb, insn, before))
a13d4ebf
AM
6418 return 0;
6419 }
6420 }
6421 }
6422
6423 return 1;
6424}
6425
adfcce61
DB
6426/* Determine whether insn is MEM store pattern that we will consider
6427 moving. We'll consider moving pretty much anything that we can
6428 move safely. */
a13d4ebf
AM
6429
6430static void
6431find_moveable_store (insn)
6432 rtx insn;
6433{
6434 struct ls_expr * ptr;
6435 rtx dest = PATTERN (insn);
6436
adfcce61
DB
6437 /* It's it's not a set, it's not a mem store we want to consider.
6438 Also, if it's an ASM, we certainly don't want to try to touch
6439 it. */
f54104df
AO
6440 if (GET_CODE (dest) != SET
6441 || GET_CODE (SET_SRC (dest)) == ASM_OPERANDS)
a13d4ebf
AM
6442 return;
6443
6444 dest = SET_DEST (dest);
6445
6446 if (GET_CODE (dest) != MEM || MEM_VOLATILE_P (dest)
6447 || GET_MODE (dest) == BLKmode)
a13d4ebf 6448 return;
adfcce61
DB
6449#if 0
6450 /* ??? Is this conservative, or just correct? We get more
6451 *candidates* without it, but i don't think we ever remove any
6452 stores where the address did vary. */
6453 if (rtx_addr_varies_p (XEXP (dest, 0), 0))
a13d4ebf 6454 return;
adfcce61 6455#endif
a13d4ebf
AM
6456 ptr = ldst_entry (dest);
6457 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
6458}
6459
adfcce61
DB
6460/* Perform store motion.
6461 Store motion is modeled as a lazy code motion problem, like PRE is
6462 above. The main diffence is that we want to move stores down as far
6463 as possible, so we have LCM work on the reverse flowgraph. */
a13d4ebf
AM
6464
6465static int
6466compute_store_table ()
6467{
6468 int bb, ret;
a13d4ebf 6469 rtx insn, pat;
a13d4ebf
AM
6470 max_gcse_regno = max_reg_num ();
6471
a13d4ebf 6472 pre_ldst_mems = 0;
a13d4ebf
AM
6473 /* Find all the stores we care about. */
6474 for (bb = 0; bb < n_basic_blocks; bb++)
6475 {
a13d4ebf
AM
6476 for (insn = BLOCK_END (bb);
6477 insn && insn != PREV_INSN (BLOCK_HEAD (bb));
6478 insn = PREV_INSN (insn))
6479 {
adfcce61
DB
6480 /* Ignore anything that is not a normal insn. */
6481 if (!INSN_P (insn))
a13d4ebf
AM
6482 continue;
6483
a13d4ebf 6484 pat = PATTERN (insn);
a13d4ebf
AM
6485 /* Now that we've marked regs, look for stores. */
6486 if (GET_CODE (pat) == SET)
6487 find_moveable_store (insn);
6488 }
6489 }
6490
6491 ret = enumerate_ldsts ();
6492
6493 if (gcse_file)
6494 {
6495 fprintf (gcse_file, "Store Motion Expressions.\n");
6496 print_ldst_list (gcse_file);
6497 }
6498
6499 return ret;
6500}
6501
adfcce61
DB
6502/* Check to see if the load X is aliased with STORE_PATTERN.
6503 If it is, it means that load kills the store.*/
a13d4ebf
AM
6504
6505static int
6506load_kills_store (x, store_pattern)
6507 rtx x, store_pattern;
6508{
6509 if (true_dependence (x, GET_MODE (x), store_pattern, rtx_addr_varies_p))
6510 return 1;
6511 return 0;
6512}
6513
adfcce61
DB
6514/* Go through the entire insn X, looking for any loads which might
6515 alias, and therefore, kill, STORE_PATTERN. Return 1 if found. */
a13d4ebf
AM
6516
6517static int
6518find_loads (x, store_pattern)
6519 rtx x, store_pattern;
6520{
6521 const char * fmt;
6522 int i,j;
6523 int ret = 0;
6524
6525 if (GET_CODE (x) == SET)
6526 x = SET_SRC (x);
6527
6528 if (GET_CODE (x) == MEM)
6529 {
6530 if (load_kills_store (x, store_pattern))
6531 return 1;
6532 }
6533
6534 /* Recursively process the insn. */
6535 fmt = GET_RTX_FORMAT (GET_CODE (x));
6536
6537 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0 && !ret; i--)
6538 {
6539 if (fmt[i] == 'e')
6540 ret |= find_loads (XEXP (x, i), store_pattern);
6541 else if (fmt[i] == 'E')
6542 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6543 ret |= find_loads (XVECEXP (x, i, j), store_pattern);
6544 }
6545 return ret;
6546}
6547
6548/* Check if INSN kills the store pattern X (is aliased with it).
6549 Return 1 if it it does. */
6550
6551static int
6552store_killed_in_insn (x, insn)
6553 rtx x, insn;
6554{
6555 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6556 return 0;
6557
6558 if (GET_CODE (insn) == CALL_INSN)
6559 {
6560 if (CONST_CALL_P (insn))
6561 return 0;
6562 else
6563 return 1;
6564 }
6565
6566 if (GET_CODE (PATTERN (insn)) == SET)
6567 {
6568 rtx pat = PATTERN (insn);
6569 /* Check for memory stores to aliased objects. */
6570 if (GET_CODE (SET_DEST (pat)) == MEM && !expr_equiv_p (SET_DEST (pat), x))
adfcce61 6571 {
a13d4ebf
AM
6572 if (find_loads (SET_DEST (pat), x))
6573 return 1;
adfcce61 6574 }
a13d4ebf
AM
6575 return find_loads (SET_SRC (pat), x);
6576 }
6577 else
6578 return find_loads (PATTERN (insn), x);
6579}
6580
6581/* Returns 1 if the expression X is loaded or clobbered on or after INSN
6582 within basic block BB. */
6583
6584static int
adfcce61 6585store_killed_after (x, insn, bb, testops)
a13d4ebf 6586 rtx x, insn;
e2d2ed72 6587 basic_block bb;
adfcce61 6588 int testops;
a13d4ebf 6589{
e2d2ed72 6590 rtx last = bb->end;
a13d4ebf
AM
6591
6592 if (insn == last)
6593 return 0;
adfcce61
DB
6594
6595 if (testops)
6596 /* Check if the register operands of the store are OK in this block.*/
6597 if (!store_ops_ok (XEXP (x, 0), bb, insn, 0))
a13d4ebf
AM
6598 return 1;
6599
adfcce61
DB
6600 for ( ;
6601 insn && insn != NEXT_INSN (last);
6602 insn = NEXT_INSN (insn))
a13d4ebf
AM
6603 if (store_killed_in_insn (x, insn))
6604 return 1;
6605
6606 return 0;
6607}
6608
adfcce61 6609/* Returns 1 if the expression X is loaded or clobbered before INSN
a13d4ebf
AM
6610 within basic block BB. */
6611static int
6612store_killed_before (x, insn, bb)
6613 rtx x, insn;
e2d2ed72 6614 basic_block bb;
a13d4ebf 6615{
e2d2ed72 6616 rtx first = bb->head;
a13d4ebf
AM
6617
6618 if (insn == first)
6619 return store_killed_in_insn (x, insn);
adfcce61
DB
6620 /* Check if the register operands of the store are OK in this block.*/
6621 if (!store_ops_ok (XEXP (x, 0), bb, insn, 1))
a13d4ebf
AM
6622 return 1;
6623
adfcce61
DB
6624 for (insn = PREV_INSN (insn) ;
6625 insn && insn != PREV_INSN (first);
6626 insn = PREV_INSN (insn))
6627
a13d4ebf
AM
6628 if (store_killed_in_insn (x, insn))
6629 return 1;
6630
6631 return 0;
6632}
6633
6634#define ANTIC_STORE_LIST(x) ((x)->loads)
6635#define AVAIL_STORE_LIST(x) ((x)->stores)
6636
6637/* Given the table of available store insns at the end of blocks,
6638 determine which ones are not killed by aliasing, and generate
6639 the appropriate vectors for gen and killed. */
6640static void
6641build_store_vectors ()
6642{
e2d2ed72 6643 basic_block bb;
adfcce61 6644 int b,i,j;
a13d4ebf
AM
6645 rtx insn, st;
6646 struct ls_expr * ptr;
adfcce61
DB
6647 sbitmap tested, *result;
6648 sbitmap used;
a13d4ebf
AM
6649
6650 /* Build the gen_vector. This is any store in the table which is not killed
6651 by aliasing later in its block. */
6652 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores);
6653 sbitmap_vector_zero (ae_gen, n_basic_blocks);
6654
6655 st_antloc = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores);
6656 sbitmap_vector_zero (st_antloc, n_basic_blocks);
adfcce61
DB
6657
6658 /* Note: In case someone needs something to optimize about store
6659 motion, here's the next place to look. We currently test one more
6660 basic block per store than necessary (at least). Since we know, at
6661 the end of this for loop, whether a store was killed in one of the
6662 basic blocks (We know both whether it's killed before, and killed
6663 after, the insn in the bb it resides in. So unless the insn
6664 consists of multiple store/loads, we know whether it was killed
6665 in the entire bb), we could avoid testing it for kill and transp in
6666 the next for loop. */
a13d4ebf
AM
6667 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
6668 {
6669 /* Put all the stores into either the antic list, or the avail list,
6670 or both. */
6671 rtx store_list = ptr->stores;
6672 ptr->stores = NULL_RTX;
6673
6674 for (st = store_list; st != NULL; st = XEXP (st, 1))
6675 {
6676 insn = XEXP (st, 0);
e2d2ed72 6677 bb = BLOCK_FOR_INSN (insn);
adfcce61 6678 if (!store_killed_after (ptr->pattern, insn, bb, 1))
a13d4ebf
AM
6679 {
6680 /* If we've already seen an availale expression in this block,
6681 we can delete the one we saw already (It occurs earlier in
6682 the block), and replace it with this one). We'll copy the
6683 old SRC expression to an unused register in case there
6684 are any side effects. */
e2d2ed72 6685 if (TEST_BIT (ae_gen[bb->index], ptr->index))
a13d4ebf
AM
6686 {
6687 /* Find previous store. */
6688 rtx st;
6689 for (st = AVAIL_STORE_LIST (ptr); st ; st = XEXP (st, 1))
e2d2ed72 6690 if (BLOCK_FOR_INSN (XEXP (st, 0)) == bb)
a13d4ebf
AM
6691 break;
6692 if (st)
6693 {
6694 rtx r = gen_reg_rtx (GET_MODE (ptr->pattern));
6695 if (gcse_file)
6696 fprintf(gcse_file, "Removing redundant store:\n");
6697 replace_store_insn (r, XEXP (st, 0), bb);
6698 XEXP (st, 0) = insn;
6699 continue;
6700 }
6701 }
e2d2ed72 6702 SET_BIT (ae_gen[bb->index], ptr->index);
a13d4ebf
AM
6703 AVAIL_STORE_LIST (ptr) = alloc_INSN_LIST (insn,
6704 AVAIL_STORE_LIST (ptr));
6705 }
6706
6707 if (!store_killed_before (ptr->pattern, insn, bb))
6708 {
6709 SET_BIT (st_antloc[BLOCK_NUM (insn)], ptr->index);
6710 ANTIC_STORE_LIST (ptr) = alloc_INSN_LIST (insn,
6711 ANTIC_STORE_LIST (ptr));
6712 }
6713 }
6714
6715 /* Free the original list of store insns. */
6716 free_INSN_LIST_list (&store_list);
6717 }
6718
6719 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores);
6720 sbitmap_vector_zero (ae_kill, n_basic_blocks);
6721
adfcce61 6722
a13d4ebf 6723 transp = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores);
adfcce61
DB
6724 sbitmap_vector_ones (transp, n_basic_blocks);
6725
6726 tested = sbitmap_alloc (max_gcse_regno);
6727 sbitmap_zero (tested);
6728 result = sbitmap_vector_alloc (n_basic_blocks, max_gcse_regno);
6729 sbitmap_vector_zero (result, n_basic_blocks);
6730 used = sbitmap_alloc (max_gcse_regno);
6731 sbitmap_zero (used);
6732
6733 /* This whole big nasty thing computes kill and transparent.
6734 It's done in this nasty way because profiling showed store motion
6735 taking twice as long as GCSE, with the cause being 1 million calls
6736 to store_ops_ok taking 30% of the entire runtime of the
6737 compiler.
6738 Since store most expressions use the same registers, there's no
6739 point in checking them 8 million times for the same basic blocks. If
6740 they weren't okay in a BB the last time we checked, they won't be
6741 okay now. Since we check all the bb's on each iteration, we don't
6742 need a vector for which registers we've tested, just the results.
6743 We then proceed to use the results of what store_ops_ok was for a
6744 given reg and bb, and if the results were a kill, we don't even need
6745 to check if the store was killed in the basic block, it'll be
6746 in the kill set because it's regs changed between here and there.
6747
6748
6749 If the whole store had no registers, we just skip store_ops_okay
6750 anyway (since it's checking reg operands), and proceed to see if
6751 it's okay in each bb, setting the approriate bits.
6752
6753 With this in place, we now take almost no time at all to perform
6754 store motion. (It's not on the first page of the profile, it
6755 takes less than a second).
6756
6757 */
a13d4ebf
AM
6758
6759 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
a13d4ebf 6760 {
adfcce61
DB
6761 /* Make sure we don't have a load-only expr, which we never seem
6762 to, but i don't think there's actually a guarantee */
6763 if (ptr->stores != NULL)
a13d4ebf 6764 {
adfcce61
DB
6765 /* First mark the regs used by the mem */
6766 mark_mem_regs (ptr->pattern, used);
6767 /* Now see if it had any regs */
6768 if (!(sbitmap_first_set_bit (used) == -1))
6769 {
6770 /* For each register, see if we've tested it */
6771 EXECUTE_IF_SET_IN_SBITMAP (used, 0, i,
6772 {
6773 if (TEST_BIT (tested, i))
6774 {
6775 /* Already tested the register, so check the
6776 result, and if we had an okay result, check the
6777 store itself. */
6778 for (j = 0; j < n_basic_blocks; j++)
6779 {
6780 if (!TEST_BIT (result[j], i)
6781 || store_killed_after (ptr->pattern, BLOCK_HEAD (j),
6782 BASIC_BLOCK (j), FALSE))
6783 {
6784 SET_BIT (ae_kill[j], ptr->index);
6785 if (!TEST_BIT (ae_gen[j], ptr->index)
6786 || !TEST_BIT (st_antloc[j], ptr->index))
6787 RESET_BIT (transp[j], ptr->index);
6788 }
6789 }
6790 }
6791 else
6792 {
6793 /* We haven't tested it yet, so mark it tested,
6794 and perform the tests */
6795 SET_BIT (tested, i);
6796 /* Check if it's okay in each BB */
6797 for (j = 0; j < n_basic_blocks; j++)
6798 {
6799 if (store_ops_ok (XEXP (ptr->pattern, 0),
6800 BASIC_BLOCK (j), BLOCK_HEAD (j), 0))
6801 {
6802 SET_BIT (result[j], ptr->index);
6803 }
6804 else
6805 {
6806 /* It's not okay, so it's killed and maybe
6807 not transparent */
6808 SET_BIT (ae_kill[j], ptr->index);
6809 if (!TEST_BIT (ae_gen[j], ptr->index)
6810 || !TEST_BIT (st_antloc[j], ptr->index))
6811 {
6812 RESET_BIT (transp[j], ptr->index);
6813 }
6814 continue;
6815 }
6816 /* The ops were okay, so check the store
6817 itself */
6818 if (store_killed_after (ptr->pattern, BLOCK_HEAD (j),
6819 BASIC_BLOCK (j), FALSE))
6820 {
6821 SET_BIT (ae_kill[j], ptr->index);
6822 if (!TEST_BIT (ae_gen[j], ptr->index)
6823 || !TEST_BIT (st_antloc[j], ptr->index))
6824 {
6825 RESET_BIT (transp[j], ptr->index);
6826 }
6827 }
6828 }
6829 }
6830 });
6831 /* Reset the used list */
6832 sbitmap_zero (used);
6833 }
6834 /* If it had no registers, we come here, and do the
6835 approriate testing */
6836 else
6837 {
6838 for (j = 0; j < n_basic_blocks; j++)
6839 {
6840 if (store_killed_after (ptr->pattern, BLOCK_HEAD (j),
6841 BASIC_BLOCK (j), FALSE))
6842 {
6843 SET_BIT (ae_kill[j], ptr->index);
6844 if (!TEST_BIT (ae_gen[j], ptr->index)
6845 || !TEST_BIT (st_antloc[j], ptr->index))
6846 {
6847 RESET_BIT (transp[j], ptr->index);
6848 }
6849 }
6850 }
6851 }
a13d4ebf
AM
6852 }
6853}
adfcce61
DB
6854 sbitmap_free (tested);
6855 sbitmap_free (used);
6856 sbitmap_vector_free (result);
6857}
a13d4ebf
AM
6858
6859/* Insert an instruction at the begining of a basic block, and update
6860 the BLOCK_HEAD if needed. */
6861
6862static void
6863insert_insn_start_bb (insn, bb)
6864 rtx insn;
e2d2ed72 6865 basic_block bb;
a13d4ebf
AM
6866{
6867 /* Insert at start of successor block. */
e2d2ed72
AM
6868 rtx prev = PREV_INSN (bb->head);
6869 rtx before = bb->head;
a13d4ebf
AM
6870 while (before != 0)
6871 {
6872 if (GET_CODE (before) != CODE_LABEL
6873 && (GET_CODE (before) != NOTE
6874 || NOTE_LINE_NUMBER (before) != NOTE_INSN_BASIC_BLOCK))
6875 break;
6876 prev = before;
e2d2ed72 6877 if (prev == bb->end)
a13d4ebf
AM
6878 break;
6879 before = NEXT_INSN (before);
6880 }
6881
6882 insn = emit_insn_after (insn, prev);
6883
e2d2ed72
AM
6884 if (prev == bb->end)
6885 bb->end = insn;
ccbaf064 6886
e2d2ed72 6887 set_block_for_new_insns (insn, bb);
a13d4ebf
AM
6888
6889 if (gcse_file)
6890 {
6891 fprintf (gcse_file, "STORE_MOTION insert store at start of BB %d:\n",
e2d2ed72 6892 bb->index);
a13d4ebf
AM
6893 print_inline_rtx (gcse_file, insn, 6);
6894 fprintf (gcse_file, "\n");
6895 }
6896}
6897
6898/* This routine will insert a store on an edge. EXPR is the ldst entry for
6899 the memory reference, and E is the edge to insert it on. Returns non-zero
6900 if an edge insertion was performed. */
6901
6902static int
6903insert_store (expr, e)
6904 struct ls_expr * expr;
6905 edge e;
6906{
6907 rtx reg, insn;
e2d2ed72 6908 basic_block bb;
a13d4ebf
AM
6909 edge tmp;
6910
6911 /* We did all the deleted before this insert, so if we didn't delete a
6912 store, then we haven't set the reaching reg yet either. */
6913 if (expr->reaching_reg == NULL_RTX)
6914 return 0;
6915
6916 reg = expr->reaching_reg;
6917 insn = gen_move_insn (expr->pattern, reg);
6918
6919 /* If we are inserting this expression on ALL predecessor edges of a BB,
6920 insert it at the start of the BB, and reset the insert bits on the other
6921 edges so we don;t try to insert it on the other edges. */
e2d2ed72 6922 bb = e->dest;
a13d4ebf
AM
6923 for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next)
6924 {
6925 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
6926 if (index == EDGE_INDEX_NO_EDGE)
6927 abort ();
6928 if (! TEST_BIT (pre_insert_map[index], expr->index))
6929 break;
6930 }
6931
6932 /* If tmp is NULL, we found an insertion on every edge, blank the
6933 insertion vector for these edges, and insert at the start of the BB. */
e2d2ed72 6934 if (!tmp && bb != EXIT_BLOCK_PTR)
a13d4ebf
AM
6935 {
6936 for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next)
6937 {
6938 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
6939 RESET_BIT (pre_insert_map[index], expr->index);
6940 }
6941 insert_insn_start_bb (insn, bb);
6942 return 0;
6943 }
6944
6945 /* We can't insert on this edge, so we'll insert at the head of the
6946 successors block. See Morgan, sec 10.5. */
6947 if ((e->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
6948 {
6949 insert_insn_start_bb (insn, bb);
6950 return 0;
6951 }
6952
6953 insert_insn_on_edge (insn, e);
6954
6955 if (gcse_file)
6956 {
6957 fprintf (gcse_file, "STORE_MOTION insert insn on edge (%d, %d):\n",
6958 e->src->index, e->dest->index);
6959 print_inline_rtx (gcse_file, insn, 6);
6960 fprintf (gcse_file, "\n");
6961 }
6962
6963 return 1;
6964}
6965
6966/* This routine will replace a store with a SET to a specified register. */
6967
6968static void
6969replace_store_insn (reg, del, bb)
6970 rtx reg, del;
e2d2ed72 6971 basic_block bb;
a13d4ebf
AM
6972{
6973 rtx insn;
6974
6975 insn = gen_move_insn (reg, SET_SRC (PATTERN (del)));
6976 insn = emit_insn_after (insn, del);
e2d2ed72 6977 set_block_for_new_insns (insn, bb);
a13d4ebf
AM
6978
6979 if (gcse_file)
6980 {
6981 fprintf (gcse_file,
e2d2ed72 6982 "STORE_MOTION delete insn in BB %d:\n ", bb->index);
a13d4ebf
AM
6983 print_inline_rtx (gcse_file, del, 6);
6984 fprintf(gcse_file, "\nSTORE MOTION replaced with insn:\n ");
6985 print_inline_rtx (gcse_file, insn, 6);
6986 fprintf(gcse_file, "\n");
6987 }
6988
e2d2ed72
AM
6989 if (bb->end == del)
6990 bb->end = insn;
a13d4ebf 6991
e2d2ed72
AM
6992 if (bb->head == del)
6993 bb->head = insn;
a13d4ebf
AM
6994
6995 delete_insn (del);
6996}
6997
6998
6999/* Delete a store, but copy the value that would have been stored into
7000 the reaching_reg for later storing. */
7001
7002static void
7003delete_store (expr, bb)
7004 struct ls_expr * expr;
e2d2ed72 7005 basic_block bb;
a13d4ebf
AM
7006{
7007 rtx reg, i, del;
7008
7009 if (expr->reaching_reg == NULL_RTX)
7010 expr->reaching_reg = gen_reg_rtx (GET_MODE (expr->pattern));
7011
7012
7013 /* If there is more than 1 store, the earlier ones will be dead,
7014 but it doesn't hurt to replace them here. */
7015 reg = expr->reaching_reg;
7016
7017 for (i = AVAIL_STORE_LIST (expr); i; i = XEXP (i, 1))
7018 {
7019 del = XEXP (i, 0);
e2d2ed72 7020 if (BLOCK_FOR_INSN (del) == bb)
a13d4ebf
AM
7021 {
7022 /* We know there is only one since we deleted redundant
7023 ones during the available computation. */
7024 replace_store_insn (reg, del, bb);
7025 break;
7026 }
7027 }
7028}
7029
7030/* Free memory used by store motion. */
7031
7032static void
7033free_store_memory ()
7034{
7035 free_ldst_mems ();
a13d4ebf 7036 if (ae_gen)
5a660bff 7037 sbitmap_vector_free (ae_gen);
a13d4ebf 7038 if (ae_kill)
5a660bff 7039 sbitmap_vector_free (ae_kill);
a13d4ebf 7040 if (transp)
5a660bff 7041 sbitmap_vector_free (transp);
a13d4ebf 7042 if (st_antloc)
5a660bff 7043 sbitmap_vector_free (st_antloc);
a13d4ebf 7044 if (pre_insert_map)
5a660bff 7045 sbitmap_vector_free (pre_insert_map);
a13d4ebf 7046 if (pre_delete_map)
5a660bff 7047 sbitmap_vector_free (pre_delete_map);
a13d4ebf
AM
7048
7049 ae_gen = ae_kill = transp = st_antloc = NULL;
7050 pre_insert_map = pre_delete_map = reg_set_in_block = NULL;
7051}
7052
7053/* Perform store motion. Much like gcse, except we move expressions the
7054 other way by looking at the flowgraph in reverse. */
7055
7056static void
7057store_motion ()
7058{
7059 int x;
7060 struct ls_expr * ptr;
adfcce61
DB
7061 sbitmap trapping_expr;
7062 int i;
a13d4ebf 7063
adfcce61 7064 int update_flow = 0;
a13d4ebf
AM
7065 if (gcse_file)
7066 {
7067 fprintf (gcse_file, "before store motion\n");
7068 print_rtl (gcse_file, get_insns ());
7069 }
7070
7071
7072 init_alias_analysis ();
adfcce61
DB
7073 df_analyzer = df_init();
7074 df_analyse (df_analyzer, 0, DF_RD_CHAIN | DF_HARD_REGS);
a13d4ebf
AM
7075 /* Find all the stores that are live to the end of their block. */
7076 num_stores = compute_store_table ();
7077 if (num_stores == 0)
7078 {
adfcce61 7079 df_finish (df_analyzer);
a13d4ebf
AM
7080 end_alias_analysis ();
7081 return;
7082 }
7083
7084 /* Now compute whats actually available to move. */
7085 add_noreturn_fake_exit_edges ();
7086 build_store_vectors ();
7087
adfcce61
DB
7088 /* Collect expressions which might trap. */
7089 trapping_expr = sbitmap_alloc (num_stores);
7090 sbitmap_zero (trapping_expr);
7091 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr(ptr))
7092 {
7093 if (may_trap_p (ptr->pattern))
7094 SET_BIT (trapping_expr, ptr->index);
7095 }
7096 for (i = 0; i < n_basic_blocks; i++)
7097 {
7098 edge e;
7099
7100 /* If the current block is the destination of an abnormal edge, we
7101 kill all trapping expressions because we won't be able to properly
7102 place the instruction on the edge. So make them neither
7103 anticipatable nor transparent. This is fairly conservative. */
7104 for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
7105 if (e->flags & EDGE_ABNORMAL)
7106 {
7107 sbitmap_difference (st_antloc[i], st_antloc[i], trapping_expr);
7108 sbitmap_difference (transp[i], transp[i], trapping_expr);
7109 break;
7110 }
7111 }
7112
a13d4ebf
AM
7113 edge_list = pre_edge_rev_lcm (gcse_file, num_stores, transp, ae_gen,
7114 st_antloc, ae_kill, &pre_insert_map,
7115 &pre_delete_map);
7116
7117 /* Now we want to insert the new stores which are going to be needed. */
7118 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
7119 {
7120 for (x = 0; x < n_basic_blocks; x++)
7121 if (TEST_BIT (pre_delete_map[x], ptr->index))
e2d2ed72 7122 delete_store (ptr, BASIC_BLOCK (x));
a13d4ebf
AM
7123
7124 for (x = 0; x < NUM_EDGES (edge_list); x++)
7125 if (TEST_BIT (pre_insert_map[x], ptr->index))
7126 update_flow |= insert_store (ptr, INDEX_EDGE (edge_list, x));
7127 }
7128
7129 if (update_flow)
7130 commit_edge_insertions ();
adfcce61 7131 sbitmap_free (trapping_expr);
a13d4ebf
AM
7132 free_store_memory ();
7133 free_edge_list (edge_list);
7134 remove_fake_edges ();
7135 end_alias_analysis ();
adfcce61 7136 df_finish (df_analyzer);
a13d4ebf 7137}