]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gcse.c
* emit-rtl.c (gen_lowpart_common): Add missing 'c' variable.
[thirdparty/gcc.git] / gcc / gcse.c
CommitLineData
f4e584dc 1/* Global common subexpression elimination/Partial redundancy elimination
7506f491 2 and global constant/copy propagation for GNU compiler.
dd1bd863 3 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
7506f491
DE
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
21
22/* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
f4e584dc
JL
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
7506f491
DE
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
34
7506f491
DE
35*/
36
37/* References searched while implementing this.
7506f491
DE
38
39 Compilers Principles, Techniques and Tools
40 Aho, Sethi, Ullman
41 Addison-Wesley, 1988
42
43 Global Optimization by Suppression of Partial Redundancies
44 E. Morel, C. Renvoise
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
46
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
48 Frederick Chow
49 Stanford Ph.D. thesis, Dec. 1983
50
7506f491
DE
51 A Fast Algorithm for Code Movement Optimization
52 D.M. Dhamdhere
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
54
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
59
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
62 D.M. Dhamdhere
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
64
65 Efficiently Computing Static Single Assignment Form and the Control
66 Dependence Graph
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
69
7506f491
DE
70 Lazy Code Motion
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
73
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
76 Thomas Ball
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
79
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
84
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
88
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
96
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
100
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
104
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
108
109 Global code motion / global value numbering
110 C. Click
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
112
113 Value Driven Redundancy Elimination
114 L.T. Simpson
115 Rice University Ph.D. thesis, Apr. 1996
116
117 Value Numbering
118 L.T. Simpson
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
120
121 High Performance Compilers for Parallel Computing
122 Michael Wolfe
123 Addison-Wesley, 1996
124
f4e584dc
JL
125 Advanced Compiler Design and Implementation
126 Steven Muchnick
127 Morgan Kaufmann, 1997
128
a42cd965
AM
129 Building an Optimizing Compiler
130 Robert Morgan
131 Digital Press, 1998
132
f4e584dc
JL
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
137
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
141
7506f491
DE
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
144*/
145
146#include "config.h"
50b2596f 147#include "system.h"
01198c2f 148#include "toplev.h"
7506f491
DE
149
150#include "rtl.h"
6baf1cc8 151#include "tm_p.h"
7506f491
DE
152#include "regs.h"
153#include "hard-reg-set.h"
154#include "flags.h"
155#include "real.h"
156#include "insn-config.h"
157#include "recog.h"
158#include "basic-block.h"
50b2596f 159#include "output.h"
49ad7cfa 160#include "function.h"
3cdbd1f8 161#include "expr.h"
7506f491
DE
162
163#include "obstack.h"
164#define obstack_chunk_alloc gmalloc
165#define obstack_chunk_free free
166
167/* Maximum number of passes to perform. */
168#define MAX_PASSES 1
169
170/* Propagate flow information through back edges and thus enable PRE's
171 moving loop invariant calculations out of loops.
172
173 Originally this tended to create worse overall code, but several
174 improvements during the development of PRE seem to have made following
175 back edges generally a win.
176
177 Note much of the loop invariant code motion done here would normally
178 be done by loop.c, which has more heuristics for when to move invariants
179 out of loops. At some point we might need to move some of those
180 heuristics into gcse.c. */
181#define FOLLOW_BACK_EDGES 1
182
f4e584dc
JL
183/* We support GCSE via Partial Redundancy Elimination. PRE optimizations
184 are a superset of those done by GCSE.
7506f491 185
f4e584dc 186 We perform the following steps:
7506f491
DE
187
188 1) Compute basic block information.
189
190 2) Compute table of places where registers are set.
191
192 3) Perform copy/constant propagation.
193
194 4) Perform global cse.
195
e78d9500 196 5) Perform another pass of copy/constant propagation.
7506f491
DE
197
198 Two passes of copy/constant propagation are done because the first one
199 enables more GCSE and the second one helps to clean up the copies that
200 GCSE creates. This is needed more for PRE than for Classic because Classic
201 GCSE will try to use an existing register containing the common
202 subexpression rather than create a new one. This is harder to do for PRE
203 because of the code motion (which Classic GCSE doesn't do).
204
205 Expressions we are interested in GCSE-ing are of the form
206 (set (pseudo-reg) (expression)).
207 Function want_to_gcse_p says what these are.
208
209 PRE handles moving invariant expressions out of loops (by treating them as
f4e584dc 210 partially redundant).
7506f491
DE
211
212 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
213 assignment) based GVN (global value numbering). L. T. Simpson's paper
214 (Rice University) on value numbering is a useful reference for this.
215
216 **********************
217
218 We used to support multiple passes but there are diminishing returns in
219 doing so. The first pass usually makes 90% of the changes that are doable.
220 A second pass can make a few more changes made possible by the first pass.
221 Experiments show any further passes don't make enough changes to justify
222 the expense.
223
224 A study of spec92 using an unlimited number of passes:
225 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
226 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
227 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
228
229 It was found doing copy propagation between each pass enables further
230 substitutions.
231
232 PRE is quite expensive in complicated functions because the DFA can take
233 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
234 be modified if one wants to experiment.
235
236 **********************
237
238 The steps for PRE are:
239
240 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
241
242 2) Perform the data flow analysis for PRE.
243
244 3) Delete the redundant instructions
245
246 4) Insert the required copies [if any] that make the partially
247 redundant instructions fully redundant.
248
249 5) For other reaching expressions, insert an instruction to copy the value
250 to a newly created pseudo that will reach the redundant instruction.
251
252 The deletion is done first so that when we do insertions we
253 know which pseudo reg to use.
254
255 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
256 argue it is not. The number of iterations for the algorithm to converge
257 is typically 2-4 so I don't view it as that expensive (relatively speaking).
258
f4e584dc 259 PRE GCSE depends heavily on the second CSE pass to clean up the copies
7506f491
DE
260 we create. To make an expression reach the place where it's redundant,
261 the result of the expression is copied to a new register, and the redundant
262 expression is deleted by replacing it with this new register. Classic GCSE
263 doesn't have this problem as much as it computes the reaching defs of
264 each register in each block and thus can try to use an existing register.
265
266 **********************
267
7506f491
DE
268 A fair bit of simplicity is created by creating small functions for simple
269 tasks, even when the function is only called in one place. This may
270 measurably slow things down [or may not] by creating more function call
271 overhead than is necessary. The source is laid out so that it's trivial
272 to make the affected functions inline so that one can measure what speed
273 up, if any, can be achieved, and maybe later when things settle things can
274 be rearranged.
275
276 Help stamp out big monolithic functions! */
277\f
278/* GCSE global vars. */
279
280/* -dG dump file. */
281static FILE *gcse_file;
282
f4e584dc
JL
283/* Note whether or not we should run jump optimization after gcse. We
284 want to do this for two cases.
285
286 * If we changed any jumps via cprop.
287
288 * If we added any labels via edge splitting. */
289
290static int run_jump_opt_after_gcse;
291
7506f491
DE
292/* Bitmaps are normally not included in debugging dumps.
293 However it's useful to be able to print them from GDB.
294 We could create special functions for this, but it's simpler to
295 just allow passing stderr to the dump_foo fns. Since stderr can
296 be a macro, we store a copy here. */
297static FILE *debug_stderr;
298
299/* An obstack for our working variables. */
300static struct obstack gcse_obstack;
301
302/* Non-zero for each mode that supports (set (reg) (reg)).
303 This is trivially true for integer and floating point values.
304 It may or may not be true for condition codes. */
305static char can_copy_p[(int) NUM_MACHINE_MODES];
306
307/* Non-zero if can_copy_p has been initialized. */
308static int can_copy_init_p;
309
c4c81601 310struct reg_use {rtx reg_rtx; };
abd535b6 311
7506f491
DE
312/* Hash table of expressions. */
313
314struct expr
315{
316 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
317 rtx expr;
318 /* Index in the available expression bitmaps. */
319 int bitmap_index;
320 /* Next entry with the same hash. */
321 struct expr *next_same_hash;
322 /* List of anticipatable occurrences in basic blocks in the function.
323 An "anticipatable occurrence" is one that is the first occurrence in the
f4e584dc
JL
324 basic block, the operands are not modified in the basic block prior
325 to the occurrence and the output is not used between the start of
326 the block and the occurrence. */
7506f491
DE
327 struct occr *antic_occr;
328 /* List of available occurrence in basic blocks in the function.
329 An "available occurrence" is one that is the last occurrence in the
330 basic block and the operands are not modified by following statements in
331 the basic block [including this insn]. */
332 struct occr *avail_occr;
333 /* Non-null if the computation is PRE redundant.
334 The value is the newly created pseudo-reg to record a copy of the
335 expression in all the places that reach the redundant copy. */
336 rtx reaching_reg;
337};
338
339/* Occurrence of an expression.
340 There is one per basic block. If a pattern appears more than once the
341 last appearance is used [or first for anticipatable expressions]. */
342
343struct occr
344{
345 /* Next occurrence of this expression. */
346 struct occr *next;
347 /* The insn that computes the expression. */
348 rtx insn;
349 /* Non-zero if this [anticipatable] occurrence has been deleted. */
350 char deleted_p;
351 /* Non-zero if this [available] occurrence has been copied to
352 reaching_reg. */
353 /* ??? This is mutually exclusive with deleted_p, so they could share
354 the same byte. */
355 char copied_p;
356};
357
358/* Expression and copy propagation hash tables.
359 Each hash table is an array of buckets.
360 ??? It is known that if it were an array of entries, structure elements
361 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
362 not clear whether in the final analysis a sufficient amount of memory would
363 be saved as the size of the available expression bitmaps would be larger
364 [one could build a mapping table without holes afterwards though].
c4c81601 365 Someday I'll perform the computation and figure it out. */
7506f491
DE
366
367/* Total size of the expression hash table, in elements. */
2e653e39
RK
368static unsigned int expr_hash_table_size;
369
7506f491
DE
370/* The table itself.
371 This is an array of `expr_hash_table_size' elements. */
372static struct expr **expr_hash_table;
373
374/* Total size of the copy propagation hash table, in elements. */
375static int set_hash_table_size;
c4c81601 376
7506f491
DE
377/* The table itself.
378 This is an array of `set_hash_table_size' elements. */
379static struct expr **set_hash_table;
380
381/* Mapping of uids to cuids.
382 Only real insns get cuids. */
383static int *uid_cuid;
384
385/* Highest UID in UID_CUID. */
386static int max_uid;
387
388/* Get the cuid of an insn. */
b86db3eb
BS
389#ifdef ENABLE_CHECKING
390#define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
391#else
7506f491 392#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
b86db3eb 393#endif
7506f491
DE
394
395/* Number of cuids. */
396static int max_cuid;
397
398/* Mapping of cuids to insns. */
399static rtx *cuid_insn;
400
401/* Get insn from cuid. */
402#define CUID_INSN(CUID) (cuid_insn[CUID])
403
404/* Maximum register number in function prior to doing gcse + 1.
405 Registers created during this pass have regno >= max_gcse_regno.
406 This is named with "gcse" to not collide with global of same name. */
770ae6cc 407static unsigned int max_gcse_regno;
7506f491
DE
408
409/* Maximum number of cse-able expressions found. */
410static int n_exprs;
c4c81601 411
7506f491
DE
412/* Maximum number of assignments for copy propagation found. */
413static int n_sets;
414
415/* Table of registers that are modified.
c4c81601 416
7506f491
DE
417 For each register, each element is a list of places where the pseudo-reg
418 is set.
419
420 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
421 requires knowledge of which blocks kill which regs [and thus could use
f4e584dc 422 a bitmap instead of the lists `reg_set_table' uses].
7506f491 423
c4c81601
RK
424 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
425 num-regs) [however perhaps it may be useful to keep the data as is]. One
426 advantage of recording things this way is that `reg_set_table' is fairly
427 sparse with respect to pseudo regs but for hard regs could be fairly dense
428 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
7506f491
DE
429 up functions like compute_transp since in the case of pseudo-regs we only
430 need to iterate over the number of times a pseudo-reg is set, not over the
431 number of basic blocks [clearly there is a bit of a slow down in the cases
432 where a pseudo is set more than once in a block, however it is believed
433 that the net effect is to speed things up]. This isn't done for hard-regs
434 because recording call-clobbered hard-regs in `reg_set_table' at each
c4c81601
RK
435 function call can consume a fair bit of memory, and iterating over
436 hard-regs stored this way in compute_transp will be more expensive. */
7506f491 437
c4c81601
RK
438typedef struct reg_set
439{
7506f491
DE
440 /* The next setting of this register. */
441 struct reg_set *next;
442 /* The insn where it was set. */
443 rtx insn;
444} reg_set;
c4c81601 445
7506f491 446static reg_set **reg_set_table;
c4c81601 447
7506f491
DE
448/* Size of `reg_set_table'.
449 The table starts out at max_gcse_regno + slop, and is enlarged as
450 necessary. */
451static int reg_set_table_size;
c4c81601 452
7506f491
DE
453/* Amount to grow `reg_set_table' by when it's full. */
454#define REG_SET_TABLE_SLOP 100
455
456/* Bitmap containing one bit for each register in the program.
457 Used when performing GCSE to track which registers have been set since
458 the start of the basic block. */
459static sbitmap reg_set_bitmap;
460
461/* For each block, a bitmap of registers set in the block.
462 This is used by expr_killed_p and compute_transp.
463 It is computed during hash table computation and not by compute_sets
464 as it includes registers added since the last pass (or between cprop and
465 gcse) and it's currently not easy to realloc sbitmap vectors. */
466static sbitmap *reg_set_in_block;
467
468/* For each block, non-zero if memory is set in that block.
469 This is computed during hash table computation and is used by
470 expr_killed_p and compute_transp.
471 ??? Handling of memory is very simple, we don't make any attempt
472 to optimize things (later).
473 ??? This can be computed by compute_sets since the information
474 doesn't change. */
475static char *mem_set_in_block;
476
477/* Various variables for statistics gathering. */
478
479/* Memory used in a pass.
480 This isn't intended to be absolutely precise. Its intent is only
481 to keep an eye on memory usage. */
482static int bytes_used;
c4c81601 483
7506f491
DE
484/* GCSE substitutions made. */
485static int gcse_subst_count;
486/* Number of copy instructions created. */
487static int gcse_create_count;
488/* Number of constants propagated. */
489static int const_prop_count;
490/* Number of copys propagated. */
491static int copy_prop_count;
7506f491
DE
492\f
493/* These variables are used by classic GCSE.
494 Normally they'd be defined a bit later, but `rd_gen' needs to
495 be declared sooner. */
496
7506f491
DE
497/* Each block has a bitmap of each type.
498 The length of each blocks bitmap is:
499
500 max_cuid - for reaching definitions
501 n_exprs - for available expressions
502
503 Thus we view the bitmaps as 2 dimensional arrays. i.e.
504 rd_kill[block_num][cuid_num]
c4c81601 505 ae_kill[block_num][expr_num] */
7506f491
DE
506
507/* For reaching defs */
508static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
509
510/* for available exprs */
511static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
b5ce41ff 512
0511851c
MM
513/* Objects of this type are passed around by the null-pointer check
514 removal routines. */
c4c81601
RK
515struct null_pointer_info
516{
0511851c
MM
517 /* The basic block being processed. */
518 int current_block;
519 /* The first register to be handled in this pass. */
770ae6cc 520 unsigned int min_reg;
0511851c 521 /* One greater than the last register to be handled in this pass. */
770ae6cc 522 unsigned int max_reg;
0511851c
MM
523 sbitmap *nonnull_local;
524 sbitmap *nonnull_killed;
525};
7506f491 526\f
c4c81601
RK
527static void compute_can_copy PARAMS ((void));
528static char *gmalloc PARAMS ((unsigned int));
529static char *grealloc PARAMS ((char *, unsigned int));
530static char *gcse_alloc PARAMS ((unsigned long));
531static void alloc_gcse_mem PARAMS ((rtx));
532static void free_gcse_mem PARAMS ((void));
533static void alloc_reg_set_mem PARAMS ((int));
534static void free_reg_set_mem PARAMS ((void));
535static int get_bitmap_width PARAMS ((int, int, int));
536static void record_one_set PARAMS ((int, rtx));
537static void record_set_info PARAMS ((rtx, rtx, void *));
538static void compute_sets PARAMS ((rtx));
539static void hash_scan_insn PARAMS ((rtx, int, int));
540static void hash_scan_set PARAMS ((rtx, rtx, int));
541static void hash_scan_clobber PARAMS ((rtx, rtx));
542static void hash_scan_call PARAMS ((rtx, rtx));
543static int want_to_gcse_p PARAMS ((rtx));
544static int oprs_unchanged_p PARAMS ((rtx, rtx, int));
545static int oprs_anticipatable_p PARAMS ((rtx, rtx));
546static int oprs_available_p PARAMS ((rtx, rtx));
547static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
548 int, int));
549static void insert_set_in_table PARAMS ((rtx, rtx));
550static unsigned int hash_expr PARAMS ((rtx, enum machine_mode, int *, int));
551static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
552static unsigned int hash_set PARAMS ((int, int));
553static int expr_equiv_p PARAMS ((rtx, rtx));
554static void record_last_reg_set_info PARAMS ((rtx, int));
555static void record_last_mem_set_info PARAMS ((rtx));
556static void record_last_set_info PARAMS ((rtx, rtx, void *));
711d877c 557static void compute_hash_table PARAMS ((int));
c4c81601
RK
558static void alloc_set_hash_table PARAMS ((int));
559static void free_set_hash_table PARAMS ((void));
560static void compute_set_hash_table PARAMS ((void));
2e653e39 561static void alloc_expr_hash_table PARAMS ((unsigned int));
c4c81601
RK
562static void free_expr_hash_table PARAMS ((void));
563static void compute_expr_hash_table PARAMS ((void));
564static void dump_hash_table PARAMS ((FILE *, const char *, struct expr **,
565 int, int));
566static struct expr *lookup_expr PARAMS ((rtx));
770ae6cc
RK
567static struct expr *lookup_set PARAMS ((unsigned int, rtx));
568static struct expr *next_set PARAMS ((unsigned int, struct expr *));
c4c81601
RK
569static void reset_opr_set_tables PARAMS ((void));
570static int oprs_not_set_p PARAMS ((rtx, rtx));
571static void mark_call PARAMS ((rtx));
572static void mark_set PARAMS ((rtx, rtx));
573static void mark_clobber PARAMS ((rtx, rtx));
574static void mark_oprs_set PARAMS ((rtx));
575static void alloc_cprop_mem PARAMS ((int, int));
576static void free_cprop_mem PARAMS ((void));
577static void compute_transp PARAMS ((rtx, int, sbitmap *, int));
578static void compute_transpout PARAMS ((void));
579static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
580 int));
711d877c 581static void compute_cprop_data PARAMS ((void));
c4c81601
RK
582static void find_used_regs PARAMS ((rtx));
583static int try_replace_reg PARAMS ((rtx, rtx, rtx));
584static struct expr *find_avail_set PARAMS ((int, rtx));
585static int cprop_jump PARAMS ((rtx, rtx, struct reg_use *, rtx));
e2bef702 586#ifdef HAVE_cc0
c4c81601 587static int cprop_cc0_jump PARAMS ((rtx, struct reg_use *, rtx));
e2bef702 588#endif
c4c81601
RK
589static int cprop_insn PARAMS ((rtx, int));
590static int cprop PARAMS ((int));
591static int one_cprop_pass PARAMS ((int, int));
592static void alloc_pre_mem PARAMS ((int, int));
593static void free_pre_mem PARAMS ((void));
594static void compute_pre_data PARAMS ((void));
595static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
711d877c 596static void insert_insn_end_bb PARAMS ((struct expr *, int, int));
c4c81601
RK
597static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
598static void pre_insert_copies PARAMS ((void));
599static int pre_delete PARAMS ((void));
600static int pre_gcse PARAMS ((void));
601static int one_pre_gcse_pass PARAMS ((int));
602static void add_label_notes PARAMS ((rtx, rtx));
603static void alloc_code_hoist_mem PARAMS ((int, int));
604static void free_code_hoist_mem PARAMS ((void));
711d877c 605static void compute_code_hoist_vbeinout PARAMS ((void));
c4c81601
RK
606static void compute_code_hoist_data PARAMS ((void));
607static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
608static void hoist_code PARAMS ((void));
609static int one_code_hoisting_pass PARAMS ((void));
610static void alloc_rd_mem PARAMS ((int, int));
611static void free_rd_mem PARAMS ((void));
711d877c 612static void handle_rd_kill_set PARAMS ((rtx, int, int));
c4c81601 613static void compute_kill_rd PARAMS ((void));
711d877c 614static void compute_rd PARAMS ((void));
c4c81601
RK
615static void alloc_avail_expr_mem PARAMS ((int, int));
616static void free_avail_expr_mem PARAMS ((void));
617static void compute_ae_gen PARAMS ((void));
618static int expr_killed_p PARAMS ((rtx, int));
619static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *));
711d877c
KG
620static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *,
621 int, int));
c4c81601
RK
622static rtx computing_insn PARAMS ((struct expr *, rtx));
623static int def_reaches_here_p PARAMS ((rtx, rtx));
624static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
625static int handle_avail_expr PARAMS ((rtx, struct expr *));
626static int classic_gcse PARAMS ((void));
627static int one_classic_gcse_pass PARAMS ((int));
628static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
770ae6cc
RK
629static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, sbitmap *,
630 sbitmap *,
711d877c
KG
631 struct null_pointer_info *));
632static rtx process_insert_insn PARAMS ((struct expr *));
633static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **));
c4c81601
RK
634static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
635 int, int, char *));
711d877c
KG
636static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
637 int, char *));
7506f491
DE
638\f
639/* Entry point for global common subexpression elimination.
640 F is the first instruction in the function. */
641
e78d9500 642int
7506f491
DE
643gcse_main (f, file)
644 rtx f;
645 FILE *file;
646{
647 int changed, pass;
648 /* Bytes used at start of pass. */
649 int initial_bytes_used;
650 /* Maximum number of bytes used by a pass. */
651 int max_pass_bytes;
652 /* Point to release obstack data from for each pass. */
653 char *gcse_obstack_bottom;
654
b5ce41ff
JL
655 /* We do not construct an accurate cfg in functions which call
656 setjmp, so just punt to be safe. */
7506f491 657 if (current_function_calls_setjmp)
e78d9500 658 return 0;
7506f491 659
b5ce41ff
JL
660 /* Assume that we do not need to run jump optimizations after gcse. */
661 run_jump_opt_after_gcse = 0;
662
7506f491
DE
663 /* For calling dump_foo fns from gdb. */
664 debug_stderr = stderr;
b5ce41ff 665 gcse_file = file;
7506f491 666
b5ce41ff
JL
667 /* Identify the basic block information for this function, including
668 successors and predecessors. */
7506f491 669 max_gcse_regno = max_reg_num ();
7506f491 670
a42cd965
AM
671 if (file)
672 dump_flow_info (file);
673
7506f491
DE
674 /* Return if there's nothing to do. */
675 if (n_basic_blocks <= 1)
a18820c6 676 return 0;
7506f491 677
55f7891b
JL
678 /* Trying to perform global optimizations on flow graphs which have
679 a high connectivity will take a long time and is unlikely to be
680 particularly useful.
681
682 In normal circumstances a cfg should have about twice has many edges
683 as blocks. But we do not want to punish small functions which have
684 a couple switch statements. So we require a relatively large number
685 of basic blocks and the ratio of edges to blocks to be high. */
686 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
a18820c6 687 return 0;
55f7891b 688
7506f491
DE
689 /* See what modes support reg/reg copy operations. */
690 if (! can_copy_init_p)
691 {
692 compute_can_copy ();
693 can_copy_init_p = 1;
694 }
695
696 gcc_obstack_init (&gcse_obstack);
a42cd965 697 bytes_used = 0;
7506f491 698
c4c81601
RK
699 /* Record where pseudo-registers are set. This data is kept accurate
700 during each pass. ??? We could also record hard-reg information here
701 [since it's unchanging], however it is currently done during hash table
702 computation.
b5ce41ff 703
c4c81601
RK
704 It may be tempting to compute MEM set information here too, but MEM sets
705 will be subject to code motion one day and thus we need to compute
b5ce41ff 706 information about memory sets when we build the hash tables. */
7506f491
DE
707
708 alloc_reg_set_mem (max_gcse_regno);
709 compute_sets (f);
710
711 pass = 0;
712 initial_bytes_used = bytes_used;
713 max_pass_bytes = 0;
714 gcse_obstack_bottom = gcse_alloc (1);
715 changed = 1;
716 while (changed && pass < MAX_PASSES)
717 {
718 changed = 0;
719 if (file)
720 fprintf (file, "GCSE pass %d\n\n", pass + 1);
721
722 /* Initialize bytes_used to the space for the pred/succ lists,
723 and the reg_set_table data. */
724 bytes_used = initial_bytes_used;
725
726 /* Each pass may create new registers, so recalculate each time. */
727 max_gcse_regno = max_reg_num ();
728
729 alloc_gcse_mem (f);
730
b5ce41ff
JL
731 /* Don't allow constant propagation to modify jumps
732 during this pass. */
733 changed = one_cprop_pass (pass + 1, 0);
7506f491
DE
734
735 if (optimize_size)
b5ce41ff 736 changed |= one_classic_gcse_pass (pass + 1);
7506f491 737 else
a42cd965
AM
738 {
739 changed |= one_pre_gcse_pass (pass + 1);
740 free_reg_set_mem ();
741 alloc_reg_set_mem (max_reg_num ());
742 compute_sets (f);
743 run_jump_opt_after_gcse = 1;
744 }
7506f491
DE
745
746 if (max_pass_bytes < bytes_used)
747 max_pass_bytes = bytes_used;
748
bb457bd9
JL
749 /* Free up memory, then reallocate for code hoisting. We can
750 not re-use the existing allocated memory because the tables
751 will not have info for the insns or registers created by
752 partial redundancy elimination. */
7506f491
DE
753 free_gcse_mem ();
754
bb457bd9
JL
755 /* It does not make sense to run code hoisting unless we optimizing
756 for code size -- it rarely makes programs faster, and can make
757 them bigger if we did partial redundancy elimination (when optimizing
758 for space, we use a classic gcse algorithm instead of partial
759 redundancy algorithms). */
760 if (optimize_size)
761 {
762 max_gcse_regno = max_reg_num ();
763 alloc_gcse_mem (f);
764 changed |= one_code_hoisting_pass ();
765 free_gcse_mem ();
766
767 if (max_pass_bytes < bytes_used)
768 max_pass_bytes = bytes_used;
769 }
770
7506f491
DE
771 if (file)
772 {
773 fprintf (file, "\n");
774 fflush (file);
775 }
c4c81601 776
7506f491
DE
777 obstack_free (&gcse_obstack, gcse_obstack_bottom);
778 pass++;
779 }
780
b5ce41ff
JL
781 /* Do one last pass of copy propagation, including cprop into
782 conditional jumps. */
783
784 max_gcse_regno = max_reg_num ();
785 alloc_gcse_mem (f);
786 /* This time, go ahead and allow cprop to alter jumps. */
787 one_cprop_pass (pass + 1, 1);
788 free_gcse_mem ();
7506f491
DE
789
790 if (file)
791 {
792 fprintf (file, "GCSE of %s: %d basic blocks, ",
793 current_function_name, n_basic_blocks);
794 fprintf (file, "%d pass%s, %d bytes\n\n",
795 pass, pass > 1 ? "es" : "", max_pass_bytes);
796 }
797
7506f491 798 obstack_free (&gcse_obstack, NULL_PTR);
7506f491 799 free_reg_set_mem ();
e78d9500 800 return run_jump_opt_after_gcse;
7506f491
DE
801}
802\f
803/* Misc. utilities. */
804
805/* Compute which modes support reg/reg copy operations. */
806
807static void
808compute_can_copy ()
809{
810 int i;
50b2596f 811#ifndef AVOID_CCMODE_COPIES
7506f491 812 rtx reg,insn;
50b2596f 813#endif
7506f491
DE
814 char *free_point = (char *) oballoc (1);
815
816 bzero (can_copy_p, NUM_MACHINE_MODES);
817
818 start_sequence ();
819 for (i = 0; i < NUM_MACHINE_MODES; i++)
c4c81601
RK
820 if (GET_MODE_CLASS (i) == MODE_CC)
821 {
7506f491 822#ifdef AVOID_CCMODE_COPIES
c4c81601 823 can_copy_p[i] = 0;
7506f491 824#else
c4c81601
RK
825 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
826 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
827 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
828 can_copy_p[i] = 1;
7506f491 829#endif
c4c81601 830 }
141b5810
AO
831 else
832 can_copy_p[i] = 1;
c4c81601 833
7506f491
DE
834 end_sequence ();
835
836 /* Free the objects we just allocated. */
837 obfree (free_point);
838}
839\f
840/* Cover function to xmalloc to record bytes allocated. */
841
842static char *
843gmalloc (size)
844 unsigned int size;
845{
846 bytes_used += size;
847 return xmalloc (size);
848}
849
850/* Cover function to xrealloc.
851 We don't record the additional size since we don't know it.
852 It won't affect memory usage stats much anyway. */
853
854static char *
855grealloc (ptr, size)
856 char *ptr;
857 unsigned int size;
858{
859 return xrealloc (ptr, size);
860}
861
862/* Cover function to obstack_alloc.
863 We don't need to record the bytes allocated here since
864 obstack_chunk_alloc is set to gmalloc. */
865
866static char *
867gcse_alloc (size)
868 unsigned long size;
869{
870 return (char *) obstack_alloc (&gcse_obstack, size);
871}
872
873/* Allocate memory for the cuid mapping array,
874 and reg/memory set tracking tables.
875
876 This is called at the start of each pass. */
877
878static void
879alloc_gcse_mem (f)
880 rtx f;
881{
882 int i,n;
883 rtx insn;
884
885 /* Find the largest UID and create a mapping from UIDs to CUIDs.
886 CUIDs are like UIDs except they increase monotonically, have no gaps,
887 and only apply to real insns. */
888
889 max_uid = get_max_uid ();
890 n = (max_uid + 1) * sizeof (int);
891 uid_cuid = (int *) gmalloc (n);
892 bzero ((char *) uid_cuid, n);
893 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
894 {
895 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
b86db3eb 896 uid_cuid[INSN_UID (insn)] = i++;
7506f491 897 else
b86db3eb 898 uid_cuid[INSN_UID (insn)] = i;
7506f491
DE
899 }
900
901 /* Create a table mapping cuids to insns. */
902
903 max_cuid = i;
904 n = (max_cuid + 1) * sizeof (rtx);
905 cuid_insn = (rtx *) gmalloc (n);
906 bzero ((char *) cuid_insn, n);
907 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
c4c81601
RK
908 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
909 CUID_INSN (i++) = insn;
7506f491
DE
910
911 /* Allocate vars to track sets of regs. */
7506f491
DE
912 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
913
914 /* Allocate vars to track sets of regs, memory per block. */
7506f491
DE
915 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
916 max_gcse_regno);
917 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
918}
919
920/* Free memory allocated by alloc_gcse_mem. */
921
922static void
923free_gcse_mem ()
924{
925 free (uid_cuid);
926 free (cuid_insn);
927
928 free (reg_set_bitmap);
929
930 free (reg_set_in_block);
931 free (mem_set_in_block);
932}
933
0511851c
MM
934/* Many of the global optimization algorithms work by solving dataflow
935 equations for various expressions. Initially, some local value is
c4c81601
RK
936 computed for each expression in each block. Then, the values across the
937 various blocks are combined (by following flow graph edges) to arrive at
938 global values. Conceptually, each set of equations is independent. We
939 may therefore solve all the equations in parallel, solve them one at a
940 time, or pick any intermediate approach.
941
942 When you're going to need N two-dimensional bitmaps, each X (say, the
943 number of blocks) by Y (say, the number of expressions), call this
944 function. It's not important what X and Y represent; only that Y
945 correspond to the things that can be done in parallel. This function will
946 return an appropriate chunking factor C; you should solve C sets of
947 equations in parallel. By going through this function, we can easily
948 trade space against time; by solving fewer equations in parallel we use
949 less space. */
0511851c
MM
950
951static int
952get_bitmap_width (n, x, y)
953 int n;
954 int x;
955 int y;
956{
957 /* It's not really worth figuring out *exactly* how much memory will
958 be used by a particular choice. The important thing is to get
959 something approximately right. */
960 size_t max_bitmap_memory = 10 * 1024 * 1024;
961
962 /* The number of bytes we'd use for a single column of minimum
963 width. */
964 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
965
966 /* Often, it's reasonable just to solve all the equations in
967 parallel. */
968 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
969 return y;
970
971 /* Otherwise, pick the largest width we can, without going over the
972 limit. */
973 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
974 / column_size);
975}
b5ce41ff
JL
976\f
977/* Compute the local properties of each recorded expression.
c4c81601
RK
978
979 Local properties are those that are defined by the block, irrespective of
980 other blocks.
b5ce41ff
JL
981
982 An expression is transparent in a block if its operands are not modified
983 in the block.
984
985 An expression is computed (locally available) in a block if it is computed
986 at least once and expression would contain the same value if the
987 computation was moved to the end of the block.
988
989 An expression is locally anticipatable in a block if it is computed at
990 least once and expression would contain the same value if the computation
991 was moved to the beginning of the block.
992
c4c81601
RK
993 We call this routine for cprop, pre and code hoisting. They all compute
994 basically the same information and thus can easily share this code.
7506f491 995
c4c81601
RK
996 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
997 properties. If NULL, then it is not necessary to compute or record that
998 particular property.
b5ce41ff 999
c4c81601
RK
1000 SETP controls which hash table to look at. If zero, this routine looks at
1001 the expr hash table; if nonzero this routine looks at the set hash table.
1002 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1003 ABSALTERED. */
b5ce41ff
JL
1004
1005static void
1006compute_local_properties (transp, comp, antloc, setp)
1007 sbitmap *transp;
1008 sbitmap *comp;
1009 sbitmap *antloc;
1010 int setp;
1011{
2e653e39 1012 unsigned int i, hash_table_size;
b5ce41ff
JL
1013 struct expr **hash_table;
1014
1015 /* Initialize any bitmaps that were passed in. */
1016 if (transp)
695ab36a
BS
1017 {
1018 if (setp)
1019 sbitmap_vector_zero (transp, n_basic_blocks);
1020 else
1021 sbitmap_vector_ones (transp, n_basic_blocks);
1022 }
c4c81601 1023
b5ce41ff
JL
1024 if (comp)
1025 sbitmap_vector_zero (comp, n_basic_blocks);
1026 if (antloc)
1027 sbitmap_vector_zero (antloc, n_basic_blocks);
1028
1029 /* We use the same code for cprop, pre and hoisting. For cprop
1030 we care about the set hash table, for pre and hoisting we
1031 care about the expr hash table. */
1032 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1033 hash_table = setp ? set_hash_table : expr_hash_table;
1034
1035 for (i = 0; i < hash_table_size; i++)
7506f491 1036 {
b5ce41ff
JL
1037 struct expr *expr;
1038
1039 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1040 {
b5ce41ff 1041 int indx = expr->bitmap_index;
c4c81601 1042 struct occr *occr;
b5ce41ff
JL
1043
1044 /* The expression is transparent in this block if it is not killed.
1045 We start by assuming all are transparent [none are killed], and
1046 then reset the bits for those that are. */
b5ce41ff
JL
1047 if (transp)
1048 compute_transp (expr->expr, indx, transp, setp);
1049
1050 /* The occurrences recorded in antic_occr are exactly those that
1051 we want to set to non-zero in ANTLOC. */
b5ce41ff 1052 if (antloc)
c4c81601
RK
1053 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1054 {
1055 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
b5ce41ff 1056
c4c81601
RK
1057 /* While we're scanning the table, this is a good place to
1058 initialize this. */
1059 occr->deleted_p = 0;
1060 }
b5ce41ff
JL
1061
1062 /* The occurrences recorded in avail_occr are exactly those that
1063 we want to set to non-zero in COMP. */
1064 if (comp)
c4c81601
RK
1065 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1066 {
1067 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
b5ce41ff 1068
c4c81601
RK
1069 /* While we're scanning the table, this is a good place to
1070 initialize this. */
1071 occr->copied_p = 0;
1072 }
b5ce41ff
JL
1073
1074 /* While we're scanning the table, this is a good place to
1075 initialize this. */
1076 expr->reaching_reg = 0;
1077 }
7506f491 1078 }
7506f491
DE
1079}
1080\f
1081/* Register set information.
1082
1083 `reg_set_table' records where each register is set or otherwise
1084 modified. */
1085
1086static struct obstack reg_set_obstack;
1087
1088static void
1089alloc_reg_set_mem (n_regs)
1090 int n_regs;
1091{
c4c81601 1092 unsigned int n;
7506f491
DE
1093
1094 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1095 n = reg_set_table_size * sizeof (struct reg_set *);
1096 reg_set_table = (struct reg_set **) gmalloc (n);
1097 bzero ((char *) reg_set_table, n);
1098
1099 gcc_obstack_init (&reg_set_obstack);
1100}
1101
1102static void
1103free_reg_set_mem ()
1104{
1105 free (reg_set_table);
1106 obstack_free (&reg_set_obstack, NULL_PTR);
1107}
1108
1109/* Record REGNO in the reg_set table. */
1110
1111static void
1112record_one_set (regno, insn)
1113 int regno;
1114 rtx insn;
1115{
1116 /* allocate a new reg_set element and link it onto the list */
63bc1d05 1117 struct reg_set *new_reg_info;
7506f491
DE
1118
1119 /* If the table isn't big enough, enlarge it. */
1120 if (regno >= reg_set_table_size)
1121 {
1122 int new_size = regno + REG_SET_TABLE_SLOP;
c4c81601
RK
1123
1124 reg_set_table
1125 = (struct reg_set **) grealloc ((char *) reg_set_table,
1126 new_size * sizeof (struct reg_set *));
7506f491
DE
1127 bzero ((char *) (reg_set_table + reg_set_table_size),
1128 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1129 reg_set_table_size = new_size;
1130 }
1131
1132 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1133 sizeof (struct reg_set));
1134 bytes_used += sizeof (struct reg_set);
1135 new_reg_info->insn = insn;
274969ea
MM
1136 new_reg_info->next = reg_set_table[regno];
1137 reg_set_table[regno] = new_reg_info;
7506f491
DE
1138}
1139
c4c81601
RK
1140/* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1141 an insn. The DATA is really the instruction in which the SET is
1142 occurring. */
7506f491
DE
1143
1144static void
84832317 1145record_set_info (dest, setter, data)
50b2596f 1146 rtx dest, setter ATTRIBUTE_UNUSED;
84832317 1147 void *data;
7506f491 1148{
84832317
MM
1149 rtx record_set_insn = (rtx) data;
1150
c4c81601
RK
1151 if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1152 record_one_set (REGNO (dest), record_set_insn);
7506f491
DE
1153}
1154
1155/* Scan the function and record each set of each pseudo-register.
1156
c4c81601
RK
1157 This is called once, at the start of the gcse pass. See the comments for
1158 `reg_set_table' for further documenation. */
7506f491
DE
1159
1160static void
1161compute_sets (f)
1162 rtx f;
1163{
c4c81601 1164 rtx insn;
7506f491 1165
c4c81601
RK
1166 for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1167 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1168 note_stores (PATTERN (insn), record_set_info, insn);
7506f491
DE
1169}
1170\f
1171/* Hash table support. */
1172
1173/* For each register, the cuid of the first/last insn in the block to set it,
e7d99f1e 1174 or -1 if not set. */
c4c81601 1175#define NEVER_SET -1
7506f491
DE
1176static int *reg_first_set;
1177static int *reg_last_set;
1178
1179/* While computing "first/last set" info, this is the CUID of first/last insn
e7d99f1e 1180 to set memory or -1 if not set. `mem_last_set' is also used when
7506f491
DE
1181 performing GCSE to record whether memory has been set since the beginning
1182 of the block.
c4c81601 1183
7506f491
DE
1184 Note that handling of memory is very simple, we don't make any attempt
1185 to optimize things (later). */
1186static int mem_first_set;
1187static int mem_last_set;
1188
7506f491
DE
1189/* Perform a quick check whether X, the source of a set, is something
1190 we want to consider for GCSE. */
1191
1192static int
1193want_to_gcse_p (x)
1194 rtx x;
1195{
c4c81601 1196 switch (GET_CODE (x))
7506f491
DE
1197 {
1198 case REG:
1199 case SUBREG:
1200 case CONST_INT:
1201 case CONST_DOUBLE:
1202 case CALL:
1203 return 0;
1204
1205 default:
1206 break;
1207 }
1208
1209 return 1;
1210}
1211
1212/* Return non-zero if the operands of expression X are unchanged from the
1213 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1214 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1215
1216static int
1217oprs_unchanged_p (x, insn, avail_p)
1218 rtx x, insn;
1219 int avail_p;
1220{
c4c81601 1221 int i, j;
7506f491 1222 enum rtx_code code;
6f7d635c 1223 const char *fmt;
7506f491 1224
7506f491
DE
1225 if (x == 0)
1226 return 1;
1227
1228 code = GET_CODE (x);
1229 switch (code)
1230 {
1231 case REG:
1232 if (avail_p)
b86ba9c8 1233 return (reg_last_set[REGNO (x)] == NEVER_SET
7506f491
DE
1234 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1235 else
b86ba9c8 1236 return (reg_first_set[REGNO (x)] == NEVER_SET
7506f491
DE
1237 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1238
1239 case MEM:
c4c81601
RK
1240 if (avail_p && mem_last_set != NEVER_SET
1241 && mem_last_set >= INSN_CUID (insn))
1242 return 0;
1243 else if (! avail_p && mem_first_set != NEVER_SET
1244 && mem_first_set < INSN_CUID (insn))
1245 return 0;
7506f491 1246 else
c4c81601 1247 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
7506f491
DE
1248
1249 case PRE_DEC:
1250 case PRE_INC:
1251 case POST_DEC:
1252 case POST_INC:
1253 return 0;
1254
1255 case PC:
1256 case CC0: /*FIXME*/
1257 case CONST:
1258 case CONST_INT:
1259 case CONST_DOUBLE:
1260 case SYMBOL_REF:
1261 case LABEL_REF:
1262 case ADDR_VEC:
1263 case ADDR_DIFF_VEC:
1264 return 1;
1265
1266 default:
1267 break;
1268 }
1269
c4c81601 1270 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
1271 {
1272 if (fmt[i] == 'e')
1273 {
c4c81601
RK
1274 /* If we are about to do the last recursive call needed at this
1275 level, change it into iteration. This function is called enough
1276 to be worth it. */
7506f491 1277 if (i == 0)
c4c81601
RK
1278 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1279
1280 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
7506f491
DE
1281 return 0;
1282 }
1283 else if (fmt[i] == 'E')
c4c81601
RK
1284 for (j = 0; j < XVECLEN (x, i); j++)
1285 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1286 return 0;
7506f491
DE
1287 }
1288
1289 return 1;
1290}
1291
1292/* Return non-zero if the operands of expression X are unchanged from
1293 the start of INSN's basic block up to but not including INSN. */
1294
1295static int
1296oprs_anticipatable_p (x, insn)
1297 rtx x, insn;
1298{
1299 return oprs_unchanged_p (x, insn, 0);
1300}
1301
1302/* Return non-zero if the operands of expression X are unchanged from
1303 INSN to the end of INSN's basic block. */
1304
1305static int
1306oprs_available_p (x, insn)
1307 rtx x, insn;
1308{
1309 return oprs_unchanged_p (x, insn, 1);
1310}
1311
1312/* Hash expression X.
c4c81601
RK
1313
1314 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1315 indicating if a volatile operand is found or if the expression contains
1316 something we don't want to insert in the table.
7506f491
DE
1317
1318 ??? One might want to merge this with canon_hash. Later. */
1319
1320static unsigned int
1321hash_expr (x, mode, do_not_record_p, hash_table_size)
1322 rtx x;
1323 enum machine_mode mode;
1324 int *do_not_record_p;
1325 int hash_table_size;
1326{
1327 unsigned int hash;
1328
1329 *do_not_record_p = 0;
1330
1331 hash = hash_expr_1 (x, mode, do_not_record_p);
1332 return hash % hash_table_size;
1333}
1334
1335/* Subroutine of hash_expr to do the actual work. */
1336
1337static unsigned int
1338hash_expr_1 (x, mode, do_not_record_p)
1339 rtx x;
1340 enum machine_mode mode;
1341 int *do_not_record_p;
1342{
1343 int i, j;
1344 unsigned hash = 0;
1345 enum rtx_code code;
6f7d635c 1346 const char *fmt;
7506f491 1347
c4c81601
RK
1348 /* Used to turn recursion into iteration. We can't rely on GCC's
1349 tail-recursion eliminatio since we need to keep accumulating values
1350 in HASH. */
7506f491
DE
1351
1352 if (x == 0)
1353 return hash;
1354
c4c81601 1355 repeat:
7506f491
DE
1356 code = GET_CODE (x);
1357 switch (code)
1358 {
1359 case REG:
c4c81601
RK
1360 hash += ((unsigned int) REG << 7) + REGNO (x);
1361 return hash;
7506f491
DE
1362
1363 case CONST_INT:
c4c81601
RK
1364 hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1365 + (unsigned int) INTVAL (x));
1366 return hash;
7506f491
DE
1367
1368 case CONST_DOUBLE:
1369 /* This is like the general case, except that it only counts
1370 the integers representing the constant. */
c4c81601 1371 hash += (unsigned int) code + (unsigned int) GET_MODE (x);
7506f491
DE
1372 if (GET_MODE (x) != VOIDmode)
1373 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
c4c81601 1374 hash += (unsigned int) XWINT (x, i);
7506f491 1375 else
c4c81601
RK
1376 hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1377 + (unsigned int) CONST_DOUBLE_HIGH (x));
7506f491
DE
1378 return hash;
1379
1380 /* Assume there is only one rtx object for any given label. */
1381 case LABEL_REF:
1382 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1383 differences and differences between each stage's debugging dumps. */
c4c81601
RK
1384 hash += (((unsigned int) LABEL_REF << 7)
1385 + CODE_LABEL_NUMBER (XEXP (x, 0)));
7506f491
DE
1386 return hash;
1387
1388 case SYMBOL_REF:
1389 {
1390 /* Don't hash on the symbol's address to avoid bootstrap differences.
1391 Different hash values may cause expressions to be recorded in
1392 different orders and thus different registers to be used in the
1393 final assembler. This also avoids differences in the dump files
1394 between various stages. */
1395 unsigned int h = 0;
3cce094d 1396 const unsigned char *p = (const unsigned char *) XSTR (x, 0);
c4c81601 1397
7506f491
DE
1398 while (*p)
1399 h += (h << 7) + *p++; /* ??? revisit */
c4c81601
RK
1400
1401 hash += ((unsigned int) SYMBOL_REF << 7) + h;
7506f491
DE
1402 return hash;
1403 }
1404
1405 case MEM:
1406 if (MEM_VOLATILE_P (x))
1407 {
1408 *do_not_record_p = 1;
1409 return 0;
1410 }
c4c81601
RK
1411
1412 hash += (unsigned int) MEM;
297c3335 1413 hash += MEM_ALIAS_SET (x);
7506f491
DE
1414 x = XEXP (x, 0);
1415 goto repeat;
1416
1417 case PRE_DEC:
1418 case PRE_INC:
1419 case POST_DEC:
1420 case POST_INC:
1421 case PC:
1422 case CC0:
1423 case CALL:
1424 case UNSPEC_VOLATILE:
1425 *do_not_record_p = 1;
1426 return 0;
1427
1428 case ASM_OPERANDS:
1429 if (MEM_VOLATILE_P (x))
1430 {
1431 *do_not_record_p = 1;
1432 return 0;
1433 }
1434
1435 default:
1436 break;
1437 }
1438
7506f491 1439 hash += (unsigned) code + (unsigned) GET_MODE (x);
c4c81601 1440 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
1441 {
1442 if (fmt[i] == 'e')
1443 {
7506f491
DE
1444 /* If we are about to do the last recursive call
1445 needed at this level, change it into iteration.
1446 This function is called enough to be worth it. */
1447 if (i == 0)
1448 {
c4c81601 1449 x = XEXP (x, i);
7506f491
DE
1450 goto repeat;
1451 }
c4c81601
RK
1452
1453 hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
7506f491
DE
1454 if (*do_not_record_p)
1455 return 0;
1456 }
c4c81601 1457
7506f491
DE
1458 else if (fmt[i] == 'E')
1459 for (j = 0; j < XVECLEN (x, i); j++)
1460 {
1461 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1462 if (*do_not_record_p)
1463 return 0;
1464 }
c4c81601 1465
7506f491
DE
1466 else if (fmt[i] == 's')
1467 {
3cce094d
KG
1468 register const unsigned char *p =
1469 (const unsigned char *) XSTR (x, i);
c4c81601 1470
7506f491
DE
1471 if (p)
1472 while (*p)
1473 hash += *p++;
1474 }
1475 else if (fmt[i] == 'i')
c4c81601 1476 hash += (unsigned int) XINT (x, i);
7506f491
DE
1477 else
1478 abort ();
1479 }
1480
1481 return hash;
1482}
1483
1484/* Hash a set of register REGNO.
1485
c4c81601
RK
1486 Sets are hashed on the register that is set. This simplifies the PRE copy
1487 propagation code.
7506f491
DE
1488
1489 ??? May need to make things more elaborate. Later, as necessary. */
1490
1491static unsigned int
1492hash_set (regno, hash_table_size)
1493 int regno;
1494 int hash_table_size;
1495{
1496 unsigned int hash;
1497
1498 hash = regno;
1499 return hash % hash_table_size;
1500}
1501
1502/* Return non-zero if exp1 is equivalent to exp2.
1503 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1504
1505static int
1506expr_equiv_p (x, y)
1507 rtx x, y;
1508{
1509 register int i, j;
1510 register enum rtx_code code;
6f7d635c 1511 register const char *fmt;
7506f491
DE
1512
1513 if (x == y)
1514 return 1;
c4c81601 1515
7506f491
DE
1516 if (x == 0 || y == 0)
1517 return x == y;
1518
1519 code = GET_CODE (x);
1520 if (code != GET_CODE (y))
1521 return 0;
1522
1523 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1524 if (GET_MODE (x) != GET_MODE (y))
1525 return 0;
1526
1527 switch (code)
1528 {
1529 case PC:
1530 case CC0:
1531 return x == y;
1532
1533 case CONST_INT:
1534 return INTVAL (x) == INTVAL (y);
1535
1536 case LABEL_REF:
1537 return XEXP (x, 0) == XEXP (y, 0);
1538
1539 case SYMBOL_REF:
1540 return XSTR (x, 0) == XSTR (y, 0);
1541
1542 case REG:
1543 return REGNO (x) == REGNO (y);
1544
297c3335
RH
1545 case MEM:
1546 /* Can't merge two expressions in different alias sets, since we can
1547 decide that the expression is transparent in a block when it isn't,
1548 due to it being set with the different alias set. */
1549 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1550 return 0;
1551 break;
1552
7506f491
DE
1553 /* For commutative operations, check both orders. */
1554 case PLUS:
1555 case MULT:
1556 case AND:
1557 case IOR:
1558 case XOR:
1559 case NE:
1560 case EQ:
1561 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1562 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1563 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1564 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1565
1566 default:
1567 break;
1568 }
1569
1570 /* Compare the elements. If any pair of corresponding elements
1571 fail to match, return 0 for the whole thing. */
1572
1573 fmt = GET_RTX_FORMAT (code);
1574 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1575 {
1576 switch (fmt[i])
1577 {
1578 case 'e':
1579 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1580 return 0;
1581 break;
1582
1583 case 'E':
1584 if (XVECLEN (x, i) != XVECLEN (y, i))
1585 return 0;
1586 for (j = 0; j < XVECLEN (x, i); j++)
1587 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1588 return 0;
1589 break;
1590
1591 case 's':
1592 if (strcmp (XSTR (x, i), XSTR (y, i)))
1593 return 0;
1594 break;
1595
1596 case 'i':
1597 if (XINT (x, i) != XINT (y, i))
1598 return 0;
1599 break;
1600
1601 case 'w':
1602 if (XWINT (x, i) != XWINT (y, i))
1603 return 0;
1604 break;
1605
1606 case '0':
1607 break;
1608
1609 default:
1610 abort ();
1611 }
1612 }
1613
1614 return 1;
1615}
1616
1617/* Insert expression X in INSN in the hash table.
1618 If it is already present, record it as the last occurrence in INSN's
1619 basic block.
1620
1621 MODE is the mode of the value X is being stored into.
1622 It is only used if X is a CONST_INT.
1623
1624 ANTIC_P is non-zero if X is an anticipatable expression.
1625 AVAIL_P is non-zero if X is an available expression. */
1626
1627static void
1628insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1629 rtx x;
1630 enum machine_mode mode;
1631 rtx insn;
1632 int antic_p, avail_p;
1633{
1634 int found, do_not_record_p;
1635 unsigned int hash;
1636 struct expr *cur_expr, *last_expr = NULL;
1637 struct occr *antic_occr, *avail_occr;
1638 struct occr *last_occr = NULL;
1639
1640 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1641
1642 /* Do not insert expression in table if it contains volatile operands,
1643 or if hash_expr determines the expression is something we don't want
1644 to or can't handle. */
1645 if (do_not_record_p)
1646 return;
1647
1648 cur_expr = expr_hash_table[hash];
1649 found = 0;
1650
c4c81601 1651 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
7506f491
DE
1652 {
1653 /* If the expression isn't found, save a pointer to the end of
1654 the list. */
1655 last_expr = cur_expr;
1656 cur_expr = cur_expr->next_same_hash;
1657 }
1658
1659 if (! found)
1660 {
1661 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1662 bytes_used += sizeof (struct expr);
1663 if (expr_hash_table[hash] == NULL)
c4c81601
RK
1664 /* This is the first pattern that hashed to this index. */
1665 expr_hash_table[hash] = cur_expr;
7506f491 1666 else
c4c81601
RK
1667 /* Add EXPR to end of this hash chain. */
1668 last_expr->next_same_hash = cur_expr;
1669
7506f491
DE
1670 /* Set the fields of the expr element. */
1671 cur_expr->expr = x;
1672 cur_expr->bitmap_index = n_exprs++;
1673 cur_expr->next_same_hash = NULL;
1674 cur_expr->antic_occr = NULL;
1675 cur_expr->avail_occr = NULL;
1676 }
1677
1678 /* Now record the occurrence(s). */
7506f491
DE
1679 if (antic_p)
1680 {
1681 antic_occr = cur_expr->antic_occr;
1682
1683 /* Search for another occurrence in the same basic block. */
1684 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1685 {
1686 /* If an occurrence isn't found, save a pointer to the end of
1687 the list. */
1688 last_occr = antic_occr;
1689 antic_occr = antic_occr->next;
1690 }
1691
1692 if (antic_occr)
c4c81601
RK
1693 /* Found another instance of the expression in the same basic block.
1694 Prefer the currently recorded one. We want the first one in the
1695 block and the block is scanned from start to end. */
1696 ; /* nothing to do */
7506f491
DE
1697 else
1698 {
1699 /* First occurrence of this expression in this basic block. */
1700 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1701 bytes_used += sizeof (struct occr);
1702 /* First occurrence of this expression in any block? */
1703 if (cur_expr->antic_occr == NULL)
1704 cur_expr->antic_occr = antic_occr;
1705 else
1706 last_occr->next = antic_occr;
c4c81601 1707
7506f491
DE
1708 antic_occr->insn = insn;
1709 antic_occr->next = NULL;
1710 }
1711 }
1712
1713 if (avail_p)
1714 {
1715 avail_occr = cur_expr->avail_occr;
1716
1717 /* Search for another occurrence in the same basic block. */
1718 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1719 {
1720 /* If an occurrence isn't found, save a pointer to the end of
1721 the list. */
1722 last_occr = avail_occr;
1723 avail_occr = avail_occr->next;
1724 }
1725
1726 if (avail_occr)
c4c81601
RK
1727 /* Found another instance of the expression in the same basic block.
1728 Prefer this occurrence to the currently recorded one. We want
1729 the last one in the block and the block is scanned from start
1730 to end. */
1731 avail_occr->insn = insn;
7506f491
DE
1732 else
1733 {
1734 /* First occurrence of this expression in this basic block. */
1735 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1736 bytes_used += sizeof (struct occr);
c4c81601 1737
7506f491
DE
1738 /* First occurrence of this expression in any block? */
1739 if (cur_expr->avail_occr == NULL)
1740 cur_expr->avail_occr = avail_occr;
1741 else
1742 last_occr->next = avail_occr;
c4c81601 1743
7506f491
DE
1744 avail_occr->insn = insn;
1745 avail_occr->next = NULL;
1746 }
1747 }
1748}
1749
1750/* Insert pattern X in INSN in the hash table.
1751 X is a SET of a reg to either another reg or a constant.
1752 If it is already present, record it as the last occurrence in INSN's
1753 basic block. */
1754
1755static void
1756insert_set_in_table (x, insn)
1757 rtx x;
1758 rtx insn;
1759{
1760 int found;
1761 unsigned int hash;
1762 struct expr *cur_expr, *last_expr = NULL;
1763 struct occr *cur_occr, *last_occr = NULL;
1764
1765 if (GET_CODE (x) != SET
1766 || GET_CODE (SET_DEST (x)) != REG)
1767 abort ();
1768
1769 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1770
1771 cur_expr = set_hash_table[hash];
1772 found = 0;
1773
c4c81601 1774 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
7506f491
DE
1775 {
1776 /* If the expression isn't found, save a pointer to the end of
1777 the list. */
1778 last_expr = cur_expr;
1779 cur_expr = cur_expr->next_same_hash;
1780 }
1781
1782 if (! found)
1783 {
1784 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1785 bytes_used += sizeof (struct expr);
1786 if (set_hash_table[hash] == NULL)
c4c81601
RK
1787 /* This is the first pattern that hashed to this index. */
1788 set_hash_table[hash] = cur_expr;
7506f491 1789 else
c4c81601
RK
1790 /* Add EXPR to end of this hash chain. */
1791 last_expr->next_same_hash = cur_expr;
1792
7506f491
DE
1793 /* Set the fields of the expr element.
1794 We must copy X because it can be modified when copy propagation is
1795 performed on its operands. */
1796 /* ??? Should this go in a different obstack? */
1797 cur_expr->expr = copy_rtx (x);
1798 cur_expr->bitmap_index = n_sets++;
1799 cur_expr->next_same_hash = NULL;
1800 cur_expr->antic_occr = NULL;
1801 cur_expr->avail_occr = NULL;
1802 }
1803
1804 /* Now record the occurrence. */
7506f491
DE
1805 cur_occr = cur_expr->avail_occr;
1806
1807 /* Search for another occurrence in the same basic block. */
1808 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1809 {
1810 /* If an occurrence isn't found, save a pointer to the end of
1811 the list. */
1812 last_occr = cur_occr;
1813 cur_occr = cur_occr->next;
1814 }
1815
1816 if (cur_occr)
c4c81601
RK
1817 /* Found another instance of the expression in the same basic block.
1818 Prefer this occurrence to the currently recorded one. We want the
1819 last one in the block and the block is scanned from start to end. */
1820 cur_occr->insn = insn;
7506f491
DE
1821 else
1822 {
1823 /* First occurrence of this expression in this basic block. */
1824 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1825 bytes_used += sizeof (struct occr);
c4c81601 1826
7506f491
DE
1827 /* First occurrence of this expression in any block? */
1828 if (cur_expr->avail_occr == NULL)
1829 cur_expr->avail_occr = cur_occr;
1830 else
1831 last_occr->next = cur_occr;
c4c81601 1832
7506f491
DE
1833 cur_occr->insn = insn;
1834 cur_occr->next = NULL;
1835 }
1836}
1837
c4c81601
RK
1838/* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is
1839 non-zero, this is for the assignment hash table, otherwise it is for the
1840 expression hash table. */
7506f491
DE
1841
1842static void
1843hash_scan_set (pat, insn, set_p)
1844 rtx pat, insn;
1845 int set_p;
1846{
1847 rtx src = SET_SRC (pat);
1848 rtx dest = SET_DEST (pat);
1849
1850 if (GET_CODE (src) == CALL)
1851 hash_scan_call (src, insn);
1852
1853 if (GET_CODE (dest) == REG)
1854 {
1855 int regno = REGNO (dest);
1856 rtx tmp;
1857
1858 /* Only record sets of pseudo-regs in the hash table. */
1859 if (! set_p
1860 && regno >= FIRST_PSEUDO_REGISTER
1861 /* Don't GCSE something if we can't do a reg/reg copy. */
1862 && can_copy_p [GET_MODE (dest)]
1863 /* Is SET_SRC something we want to gcse? */
1864 && want_to_gcse_p (src))
1865 {
1866 /* An expression is not anticipatable if its operands are
1867 modified before this insn. */
3cce638b 1868 int antic_p = oprs_anticipatable_p (src, insn);
7506f491
DE
1869 /* An expression is not available if its operands are
1870 subsequently modified, including this insn. */
1871 int avail_p = oprs_available_p (src, insn);
c4c81601 1872
7506f491
DE
1873 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1874 }
c4c81601 1875
7506f491
DE
1876 /* Record sets for constant/copy propagation. */
1877 else if (set_p
1878 && regno >= FIRST_PSEUDO_REGISTER
1879 && ((GET_CODE (src) == REG
1880 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1881 && can_copy_p [GET_MODE (dest)])
e78d9500 1882 || GET_CODE (src) == CONST_INT
05f6f07c 1883 || GET_CODE (src) == SYMBOL_REF
e78d9500 1884 || GET_CODE (src) == CONST_DOUBLE)
7506f491
DE
1885 /* A copy is not available if its src or dest is subsequently
1886 modified. Here we want to search from INSN+1 on, but
1887 oprs_available_p searches from INSN on. */
1888 && (insn == BLOCK_END (BLOCK_NUM (insn))
1889 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1890 && oprs_available_p (pat, tmp))))
1891 insert_set_in_table (pat, insn);
1892 }
7506f491
DE
1893}
1894
1895static void
1896hash_scan_clobber (x, insn)
50b2596f 1897 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
7506f491
DE
1898{
1899 /* Currently nothing to do. */
1900}
1901
1902static void
1903hash_scan_call (x, insn)
50b2596f 1904 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
7506f491
DE
1905{
1906 /* Currently nothing to do. */
1907}
1908
1909/* Process INSN and add hash table entries as appropriate.
1910
1911 Only available expressions that set a single pseudo-reg are recorded.
1912
1913 Single sets in a PARALLEL could be handled, but it's an extra complication
1914 that isn't dealt with right now. The trick is handling the CLOBBERs that
1915 are also in the PARALLEL. Later.
1916
1917 If SET_P is non-zero, this is for the assignment hash table,
ed79bb3d
R
1918 otherwise it is for the expression hash table.
1919 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1920 not record any expressions. */
7506f491
DE
1921
1922static void
ed79bb3d 1923hash_scan_insn (insn, set_p, in_libcall_block)
7506f491
DE
1924 rtx insn;
1925 int set_p;
48e87cef 1926 int in_libcall_block;
7506f491
DE
1927{
1928 rtx pat = PATTERN (insn);
c4c81601 1929 int i;
7506f491
DE
1930
1931 /* Pick out the sets of INSN and for other forms of instructions record
1932 what's been modified. */
1933
ed79bb3d 1934 if (GET_CODE (pat) == SET && ! in_libcall_block)
21e3a717
BS
1935 {
1936 /* Ignore obvious no-ops. */
1937 if (SET_SRC (pat) != SET_DEST (pat))
1938 hash_scan_set (pat, insn, set_p);
1939 }
7506f491 1940 else if (GET_CODE (pat) == PARALLEL)
c4c81601
RK
1941 for (i = 0; i < XVECLEN (pat, 0); i++)
1942 {
1943 rtx x = XVECEXP (pat, 0, i);
7506f491 1944
c4c81601
RK
1945 if (GET_CODE (x) == SET)
1946 {
1947 if (GET_CODE (SET_SRC (x)) == CALL)
1948 hash_scan_call (SET_SRC (x), insn);
1949 }
1950 else if (GET_CODE (x) == CLOBBER)
1951 hash_scan_clobber (x, insn);
1952 else if (GET_CODE (x) == CALL)
1953 hash_scan_call (x, insn);
1954 }
7506f491 1955
7506f491
DE
1956 else if (GET_CODE (pat) == CLOBBER)
1957 hash_scan_clobber (pat, insn);
1958 else if (GET_CODE (pat) == CALL)
1959 hash_scan_call (pat, insn);
1960}
1961
1962static void
1963dump_hash_table (file, name, table, table_size, total_size)
1964 FILE *file;
dff01034 1965 const char *name;
7506f491
DE
1966 struct expr **table;
1967 int table_size, total_size;
1968{
1969 int i;
1970 /* Flattened out table, so it's printed in proper order. */
4da896b2
MM
1971 struct expr **flat_table;
1972 unsigned int *hash_val;
c4c81601 1973 struct expr *expr;
4da896b2
MM
1974
1975 flat_table
1976 = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
1977 hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
7506f491 1978
7506f491 1979 for (i = 0; i < table_size; i++)
c4c81601
RK
1980 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1981 {
1982 flat_table[expr->bitmap_index] = expr;
1983 hash_val[expr->bitmap_index] = i;
1984 }
7506f491
DE
1985
1986 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1987 name, table_size, total_size);
1988
1989 for (i = 0; i < total_size; i++)
21318741
RK
1990 if (flat_table[i] != 0)
1991 {
a0ac9e5a 1992 expr = flat_table[i];
21318741
RK
1993 fprintf (file, "Index %d (hash value %d)\n ",
1994 expr->bitmap_index, hash_val[i]);
a0ac9e5a 1995 print_rtl (file, expr->expr);
21318741
RK
1996 fprintf (file, "\n");
1997 }
7506f491
DE
1998
1999 fprintf (file, "\n");
4da896b2 2000
4da896b2
MM
2001 free (flat_table);
2002 free (hash_val);
7506f491
DE
2003}
2004
2005/* Record register first/last/block set information for REGNO in INSN.
c4c81601 2006
7506f491
DE
2007 reg_first_set records the first place in the block where the register
2008 is set and is used to compute "anticipatability".
c4c81601 2009
7506f491
DE
2010 reg_last_set records the last place in the block where the register
2011 is set and is used to compute "availability".
c4c81601 2012
7506f491
DE
2013 reg_set_in_block records whether the register is set in the block
2014 and is used to compute "transparency". */
2015
2016static void
2017record_last_reg_set_info (insn, regno)
2018 rtx insn;
2019 int regno;
2020{
b86ba9c8 2021 if (reg_first_set[regno] == NEVER_SET)
7506f491 2022 reg_first_set[regno] = INSN_CUID (insn);
c4c81601 2023
7506f491
DE
2024 reg_last_set[regno] = INSN_CUID (insn);
2025 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2026}
2027
2028/* Record memory first/last/block set information for INSN. */
2029
2030static void
2031record_last_mem_set_info (insn)
2032 rtx insn;
2033{
b86ba9c8 2034 if (mem_first_set == NEVER_SET)
7506f491 2035 mem_first_set = INSN_CUID (insn);
c4c81601 2036
7506f491
DE
2037 mem_last_set = INSN_CUID (insn);
2038 mem_set_in_block[BLOCK_NUM (insn)] = 1;
2039}
2040
7506f491 2041/* Called from compute_hash_table via note_stores to handle one
84832317
MM
2042 SET or CLOBBER in an insn. DATA is really the instruction in which
2043 the SET is taking place. */
7506f491
DE
2044
2045static void
84832317 2046record_last_set_info (dest, setter, data)
50b2596f 2047 rtx dest, setter ATTRIBUTE_UNUSED;
84832317 2048 void *data;
7506f491 2049{
84832317
MM
2050 rtx last_set_insn = (rtx) data;
2051
7506f491
DE
2052 if (GET_CODE (dest) == SUBREG)
2053 dest = SUBREG_REG (dest);
2054
2055 if (GET_CODE (dest) == REG)
2056 record_last_reg_set_info (last_set_insn, REGNO (dest));
2057 else if (GET_CODE (dest) == MEM
2058 /* Ignore pushes, they clobber nothing. */
2059 && ! push_operand (dest, GET_MODE (dest)))
2060 record_last_mem_set_info (last_set_insn);
2061}
2062
2063/* Top level function to create an expression or assignment hash table.
2064
2065 Expression entries are placed in the hash table if
2066 - they are of the form (set (pseudo-reg) src),
2067 - src is something we want to perform GCSE on,
2068 - none of the operands are subsequently modified in the block
2069
2070 Assignment entries are placed in the hash table if
2071 - they are of the form (set (pseudo-reg) src),
2072 - src is something we want to perform const/copy propagation on,
2073 - none of the operands or target are subsequently modified in the block
c4c81601 2074
7506f491
DE
2075 Currently src must be a pseudo-reg or a const_int.
2076
2077 F is the first insn.
2078 SET_P is non-zero for computing the assignment hash table. */
2079
2080static void
b5ce41ff 2081compute_hash_table (set_p)
7506f491
DE
2082 int set_p;
2083{
2084 int bb;
2085
2086 /* While we compute the hash table we also compute a bit array of which
2087 registers are set in which blocks.
2088 We also compute which blocks set memory, in the absence of aliasing
2089 support [which is TODO].
2090 ??? This isn't needed during const/copy propagation, but it's cheap to
2091 compute. Later. */
2092 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2093 bzero ((char *) mem_set_in_block, n_basic_blocks);
2094
2095 /* Some working arrays used to track first and last set in each block. */
2096 /* ??? One could use alloca here, but at some size a threshold is crossed
2097 beyond which one should use malloc. Are we at that threshold here? */
2098 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2099 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2100
2101 for (bb = 0; bb < n_basic_blocks; bb++)
2102 {
2103 rtx insn;
770ae6cc 2104 unsigned int regno;
ed79bb3d 2105 int in_libcall_block;
770ae6cc 2106 unsigned int i;
7506f491
DE
2107
2108 /* First pass over the instructions records information used to
2109 determine when registers and memory are first and last set.
2110 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2111 could be moved to compute_sets since they currently don't change. */
2112
b86ba9c8
GK
2113 for (i = 0; i < max_gcse_regno; i++)
2114 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
770ae6cc 2115
b86ba9c8
GK
2116 mem_first_set = NEVER_SET;
2117 mem_last_set = NEVER_SET;
7506f491 2118
3b413743
RH
2119 for (insn = BLOCK_HEAD (bb);
2120 insn && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
2121 insn = NEXT_INSN (insn))
2122 {
2123#ifdef NON_SAVING_SETJMP
2124 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2125 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2126 {
2127 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2128 record_last_reg_set_info (insn, regno);
2129 continue;
2130 }
2131#endif
2132
2133 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2134 continue;
2135
2136 if (GET_CODE (insn) == CALL_INSN)
2137 {
2138 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
15f8470f
JL
2139 if ((call_used_regs[regno]
2140 && regno != STACK_POINTER_REGNUM
2141#if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2142 && regno != HARD_FRAME_POINTER_REGNUM
2143#endif
2144#if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2145 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2146#endif
2147#if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2148 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2149#endif
2150
2151 && regno != FRAME_POINTER_REGNUM)
2152 || global_regs[regno])
7506f491 2153 record_last_reg_set_info (insn, regno);
c4c81601 2154
7506f491
DE
2155 if (! CONST_CALL_P (insn))
2156 record_last_mem_set_info (insn);
2157 }
2158
84832317 2159 note_stores (PATTERN (insn), record_last_set_info, insn);
7506f491
DE
2160 }
2161
2162 /* The next pass builds the hash table. */
2163
3b413743
RH
2164 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2165 insn && insn != NEXT_INSN (BLOCK_END (bb));
7506f491 2166 insn = NEXT_INSN (insn))
c4c81601
RK
2167 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2168 {
2169 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2170 in_libcall_block = 1;
2171 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2172 in_libcall_block = 0;
2173 hash_scan_insn (insn, set_p, in_libcall_block);
7506f491
DE
2174 }
2175 }
2176
2177 free (reg_first_set);
2178 free (reg_last_set);
c4c81601 2179
7506f491
DE
2180 /* Catch bugs early. */
2181 reg_first_set = reg_last_set = 0;
2182}
2183
2184/* Allocate space for the set hash table.
2185 N_INSNS is the number of instructions in the function.
2186 It is used to determine the number of buckets to use. */
2187
2188static void
2189alloc_set_hash_table (n_insns)
2190 int n_insns;
2191{
2192 int n;
2193
2194 set_hash_table_size = n_insns / 4;
2195 if (set_hash_table_size < 11)
2196 set_hash_table_size = 11;
c4c81601 2197
7506f491
DE
2198 /* Attempt to maintain efficient use of hash table.
2199 Making it an odd number is simplest for now.
2200 ??? Later take some measurements. */
2201 set_hash_table_size |= 1;
2202 n = set_hash_table_size * sizeof (struct expr *);
2203 set_hash_table = (struct expr **) gmalloc (n);
2204}
2205
2206/* Free things allocated by alloc_set_hash_table. */
2207
2208static void
2209free_set_hash_table ()
2210{
2211 free (set_hash_table);
2212}
2213
2214/* Compute the hash table for doing copy/const propagation. */
2215
2216static void
b5ce41ff 2217compute_set_hash_table ()
7506f491
DE
2218{
2219 /* Initialize count of number of entries in hash table. */
2220 n_sets = 0;
c4c81601
RK
2221 bzero ((char *) set_hash_table,
2222 set_hash_table_size * sizeof (struct expr *));
7506f491 2223
b5ce41ff 2224 compute_hash_table (1);
7506f491
DE
2225}
2226
2227/* Allocate space for the expression hash table.
2228 N_INSNS is the number of instructions in the function.
2229 It is used to determine the number of buckets to use. */
2230
2231static void
2232alloc_expr_hash_table (n_insns)
2e653e39 2233 unsigned int n_insns;
7506f491
DE
2234{
2235 int n;
2236
2237 expr_hash_table_size = n_insns / 2;
2238 /* Make sure the amount is usable. */
2239 if (expr_hash_table_size < 11)
2240 expr_hash_table_size = 11;
c4c81601 2241
7506f491
DE
2242 /* Attempt to maintain efficient use of hash table.
2243 Making it an odd number is simplest for now.
2244 ??? Later take some measurements. */
2245 expr_hash_table_size |= 1;
2246 n = expr_hash_table_size * sizeof (struct expr *);
2247 expr_hash_table = (struct expr **) gmalloc (n);
2248}
2249
2250/* Free things allocated by alloc_expr_hash_table. */
2251
2252static void
2253free_expr_hash_table ()
2254{
2255 free (expr_hash_table);
2256}
2257
2258/* Compute the hash table for doing GCSE. */
2259
2260static void
b5ce41ff 2261compute_expr_hash_table ()
7506f491
DE
2262{
2263 /* Initialize count of number of entries in hash table. */
2264 n_exprs = 0;
c4c81601
RK
2265 bzero ((char *) expr_hash_table,
2266 expr_hash_table_size * sizeof (struct expr *));
7506f491 2267
b5ce41ff 2268 compute_hash_table (0);
7506f491
DE
2269}
2270\f
2271/* Expression tracking support. */
2272
2273/* Lookup pattern PAT in the expression table.
2274 The result is a pointer to the table entry, or NULL if not found. */
2275
2276static struct expr *
2277lookup_expr (pat)
2278 rtx pat;
2279{
2280 int do_not_record_p;
2281 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2282 expr_hash_table_size);
2283 struct expr *expr;
2284
2285 if (do_not_record_p)
2286 return NULL;
2287
2288 expr = expr_hash_table[hash];
2289
2290 while (expr && ! expr_equiv_p (expr->expr, pat))
2291 expr = expr->next_same_hash;
2292
2293 return expr;
2294}
2295
c4c81601
RK
2296/* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that
2297 matches it, otherwise return the first entry for REGNO. The result is a
2298 pointer to the table entry, or NULL if not found. */
7506f491
DE
2299
2300static struct expr *
2301lookup_set (regno, pat)
770ae6cc 2302 unsigned int regno;
7506f491
DE
2303 rtx pat;
2304{
2305 unsigned int hash = hash_set (regno, set_hash_table_size);
2306 struct expr *expr;
2307
2308 expr = set_hash_table[hash];
2309
2310 if (pat)
2311 {
2312 while (expr && ! expr_equiv_p (expr->expr, pat))
2313 expr = expr->next_same_hash;
2314 }
2315 else
2316 {
2317 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2318 expr = expr->next_same_hash;
2319 }
2320
2321 return expr;
2322}
2323
2324/* Return the next entry for REGNO in list EXPR. */
2325
2326static struct expr *
2327next_set (regno, expr)
770ae6cc 2328 unsigned int regno;
7506f491
DE
2329 struct expr *expr;
2330{
2331 do
2332 expr = expr->next_same_hash;
2333 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
c4c81601 2334
7506f491
DE
2335 return expr;
2336}
2337
2338/* Reset tables used to keep track of what's still available [since the
2339 start of the block]. */
2340
2341static void
2342reset_opr_set_tables ()
2343{
2344 /* Maintain a bitmap of which regs have been set since beginning of
2345 the block. */
2346 sbitmap_zero (reg_set_bitmap);
c4c81601 2347
7506f491
DE
2348 /* Also keep a record of the last instruction to modify memory.
2349 For now this is very trivial, we only record whether any memory
2350 location has been modified. */
2351 mem_last_set = 0;
2352}
2353
2354/* Return non-zero if the operands of X are not set before INSN in
2355 INSN's basic block. */
2356
2357static int
2358oprs_not_set_p (x, insn)
2359 rtx x, insn;
2360{
c4c81601 2361 int i, j;
7506f491 2362 enum rtx_code code;
6f7d635c 2363 const char *fmt;
7506f491 2364
7506f491
DE
2365 if (x == 0)
2366 return 1;
2367
2368 code = GET_CODE (x);
2369 switch (code)
2370 {
2371 case PC:
2372 case CC0:
2373 case CONST:
2374 case CONST_INT:
2375 case CONST_DOUBLE:
2376 case SYMBOL_REF:
2377 case LABEL_REF:
2378 case ADDR_VEC:
2379 case ADDR_DIFF_VEC:
2380 return 1;
2381
2382 case MEM:
2383 if (mem_last_set != 0)
2384 return 0;
c4c81601
RK
2385 else
2386 return oprs_not_set_p (XEXP (x, 0), insn);
7506f491
DE
2387
2388 case REG:
2389 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2390
2391 default:
2392 break;
2393 }
2394
c4c81601 2395 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
2396 {
2397 if (fmt[i] == 'e')
2398 {
7506f491
DE
2399 /* If we are about to do the last recursive call
2400 needed at this level, change it into iteration.
2401 This function is called enough to be worth it. */
2402 if (i == 0)
c4c81601
RK
2403 return oprs_not_set_p (XEXP (x, i), insn);
2404
2405 if (! oprs_not_set_p (XEXP (x, i), insn))
7506f491
DE
2406 return 0;
2407 }
2408 else if (fmt[i] == 'E')
c4c81601
RK
2409 for (j = 0; j < XVECLEN (x, i); j++)
2410 if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2411 return 0;
7506f491
DE
2412 }
2413
2414 return 1;
2415}
2416
2417/* Mark things set by a CALL. */
2418
2419static void
b5ce41ff
JL
2420mark_call (insn)
2421 rtx insn;
7506f491
DE
2422{
2423 mem_last_set = INSN_CUID (insn);
2424}
2425
2426/* Mark things set by a SET. */
2427
2428static void
2429mark_set (pat, insn)
2430 rtx pat, insn;
2431{
2432 rtx dest = SET_DEST (pat);
2433
2434 while (GET_CODE (dest) == SUBREG
2435 || GET_CODE (dest) == ZERO_EXTRACT
2436 || GET_CODE (dest) == SIGN_EXTRACT
2437 || GET_CODE (dest) == STRICT_LOW_PART)
2438 dest = XEXP (dest, 0);
2439
2440 if (GET_CODE (dest) == REG)
2441 SET_BIT (reg_set_bitmap, REGNO (dest));
2442 else if (GET_CODE (dest) == MEM)
2443 mem_last_set = INSN_CUID (insn);
2444
2445 if (GET_CODE (SET_SRC (pat)) == CALL)
b5ce41ff 2446 mark_call (insn);
7506f491
DE
2447}
2448
2449/* Record things set by a CLOBBER. */
2450
2451static void
2452mark_clobber (pat, insn)
2453 rtx pat, insn;
2454{
2455 rtx clob = XEXP (pat, 0);
2456
2457 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2458 clob = XEXP (clob, 0);
2459
2460 if (GET_CODE (clob) == REG)
2461 SET_BIT (reg_set_bitmap, REGNO (clob));
2462 else
2463 mem_last_set = INSN_CUID (insn);
2464}
2465
2466/* Record things set by INSN.
2467 This data is used by oprs_not_set_p. */
2468
2469static void
2470mark_oprs_set (insn)
2471 rtx insn;
2472{
2473 rtx pat = PATTERN (insn);
c4c81601 2474 int i;
7506f491
DE
2475
2476 if (GET_CODE (pat) == SET)
2477 mark_set (pat, insn);
2478 else if (GET_CODE (pat) == PARALLEL)
c4c81601
RK
2479 for (i = 0; i < XVECLEN (pat, 0); i++)
2480 {
2481 rtx x = XVECEXP (pat, 0, i);
2482
2483 if (GET_CODE (x) == SET)
2484 mark_set (x, insn);
2485 else if (GET_CODE (x) == CLOBBER)
2486 mark_clobber (x, insn);
2487 else if (GET_CODE (x) == CALL)
2488 mark_call (insn);
2489 }
7506f491 2490
7506f491
DE
2491 else if (GET_CODE (pat) == CLOBBER)
2492 mark_clobber (pat, insn);
2493 else if (GET_CODE (pat) == CALL)
b5ce41ff 2494 mark_call (insn);
7506f491 2495}
b5ce41ff 2496
7506f491
DE
2497\f
2498/* Classic GCSE reaching definition support. */
2499
2500/* Allocate reaching def variables. */
2501
2502static void
2503alloc_rd_mem (n_blocks, n_insns)
2504 int n_blocks, n_insns;
2505{
2506 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2507 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2508
2509 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2510 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2511
2512 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2513 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2514
2515 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2516 sbitmap_vector_zero (rd_out, n_basic_blocks);
2517}
2518
2519/* Free reaching def variables. */
2520
2521static void
2522free_rd_mem ()
2523{
2524 free (rd_kill);
2525 free (rd_gen);
2526 free (reaching_defs);
2527 free (rd_out);
2528}
2529
c4c81601 2530/* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
7506f491
DE
2531
2532static void
2533handle_rd_kill_set (insn, regno, bb)
2534 rtx insn;
2535 int regno, bb;
2536{
c4c81601 2537 struct reg_set *this_reg;
7506f491 2538
c4c81601
RK
2539 for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2540 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2541 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
7506f491
DE
2542}
2543
7506f491
DE
2544/* Compute the set of kill's for reaching definitions. */
2545
2546static void
2547compute_kill_rd ()
2548{
c4c81601
RK
2549 int bb, cuid;
2550 int regno, i;
7506f491
DE
2551
2552 /* For each block
2553 For each set bit in `gen' of the block (i.e each insn which
ac7c5af5
JL
2554 generates a definition in the block)
2555 Call the reg set by the insn corresponding to that bit regx
2556 Look at the linked list starting at reg_set_table[regx]
2557 For each setting of regx in the linked list, which is not in
2558 this block
c4c81601 2559 Set the bit in `kill' corresponding to that insn. */
7506f491 2560 for (bb = 0; bb < n_basic_blocks; bb++)
c4c81601
RK
2561 for (cuid = 0; cuid < max_cuid; cuid++)
2562 if (TEST_BIT (rd_gen[bb], cuid))
7506f491 2563 {
c4c81601
RK
2564 rtx insn = CUID_INSN (cuid);
2565 rtx pat = PATTERN (insn);
7506f491 2566
c4c81601
RK
2567 if (GET_CODE (insn) == CALL_INSN)
2568 {
2569 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
ac7c5af5 2570 {
c4c81601
RK
2571 if ((call_used_regs[regno]
2572 && regno != STACK_POINTER_REGNUM
15f8470f 2573#if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
c4c81601 2574 && regno != HARD_FRAME_POINTER_REGNUM
15f8470f
JL
2575#endif
2576#if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
c4c81601
RK
2577 && ! (regno == ARG_POINTER_REGNUM
2578 && fixed_regs[regno])
15f8470f
JL
2579#endif
2580#if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
c4c81601 2581 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
15f8470f 2582#endif
c4c81601
RK
2583 && regno != FRAME_POINTER_REGNUM)
2584 || global_regs[regno])
2585 handle_rd_kill_set (insn, regno, bb);
ac7c5af5 2586 }
c4c81601 2587 }
7506f491 2588
c4c81601
RK
2589 if (GET_CODE (pat) == PARALLEL)
2590 {
2591 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
7506f491 2592 {
c4c81601 2593 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
7506f491 2594
c4c81601
RK
2595 if ((code == SET || code == CLOBBER)
2596 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2597 handle_rd_kill_set (insn,
2598 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2599 bb);
ac7c5af5 2600 }
ac7c5af5 2601 }
c4c81601
RK
2602 else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2603 /* Each setting of this register outside of this block
2604 must be marked in the set of kills in this block. */
2605 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
7506f491 2606 }
7506f491
DE
2607}
2608
2609/* Compute the reaching definitions as in
2610 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2611 Chapter 10. It is the same algorithm as used for computing available
2612 expressions but applied to the gens and kills of reaching definitions. */
2613
2614static void
2615compute_rd ()
2616{
2617 int bb, changed, passes;
2618
2619 for (bb = 0; bb < n_basic_blocks; bb++)
2620 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2621
2622 passes = 0;
2623 changed = 1;
2624 while (changed)
2625 {
2626 changed = 0;
2627 for (bb = 0; bb < n_basic_blocks; bb++)
ac7c5af5 2628 {
36349f8b 2629 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
7506f491
DE
2630 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2631 reaching_defs[bb], rd_kill[bb]);
ac7c5af5 2632 }
7506f491
DE
2633 passes++;
2634 }
2635
2636 if (gcse_file)
2637 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2638}
2639\f
2640/* Classic GCSE available expression support. */
2641
2642/* Allocate memory for available expression computation. */
2643
2644static void
2645alloc_avail_expr_mem (n_blocks, n_exprs)
2646 int n_blocks, n_exprs;
2647{
2648 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2649 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2650
2651 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2652 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2653
2654 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2655 sbitmap_vector_zero (ae_in, n_basic_blocks);
2656
2657 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2658 sbitmap_vector_zero (ae_out, n_basic_blocks);
7506f491
DE
2659}
2660
2661static void
2662free_avail_expr_mem ()
2663{
2664 free (ae_kill);
2665 free (ae_gen);
2666 free (ae_in);
2667 free (ae_out);
7506f491
DE
2668}
2669
2670/* Compute the set of available expressions generated in each basic block. */
2671
2672static void
2673compute_ae_gen ()
2674{
2e653e39 2675 unsigned int i;
c4c81601
RK
2676 struct expr *expr;
2677 struct occr *occr;
7506f491
DE
2678
2679 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2680 This is all we have to do because an expression is not recorded if it
2681 is not available, and the only expressions we want to work with are the
2682 ones that are recorded. */
7506f491 2683 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
2684 for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2685 for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2686 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
7506f491
DE
2687}
2688
2689/* Return non-zero if expression X is killed in BB. */
2690
2691static int
2692expr_killed_p (x, bb)
2693 rtx x;
2694 int bb;
2695{
c4c81601 2696 int i, j;
7506f491 2697 enum rtx_code code;
6f7d635c 2698 const char *fmt;
7506f491 2699
7506f491
DE
2700 if (x == 0)
2701 return 1;
2702
2703 code = GET_CODE (x);
2704 switch (code)
2705 {
2706 case REG:
2707 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2708
2709 case MEM:
2710 if (mem_set_in_block[bb])
2711 return 1;
c4c81601
RK
2712 else
2713 return expr_killed_p (XEXP (x, 0), bb);
7506f491
DE
2714
2715 case PC:
2716 case CC0: /*FIXME*/
2717 case CONST:
2718 case CONST_INT:
2719 case CONST_DOUBLE:
2720 case SYMBOL_REF:
2721 case LABEL_REF:
2722 case ADDR_VEC:
2723 case ADDR_DIFF_VEC:
2724 return 0;
2725
2726 default:
2727 break;
2728 }
2729
c4c81601 2730 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
2731 {
2732 if (fmt[i] == 'e')
2733 {
7506f491
DE
2734 /* If we are about to do the last recursive call
2735 needed at this level, change it into iteration.
2736 This function is called enough to be worth it. */
2737 if (i == 0)
c4c81601
RK
2738 return expr_killed_p (XEXP (x, i), bb);
2739 else if (expr_killed_p (XEXP (x, i), bb))
7506f491
DE
2740 return 1;
2741 }
2742 else if (fmt[i] == 'E')
c4c81601
RK
2743 for (j = 0; j < XVECLEN (x, i); j++)
2744 if (expr_killed_p (XVECEXP (x, i, j), bb))
2745 return 1;
7506f491
DE
2746 }
2747
2748 return 0;
2749}
2750
2751/* Compute the set of available expressions killed in each basic block. */
2752
2753static void
a42cd965
AM
2754compute_ae_kill (ae_gen, ae_kill)
2755 sbitmap *ae_gen, *ae_kill;
7506f491 2756{
2e653e39
RK
2757 int bb;
2758 unsigned int i;
c4c81601 2759 struct expr *expr;
7506f491
DE
2760
2761 for (bb = 0; bb < n_basic_blocks; bb++)
c4c81601
RK
2762 for (i = 0; i < expr_hash_table_size; i++)
2763 for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
7506f491 2764 {
c4c81601
RK
2765 /* Skip EXPR if generated in this block. */
2766 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2767 continue;
7506f491 2768
c4c81601
RK
2769 if (expr_killed_p (expr->expr, bb))
2770 SET_BIT (ae_kill[bb], expr->bitmap_index);
7506f491 2771 }
7506f491 2772}
7506f491
DE
2773\f
2774/* Actually perform the Classic GCSE optimizations. */
2775
2776/* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2777
2778 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2779 as a positive reach. We want to do this when there are two computations
2780 of the expression in the block.
2781
2782 VISITED is a pointer to a working buffer for tracking which BB's have
2783 been visited. It is NULL for the top-level call.
2784
2785 We treat reaching expressions that go through blocks containing the same
2786 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2787 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2788 2 as not reaching. The intent is to improve the probability of finding
2789 only one reaching expression and to reduce register lifetimes by picking
2790 the closest such expression. */
2791
2792static int
283a2545 2793expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
7506f491
DE
2794 struct occr *occr;
2795 struct expr *expr;
2796 int bb;
2797 int check_self_loop;
2798 char *visited;
2799{
36349f8b 2800 edge pred;
7506f491 2801
36349f8b 2802 for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
7506f491 2803 {
36349f8b 2804 int pred_bb = pred->src->index;
7506f491
DE
2805
2806 if (visited[pred_bb])
c4c81601 2807 /* This predecessor has already been visited. Nothing to do. */
7506f491 2808 ;
7506f491 2809 else if (pred_bb == bb)
ac7c5af5 2810 {
7506f491
DE
2811 /* BB loops on itself. */
2812 if (check_self_loop
2813 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2814 && BLOCK_NUM (occr->insn) == pred_bb)
2815 return 1;
c4c81601 2816
7506f491 2817 visited[pred_bb] = 1;
ac7c5af5 2818 }
c4c81601 2819
7506f491
DE
2820 /* Ignore this predecessor if it kills the expression. */
2821 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2822 visited[pred_bb] = 1;
c4c81601 2823
7506f491
DE
2824 /* Does this predecessor generate this expression? */
2825 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2826 {
2827 /* Is this the occurrence we're looking for?
2828 Note that there's only one generating occurrence per block
2829 so we just need to check the block number. */
2830 if (BLOCK_NUM (occr->insn) == pred_bb)
2831 return 1;
c4c81601 2832
7506f491
DE
2833 visited[pred_bb] = 1;
2834 }
c4c81601 2835
7506f491
DE
2836 /* Neither gen nor kill. */
2837 else
ac7c5af5 2838 {
7506f491 2839 visited[pred_bb] = 1;
283a2545
RL
2840 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2841 visited))
c4c81601 2842
7506f491 2843 return 1;
ac7c5af5 2844 }
7506f491
DE
2845 }
2846
2847 /* All paths have been checked. */
2848 return 0;
2849}
2850
283a2545
RL
2851/* This wrapper for expr_reaches_here_p_work() is to ensure that any
2852 memory allocated for that function is returned. */
2853
2854static int
2855expr_reaches_here_p (occr, expr, bb, check_self_loop)
2856 struct occr *occr;
2857 struct expr *expr;
2858 int bb;
2859 int check_self_loop;
2860{
2861 int rval;
c4c81601 2862 char *visited = (char *) xcalloc (n_basic_blocks, 1);
283a2545 2863
c4c81601 2864 rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
283a2545
RL
2865
2866 free (visited);
c4c81601 2867 return rval;
283a2545
RL
2868}
2869
7506f491
DE
2870/* Return the instruction that computes EXPR that reaches INSN's basic block.
2871 If there is more than one such instruction, return NULL.
2872
2873 Called only by handle_avail_expr. */
2874
2875static rtx
2876computing_insn (expr, insn)
2877 struct expr *expr;
2878 rtx insn;
2879{
2880 int bb = BLOCK_NUM (insn);
2881
2882 if (expr->avail_occr->next == NULL)
2883 {
2884 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
c4c81601
RK
2885 /* The available expression is actually itself
2886 (i.e. a loop in the flow graph) so do nothing. */
2887 return NULL;
2888
7506f491
DE
2889 /* (FIXME) Case that we found a pattern that was created by
2890 a substitution that took place. */
2891 return expr->avail_occr->insn;
2892 }
2893 else
2894 {
2895 /* Pattern is computed more than once.
2896 Search backwards from this insn to see how many of these
2897 computations actually reach this insn. */
2898 struct occr *occr;
2899 rtx insn_computes_expr = NULL;
2900 int can_reach = 0;
2901
2902 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2903 {
2904 if (BLOCK_NUM (occr->insn) == bb)
2905 {
2906 /* The expression is generated in this block.
2907 The only time we care about this is when the expression
2908 is generated later in the block [and thus there's a loop].
2909 We let the normal cse pass handle the other cases. */
c4c81601
RK
2910 if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2911 && expr_reaches_here_p (occr, expr, bb, 1))
7506f491
DE
2912 {
2913 can_reach++;
2914 if (can_reach > 1)
2915 return NULL;
c4c81601 2916
7506f491
DE
2917 insn_computes_expr = occr->insn;
2918 }
2919 }
c4c81601
RK
2920 else if (expr_reaches_here_p (occr, expr, bb, 0))
2921 {
2922 can_reach++;
2923 if (can_reach > 1)
2924 return NULL;
2925
2926 insn_computes_expr = occr->insn;
2927 }
7506f491
DE
2928 }
2929
2930 if (insn_computes_expr == NULL)
2931 abort ();
c4c81601 2932
7506f491
DE
2933 return insn_computes_expr;
2934 }
2935}
2936
2937/* Return non-zero if the definition in DEF_INSN can reach INSN.
2938 Only called by can_disregard_other_sets. */
2939
2940static int
2941def_reaches_here_p (insn, def_insn)
2942 rtx insn, def_insn;
2943{
2944 rtx reg;
2945
2946 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2947 return 1;
2948
2949 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2950 {
2951 if (INSN_CUID (def_insn) < INSN_CUID (insn))
ac7c5af5 2952 {
7506f491
DE
2953 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2954 return 1;
c4c81601 2955 else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
7506f491
DE
2956 reg = XEXP (PATTERN (def_insn), 0);
2957 else if (GET_CODE (PATTERN (def_insn)) == SET)
2958 reg = SET_DEST (PATTERN (def_insn));
2959 else
2960 abort ();
c4c81601 2961
7506f491
DE
2962 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2963 }
2964 else
2965 return 0;
2966 }
2967
2968 return 0;
2969}
2970
c4c81601
RK
2971/* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The
2972 value returned is the number of definitions that reach INSN. Returning a
2973 value of zero means that [maybe] more than one definition reaches INSN and
2974 the caller can't perform whatever optimization it is trying. i.e. it is
2975 always safe to return zero. */
7506f491
DE
2976
2977static int
2978can_disregard_other_sets (addr_this_reg, insn, for_combine)
2979 struct reg_set **addr_this_reg;
2980 rtx insn;
2981 int for_combine;
2982{
2983 int number_of_reaching_defs = 0;
c4c81601 2984 struct reg_set *this_reg;
7506f491 2985
c4c81601
RK
2986 for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
2987 if (def_reaches_here_p (insn, this_reg->insn))
2988 {
2989 number_of_reaching_defs++;
2990 /* Ignore parallels for now. */
2991 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
2992 return 0;
2993
2994 if (!for_combine
2995 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
2996 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
2997 SET_SRC (PATTERN (insn)))))
2998 /* A setting of the reg to a different value reaches INSN. */
2999 return 0;
3000
3001 if (number_of_reaching_defs > 1)
3002 {
3003 /* If in this setting the value the register is being set to is
3004 equal to the previous value the register was set to and this
3005 setting reaches the insn we are trying to do the substitution
3006 on then we are ok. */
3007 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
7506f491 3008 return 0;
c4c81601
RK
3009 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3010 SET_SRC (PATTERN (insn))))
3011 return 0;
3012 }
7506f491 3013
c4c81601
RK
3014 *addr_this_reg = this_reg;
3015 }
7506f491
DE
3016
3017 return number_of_reaching_defs;
3018}
3019
3020/* Expression computed by insn is available and the substitution is legal,
3021 so try to perform the substitution.
3022
3023 The result is non-zero if any changes were made. */
3024
3025static int
3026handle_avail_expr (insn, expr)
3027 rtx insn;
3028 struct expr *expr;
3029{
3030 rtx pat, insn_computes_expr;
3031 rtx to;
3032 struct reg_set *this_reg;
3033 int found_setting, use_src;
3034 int changed = 0;
3035
3036 /* We only handle the case where one computation of the expression
3037 reaches this instruction. */
3038 insn_computes_expr = computing_insn (expr, insn);
3039 if (insn_computes_expr == NULL)
3040 return 0;
3041
3042 found_setting = 0;
3043 use_src = 0;
3044
3045 /* At this point we know only one computation of EXPR outside of this
3046 block reaches this insn. Now try to find a register that the
3047 expression is computed into. */
7506f491
DE
3048 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3049 {
3050 /* This is the case when the available expression that reaches
3051 here has already been handled as an available expression. */
770ae6cc 3052 unsigned int regnum_for_replacing
c4c81601
RK
3053 = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3054
7506f491
DE
3055 /* If the register was created by GCSE we can't use `reg_set_table',
3056 however we know it's set only once. */
3057 if (regnum_for_replacing >= max_gcse_regno
3058 /* If the register the expression is computed into is set only once,
3059 or only one set reaches this insn, we can use it. */
3060 || (((this_reg = reg_set_table[regnum_for_replacing]),
3061 this_reg->next == NULL)
3062 || can_disregard_other_sets (&this_reg, insn, 0)))
3063 {
3064 use_src = 1;
3065 found_setting = 1;
3066 }
3067 }
3068
3069 if (!found_setting)
3070 {
770ae6cc 3071 unsigned int regnum_for_replacing
c4c81601
RK
3072 = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3073
7506f491
DE
3074 /* This shouldn't happen. */
3075 if (regnum_for_replacing >= max_gcse_regno)
3076 abort ();
c4c81601 3077
7506f491 3078 this_reg = reg_set_table[regnum_for_replacing];
c4c81601 3079
7506f491
DE
3080 /* If the register the expression is computed into is set only once,
3081 or only one set reaches this insn, use it. */
3082 if (this_reg->next == NULL
3083 || can_disregard_other_sets (&this_reg, insn, 0))
3084 found_setting = 1;
3085 }
3086
3087 if (found_setting)
3088 {
3089 pat = PATTERN (insn);
3090 if (use_src)
3091 to = SET_SRC (PATTERN (insn_computes_expr));
3092 else
3093 to = SET_DEST (PATTERN (insn_computes_expr));
3094 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3095
3096 /* We should be able to ignore the return code from validate_change but
3097 to play it safe we check. */
3098 if (changed)
3099 {
3100 gcse_subst_count++;
3101 if (gcse_file != NULL)
3102 {
c4c81601
RK
3103 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3104 INSN_UID (insn));
3105 fprintf (gcse_file, " reg %d %s insn %d\n",
3106 REGNO (to), use_src ? "from" : "set in",
7506f491
DE
3107 INSN_UID (insn_computes_expr));
3108 }
7506f491
DE
3109 }
3110 }
c4c81601 3111
7506f491
DE
3112 /* The register that the expr is computed into is set more than once. */
3113 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3114 {
3115 /* Insert an insn after insnx that copies the reg set in insnx
3116 into a new pseudo register call this new register REGN.
3117 From insnb until end of basic block or until REGB is set
3118 replace all uses of REGB with REGN. */
3119 rtx new_insn;
3120
3121 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3122
3123 /* Generate the new insn. */
3124 /* ??? If the change fails, we return 0, even though we created
3125 an insn. I think this is ok. */
9e6a5703
JC
3126 new_insn
3127 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
c4c81601
RK
3128 SET_DEST (PATTERN
3129 (insn_computes_expr))),
3130 insn_computes_expr);
3131
7506f491
DE
3132 /* Keep block number table up to date. */
3133 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
c4c81601 3134
7506f491
DE
3135 /* Keep register set table up to date. */
3136 record_one_set (REGNO (to), new_insn);
3137
3138 gcse_create_count++;
3139 if (gcse_file != NULL)
ac7c5af5 3140 {
c4c81601 3141 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
7506f491 3142 INSN_UID (NEXT_INSN (insn_computes_expr)),
c4c81601
RK
3143 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3144 fprintf (gcse_file, ", computed in insn %d,\n",
7506f491 3145 INSN_UID (insn_computes_expr));
c4c81601
RK
3146 fprintf (gcse_file, " into newly allocated reg %d\n",
3147 REGNO (to));
ac7c5af5 3148 }
7506f491
DE
3149
3150 pat = PATTERN (insn);
3151
3152 /* Do register replacement for INSN. */
3153 changed = validate_change (insn, &SET_SRC (pat),
c4c81601
RK
3154 SET_DEST (PATTERN
3155 (NEXT_INSN (insn_computes_expr))),
7506f491
DE
3156 0);
3157
3158 /* We should be able to ignore the return code from validate_change but
3159 to play it safe we check. */
3160 if (changed)
3161 {
3162 gcse_subst_count++;
3163 if (gcse_file != NULL)
3164 {
c4c81601
RK
3165 fprintf (gcse_file,
3166 "GCSE: Replacing the source in insn %d with reg %d ",
7506f491 3167 INSN_UID (insn),
c4c81601
RK
3168 REGNO (SET_DEST (PATTERN (NEXT_INSN
3169 (insn_computes_expr)))));
3170 fprintf (gcse_file, "set in insn %d\n",
7506f491
DE
3171 INSN_UID (insn_computes_expr));
3172 }
7506f491
DE
3173 }
3174 }
3175
3176 return changed;
3177}
3178
c4c81601
RK
3179/* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3180 the dataflow analysis has been done.
7506f491
DE
3181
3182 The result is non-zero if a change was made. */
3183
3184static int
3185classic_gcse ()
3186{
3187 int bb, changed;
3188 rtx insn;
3189
3190 /* Note we start at block 1. */
3191
3192 changed = 0;
3193 for (bb = 1; bb < n_basic_blocks; bb++)
3194 {
3195 /* Reset tables used to keep track of what's still valid [since the
3196 start of the block]. */
3197 reset_opr_set_tables ();
3198
3b413743
RH
3199 for (insn = BLOCK_HEAD (bb);
3200 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
3201 insn = NEXT_INSN (insn))
3202 {
3203 /* Is insn of form (set (pseudo-reg) ...)? */
7506f491
DE
3204 if (GET_CODE (insn) == INSN
3205 && GET_CODE (PATTERN (insn)) == SET
3206 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3207 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3208 {
3209 rtx pat = PATTERN (insn);
3210 rtx src = SET_SRC (pat);
3211 struct expr *expr;
3212
3213 if (want_to_gcse_p (src)
3214 /* Is the expression recorded? */
3215 && ((expr = lookup_expr (src)) != NULL)
3216 /* Is the expression available [at the start of the
3217 block]? */
3218 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3219 /* Are the operands unchanged since the start of the
3220 block? */
3221 && oprs_not_set_p (src, insn))
3222 changed |= handle_avail_expr (insn, expr);
3223 }
3224
3225 /* Keep track of everything modified by this insn. */
3226 /* ??? Need to be careful w.r.t. mods done to INSN. */
3227 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3228 mark_oprs_set (insn);
ac7c5af5 3229 }
7506f491
DE
3230 }
3231
3232 return changed;
3233}
3234
3235/* Top level routine to perform one classic GCSE pass.
3236
3237 Return non-zero if a change was made. */
3238
3239static int
b5ce41ff 3240one_classic_gcse_pass (pass)
7506f491
DE
3241 int pass;
3242{
3243 int changed = 0;
3244
3245 gcse_subst_count = 0;
3246 gcse_create_count = 0;
3247
3248 alloc_expr_hash_table (max_cuid);
3249 alloc_rd_mem (n_basic_blocks, max_cuid);
b5ce41ff 3250 compute_expr_hash_table ();
7506f491
DE
3251 if (gcse_file)
3252 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3253 expr_hash_table_size, n_exprs);
c4c81601 3254
7506f491
DE
3255 if (n_exprs > 0)
3256 {
3257 compute_kill_rd ();
3258 compute_rd ();
3259 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3260 compute_ae_gen ();
a42cd965 3261 compute_ae_kill (ae_gen, ae_kill);
bd0eaec2 3262 compute_available (ae_gen, ae_kill, ae_out, ae_in);
7506f491
DE
3263 changed = classic_gcse ();
3264 free_avail_expr_mem ();
3265 }
c4c81601 3266
7506f491
DE
3267 free_rd_mem ();
3268 free_expr_hash_table ();
3269
3270 if (gcse_file)
3271 {
3272 fprintf (gcse_file, "\n");
c4c81601
RK
3273 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3274 current_function_name, pass, bytes_used, gcse_subst_count);
3275 fprintf (gcse_file, "%d insns created\n", gcse_create_count);
7506f491
DE
3276 }
3277
3278 return changed;
3279}
3280\f
3281/* Compute copy/constant propagation working variables. */
3282
3283/* Local properties of assignments. */
7506f491
DE
3284static sbitmap *cprop_pavloc;
3285static sbitmap *cprop_absaltered;
3286
3287/* Global properties of assignments (computed from the local properties). */
7506f491
DE
3288static sbitmap *cprop_avin;
3289static sbitmap *cprop_avout;
3290
c4c81601
RK
3291/* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3292 basic blocks. N_SETS is the number of sets. */
7506f491
DE
3293
3294static void
3295alloc_cprop_mem (n_blocks, n_sets)
3296 int n_blocks, n_sets;
3297{
3298 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3299 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3300
3301 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3302 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3303}
3304
3305/* Free vars used by copy/const propagation. */
3306
3307static void
3308free_cprop_mem ()
3309{
3310 free (cprop_pavloc);
3311 free (cprop_absaltered);
3312 free (cprop_avin);
3313 free (cprop_avout);
3314}
3315
c4c81601
RK
3316/* For each block, compute whether X is transparent. X is either an
3317 expression or an assignment [though we don't care which, for this context
3318 an assignment is treated as an expression]. For each block where an
3319 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3320 bit in BMAP. */
7506f491
DE
3321
3322static void
3323compute_transp (x, indx, bmap, set_p)
3324 rtx x;
3325 int indx;
3326 sbitmap *bmap;
3327 int set_p;
3328{
c4c81601 3329 int bb, i, j;
7506f491 3330 enum rtx_code code;
c4c81601 3331 reg_set *r;
6f7d635c 3332 const char *fmt;
7506f491 3333
c4c81601
RK
3334 /* repeat is used to turn tail-recursion into iteration since GCC
3335 can't do it when there's no return value. */
7506f491
DE
3336 repeat:
3337
3338 if (x == 0)
3339 return;
3340
3341 code = GET_CODE (x);
3342 switch (code)
3343 {
3344 case REG:
c4c81601
RK
3345 if (set_p)
3346 {
3347 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3348 {
3349 for (bb = 0; bb < n_basic_blocks; bb++)
3350 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3351 SET_BIT (bmap[bb], indx);
3352 }
3353 else
3354 {
3355 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3356 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3357 }
3358 }
3359 else
3360 {
3361 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3362 {
3363 for (bb = 0; bb < n_basic_blocks; bb++)
3364 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3365 RESET_BIT (bmap[bb], indx);
3366 }
3367 else
3368 {
3369 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3370 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3371 }
3372 }
7506f491 3373
c4c81601 3374 return;
7506f491
DE
3375
3376 case MEM:
3377 if (set_p)
3378 {
3379 for (bb = 0; bb < n_basic_blocks; bb++)
3380 if (mem_set_in_block[bb])
3381 SET_BIT (bmap[bb], indx);
3382 }
3383 else
3384 {
3385 for (bb = 0; bb < n_basic_blocks; bb++)
3386 if (mem_set_in_block[bb])
3387 RESET_BIT (bmap[bb], indx);
3388 }
c4c81601 3389
7506f491
DE
3390 x = XEXP (x, 0);
3391 goto repeat;
3392
3393 case PC:
3394 case CC0: /*FIXME*/
3395 case CONST:
3396 case CONST_INT:
3397 case CONST_DOUBLE:
3398 case SYMBOL_REF:
3399 case LABEL_REF:
3400 case ADDR_VEC:
3401 case ADDR_DIFF_VEC:
3402 return;
3403
3404 default:
3405 break;
3406 }
3407
c4c81601 3408 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
3409 {
3410 if (fmt[i] == 'e')
3411 {
7506f491
DE
3412 /* If we are about to do the last recursive call
3413 needed at this level, change it into iteration.
3414 This function is called enough to be worth it. */
3415 if (i == 0)
3416 {
c4c81601 3417 x = XEXP (x, i);
7506f491
DE
3418 goto repeat;
3419 }
c4c81601
RK
3420
3421 compute_transp (XEXP (x, i), indx, bmap, set_p);
7506f491
DE
3422 }
3423 else if (fmt[i] == 'E')
c4c81601
RK
3424 for (j = 0; j < XVECLEN (x, i); j++)
3425 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
7506f491
DE
3426 }
3427}
3428
7506f491
DE
3429/* Top level routine to do the dataflow analysis needed by copy/const
3430 propagation. */
3431
3432static void
3433compute_cprop_data ()
3434{
b5ce41ff 3435 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
ce724250
JL
3436 compute_available (cprop_pavloc, cprop_absaltered,
3437 cprop_avout, cprop_avin);
7506f491
DE
3438}
3439\f
3440/* Copy/constant propagation. */
3441
7506f491
DE
3442/* Maximum number of register uses in an insn that we handle. */
3443#define MAX_USES 8
3444
3445/* Table of uses found in an insn.
3446 Allocated statically to avoid alloc/free complexity and overhead. */
3447static struct reg_use reg_use_table[MAX_USES];
3448
3449/* Index into `reg_use_table' while building it. */
3450static int reg_use_count;
3451
c4c81601
RK
3452/* Set up a list of register numbers used in INSN. The found uses are stored
3453 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3454 and contains the number of uses in the table upon exit.
7506f491 3455
c4c81601
RK
3456 ??? If a register appears multiple times we will record it multiple times.
3457 This doesn't hurt anything but it will slow things down. */
7506f491
DE
3458
3459static void
3460find_used_regs (x)
3461 rtx x;
3462{
c4c81601 3463 int i, j;
7506f491 3464 enum rtx_code code;
6f7d635c 3465 const char *fmt;
7506f491 3466
c4c81601
RK
3467 /* repeat is used to turn tail-recursion into iteration since GCC
3468 can't do it when there's no return value. */
7506f491
DE
3469 repeat:
3470
3471 if (x == 0)
3472 return;
3473
3474 code = GET_CODE (x);
3475 switch (code)
3476 {
3477 case REG:
3478 if (reg_use_count == MAX_USES)
3479 return;
c4c81601 3480
7506f491
DE
3481 reg_use_table[reg_use_count].reg_rtx = x;
3482 reg_use_count++;
3483 return;
3484
3485 case MEM:
3486 x = XEXP (x, 0);
3487 goto repeat;
3488
3489 case PC:
3490 case CC0:
3491 case CONST:
3492 case CONST_INT:
3493 case CONST_DOUBLE:
3494 case SYMBOL_REF:
3495 case LABEL_REF:
3496 case CLOBBER:
3497 case ADDR_VEC:
3498 case ADDR_DIFF_VEC:
3499 case ASM_INPUT: /*FIXME*/
3500 return;
3501
3502 case SET:
3503 if (GET_CODE (SET_DEST (x)) == MEM)
3504 find_used_regs (SET_DEST (x));
3505 x = SET_SRC (x);
3506 goto repeat;
3507
3508 default:
3509 break;
3510 }
3511
3512 /* Recursively scan the operands of this expression. */
3513
c4c81601 3514 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
3515 {
3516 if (fmt[i] == 'e')
3517 {
3518 /* If we are about to do the last recursive call
3519 needed at this level, change it into iteration.
3520 This function is called enough to be worth it. */
3521 if (i == 0)
3522 {
3523 x = XEXP (x, 0);
3524 goto repeat;
3525 }
c4c81601 3526
7506f491
DE
3527 find_used_regs (XEXP (x, i));
3528 }
3529 else if (fmt[i] == 'E')
c4c81601
RK
3530 for (j = 0; j < XVECLEN (x, i); j++)
3531 find_used_regs (XVECEXP (x, i, j));
7506f491
DE
3532 }
3533}
3534
3535/* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3536 Returns non-zero is successful. */
3537
3538static int
3539try_replace_reg (from, to, insn)
3540 rtx from, to, insn;
3541{
833fc3ad
JH
3542 rtx note;
3543 rtx src;
3544 int success;
3545 rtx set;
3546
3547 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3548
3549 if (!note)
3550 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3551
e78d9500
JL
3552 /* If this fails we could try to simplify the result of the
3553 replacement and attempt to recognize the simplified insn.
3554
3555 But we need a general simplify_rtx that doesn't have pass
3556 specific state variables. I'm not aware of one at the moment. */
7506f491 3557
833fc3ad
JH
3558 success = validate_replace_src (from, to, insn);
3559 set = single_set (insn);
3560
3561 /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3562 information. */
3563 if (!success && !note)
3564 {
3565 if (!set)
3566 return 0;
c4c81601 3567
833fc3ad
JH
3568 note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3569 copy_rtx (SET_SRC (set)),
3570 REG_NOTES (insn));
3571 }
3572
3573 /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes. Also
3574 try to simplify them. */
3575 if (note)
3576 {
3577 rtx simplified;
c4c81601 3578
833fc3ad
JH
3579 src = XEXP (note, 0);
3580 replace_rtx (src, from, to);
3581
3582 /* Try to simplify resulting note. */
3583 simplified = simplify_rtx (src);
3584 if (simplified)
3585 {
3586 src = simplified;
3587 XEXP (note, 0) = src;
3588 }
3589
3590 /* REG_EQUAL may get simplified into register.
3591 We don't allow that. Remove that note. This code ought
3592 not to hapen, because previous code ought to syntetize
3593 reg-reg move, but be on the safe side. */
3594 else if (REG_P (src))
3595 remove_note (insn, note);
3596 }
3597 return success;
3598}
c4c81601
RK
3599
3600/* Find a set of REGNOs that are available on entry to INSN's block. Returns
3601 NULL no such set is found. */
7506f491
DE
3602
3603static struct expr *
3604find_avail_set (regno, insn)
3605 int regno;
3606 rtx insn;
3607{
cafba495
BS
3608 /* SET1 contains the last set found that can be returned to the caller for
3609 use in a substitution. */
3610 struct expr *set1 = 0;
3611
3612 /* Loops are not possible here. To get a loop we would need two sets
3613 available at the start of the block containing INSN. ie we would
3614 need two sets like this available at the start of the block:
3615
3616 (set (reg X) (reg Y))
3617 (set (reg Y) (reg X))
3618
3619 This can not happen since the set of (reg Y) would have killed the
3620 set of (reg X) making it unavailable at the start of this block. */
3621 while (1)
3622 {
3623 rtx src;
3624 struct expr *set = lookup_set (regno, NULL_RTX);
3625
3626 /* Find a set that is available at the start of the block
3627 which contains INSN. */
3628 while (set)
3629 {
3630 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3631 break;
3632 set = next_set (regno, set);
3633 }
7506f491 3634
cafba495
BS
3635 /* If no available set was found we've reached the end of the
3636 (possibly empty) copy chain. */
3637 if (set == 0)
3638 break;
3639
3640 if (GET_CODE (set->expr) != SET)
3641 abort ();
3642
3643 src = SET_SRC (set->expr);
3644
3645 /* We know the set is available.
3646 Now check that SRC is ANTLOC (i.e. none of the source operands
3647 have changed since the start of the block).
3648
3649 If the source operand changed, we may still use it for the next
3650 iteration of this loop, but we may not use it for substitutions. */
c4c81601 3651
cafba495
BS
3652 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3653 set1 = set;
3654
3655 /* If the source of the set is anything except a register, then
3656 we have reached the end of the copy chain. */
3657 if (GET_CODE (src) != REG)
7506f491 3658 break;
7506f491 3659
cafba495
BS
3660 /* Follow the copy chain, ie start another iteration of the loop
3661 and see if we have an available copy into SRC. */
3662 regno = REGNO (src);
3663 }
3664
3665 /* SET1 holds the last set that was available and anticipatable at
3666 INSN. */
3667 return set1;
7506f491
DE
3668}
3669
abd535b6
BS
3670/* Subroutine of cprop_insn that tries to propagate constants into
3671 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3672 that we can use for substitutions.
3673 REG_USED is the use we will try to replace, SRC is the constant we
3674 will try to substitute for it.
3675 Returns nonzero if a change was made. */
c4c81601 3676
abd535b6
BS
3677static int
3678cprop_jump (insn, copy, reg_used, src)
3679 rtx insn, copy;
3680 struct reg_use *reg_used;
3681 rtx src;
3682{
3683 rtx set = PATTERN (copy);
3684 rtx temp;
3685
3686 /* Replace the register with the appropriate constant. */
3687 replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3688
3689 temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3690 GET_MODE (SET_SRC (set)),
3691 GET_MODE (XEXP (SET_SRC (set), 0)),
3692 XEXP (SET_SRC (set), 0),
3693 XEXP (SET_SRC (set), 1),
3694 XEXP (SET_SRC (set), 2));
3695
3696 /* If no simplification can be made, then try the next
3697 register. */
3698 if (temp == 0)
3699 return 0;
3700
3701 SET_SRC (set) = temp;
3702
3703 /* That may have changed the structure of TEMP, so
3704 force it to be rerecognized if it has not turned
3705 into a nop or unconditional jump. */
3706
3707 INSN_CODE (copy) = -1;
3708 if ((SET_DEST (set) == pc_rtx
3709 && (SET_SRC (set) == pc_rtx
3710 || GET_CODE (SET_SRC (set)) == LABEL_REF))
3711 || recog (PATTERN (copy), copy, NULL) >= 0)
3712 {
3713 /* This has either become an unconditional jump
3714 or a nop-jump. We'd like to delete nop jumps
3715 here, but doing so confuses gcse. So we just
3716 make the replacement and let later passes
3717 sort things out. */
3718 PATTERN (insn) = set;
3719 INSN_CODE (insn) = -1;
3720
3721 /* One less use of the label this insn used to jump to
3722 if we turned this into a NOP jump. */
3723 if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3724 --LABEL_NUSES (JUMP_LABEL (insn));
3725
3726 /* If this has turned into an unconditional jump,
3727 then put a barrier after it so that the unreachable
3728 code will be deleted. */
3729 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3730 emit_barrier_after (insn);
3731
3732 run_jump_opt_after_gcse = 1;
3733
3734 const_prop_count++;
3735 if (gcse_file != NULL)
3736 {
c4c81601
RK
3737 fprintf (gcse_file,
3738 "CONST-PROP: Replacing reg %d in insn %d with constant ",
3739 REGNO (reg_used->reg_rtx), INSN_UID (insn));
abd535b6
BS
3740 print_rtl (gcse_file, src);
3741 fprintf (gcse_file, "\n");
3742 }
c4c81601 3743
abd535b6
BS
3744 return 1;
3745 }
3746 return 0;
3747}
3748
3749#ifdef HAVE_cc0
c4c81601
RK
3750
3751/* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3752 for machines that have CC0. INSN is a single set that stores into CC0;
3753 the insn following it is a conditional jump. REG_USED is the use we will
3754 try to replace, SRC is the constant we will try to substitute for it.
abd535b6 3755 Returns nonzero if a change was made. */
c4c81601 3756
abd535b6
BS
3757static int
3758cprop_cc0_jump (insn, reg_used, src)
3759 rtx insn;
3760 struct reg_use *reg_used;
3761 rtx src;
3762{
3763 rtx jump = NEXT_INSN (insn);
3764 rtx copy = copy_rtx (jump);
3765 rtx set = PATTERN (copy);
3766
3767 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3768 substitute into it. */
3769 replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3770 if (! cprop_jump (jump, copy, reg_used, src))
3771 return 0;
3772
3773 /* If we succeeded, delete the cc0 setter. */
3774 PUT_CODE (insn, NOTE);
3775 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3776 NOTE_SOURCE_FILE (insn) = 0;
3777 return 1;
3778 }
3779#endif
3780
7506f491
DE
3781/* Perform constant and copy propagation on INSN.
3782 The result is non-zero if a change was made. */
3783
3784static int
b5ce41ff 3785cprop_insn (insn, alter_jumps)
7506f491 3786 rtx insn;
b5ce41ff 3787 int alter_jumps;
7506f491
DE
3788{
3789 struct reg_use *reg_used;
3790 int changed = 0;
833fc3ad 3791 rtx note;
7506f491 3792
e78d9500
JL
3793 /* Only propagate into SETs. Note that a conditional jump is a
3794 SET with pc_rtx as the destination. */
3795 if ((GET_CODE (insn) != INSN
3796 && GET_CODE (insn) != JUMP_INSN)
7506f491
DE
3797 || GET_CODE (PATTERN (insn)) != SET)
3798 return 0;
3799
3800 reg_use_count = 0;
3801 find_used_regs (PATTERN (insn));
833fc3ad
JH
3802
3803 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3804 if (!note)
3805 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3806
3807 /* We may win even when propagating constants into notes. */
3808 if (note)
3809 find_used_regs (XEXP (note, 0));
7506f491 3810
c4c81601
RK
3811 for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3812 reg_used++, reg_use_count--)
7506f491 3813 {
770ae6cc 3814 unsigned int regno = REGNO (reg_used->reg_rtx);
7506f491
DE
3815 rtx pat, src;
3816 struct expr *set;
7506f491
DE
3817
3818 /* Ignore registers created by GCSE.
3819 We do this because ... */
3820 if (regno >= max_gcse_regno)
3821 continue;
3822
3823 /* If the register has already been set in this block, there's
3824 nothing we can do. */
3825 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3826 continue;
3827
3828 /* Find an assignment that sets reg_used and is available
3829 at the start of the block. */
3830 set = find_avail_set (regno, insn);
3831 if (! set)
3832 continue;
3833
3834 pat = set->expr;
3835 /* ??? We might be able to handle PARALLELs. Later. */
3836 if (GET_CODE (pat) != SET)
3837 abort ();
c4c81601 3838
7506f491
DE
3839 src = SET_SRC (pat);
3840
e78d9500 3841 /* Constant propagation. */
05f6f07c
BS
3842 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3843 || GET_CODE (src) == SYMBOL_REF)
7506f491 3844 {
e78d9500
JL
3845 /* Handle normal insns first. */
3846 if (GET_CODE (insn) == INSN
3847 && try_replace_reg (reg_used->reg_rtx, src, insn))
7506f491
DE
3848 {
3849 changed = 1;
3850 const_prop_count++;
3851 if (gcse_file != NULL)
3852 {
c4c81601
RK
3853 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3854 regno);
3855 fprintf (gcse_file, "insn %d with constant ",
3856 INSN_UID (insn));
e78d9500 3857 print_rtl (gcse_file, src);
7506f491
DE
3858 fprintf (gcse_file, "\n");
3859 }
3860
3861 /* The original insn setting reg_used may or may not now be
3862 deletable. We leave the deletion to flow. */
3863 }
e78d9500
JL
3864
3865 /* Try to propagate a CONST_INT into a conditional jump.
3866 We're pretty specific about what we will handle in this
3867 code, we can extend this as necessary over time.
3868
3869 Right now the insn in question must look like
abd535b6 3870 (set (pc) (if_then_else ...)) */
b5ce41ff 3871 else if (alter_jumps
6e9a3c38
JL
3872 && GET_CODE (insn) == JUMP_INSN
3873 && condjump_p (insn)
3874 && ! simplejump_p (insn))
abd535b6
BS
3875 changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3876#ifdef HAVE_cc0
3877 /* Similar code for machines that use a pair of CC0 setter and
3878 conditional jump insn. */
3879 else if (alter_jumps
3880 && GET_CODE (PATTERN (insn)) == SET
3881 && SET_DEST (PATTERN (insn)) == cc0_rtx
3882 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3883 && condjump_p (NEXT_INSN (insn))
3884 && ! simplejump_p (NEXT_INSN (insn)))
3885 changed |= cprop_cc0_jump (insn, reg_used, src);
3886#endif
7506f491
DE
3887 }
3888 else if (GET_CODE (src) == REG
3889 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3890 && REGNO (src) != regno)
3891 {
cafba495 3892 if (try_replace_reg (reg_used->reg_rtx, src, insn))
7506f491 3893 {
cafba495
BS
3894 changed = 1;
3895 copy_prop_count++;
3896 if (gcse_file != NULL)
7506f491 3897 {
c4c81601
RK
3898 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3899 regno, INSN_UID (insn));
3900 fprintf (gcse_file, " with reg %d\n", REGNO (src));
7506f491 3901 }
cafba495
BS
3902
3903 /* The original insn setting reg_used may or may not now be
3904 deletable. We leave the deletion to flow. */
3905 /* FIXME: If it turns out that the insn isn't deletable,
3906 then we may have unnecessarily extended register lifetimes
3907 and made things worse. */
7506f491
DE
3908 }
3909 }
3910 }
3911
3912 return changed;
3913}
3914
c4c81601
RK
3915/* Forward propagate copies. This includes copies and constants. Return
3916 non-zero if a change was made. */
7506f491
DE
3917
3918static int
b5ce41ff
JL
3919cprop (alter_jumps)
3920 int alter_jumps;
7506f491
DE
3921{
3922 int bb, changed;
3923 rtx insn;
3924
3925 /* Note we start at block 1. */
3926
3927 changed = 0;
3928 for (bb = 1; bb < n_basic_blocks; bb++)
3929 {
3930 /* Reset tables used to keep track of what's still valid [since the
3931 start of the block]. */
3932 reset_opr_set_tables ();
3933
3b413743
RH
3934 for (insn = BLOCK_HEAD (bb);
3935 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
3936 insn = NEXT_INSN (insn))
3937 {
3938 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3939 {
b5ce41ff 3940 changed |= cprop_insn (insn, alter_jumps);
7506f491
DE
3941
3942 /* Keep track of everything modified by this insn. */
abd535b6
BS
3943 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
3944 call mark_oprs_set if we turned the insn into a NOTE. */
3945 if (GET_CODE (insn) != NOTE)
3946 mark_oprs_set (insn);
7506f491 3947 }
ac7c5af5 3948 }
7506f491
DE
3949 }
3950
3951 if (gcse_file != NULL)
3952 fprintf (gcse_file, "\n");
3953
3954 return changed;
3955}
3956
3957/* Perform one copy/constant propagation pass.
3958 F is the first insn in the function.
3959 PASS is the pass count. */
3960
3961static int
b5ce41ff 3962one_cprop_pass (pass, alter_jumps)
7506f491 3963 int pass;
b5ce41ff 3964 int alter_jumps;
7506f491
DE
3965{
3966 int changed = 0;
3967
3968 const_prop_count = 0;
3969 copy_prop_count = 0;
3970
3971 alloc_set_hash_table (max_cuid);
b5ce41ff 3972 compute_set_hash_table ();
7506f491
DE
3973 if (gcse_file)
3974 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3975 n_sets);
3976 if (n_sets > 0)
3977 {
3978 alloc_cprop_mem (n_basic_blocks, n_sets);
3979 compute_cprop_data ();
b5ce41ff 3980 changed = cprop (alter_jumps);
7506f491
DE
3981 free_cprop_mem ();
3982 }
c4c81601 3983
7506f491
DE
3984 free_set_hash_table ();
3985
3986 if (gcse_file)
3987 {
c4c81601
RK
3988 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
3989 current_function_name, pass, bytes_used);
3990 fprintf (gcse_file, "%d const props, %d copy props\n\n",
3991 const_prop_count, copy_prop_count);
7506f491
DE
3992 }
3993
3994 return changed;
3995}
3996\f
a65f3558 3997/* Compute PRE+LCM working variables. */
7506f491
DE
3998
3999/* Local properties of expressions. */
4000/* Nonzero for expressions that are transparent in the block. */
a65f3558 4001static sbitmap *transp;
7506f491 4002
5c35539b
RH
4003/* Nonzero for expressions that are transparent at the end of the block.
4004 This is only zero for expressions killed by abnormal critical edge
4005 created by a calls. */
a65f3558 4006static sbitmap *transpout;
5c35539b 4007
a65f3558
JL
4008/* Nonzero for expressions that are computed (available) in the block. */
4009static sbitmap *comp;
7506f491 4010
a65f3558
JL
4011/* Nonzero for expressions that are locally anticipatable in the block. */
4012static sbitmap *antloc;
7506f491 4013
a65f3558
JL
4014/* Nonzero for expressions where this block is an optimal computation
4015 point. */
4016static sbitmap *pre_optimal;
5c35539b 4017
a65f3558
JL
4018/* Nonzero for expressions which are redundant in a particular block. */
4019static sbitmap *pre_redundant;
7506f491 4020
a42cd965
AM
4021/* Nonzero for expressions which should be inserted on a specific edge. */
4022static sbitmap *pre_insert_map;
4023
4024/* Nonzero for expressions which should be deleted in a specific block. */
4025static sbitmap *pre_delete_map;
4026
4027/* Contains the edge_list returned by pre_edge_lcm. */
4028static struct edge_list *edge_list;
4029
a65f3558
JL
4030/* Redundant insns. */
4031static sbitmap pre_redundant_insns;
7506f491 4032
a65f3558 4033/* Allocate vars used for PRE analysis. */
7506f491
DE
4034
4035static void
a65f3558
JL
4036alloc_pre_mem (n_blocks, n_exprs)
4037 int n_blocks, n_exprs;
7506f491 4038{
a65f3558
JL
4039 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4040 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4041 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5faf03ae 4042
a42cd965
AM
4043 pre_optimal = NULL;
4044 pre_redundant = NULL;
4045 pre_insert_map = NULL;
4046 pre_delete_map = NULL;
4047 ae_in = NULL;
4048 ae_out = NULL;
a42cd965 4049 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
c4c81601 4050
a42cd965 4051 /* pre_insert and pre_delete are allocated later. */
7506f491
DE
4052}
4053
a65f3558 4054/* Free vars used for PRE analysis. */
7506f491
DE
4055
4056static void
a65f3558 4057free_pre_mem ()
7506f491 4058{
a65f3558
JL
4059 free (transp);
4060 free (comp);
bd3675fc
JL
4061
4062 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
7506f491 4063
a42cd965
AM
4064 if (pre_optimal)
4065 free (pre_optimal);
4066 if (pre_redundant)
4067 free (pre_redundant);
4068 if (pre_insert_map)
4069 free (pre_insert_map);
4070 if (pre_delete_map)
4071 free (pre_delete_map);
a42cd965
AM
4072
4073 if (ae_in)
4074 free (ae_in);
4075 if (ae_out)
4076 free (ae_out);
a42cd965 4077
bd3675fc 4078 transp = comp = NULL;
a42cd965 4079 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
55d3f917 4080 ae_in = ae_out = NULL;
7506f491
DE
4081}
4082
4083/* Top level routine to do the dataflow analysis needed by PRE. */
4084
4085static void
4086compute_pre_data ()
4087{
c66e8ae9
JL
4088 int i;
4089
a65f3558 4090 compute_local_properties (transp, comp, antloc, 0);
a42cd965 4091 sbitmap_vector_zero (ae_kill, n_basic_blocks);
c66e8ae9
JL
4092
4093 /* Compute ae_kill for each basic block using:
4094
4095 ~(TRANSP | COMP)
4096
a2e90653 4097 This is significantly faster than compute_ae_kill. */
c66e8ae9
JL
4098
4099 for (i = 0; i < n_basic_blocks; i++)
4100 {
4101 sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4102 sbitmap_not (ae_kill[i], ae_kill[i]);
4103 }
4104
a42cd965
AM
4105 edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4106 ae_kill, &pre_insert_map, &pre_delete_map);
bd3675fc
JL
4107 free (antloc);
4108 antloc = NULL;
4109 free (ae_kill);
4110 ae_kill = NULL;
7506f491
DE
4111}
4112\f
4113/* PRE utilities */
4114
a65f3558
JL
4115/* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4116 block BB.
7506f491
DE
4117
4118 VISITED is a pointer to a working buffer for tracking which BB's have
4119 been visited. It is NULL for the top-level call.
4120
4121 We treat reaching expressions that go through blocks containing the same
4122 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4123 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4124 2 as not reaching. The intent is to improve the probability of finding
4125 only one reaching expression and to reduce register lifetimes by picking
4126 the closest such expression. */
4127
4128static int
89e606c9 4129pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
a65f3558 4130 int occr_bb;
7506f491
DE
4131 struct expr *expr;
4132 int bb;
4133 char *visited;
4134{
36349f8b 4135 edge pred;
7506f491 4136
36349f8b 4137 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
7506f491 4138 {
36349f8b 4139 int pred_bb = pred->src->index;
7506f491 4140
36349f8b 4141 if (pred->src == ENTRY_BLOCK_PTR
7506f491
DE
4142 /* Has predecessor has already been visited? */
4143 || visited[pred_bb])
c4c81601
RK
4144 ;/* Nothing to do. */
4145
7506f491 4146 /* Does this predecessor generate this expression? */
89e606c9 4147 else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
7506f491
DE
4148 {
4149 /* Is this the occurrence we're looking for?
4150 Note that there's only one generating occurrence per block
4151 so we just need to check the block number. */
a65f3558 4152 if (occr_bb == pred_bb)
7506f491 4153 return 1;
c4c81601 4154
7506f491
DE
4155 visited[pred_bb] = 1;
4156 }
4157 /* Ignore this predecessor if it kills the expression. */
a65f3558 4158 else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
7506f491 4159 visited[pred_bb] = 1;
c4c81601 4160
7506f491
DE
4161 /* Neither gen nor kill. */
4162 else
ac7c5af5 4163 {
7506f491 4164 visited[pred_bb] = 1;
89e606c9 4165 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
7506f491 4166 return 1;
ac7c5af5 4167 }
7506f491
DE
4168 }
4169
4170 /* All paths have been checked. */
4171 return 0;
4172}
283a2545
RL
4173
4174/* The wrapper for pre_expr_reaches_here_work that ensures that any
4175 memory allocated for that function is returned. */
4176
4177static int
89e606c9 4178pre_expr_reaches_here_p (occr_bb, expr, bb)
283a2545
RL
4179 int occr_bb;
4180 struct expr *expr;
4181 int bb;
283a2545
RL
4182{
4183 int rval;
c4c81601 4184 char *visited = (char *) xcalloc (n_basic_blocks, 1);
283a2545 4185
89e606c9 4186 rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
283a2545
RL
4187
4188 free (visited);
c4c81601 4189 return rval;
283a2545 4190}
7506f491 4191\f
a42cd965
AM
4192
4193/* Given an expr, generate RTL which we can insert at the end of a BB,
4194 or on an edge. Set the block number of any insns generated to
4195 the value of BB. */
4196
4197static rtx
4198process_insert_insn (expr)
4199 struct expr *expr;
4200{
4201 rtx reg = expr->reaching_reg;
4202 rtx pat, copied_expr;
4203 rtx first_new_insn;
4204
4205 start_sequence ();
4206 copied_expr = copy_rtx (expr->expr);
4207 emit_move_insn (reg, copied_expr);
4208 first_new_insn = get_insns ();
4209 pat = gen_sequence ();
4210 end_sequence ();
4211
4212 return pat;
4213}
4214
a65f3558
JL
4215/* Add EXPR to the end of basic block BB.
4216
4217 This is used by both the PRE and code hoisting.
4218
4219 For PRE, we want to verify that the expr is either transparent
4220 or locally anticipatable in the target block. This check makes
4221 no sense for code hoisting. */
7506f491
DE
4222
4223static void
a65f3558 4224insert_insn_end_bb (expr, bb, pre)
7506f491
DE
4225 struct expr *expr;
4226 int bb;
a65f3558 4227 int pre;
7506f491
DE
4228{
4229 rtx insn = BLOCK_END (bb);
4230 rtx new_insn;
4231 rtx reg = expr->reaching_reg;
4232 int regno = REGNO (reg);
a42cd965 4233 rtx pat;
c4c81601 4234 int i;
7506f491 4235
a42cd965 4236 pat = process_insert_insn (expr);
7506f491
DE
4237
4238 /* If the last insn is a jump, insert EXPR in front [taking care to
4239 handle cc0, etc. properly]. */
4240
4241 if (GET_CODE (insn) == JUMP_INSN)
4242 {
50b2596f 4243#ifdef HAVE_cc0
7506f491 4244 rtx note;
50b2596f 4245#endif
7506f491
DE
4246
4247 /* If this is a jump table, then we can't insert stuff here. Since
4248 we know the previous real insn must be the tablejump, we insert
4249 the new instruction just before the tablejump. */
4250 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4251 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4252 insn = prev_real_insn (insn);
4253
4254#ifdef HAVE_cc0
4255 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4256 if cc0 isn't set. */
4257 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4258 if (note)
4259 insn = XEXP (note, 0);
4260 else
4261 {
4262 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4263 if (maybe_cc0_setter
4264 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4265 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4266 insn = maybe_cc0_setter;
4267 }
4268#endif
4269 /* FIXME: What if something in cc0/jump uses value set in new insn? */
b5229628 4270 new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
3947e2f9 4271 }
c4c81601 4272
3947e2f9
RH
4273 /* Likewise if the last insn is a call, as will happen in the presence
4274 of exception handling. */
5c35539b 4275 else if (GET_CODE (insn) == CALL_INSN)
3947e2f9
RH
4276 {
4277 HARD_REG_SET parm_regs;
4278 int nparm_regs;
4279 rtx p;
4280
4281 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4282 we search backward and place the instructions before the first
4283 parameter is loaded. Do this for everyone for consistency and a
c4c81601 4284 presumtion that we'll get better code elsewhere as well.
3947e2f9 4285
c4c81601 4286 It should always be the case that we can put these instructions
a65f3558
JL
4287 anywhere in the basic block with performing PRE optimizations.
4288 Check this. */
c4c81601 4289
a65f3558
JL
4290 if (pre
4291 && !TEST_BIT (antloc[bb], expr->bitmap_index)
4292 && !TEST_BIT (transp[bb], expr->bitmap_index))
3947e2f9
RH
4293 abort ();
4294
4295 /* Since different machines initialize their parameter registers
4296 in different orders, assume nothing. Collect the set of all
4297 parameter registers. */
4298 CLEAR_HARD_REG_SET (parm_regs);
4299 nparm_regs = 0;
4300 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4301 if (GET_CODE (XEXP (p, 0)) == USE
4302 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4303 {
c4c81601 4304 if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
5c35539b 4305 abort ();
c4c81601
RK
4306
4307 SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
3947e2f9
RH
4308 nparm_regs++;
4309 }
4310
4311 /* Search backward for the first set of a register in this set. */
4312 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4313 {
4314 insn = PREV_INSN (insn);
4315 p = single_set (insn);
4316 if (p && GET_CODE (SET_DEST (p)) == REG
4317 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4318 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4319 {
4320 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4321 nparm_regs--;
4322 }
4323 }
4324
b1d26727
JL
4325 /* If we found all the parameter loads, then we want to insert
4326 before the first parameter load.
4327
4328 If we did not find all the parameter loads, then we might have
4329 stopped on the head of the block, which could be a CODE_LABEL.
4330 If we inserted before the CODE_LABEL, then we would be putting
4331 the insn in the wrong basic block. In that case, put the insn
b5229628 4332 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
0a377997 4333 while (GET_CODE (insn) == CODE_LABEL
589ca5cb 4334 || NOTE_INSN_BASIC_BLOCK_P (insn))
b5229628 4335 insn = NEXT_INSN (insn);
c4c81601 4336
b5229628 4337 new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
7506f491
DE
4338 }
4339 else
4340 {
4341 new_insn = emit_insn_after (pat, insn);
4342 BLOCK_END (bb) = new_insn;
7506f491
DE
4343 }
4344
a65f3558
JL
4345 /* Keep block number table up to date.
4346 Note, PAT could be a multiple insn sequence, we have to make
4347 sure that each insn in the sequence is handled. */
4348 if (GET_CODE (pat) == SEQUENCE)
4349 {
a65f3558
JL
4350 for (i = 0; i < XVECLEN (pat, 0); i++)
4351 {
4352 rtx insn = XVECEXP (pat, 0, i);
c4c81601 4353
a65f3558
JL
4354 set_block_num (insn, bb);
4355 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4356 add_label_notes (PATTERN (insn), new_insn);
c4c81601 4357
84832317 4358 note_stores (PATTERN (insn), record_set_info, insn);
a65f3558
JL
4359 }
4360 }
4361 else
4362 {
4363 add_label_notes (SET_SRC (pat), new_insn);
4364 set_block_num (new_insn, bb);
c4c81601 4365
a65f3558
JL
4366 /* Keep register set table up to date. */
4367 record_one_set (regno, new_insn);
4368 }
3947e2f9 4369
7506f491
DE
4370 gcse_create_count++;
4371
4372 if (gcse_file)
4373 {
c4c81601
RK
4374 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4375 bb, INSN_UID (new_insn));
4376 fprintf (gcse_file, "copying expression %d to reg %d\n",
4377 expr->bitmap_index, regno);
7506f491
DE
4378 }
4379}
4380
a42cd965
AM
4381/* Insert partially redundant expressions on edges in the CFG to make
4382 the expressions fully redundant. */
7506f491 4383
a42cd965
AM
4384static int
4385pre_edge_insert (edge_list, index_map)
4386 struct edge_list *edge_list;
7506f491
DE
4387 struct expr **index_map;
4388{
c4c81601 4389 int e, i, j, num_edges, set_size, did_insert = 0;
a65f3558
JL
4390 sbitmap *inserted;
4391
a42cd965
AM
4392 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4393 if it reaches any of the deleted expressions. */
7506f491 4394
a42cd965
AM
4395 set_size = pre_insert_map[0]->size;
4396 num_edges = NUM_EDGES (edge_list);
4397 inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4398 sbitmap_vector_zero (inserted, num_edges);
7506f491 4399
a42cd965 4400 for (e = 0; e < num_edges; e++)
7506f491
DE
4401 {
4402 int indx;
a42cd965
AM
4403 basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4404 int bb = pred->index;
a65f3558 4405
a65f3558 4406 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
7506f491 4407 {
a42cd965 4408 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
7506f491 4409
a65f3558 4410 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
c4c81601
RK
4411 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4412 {
4413 struct expr *expr = index_map[j];
4414 struct occr *occr;
a65f3558 4415
c4c81601
RK
4416 /* Now look at each deleted occurence of this expression. */
4417 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4418 {
4419 if (! occr->deleted_p)
4420 continue;
4421
4422 /* Insert this expression on this edge if if it would
4423 reach the deleted occurence in BB. */
4424 if (!TEST_BIT (inserted[e], j))
4425 {
4426 rtx insn;
4427 edge eg = INDEX_EDGE (edge_list, e);
4428
4429 /* We can't insert anything on an abnormal and
4430 critical edge, so we insert the insn at the end of
4431 the previous block. There are several alternatives
4432 detailed in Morgans book P277 (sec 10.5) for
4433 handling this situation. This one is easiest for
4434 now. */
4435
4436 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4437 insert_insn_end_bb (index_map[j], bb, 0);
4438 else
4439 {
4440 insn = process_insert_insn (index_map[j]);
4441 insert_insn_on_edge (insn, eg);
4442 }
4443
4444 if (gcse_file)
4445 {
4446 fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4447 bb,
4448 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4449 fprintf (gcse_file, "copy expression %d\n",
4450 expr->bitmap_index);
4451 }
4452
4453 SET_BIT (inserted[e], j);
4454 did_insert = 1;
4455 gcse_create_count++;
4456 }
4457 }
4458 }
7506f491
DE
4459 }
4460 }
5faf03ae 4461
5faf03ae 4462 free (inserted);
a42cd965 4463 return did_insert;
7506f491
DE
4464}
4465
c4c81601 4466/* Copy the result of INSN to REG. INDX is the expression number. */
7506f491
DE
4467
4468static void
4469pre_insert_copy_insn (expr, insn)
4470 struct expr *expr;
4471 rtx insn;
4472{
4473 rtx reg = expr->reaching_reg;
4474 int regno = REGNO (reg);
4475 int indx = expr->bitmap_index;
4476 rtx set = single_set (insn);
4477 rtx new_insn;
a42cd965 4478 int bb = BLOCK_NUM (insn);
7506f491
DE
4479
4480 if (!set)
4481 abort ();
c4c81601 4482
9e6a5703 4483 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
7506f491 4484 insn);
c4c81601 4485
7506f491 4486 /* Keep block number table up to date. */
a42cd965 4487 set_block_num (new_insn, bb);
c4c81601 4488
7506f491
DE
4489 /* Keep register set table up to date. */
4490 record_one_set (regno, new_insn);
a42cd965
AM
4491 if (insn == BLOCK_END (bb))
4492 BLOCK_END (bb) = new_insn;
7506f491
DE
4493
4494 gcse_create_count++;
4495
4496 if (gcse_file)
a42cd965
AM
4497 fprintf (gcse_file,
4498 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4499 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4500 INSN_UID (insn), regno);
7506f491
DE
4501}
4502
4503/* Copy available expressions that reach the redundant expression
4504 to `reaching_reg'. */
4505
4506static void
4507pre_insert_copies ()
4508{
2e653e39 4509 unsigned int i;
c4c81601
RK
4510 struct expr *expr;
4511 struct occr *occr;
4512 struct occr *avail;
a65f3558 4513
7506f491
DE
4514 /* For each available expression in the table, copy the result to
4515 `reaching_reg' if the expression reaches a deleted one.
4516
4517 ??? The current algorithm is rather brute force.
4518 Need to do some profiling. */
4519
4520 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
4521 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4522 {
4523 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4524 we don't want to insert a copy here because the expression may not
4525 really be redundant. So only insert an insn if the expression was
4526 deleted. This test also avoids further processing if the
4527 expression wasn't deleted anywhere. */
4528 if (expr->reaching_reg == NULL)
4529 continue;
4530
4531 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4532 {
4533 if (! occr->deleted_p)
4534 continue;
7506f491 4535
c4c81601
RK
4536 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4537 {
4538 rtx insn = avail->insn;
7506f491 4539
c4c81601
RK
4540 /* No need to handle this one if handled already. */
4541 if (avail->copied_p)
4542 continue;
7506f491 4543
c4c81601
RK
4544 /* Don't handle this one if it's a redundant one. */
4545 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4546 continue;
7506f491 4547
c4c81601
RK
4548 /* Or if the expression doesn't reach the deleted one. */
4549 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4550 BLOCK_NUM (occr->insn)))
4551 continue;
7506f491 4552
c4c81601
RK
4553 /* Copy the result of avail to reaching_reg. */
4554 pre_insert_copy_insn (expr, insn);
4555 avail->copied_p = 1;
4556 }
4557 }
4558 }
7506f491
DE
4559}
4560
4561/* Delete redundant computations.
7506f491
DE
4562 Deletion is done by changing the insn to copy the `reaching_reg' of
4563 the expression into the result of the SET. It is left to later passes
4564 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4565
4566 Returns non-zero if a change is made. */
4567
4568static int
4569pre_delete ()
4570{
2e653e39 4571 unsigned int i;
63bc1d05 4572 int changed;
c4c81601
RK
4573 struct expr *expr;
4574 struct occr *occr;
a65f3558 4575
7506f491
DE
4576 changed = 0;
4577 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
4578 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4579 {
4580 int indx = expr->bitmap_index;
7506f491 4581
c4c81601
RK
4582 /* We only need to search antic_occr since we require
4583 ANTLOC != 0. */
7506f491 4584
c4c81601
RK
4585 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4586 {
4587 rtx insn = occr->insn;
4588 rtx set;
4589 int bb = BLOCK_NUM (insn);
7506f491 4590
e20dccad 4591 if (TEST_BIT (pre_delete_map[bb], indx))
c4c81601
RK
4592 {
4593 set = single_set (insn);
4594 if (! set)
4595 abort ();
4596
4597 /* Create a pseudo-reg to store the result of reaching
4598 expressions into. Get the mode for the new pseudo from
4599 the mode of the original destination pseudo. */
4600 if (expr->reaching_reg == NULL)
4601 expr->reaching_reg
4602 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4603
4604 /* In theory this should never fail since we're creating
4605 a reg->reg copy.
4606
4607 However, on the x86 some of the movXX patterns actually
4608 contain clobbers of scratch regs. This may cause the
4609 insn created by validate_change to not match any pattern
4610 and thus cause validate_change to fail. */
4611 if (validate_change (insn, &SET_SRC (set),
4612 expr->reaching_reg, 0))
4613 {
4614 occr->deleted_p = 1;
4615 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4616 changed = 1;
4617 gcse_subst_count++;
4618 }
7506f491 4619
c4c81601
RK
4620 if (gcse_file)
4621 {
4622 fprintf (gcse_file,
4623 "PRE: redundant insn %d (expression %d) in ",
4624 INSN_UID (insn), indx);
4625 fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4626 bb, REGNO (expr->reaching_reg));
4627 }
4628 }
4629 }
4630 }
7506f491
DE
4631
4632 return changed;
4633}
4634
4635/* Perform GCSE optimizations using PRE.
4636 This is called by one_pre_gcse_pass after all the dataflow analysis
4637 has been done.
4638
c4c81601
RK
4639 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4640 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4641 Compiler Design and Implementation.
7506f491 4642
c4c81601
RK
4643 ??? A new pseudo reg is created to hold the reaching expression. The nice
4644 thing about the classical approach is that it would try to use an existing
4645 reg. If the register can't be adequately optimized [i.e. we introduce
4646 reload problems], one could add a pass here to propagate the new register
4647 through the block.
7506f491 4648
c4c81601
RK
4649 ??? We don't handle single sets in PARALLELs because we're [currently] not
4650 able to copy the rest of the parallel when we insert copies to create full
4651 redundancies from partial redundancies. However, there's no reason why we
4652 can't handle PARALLELs in the cases where there are no partial
7506f491
DE
4653 redundancies. */
4654
4655static int
4656pre_gcse ()
4657{
2e653e39
RK
4658 unsigned int i;
4659 int did_insert, changed;
7506f491 4660 struct expr **index_map;
c4c81601 4661 struct expr *expr;
7506f491
DE
4662
4663 /* Compute a mapping from expression number (`bitmap_index') to
4664 hash table entry. */
4665
dd1bd863 4666 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
7506f491 4667 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
4668 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4669 index_map[expr->bitmap_index] = expr;
7506f491
DE
4670
4671 /* Reset bitmap used to track which insns are redundant. */
a65f3558
JL
4672 pre_redundant_insns = sbitmap_alloc (max_cuid);
4673 sbitmap_zero (pre_redundant_insns);
7506f491
DE
4674
4675 /* Delete the redundant insns first so that
4676 - we know what register to use for the new insns and for the other
4677 ones with reaching expressions
4678 - we know which insns are redundant when we go to create copies */
c4c81601 4679
7506f491
DE
4680 changed = pre_delete ();
4681
a42cd965 4682 did_insert = pre_edge_insert (edge_list, index_map);
c4c81601 4683
7506f491 4684 /* In other places with reaching expressions, copy the expression to the
a42cd965 4685 specially allocated pseudo-reg that reaches the redundant expr. */
7506f491 4686 pre_insert_copies ();
a42cd965
AM
4687 if (did_insert)
4688 {
4689 commit_edge_insertions ();
4690 changed = 1;
4691 }
7506f491 4692
283a2545 4693 free (index_map);
a65f3558 4694 free (pre_redundant_insns);
7506f491
DE
4695 return changed;
4696}
4697
4698/* Top level routine to perform one PRE GCSE pass.
4699
4700 Return non-zero if a change was made. */
4701
4702static int
b5ce41ff 4703one_pre_gcse_pass (pass)
7506f491
DE
4704 int pass;
4705{
4706 int changed = 0;
4707
4708 gcse_subst_count = 0;
4709 gcse_create_count = 0;
4710
4711 alloc_expr_hash_table (max_cuid);
a42cd965 4712 add_noreturn_fake_exit_edges ();
b5ce41ff 4713 compute_expr_hash_table ();
7506f491
DE
4714 if (gcse_file)
4715 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4716 expr_hash_table_size, n_exprs);
c4c81601 4717
7506f491
DE
4718 if (n_exprs > 0)
4719 {
4720 alloc_pre_mem (n_basic_blocks, n_exprs);
4721 compute_pre_data ();
4722 changed |= pre_gcse ();
a42cd965 4723 free_edge_list (edge_list);
7506f491
DE
4724 free_pre_mem ();
4725 }
c4c81601 4726
a42cd965 4727 remove_fake_edges ();
7506f491
DE
4728 free_expr_hash_table ();
4729
4730 if (gcse_file)
4731 {
c4c81601
RK
4732 fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4733 current_function_name, pass, bytes_used);
4734 fprintf (gcse_file, "%d substs, %d insns created\n",
4735 gcse_subst_count, gcse_create_count);
7506f491
DE
4736 }
4737
4738 return changed;
4739}
aeb2f500
JW
4740\f
4741/* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4742 We have to add REG_LABEL notes, because the following loop optimization
4743 pass requires them. */
4744
4745/* ??? This is very similar to the loop.c add_label_notes function. We
4746 could probably share code here. */
4747
4748/* ??? If there was a jump optimization pass after gcse and before loop,
4749 then we would not need to do this here, because jump would add the
4750 necessary REG_LABEL notes. */
4751
4752static void
4753add_label_notes (x, insn)
4754 rtx x;
4755 rtx insn;
4756{
4757 enum rtx_code code = GET_CODE (x);
4758 int i, j;
6f7d635c 4759 const char *fmt;
aeb2f500
JW
4760
4761 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4762 {
6b3603c2 4763 /* This code used to ignore labels that referred to dispatch tables to
ac7c5af5 4764 avoid flow generating (slighly) worse code.
6b3603c2 4765
ac7c5af5
JL
4766 We no longer ignore such label references (see LABEL_REF handling in
4767 mark_jump_label for additional information). */
c4c81601 4768
6b3603c2
JL
4769 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4770 REG_NOTES (insn));
aeb2f500
JW
4771 return;
4772 }
4773
c4c81601 4774 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
aeb2f500
JW
4775 {
4776 if (fmt[i] == 'e')
4777 add_label_notes (XEXP (x, i), insn);
4778 else if (fmt[i] == 'E')
4779 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4780 add_label_notes (XVECEXP (x, i, j), insn);
4781 }
4782}
a65f3558
JL
4783
4784/* Compute transparent outgoing information for each block.
4785
4786 An expression is transparent to an edge unless it is killed by
4787 the edge itself. This can only happen with abnormal control flow,
4788 when the edge is traversed through a call. This happens with
4789 non-local labels and exceptions.
4790
4791 This would not be necessary if we split the edge. While this is
4792 normally impossible for abnormal critical edges, with some effort
4793 it should be possible with exception handling, since we still have
4794 control over which handler should be invoked. But due to increased
4795 EH table sizes, this may not be worthwhile. */
4796
4797static void
4798compute_transpout ()
4799{
4800 int bb;
2e653e39 4801 unsigned int i;
c4c81601 4802 struct expr *expr;
a65f3558
JL
4803
4804 sbitmap_vector_ones (transpout, n_basic_blocks);
4805
4806 for (bb = 0; bb < n_basic_blocks; ++bb)
4807 {
a65f3558
JL
4808 /* Note that flow inserted a nop a the end of basic blocks that
4809 end in call instructions for reasons other than abnormal
4810 control flow. */
4811 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4812 continue;
4813
4814 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
4815 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4816 if (GET_CODE (expr->expr) == MEM)
4817 {
4818 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4819 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4820 continue;
a65f3558 4821
c4c81601
RK
4822 /* ??? Optimally, we would use interprocedural alias
4823 analysis to determine if this mem is actually killed
4824 by this call. */
4825 RESET_BIT (transpout[bb], expr->bitmap_index);
4826 }
a65f3558
JL
4827 }
4828}
dfdb644f
JL
4829
4830/* Removal of useless null pointer checks */
4831
dfdb644f 4832/* Called via note_stores. X is set by SETTER. If X is a register we must
0511851c
MM
4833 invalidate nonnull_local and set nonnull_killed. DATA is really a
4834 `null_pointer_info *'.
dfdb644f
JL
4835
4836 We ignore hard registers. */
c4c81601 4837
dfdb644f 4838static void
84832317 4839invalidate_nonnull_info (x, setter, data)
dfdb644f
JL
4840 rtx x;
4841 rtx setter ATTRIBUTE_UNUSED;
0511851c 4842 void *data;
dfdb644f 4843{
770ae6cc
RK
4844 unsigned int regno;
4845 struct null_pointer_info *npi = (struct null_pointer_info *) data;
c4c81601 4846
dfdb644f
JL
4847 while (GET_CODE (x) == SUBREG)
4848 x = SUBREG_REG (x);
4849
4850 /* Ignore anything that is not a register or is a hard register. */
4851 if (GET_CODE (x) != REG
0511851c
MM
4852 || REGNO (x) < npi->min_reg
4853 || REGNO (x) >= npi->max_reg)
dfdb644f
JL
4854 return;
4855
0511851c 4856 regno = REGNO (x) - npi->min_reg;
dfdb644f 4857
0511851c
MM
4858 RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4859 SET_BIT (npi->nonnull_killed[npi->current_block], regno);
dfdb644f
JL
4860}
4861
0511851c
MM
4862/* Do null-pointer check elimination for the registers indicated in
4863 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4864 they are not our responsibility to free. */
dfdb644f 4865
0511851c 4866static void
b71a2ff8 4867delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
770ae6cc 4868 unsigned int *block_reg;
0511851c
MM
4869 sbitmap *nonnull_avin;
4870 sbitmap *nonnull_avout;
4871 struct null_pointer_info *npi;
dfdb644f 4872{
ce724250 4873 int bb;
0511851c
MM
4874 int current_block;
4875 sbitmap *nonnull_local = npi->nonnull_local;
4876 sbitmap *nonnull_killed = npi->nonnull_killed;
dfdb644f 4877
dfdb644f
JL
4878 /* Compute local properties, nonnull and killed. A register will have
4879 the nonnull property if at the end of the current block its value is
4880 known to be nonnull. The killed property indicates that somewhere in
4881 the block any information we had about the register is killed.
4882
4883 Note that a register can have both properties in a single block. That
4884 indicates that it's killed, then later in the block a new value is
4885 computed. */
4886 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4887 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
c4c81601 4888
dfdb644f
JL
4889 for (current_block = 0; current_block < n_basic_blocks; current_block++)
4890 {
4891 rtx insn, stop_insn;
4892
0511851c
MM
4893 /* Set the current block for invalidate_nonnull_info. */
4894 npi->current_block = current_block;
4895
dfdb644f
JL
4896 /* Scan each insn in the basic block looking for memory references and
4897 register sets. */
4898 stop_insn = NEXT_INSN (BLOCK_END (current_block));
4899 for (insn = BLOCK_HEAD (current_block);
4900 insn != stop_insn;
4901 insn = NEXT_INSN (insn))
4902 {
4903 rtx set;
0511851c 4904 rtx reg;
dfdb644f
JL
4905
4906 /* Ignore anything that is not a normal insn. */
4907 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4908 continue;
4909
4910 /* Basically ignore anything that is not a simple SET. We do have
4911 to make sure to invalidate nonnull_local and set nonnull_killed
4912 for such insns though. */
4913 set = single_set (insn);
4914 if (!set)
4915 {
0511851c 4916 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
dfdb644f
JL
4917 continue;
4918 }
4919
4920 /* See if we've got a useable memory load. We handle it first
4921 in case it uses its address register as a dest (which kills
4922 the nonnull property). */
4923 if (GET_CODE (SET_SRC (set)) == MEM
0511851c
MM
4924 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
4925 && REGNO (reg) >= npi->min_reg
4926 && REGNO (reg) < npi->max_reg)
dfdb644f 4927 SET_BIT (nonnull_local[current_block],
0511851c 4928 REGNO (reg) - npi->min_reg);
dfdb644f
JL
4929
4930 /* Now invalidate stuff clobbered by this insn. */
0511851c 4931 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
dfdb644f
JL
4932
4933 /* And handle stores, we do these last since any sets in INSN can
4934 not kill the nonnull property if it is derived from a MEM
4935 appearing in a SET_DEST. */
4936 if (GET_CODE (SET_DEST (set)) == MEM
0511851c
MM
4937 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
4938 && REGNO (reg) >= npi->min_reg
4939 && REGNO (reg) < npi->max_reg)
dfdb644f 4940 SET_BIT (nonnull_local[current_block],
0511851c 4941 REGNO (reg) - npi->min_reg);
dfdb644f
JL
4942 }
4943 }
4944
4945 /* Now compute global properties based on the local properties. This
4946 is a classic global availablity algorithm. */
ce724250
JL
4947 compute_available (nonnull_local, nonnull_killed,
4948 nonnull_avout, nonnull_avin);
dfdb644f
JL
4949
4950 /* Now look at each bb and see if it ends with a compare of a value
4951 against zero. */
4952 for (bb = 0; bb < n_basic_blocks; bb++)
4953 {
4954 rtx last_insn = BLOCK_END (bb);
0511851c 4955 rtx condition, earliest;
dfdb644f
JL
4956 int compare_and_branch;
4957
0511851c
MM
4958 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
4959 since BLOCK_REG[BB] is zero if this block did not end with a
4960 comparison against zero, this condition works. */
4961 if (block_reg[bb] < npi->min_reg
4962 || block_reg[bb] >= npi->max_reg)
dfdb644f
JL
4963 continue;
4964
4965 /* LAST_INSN is a conditional jump. Get its condition. */
4966 condition = get_condition (last_insn, &earliest);
4967
40d7a3fe
NB
4968 /* If we can't determine the condition then skip. */
4969 if (! condition)
4970 continue;
4971
dfdb644f 4972 /* Is the register known to have a nonzero value? */
0511851c 4973 if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
dfdb644f
JL
4974 continue;
4975
4976 /* Try to compute whether the compare/branch at the loop end is one or
4977 two instructions. */
4978 if (earliest == last_insn)
4979 compare_and_branch = 1;
4980 else if (earliest == prev_nonnote_insn (last_insn))
4981 compare_and_branch = 2;
4982 else
4983 continue;
4984
4985 /* We know the register in this comparison is nonnull at exit from
4986 this block. We can optimize this comparison. */
4987 if (GET_CODE (condition) == NE)
4988 {
4989 rtx new_jump;
4990
4991 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
4992 last_insn);
4993 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
4994 LABEL_NUSES (JUMP_LABEL (new_jump))++;
4995 emit_barrier_after (new_jump);
4996 }
4997 delete_insn (last_insn);
4998 if (compare_and_branch == 2)
4999 delete_insn (earliest);
0511851c
MM
5000
5001 /* Don't check this block again. (Note that BLOCK_END is
5002 invalid here; we deleted the last instruction in the
5003 block.) */
5004 block_reg[bb] = 0;
5005 }
5006}
5007
5008/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5009 at compile time.
5010
5011 This is conceptually similar to global constant/copy propagation and
5012 classic global CSE (it even uses the same dataflow equations as cprop).
5013
5014 If a register is used as memory address with the form (mem (reg)), then we
5015 know that REG can not be zero at that point in the program. Any instruction
5016 which sets REG "kills" this property.
5017
5018 So, if every path leading to a conditional branch has an available memory
5019 reference of that form, then we know the register can not have the value
5020 zero at the conditional branch.
5021
5022 So we merely need to compute the local properies and propagate that data
5023 around the cfg, then optimize where possible.
5024
5025 We run this pass two times. Once before CSE, then again after CSE. This
5026 has proven to be the most profitable approach. It is rare for new
5027 optimization opportunities of this nature to appear after the first CSE
5028 pass.
5029
5030 This could probably be integrated with global cprop with a little work. */
5031
5032void
5033delete_null_pointer_checks (f)
2e653e39 5034 rtx f ATTRIBUTE_UNUSED;
0511851c 5035{
0511851c 5036 sbitmap *nonnull_avin, *nonnull_avout;
770ae6cc 5037 unsigned int *block_reg;
0511851c
MM
5038 int bb;
5039 int reg;
5040 int regs_per_pass;
5041 int max_reg;
5042 struct null_pointer_info npi;
5043
0511851c
MM
5044 /* If we have only a single block, then there's nothing to do. */
5045 if (n_basic_blocks <= 1)
a18820c6 5046 return;
0511851c
MM
5047
5048 /* Trying to perform global optimizations on flow graphs which have
5049 a high connectivity will take a long time and is unlikely to be
5050 particularly useful.
5051
5052 In normal circumstances a cfg should have about twice has many edges
5053 as blocks. But we do not want to punish small functions which have
5054 a couple switch statements. So we require a relatively large number
5055 of basic blocks and the ratio of edges to blocks to be high. */
5056 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
a18820c6 5057 return;
0511851c 5058
0511851c
MM
5059 /* We need four bitmaps, each with a bit for each register in each
5060 basic block. */
5061 max_reg = max_reg_num ();
5062 regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5063
5064 /* Allocate bitmaps to hold local and global properties. */
5065 npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5066 npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5067 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5068 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5069
5070 /* Go through the basic blocks, seeing whether or not each block
5071 ends with a conditional branch whose condition is a comparison
5072 against zero. Record the register compared in BLOCK_REG. */
f9e158c3 5073 block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
0511851c
MM
5074 for (bb = 0; bb < n_basic_blocks; bb++)
5075 {
5076 rtx last_insn = BLOCK_END (bb);
5077 rtx condition, earliest, reg;
5078
5079 /* We only want conditional branches. */
5080 if (GET_CODE (last_insn) != JUMP_INSN
7f1c097d
JH
5081 || !any_condjump_p (last_insn)
5082 || !onlyjump_p (last_insn))
0511851c
MM
5083 continue;
5084
5085 /* LAST_INSN is a conditional jump. Get its condition. */
5086 condition = get_condition (last_insn, &earliest);
5087
5088 /* If we were unable to get the condition, or it is not a equality
5089 comparison against zero then there's nothing we can do. */
5090 if (!condition
5091 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5092 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5093 || (XEXP (condition, 1)
5094 != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5095 continue;
5096
5097 /* We must be checking a register against zero. */
5098 reg = XEXP (condition, 0);
5099 if (GET_CODE (reg) != REG)
5100 continue;
5101
5102 block_reg[bb] = REGNO (reg);
5103 }
5104
5105 /* Go through the algorithm for each block of registers. */
5106 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5107 {
5108 npi.min_reg = reg;
5109 npi.max_reg = MIN (reg + regs_per_pass, max_reg);
b71a2ff8 5110 delete_null_pointer_checks_1 (block_reg, nonnull_avin,
0511851c 5111 nonnull_avout, &npi);
dfdb644f
JL
5112 }
5113
0511851c
MM
5114 /* Free the table of registers compared at the end of every block. */
5115 free (block_reg);
5116
dfdb644f 5117 /* Free bitmaps. */
0511851c
MM
5118 free (npi.nonnull_local);
5119 free (npi.nonnull_killed);
dfdb644f
JL
5120 free (nonnull_avin);
5121 free (nonnull_avout);
5122}
bb457bd9
JL
5123
5124/* Code Hoisting variables and subroutines. */
5125
5126/* Very busy expressions. */
5127static sbitmap *hoist_vbein;
5128static sbitmap *hoist_vbeout;
5129
5130/* Hoistable expressions. */
5131static sbitmap *hoist_exprs;
5132
5133/* Dominator bitmaps. */
5134static sbitmap *dominators;
bb457bd9
JL
5135
5136/* ??? We could compute post dominators and run this algorithm in
5137 reverse to to perform tail merging, doing so would probably be
5138 more effective than the tail merging code in jump.c.
5139
5140 It's unclear if tail merging could be run in parallel with
5141 code hoisting. It would be nice. */
5142
5143/* Allocate vars used for code hoisting analysis. */
5144
5145static void
5146alloc_code_hoist_mem (n_blocks, n_exprs)
5147 int n_blocks, n_exprs;
5148{
5149 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5150 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5151 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5152
5153 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5154 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5155 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5156 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5157
5158 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
bb457bd9
JL
5159}
5160
5161/* Free vars used for code hoisting analysis. */
5162
5163static void
5164free_code_hoist_mem ()
5165{
5166 free (antloc);
5167 free (transp);
5168 free (comp);
5169
5170 free (hoist_vbein);
5171 free (hoist_vbeout);
5172 free (hoist_exprs);
5173 free (transpout);
5174
5175 free (dominators);
bb457bd9
JL
5176}
5177
5178/* Compute the very busy expressions at entry/exit from each block.
5179
5180 An expression is very busy if all paths from a given point
5181 compute the expression. */
5182
5183static void
5184compute_code_hoist_vbeinout ()
5185{
5186 int bb, changed, passes;
5187
5188 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5189 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5190
5191 passes = 0;
5192 changed = 1;
c4c81601 5193
bb457bd9
JL
5194 while (changed)
5195 {
5196 changed = 0;
c4c81601 5197
bb457bd9
JL
5198 /* We scan the blocks in the reverse order to speed up
5199 the convergence. */
5200 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5201 {
5202 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5203 hoist_vbeout[bb], transp[bb]);
5204 if (bb != n_basic_blocks - 1)
a42cd965 5205 sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
bb457bd9 5206 }
c4c81601 5207
bb457bd9
JL
5208 passes++;
5209 }
5210
5211 if (gcse_file)
5212 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5213}
5214
5215/* Top level routine to do the dataflow analysis needed by code hoisting. */
5216
5217static void
5218compute_code_hoist_data ()
5219{
5220 compute_local_properties (transp, comp, antloc, 0);
5221 compute_transpout ();
5222 compute_code_hoist_vbeinout ();
092ae4ba 5223 compute_flow_dominators (dominators, NULL);
bb457bd9
JL
5224 if (gcse_file)
5225 fprintf (gcse_file, "\n");
5226}
5227
5228/* Determine if the expression identified by EXPR_INDEX would
5229 reach BB unimpared if it was placed at the end of EXPR_BB.
5230
5231 It's unclear exactly what Muchnick meant by "unimpared". It seems
5232 to me that the expression must either be computed or transparent in
5233 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5234 would allow the expression to be hoisted out of loops, even if
5235 the expression wasn't a loop invariant.
5236
5237 Contrast this to reachability for PRE where an expression is
5238 considered reachable if *any* path reaches instead of *all*
5239 paths. */
5240
5241static int
5242hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5243 int expr_bb;
5244 int expr_index;
5245 int bb;
5246 char *visited;
5247{
5248 edge pred;
283a2545
RL
5249 int visited_allocated_locally = 0;
5250
bb457bd9
JL
5251
5252 if (visited == NULL)
5253 {
283a2545
RL
5254 visited_allocated_locally = 1;
5255 visited = xcalloc (n_basic_blocks, 1);
bb457bd9
JL
5256 }
5257
5258 visited[expr_bb] = 1;
5259 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5260 {
5261 int pred_bb = pred->src->index;
5262
5263 if (pred->src == ENTRY_BLOCK_PTR)
5264 break;
5265 else if (visited[pred_bb])
5266 continue;
c4c81601 5267
bb457bd9
JL
5268 /* Does this predecessor generate this expression? */
5269 else if (TEST_BIT (comp[pred_bb], expr_index))
5270 break;
5271 else if (! TEST_BIT (transp[pred_bb], expr_index))
5272 break;
c4c81601 5273
bb457bd9
JL
5274 /* Not killed. */
5275 else
5276 {
5277 visited[pred_bb] = 1;
5278 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5279 pred_bb, visited))
5280 break;
5281 }
5282 }
283a2545
RL
5283 if (visited_allocated_locally)
5284 free (visited);
c4c81601 5285
bb457bd9
JL
5286 return (pred == NULL);
5287}
5288\f
5289/* Actually perform code hoisting. */
c4c81601 5290
bb457bd9
JL
5291static void
5292hoist_code ()
5293{
2e653e39
RK
5294 int bb, dominated;
5295 unsigned int i;
bb457bd9 5296 struct expr **index_map;
c4c81601 5297 struct expr *expr;
bb457bd9
JL
5298
5299 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5300
5301 /* Compute a mapping from expression number (`bitmap_index') to
5302 hash table entry. */
5303
dd1bd863 5304 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
bb457bd9 5305 for (i = 0; i < expr_hash_table_size; i++)
c4c81601
RK
5306 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5307 index_map[expr->bitmap_index] = expr;
bb457bd9
JL
5308
5309 /* Walk over each basic block looking for potentially hoistable
5310 expressions, nothing gets hoisted from the entry block. */
5311 for (bb = 0; bb < n_basic_blocks; bb++)
5312 {
5313 int found = 0;
5314 int insn_inserted_p;
5315
5316 /* Examine each expression that is very busy at the exit of this
5317 block. These are the potentially hoistable expressions. */
5318 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5319 {
5320 int hoistable = 0;
c4c81601
RK
5321
5322 if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
bb457bd9
JL
5323 {
5324 /* We've found a potentially hoistable expression, now
5325 we look at every block BB dominates to see if it
5326 computes the expression. */
5327 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5328 {
5329 /* Ignore self dominance. */
5330 if (bb == dominated
5331 || ! TEST_BIT (dominators[dominated], bb))
5332 continue;
5333
5334 /* We've found a dominated block, now see if it computes
5335 the busy expression and whether or not moving that
5336 expression to the "beginning" of that block is safe. */
5337 if (!TEST_BIT (antloc[dominated], i))
5338 continue;
5339
5340 /* Note if the expression would reach the dominated block
5341 unimpared if it was placed at the end of BB.
5342
5343 Keep track of how many times this expression is hoistable
5344 from a dominated block into BB. */
5345 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5346 hoistable++;
5347 }
5348
5349 /* If we found more than one hoistable occurence of this
5350 expression, then note it in the bitmap of expressions to
5351 hoist. It makes no sense to hoist things which are computed
5352 in only one BB, and doing so tends to pessimize register
5353 allocation. One could increase this value to try harder
5354 to avoid any possible code expansion due to register
5355 allocation issues; however experiments have shown that
5356 the vast majority of hoistable expressions are only movable
5357 from two successors, so raising this threshhold is likely
5358 to nullify any benefit we get from code hoisting. */
5359 if (hoistable > 1)
5360 {
5361 SET_BIT (hoist_exprs[bb], i);
5362 found = 1;
5363 }
5364 }
5365 }
5366
5367 /* If we found nothing to hoist, then quit now. */
5368 if (! found)
5369 continue;
5370
5371 /* Loop over all the hoistable expressions. */
5372 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5373 {
5374 /* We want to insert the expression into BB only once, so
5375 note when we've inserted it. */
5376 insn_inserted_p = 0;
5377
5378 /* These tests should be the same as the tests above. */
5379 if (TEST_BIT (hoist_vbeout[bb], i))
5380 {
5381 /* We've found a potentially hoistable expression, now
5382 we look at every block BB dominates to see if it
5383 computes the expression. */
5384 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5385 {
5386 /* Ignore self dominance. */
5387 if (bb == dominated
5388 || ! TEST_BIT (dominators[dominated], bb))
5389 continue;
5390
5391 /* We've found a dominated block, now see if it computes
5392 the busy expression and whether or not moving that
5393 expression to the "beginning" of that block is safe. */
5394 if (!TEST_BIT (antloc[dominated], i))
5395 continue;
5396
5397 /* The expression is computed in the dominated block and
5398 it would be safe to compute it at the start of the
5399 dominated block. Now we have to determine if the
5400 expresion would reach the dominated block if it was
5401 placed at the end of BB. */
5402 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5403 {
5404 struct expr *expr = index_map[i];
5405 struct occr *occr = expr->antic_occr;
5406 rtx insn;
5407 rtx set;
5408
bb457bd9
JL
5409 /* Find the right occurence of this expression. */
5410 while (BLOCK_NUM (occr->insn) != dominated && occr)
5411 occr = occr->next;
5412
5413 /* Should never happen. */
5414 if (!occr)
5415 abort ();
5416
5417 insn = occr->insn;
5418
5419 set = single_set (insn);
5420 if (! set)
5421 abort ();
5422
5423 /* Create a pseudo-reg to store the result of reaching
5424 expressions into. Get the mode for the new pseudo
5425 from the mode of the original destination pseudo. */
5426 if (expr->reaching_reg == NULL)
5427 expr->reaching_reg
5428 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5429
5430 /* In theory this should never fail since we're creating
5431 a reg->reg copy.
5432
c4c81601
RK
5433 However, on the x86 some of the movXX patterns
5434 actually contain clobbers of scratch regs. This may
5435 cause the insn created by validate_change to not
5436 match any pattern and thus cause validate_change to
5437 fail. */
bb457bd9
JL
5438 if (validate_change (insn, &SET_SRC (set),
5439 expr->reaching_reg, 0))
5440 {
5441 occr->deleted_p = 1;
5442 if (!insn_inserted_p)
5443 {
5444 insert_insn_end_bb (index_map[i], bb, 0);
5445 insn_inserted_p = 1;
5446 }
5447 }
5448 }
5449 }
5450 }
5451 }
5452 }
c4c81601 5453
283a2545 5454 free (index_map);
bb457bd9
JL
5455}
5456
5457/* Top level routine to perform one code hoisting (aka unification) pass
5458
5459 Return non-zero if a change was made. */
5460
5461static int
5462one_code_hoisting_pass ()
5463{
5464 int changed = 0;
5465
5466 alloc_expr_hash_table (max_cuid);
5467 compute_expr_hash_table ();
5468 if (gcse_file)
5469 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5470 expr_hash_table_size, n_exprs);
c4c81601 5471
bb457bd9
JL
5472 if (n_exprs > 0)
5473 {
5474 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5475 compute_code_hoist_data ();
5476 hoist_code ();
5477 free_code_hoist_mem ();
5478 }
c4c81601 5479
bb457bd9
JL
5480 free_expr_hash_table ();
5481
5482 return changed;
5483}