]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/gcse.c
backport: As described in http://gcc.gnu.org/ml/gcc/2012-08/msg00015.html...
[thirdparty/gcc.git] / gcc / gcse.c
1 /* Partial redundancy elimination / Hoisting for RTL.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* TODO
22 - reordering of memory allocation and freeing to be more space efficient
23 - do rough calc of how many regs are needed in each block, and a rough
24 calc of how many regs are available in each class and use that to
25 throttle back the code in cases where RTX_COST is minimal.
26 */
27
28 /* References searched while implementing this.
29
30 Compilers Principles, Techniques and Tools
31 Aho, Sethi, Ullman
32 Addison-Wesley, 1988
33
34 Global Optimization by Suppression of Partial Redundancies
35 E. Morel, C. Renvoise
36 communications of the acm, Vol. 22, Num. 2, Feb. 1979
37
38 A Portable Machine-Independent Global Optimizer - Design and Measurements
39 Frederick Chow
40 Stanford Ph.D. thesis, Dec. 1983
41
42 A Fast Algorithm for Code Movement Optimization
43 D.M. Dhamdhere
44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
45
46 A Solution to a Problem with Morel and Renvoise's
47 Global Optimization by Suppression of Partial Redundancies
48 K-H Drechsler, M.P. Stadel
49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
50
51 Practical Adaptation of the Global Optimization
52 Algorithm of Morel and Renvoise
53 D.M. Dhamdhere
54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
55
56 Efficiently Computing Static Single Assignment Form and the Control
57 Dependence Graph
58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
60
61 Lazy Code Motion
62 J. Knoop, O. Ruthing, B. Steffen
63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
64
65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
66 Time for Reducible Flow Control
67 Thomas Ball
68 ACM Letters on Programming Languages and Systems,
69 Vol. 2, Num. 1-4, Mar-Dec 1993
70
71 An Efficient Representation for Sparse Sets
72 Preston Briggs, Linda Torczon
73 ACM Letters on Programming Languages and Systems,
74 Vol. 2, Num. 1-4, Mar-Dec 1993
75
76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77 K-H Drechsler, M.P. Stadel
78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
79
80 Partial Dead Code Elimination
81 J. Knoop, O. Ruthing, B. Steffen
82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
83
84 Effective Partial Redundancy Elimination
85 P. Briggs, K.D. Cooper
86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
87
88 The Program Structure Tree: Computing Control Regions in Linear Time
89 R. Johnson, D. Pearson, K. Pingali
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
91
92 Optimal Code Motion: Theory and Practice
93 J. Knoop, O. Ruthing, B. Steffen
94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
95
96 The power of assignment motion
97 J. Knoop, O. Ruthing, B. Steffen
98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
99
100 Global code motion / global value numbering
101 C. Click
102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
103
104 Value Driven Redundancy Elimination
105 L.T. Simpson
106 Rice University Ph.D. thesis, Apr. 1996
107
108 Value Numbering
109 L.T. Simpson
110 Massively Scalar Compiler Project, Rice University, Sep. 1996
111
112 High Performance Compilers for Parallel Computing
113 Michael Wolfe
114 Addison-Wesley, 1996
115
116 Advanced Compiler Design and Implementation
117 Steven Muchnick
118 Morgan Kaufmann, 1997
119
120 Building an Optimizing Compiler
121 Robert Morgan
122 Digital Press, 1998
123
124 People wishing to speed up the code here should read:
125 Elimination Algorithms for Data Flow Analysis
126 B.G. Ryder, M.C. Paull
127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
128
129 How to Analyze Large Programs Efficiently and Informatively
130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
132
133 People wishing to do something different can find various possibilities
134 in the above papers and elsewhere.
135 */
136
137 #include "config.h"
138 #include "system.h"
139 #include "coretypes.h"
140 #include "tm.h"
141 #include "diagnostic-core.h"
142 #include "toplev.h"
143
144 #include "rtl.h"
145 #include "tree.h"
146 #include "tm_p.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "insn-config.h"
151 #include "recog.h"
152 #include "basic-block.h"
153 #include "function.h"
154 #include "expr.h"
155 #include "except.h"
156 #include "ggc.h"
157 #include "params.h"
158 #include "cselib.h"
159 #include "intl.h"
160 #include "obstack.h"
161 #include "tree-pass.h"
162 #include "hashtab.h"
163 #include "df.h"
164 #include "dbgcnt.h"
165 #include "target.h"
166 #include "gcse.h"
167
168 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
169 are a superset of those done by classic GCSE.
170
171 Two passes of copy/constant propagation are done around PRE or hoisting
172 because the first one enables more GCSE and the second one helps to clean
173 up the copies that PRE and HOIST create. This is needed more for PRE than
174 for HOIST because code hoisting will try to use an existing register
175 containing the common subexpression rather than create a new one. This is
176 harder to do for PRE because of the code motion (which HOIST doesn't do).
177
178 Expressions we are interested in GCSE-ing are of the form
179 (set (pseudo-reg) (expression)).
180 Function want_to_gcse_p says what these are.
181
182 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
183 This allows PRE to hoist expressions that are expressed in multiple insns,
184 such as complex address calculations (e.g. for PIC code, or loads with a
185 high part and a low part).
186
187 PRE handles moving invariant expressions out of loops (by treating them as
188 partially redundant).
189
190 **********************
191
192 We used to support multiple passes but there are diminishing returns in
193 doing so. The first pass usually makes 90% of the changes that are doable.
194 A second pass can make a few more changes made possible by the first pass.
195 Experiments show any further passes don't make enough changes to justify
196 the expense.
197
198 A study of spec92 using an unlimited number of passes:
199 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
200 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
201 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
202
203 It was found doing copy propagation between each pass enables further
204 substitutions.
205
206 This study was done before expressions in REG_EQUAL notes were added as
207 candidate expressions for optimization, and before the GIMPLE optimizers
208 were added. Probably, multiple passes is even less efficient now than
209 at the time when the study was conducted.
210
211 PRE is quite expensive in complicated functions because the DFA can take
212 a while to converge. Hence we only perform one pass.
213
214 **********************
215
216 The steps for PRE are:
217
218 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
219
220 2) Perform the data flow analysis for PRE.
221
222 3) Delete the redundant instructions
223
224 4) Insert the required copies [if any] that make the partially
225 redundant instructions fully redundant.
226
227 5) For other reaching expressions, insert an instruction to copy the value
228 to a newly created pseudo that will reach the redundant instruction.
229
230 The deletion is done first so that when we do insertions we
231 know which pseudo reg to use.
232
233 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
234 argue it is not. The number of iterations for the algorithm to converge
235 is typically 2-4 so I don't view it as that expensive (relatively speaking).
236
237 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
238 we create. To make an expression reach the place where it's redundant,
239 the result of the expression is copied to a new register, and the redundant
240 expression is deleted by replacing it with this new register. Classic GCSE
241 doesn't have this problem as much as it computes the reaching defs of
242 each register in each block and thus can try to use an existing
243 register. */
244 \f
245 /* GCSE global vars. */
246
247 struct target_gcse default_target_gcse;
248 #if SWITCHABLE_TARGET
249 struct target_gcse *this_target_gcse = &default_target_gcse;
250 #endif
251
252 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
253 int flag_rerun_cse_after_global_opts;
254
255 /* An obstack for our working variables. */
256 static struct obstack gcse_obstack;
257
258 struct reg_use {rtx reg_rtx; };
259
260 /* Hash table of expressions. */
261
262 struct expr
263 {
264 /* The expression. */
265 rtx expr;
266 /* Index in the available expression bitmaps. */
267 int bitmap_index;
268 /* Next entry with the same hash. */
269 struct expr *next_same_hash;
270 /* List of anticipatable occurrences in basic blocks in the function.
271 An "anticipatable occurrence" is one that is the first occurrence in the
272 basic block, the operands are not modified in the basic block prior
273 to the occurrence and the output is not used between the start of
274 the block and the occurrence. */
275 struct occr *antic_occr;
276 /* List of available occurrence in basic blocks in the function.
277 An "available occurrence" is one that is the last occurrence in the
278 basic block and the operands are not modified by following statements in
279 the basic block [including this insn]. */
280 struct occr *avail_occr;
281 /* Non-null if the computation is PRE redundant.
282 The value is the newly created pseudo-reg to record a copy of the
283 expression in all the places that reach the redundant copy. */
284 rtx reaching_reg;
285 /* Maximum distance in instructions this expression can travel.
286 We avoid moving simple expressions for more than a few instructions
287 to keep register pressure under control.
288 A value of "0" removes restrictions on how far the expression can
289 travel. */
290 int max_distance;
291 };
292
293 /* Occurrence of an expression.
294 There is one per basic block. If a pattern appears more than once the
295 last appearance is used [or first for anticipatable expressions]. */
296
297 struct occr
298 {
299 /* Next occurrence of this expression. */
300 struct occr *next;
301 /* The insn that computes the expression. */
302 rtx insn;
303 /* Nonzero if this [anticipatable] occurrence has been deleted. */
304 char deleted_p;
305 /* Nonzero if this [available] occurrence has been copied to
306 reaching_reg. */
307 /* ??? This is mutually exclusive with deleted_p, so they could share
308 the same byte. */
309 char copied_p;
310 };
311
312 typedef struct occr *occr_t;
313 DEF_VEC_P (occr_t);
314 DEF_VEC_ALLOC_P (occr_t, heap);
315
316 /* Expression hash tables.
317 Each hash table is an array of buckets.
318 ??? It is known that if it were an array of entries, structure elements
319 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
320 not clear whether in the final analysis a sufficient amount of memory would
321 be saved as the size of the available expression bitmaps would be larger
322 [one could build a mapping table without holes afterwards though].
323 Someday I'll perform the computation and figure it out. */
324
325 struct hash_table_d
326 {
327 /* The table itself.
328 This is an array of `expr_hash_table_size' elements. */
329 struct expr **table;
330
331 /* Size of the hash table, in elements. */
332 unsigned int size;
333
334 /* Number of hash table elements. */
335 unsigned int n_elems;
336 };
337
338 /* Expression hash table. */
339 static struct hash_table_d expr_hash_table;
340
341 /* This is a list of expressions which are MEMs and will be used by load
342 or store motion.
343 Load motion tracks MEMs which aren't killed by anything except itself,
344 i.e. loads and stores to a single location.
345 We can then allow movement of these MEM refs with a little special
346 allowance. (all stores copy the same value to the reaching reg used
347 for the loads). This means all values used to store into memory must have
348 no side effects so we can re-issue the setter value. */
349
350 struct ls_expr
351 {
352 struct expr * expr; /* Gcse expression reference for LM. */
353 rtx pattern; /* Pattern of this mem. */
354 rtx pattern_regs; /* List of registers mentioned by the mem. */
355 rtx loads; /* INSN list of loads seen. */
356 rtx stores; /* INSN list of stores seen. */
357 struct ls_expr * next; /* Next in the list. */
358 int invalid; /* Invalid for some reason. */
359 int index; /* If it maps to a bitmap index. */
360 unsigned int hash_index; /* Index when in a hash table. */
361 rtx reaching_reg; /* Register to use when re-writing. */
362 };
363
364 /* Head of the list of load/store memory refs. */
365 static struct ls_expr * pre_ldst_mems = NULL;
366
367 /* Hashtable for the load/store memory refs. */
368 static htab_t pre_ldst_table = NULL;
369
370 /* Bitmap containing one bit for each register in the program.
371 Used when performing GCSE to track which registers have been set since
372 the start of the basic block. */
373 static regset reg_set_bitmap;
374
375 /* Array, indexed by basic block number for a list of insns which modify
376 memory within that block. */
377 static VEC (rtx,heap) **modify_mem_list;
378 static bitmap modify_mem_list_set;
379
380 typedef struct modify_pair_s
381 {
382 rtx dest; /* A MEM. */
383 rtx dest_addr; /* The canonical address of `dest'. */
384 } modify_pair;
385
386 DEF_VEC_O(modify_pair);
387 DEF_VEC_ALLOC_O(modify_pair,heap);
388
389 /* This array parallels modify_mem_list, except that it stores MEMs
390 being set and their canonicalized memory addresses. */
391 static VEC (modify_pair,heap) **canon_modify_mem_list;
392
393 /* Bitmap indexed by block numbers to record which blocks contain
394 function calls. */
395 static bitmap blocks_with_calls;
396
397 /* Various variables for statistics gathering. */
398
399 /* Memory used in a pass.
400 This isn't intended to be absolutely precise. Its intent is only
401 to keep an eye on memory usage. */
402 static int bytes_used;
403
404 /* GCSE substitutions made. */
405 static int gcse_subst_count;
406 /* Number of copy instructions created. */
407 static int gcse_create_count;
408 \f
409 /* Doing code hoisting. */
410 static bool doing_code_hoisting_p = false;
411 \f
412 /* For available exprs */
413 static sbitmap *ae_kill;
414 \f
415 static void compute_can_copy (void);
416 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
417 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
418 static void *gcse_alloc (unsigned long);
419 static void alloc_gcse_mem (void);
420 static void free_gcse_mem (void);
421 static void hash_scan_insn (rtx, struct hash_table_d *);
422 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
423 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
424 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
425 static int want_to_gcse_p (rtx, int *);
426 static int oprs_unchanged_p (const_rtx, const_rtx, int);
427 static int oprs_anticipatable_p (const_rtx, const_rtx);
428 static int oprs_available_p (const_rtx, const_rtx);
429 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
430 struct hash_table_d *);
431 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
432 static int expr_equiv_p (const_rtx, const_rtx);
433 static void record_last_reg_set_info (rtx, int);
434 static void record_last_mem_set_info (rtx);
435 static void record_last_set_info (rtx, const_rtx, void *);
436 static void compute_hash_table (struct hash_table_d *);
437 static void alloc_hash_table (struct hash_table_d *);
438 static void free_hash_table (struct hash_table_d *);
439 static void compute_hash_table_work (struct hash_table_d *);
440 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
441 static void compute_transp (const_rtx, int, sbitmap *);
442 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
443 struct hash_table_d *);
444 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
445 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
446 static void canon_list_insert (rtx, const_rtx, void *);
447 static void alloc_pre_mem (int, int);
448 static void free_pre_mem (void);
449 static struct edge_list *compute_pre_data (void);
450 static int pre_expr_reaches_here_p (basic_block, struct expr *,
451 basic_block);
452 static void insert_insn_end_basic_block (struct expr *, basic_block);
453 static void pre_insert_copy_insn (struct expr *, rtx);
454 static void pre_insert_copies (void);
455 static int pre_delete (void);
456 static int pre_gcse (struct edge_list *);
457 static int one_pre_gcse_pass (void);
458 static void add_label_notes (rtx, rtx);
459 static void alloc_code_hoist_mem (int, int);
460 static void free_code_hoist_mem (void);
461 static void compute_code_hoist_vbeinout (void);
462 static void compute_code_hoist_data (void);
463 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
464 int, int *);
465 static int hoist_code (void);
466 static int one_code_hoisting_pass (void);
467 static rtx process_insert_insn (struct expr *);
468 static int pre_edge_insert (struct edge_list *, struct expr **);
469 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
470 basic_block, char *);
471 static struct ls_expr * ldst_entry (rtx);
472 static void free_ldst_entry (struct ls_expr *);
473 static void free_ld_motion_mems (void);
474 static void print_ldst_list (FILE *);
475 static struct ls_expr * find_rtx_in_ldst (rtx);
476 static int simple_mem (const_rtx);
477 static void invalidate_any_buried_refs (rtx);
478 static void compute_ld_motion_mems (void);
479 static void trim_ld_motion_mems (void);
480 static void update_ld_motion_stores (struct expr *);
481 static void clear_modify_mem_tables (void);
482 static void free_modify_mem_tables (void);
483 static rtx gcse_emit_move_after (rtx, rtx, rtx);
484 static bool is_too_expensive (const char *);
485
486 #define GNEW(T) ((T *) gmalloc (sizeof (T)))
487 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
488
489 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
490 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
491
492 #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
493 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
494
495 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
496 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
497 \f
498 /* Misc. utilities. */
499
500 #define can_copy \
501 (this_target_gcse->x_can_copy)
502 #define can_copy_init_p \
503 (this_target_gcse->x_can_copy_init_p)
504
505 /* Compute which modes support reg/reg copy operations. */
506
507 static void
508 compute_can_copy (void)
509 {
510 int i;
511 #ifndef AVOID_CCMODE_COPIES
512 rtx reg, insn;
513 #endif
514 memset (can_copy, 0, NUM_MACHINE_MODES);
515
516 start_sequence ();
517 for (i = 0; i < NUM_MACHINE_MODES; i++)
518 if (GET_MODE_CLASS (i) == MODE_CC)
519 {
520 #ifdef AVOID_CCMODE_COPIES
521 can_copy[i] = 0;
522 #else
523 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
524 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
525 if (recog (PATTERN (insn), insn, NULL) >= 0)
526 can_copy[i] = 1;
527 #endif
528 }
529 else
530 can_copy[i] = 1;
531
532 end_sequence ();
533 }
534
535 /* Returns whether the mode supports reg/reg copy operations. */
536
537 bool
538 can_copy_p (enum machine_mode mode)
539 {
540 if (! can_copy_init_p)
541 {
542 compute_can_copy ();
543 can_copy_init_p = true;
544 }
545
546 return can_copy[mode] != 0;
547 }
548 \f
549 /* Cover function to xmalloc to record bytes allocated. */
550
551 static void *
552 gmalloc (size_t size)
553 {
554 bytes_used += size;
555 return xmalloc (size);
556 }
557
558 /* Cover function to xcalloc to record bytes allocated. */
559
560 static void *
561 gcalloc (size_t nelem, size_t elsize)
562 {
563 bytes_used += nelem * elsize;
564 return xcalloc (nelem, elsize);
565 }
566
567 /* Cover function to obstack_alloc. */
568
569 static void *
570 gcse_alloc (unsigned long size)
571 {
572 bytes_used += size;
573 return obstack_alloc (&gcse_obstack, size);
574 }
575
576 /* Allocate memory for the reg/memory set tracking tables.
577 This is called at the start of each pass. */
578
579 static void
580 alloc_gcse_mem (void)
581 {
582 /* Allocate vars to track sets of regs. */
583 reg_set_bitmap = ALLOC_REG_SET (NULL);
584
585 /* Allocate array to keep a list of insns which modify memory in each
586 basic block. */
587 modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
588 canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
589 last_basic_block);
590 modify_mem_list_set = BITMAP_ALLOC (NULL);
591 blocks_with_calls = BITMAP_ALLOC (NULL);
592 }
593
594 /* Free memory allocated by alloc_gcse_mem. */
595
596 static void
597 free_gcse_mem (void)
598 {
599 FREE_REG_SET (reg_set_bitmap);
600
601 free_modify_mem_tables ();
602 BITMAP_FREE (modify_mem_list_set);
603 BITMAP_FREE (blocks_with_calls);
604 }
605 \f
606 /* Compute the local properties of each recorded expression.
607
608 Local properties are those that are defined by the block, irrespective of
609 other blocks.
610
611 An expression is transparent in a block if its operands are not modified
612 in the block.
613
614 An expression is computed (locally available) in a block if it is computed
615 at least once and expression would contain the same value if the
616 computation was moved to the end of the block.
617
618 An expression is locally anticipatable in a block if it is computed at
619 least once and expression would contain the same value if the computation
620 was moved to the beginning of the block.
621
622 We call this routine for pre and code hoisting. They all compute
623 basically the same information and thus can easily share this code.
624
625 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
626 properties. If NULL, then it is not necessary to compute or record that
627 particular property.
628
629 TABLE controls which hash table to look at. */
630
631 static void
632 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
633 struct hash_table_d *table)
634 {
635 unsigned int i;
636
637 /* Initialize any bitmaps that were passed in. */
638 if (transp)
639 {
640 sbitmap_vector_ones (transp, last_basic_block);
641 }
642
643 if (comp)
644 sbitmap_vector_zero (comp, last_basic_block);
645 if (antloc)
646 sbitmap_vector_zero (antloc, last_basic_block);
647
648 for (i = 0; i < table->size; i++)
649 {
650 struct expr *expr;
651
652 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
653 {
654 int indx = expr->bitmap_index;
655 struct occr *occr;
656
657 /* The expression is transparent in this block if it is not killed.
658 We start by assuming all are transparent [none are killed], and
659 then reset the bits for those that are. */
660 if (transp)
661 compute_transp (expr->expr, indx, transp);
662
663 /* The occurrences recorded in antic_occr are exactly those that
664 we want to set to nonzero in ANTLOC. */
665 if (antloc)
666 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
667 {
668 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
669
670 /* While we're scanning the table, this is a good place to
671 initialize this. */
672 occr->deleted_p = 0;
673 }
674
675 /* The occurrences recorded in avail_occr are exactly those that
676 we want to set to nonzero in COMP. */
677 if (comp)
678 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
679 {
680 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
681
682 /* While we're scanning the table, this is a good place to
683 initialize this. */
684 occr->copied_p = 0;
685 }
686
687 /* While we're scanning the table, this is a good place to
688 initialize this. */
689 expr->reaching_reg = 0;
690 }
691 }
692 }
693 \f
694 /* Hash table support. */
695
696 struct reg_avail_info
697 {
698 basic_block last_bb;
699 int first_set;
700 int last_set;
701 };
702
703 static struct reg_avail_info *reg_avail_info;
704 static basic_block current_bb;
705
706 /* See whether X, the source of a set, is something we want to consider for
707 GCSE. */
708
709 static int
710 want_to_gcse_p (rtx x, int *max_distance_ptr)
711 {
712 #ifdef STACK_REGS
713 /* On register stack architectures, don't GCSE constants from the
714 constant pool, as the benefits are often swamped by the overhead
715 of shuffling the register stack between basic blocks. */
716 if (IS_STACK_MODE (GET_MODE (x)))
717 x = avoid_constant_pool_reference (x);
718 #endif
719
720 /* GCSE'ing constants:
721
722 We do not specifically distinguish between constant and non-constant
723 expressions in PRE and Hoist. We use set_src_cost below to limit
724 the maximum distance simple expressions can travel.
725
726 Nevertheless, constants are much easier to GCSE, and, hence,
727 it is easy to overdo the optimizations. Usually, excessive PRE and
728 Hoisting of constant leads to increased register pressure.
729
730 RA can deal with this by rematerialing some of the constants.
731 Therefore, it is important that the back-end generates sets of constants
732 in a way that allows reload rematerialize them under high register
733 pressure, i.e., a pseudo register with REG_EQUAL to constant
734 is set only once. Failing to do so will result in IRA/reload
735 spilling such constants under high register pressure instead of
736 rematerializing them. */
737
738 switch (GET_CODE (x))
739 {
740 case REG:
741 case SUBREG:
742 case CALL:
743 return 0;
744
745 case CONST_INT:
746 case CONST_DOUBLE:
747 case CONST_FIXED:
748 case CONST_VECTOR:
749 if (!doing_code_hoisting_p)
750 /* Do not PRE constants. */
751 return 0;
752
753 /* FALLTHRU */
754
755 default:
756 if (doing_code_hoisting_p)
757 /* PRE doesn't implement max_distance restriction. */
758 {
759 int cost;
760 int max_distance;
761
762 gcc_assert (!optimize_function_for_speed_p (cfun)
763 && optimize_function_for_size_p (cfun));
764 cost = set_src_cost (x, 0);
765
766 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
767 {
768 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
769 if (max_distance == 0)
770 return 0;
771
772 gcc_assert (max_distance > 0);
773 }
774 else
775 max_distance = 0;
776
777 if (max_distance_ptr)
778 *max_distance_ptr = max_distance;
779 }
780
781 return can_assign_to_reg_without_clobbers_p (x);
782 }
783 }
784
785 /* Used internally by can_assign_to_reg_without_clobbers_p. */
786
787 static GTY(()) rtx test_insn;
788
789 /* Return true if we can assign X to a pseudo register such that the
790 resulting insn does not result in clobbering a hard register as a
791 side-effect.
792
793 Additionally, if the target requires it, check that the resulting insn
794 can be copied. If it cannot, this means that X is special and probably
795 has hidden side-effects we don't want to mess with.
796
797 This function is typically used by code motion passes, to verify
798 that it is safe to insert an insn without worrying about clobbering
799 maybe live hard regs. */
800
801 bool
802 can_assign_to_reg_without_clobbers_p (rtx x)
803 {
804 int num_clobbers = 0;
805 int icode;
806
807 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
808 if (general_operand (x, GET_MODE (x)))
809 return 1;
810 else if (GET_MODE (x) == VOIDmode)
811 return 0;
812
813 /* Otherwise, check if we can make a valid insn from it. First initialize
814 our test insn if we haven't already. */
815 if (test_insn == 0)
816 {
817 test_insn
818 = make_insn_raw (gen_rtx_SET (VOIDmode,
819 gen_rtx_REG (word_mode,
820 FIRST_PSEUDO_REGISTER * 2),
821 const0_rtx));
822 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
823 }
824
825 /* Now make an insn like the one we would make when GCSE'ing and see if
826 valid. */
827 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
828 SET_SRC (PATTERN (test_insn)) = x;
829
830 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
831 if (icode < 0)
832 return false;
833
834 if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
835 return false;
836
837 if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
838 return false;
839
840 return true;
841 }
842
843 /* Return nonzero if the operands of expression X are unchanged from the
844 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
845 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
846
847 static int
848 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
849 {
850 int i, j;
851 enum rtx_code code;
852 const char *fmt;
853
854 if (x == 0)
855 return 1;
856
857 code = GET_CODE (x);
858 switch (code)
859 {
860 case REG:
861 {
862 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
863
864 if (info->last_bb != current_bb)
865 return 1;
866 if (avail_p)
867 return info->last_set < DF_INSN_LUID (insn);
868 else
869 return info->first_set >= DF_INSN_LUID (insn);
870 }
871
872 case MEM:
873 if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
874 x, avail_p))
875 return 0;
876 else
877 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
878
879 case PRE_DEC:
880 case PRE_INC:
881 case POST_DEC:
882 case POST_INC:
883 case PRE_MODIFY:
884 case POST_MODIFY:
885 return 0;
886
887 case PC:
888 case CC0: /*FIXME*/
889 case CONST:
890 case CONST_INT:
891 case CONST_DOUBLE:
892 case CONST_FIXED:
893 case CONST_VECTOR:
894 case SYMBOL_REF:
895 case LABEL_REF:
896 case ADDR_VEC:
897 case ADDR_DIFF_VEC:
898 return 1;
899
900 default:
901 break;
902 }
903
904 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
905 {
906 if (fmt[i] == 'e')
907 {
908 /* If we are about to do the last recursive call needed at this
909 level, change it into iteration. This function is called enough
910 to be worth it. */
911 if (i == 0)
912 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
913
914 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
915 return 0;
916 }
917 else if (fmt[i] == 'E')
918 for (j = 0; j < XVECLEN (x, i); j++)
919 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
920 return 0;
921 }
922
923 return 1;
924 }
925
926 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
927
928 struct mem_conflict_info
929 {
930 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
931 see if a memory store conflicts with this memory load. */
932 const_rtx mem;
933
934 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
935 references. */
936 bool conflict;
937 };
938
939 /* DEST is the output of an instruction. If it is a memory reference and
940 possibly conflicts with the load found in DATA, then communicate this
941 information back through DATA. */
942
943 static void
944 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
945 void *data)
946 {
947 struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
948
949 while (GET_CODE (dest) == SUBREG
950 || GET_CODE (dest) == ZERO_EXTRACT
951 || GET_CODE (dest) == STRICT_LOW_PART)
952 dest = XEXP (dest, 0);
953
954 /* If DEST is not a MEM, then it will not conflict with the load. Note
955 that function calls are assumed to clobber memory, but are handled
956 elsewhere. */
957 if (! MEM_P (dest))
958 return;
959
960 /* If we are setting a MEM in our list of specially recognized MEMs,
961 don't mark as killed this time. */
962 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
963 {
964 if (!find_rtx_in_ldst (dest))
965 mci->conflict = true;
966 return;
967 }
968
969 if (true_dependence (dest, GET_MODE (dest), mci->mem))
970 mci->conflict = true;
971 }
972
973 /* Return nonzero if the expression in X (a memory reference) is killed
974 in block BB before or after the insn with the LUID in UID_LIMIT.
975 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
976 before UID_LIMIT.
977
978 To check the entire block, set UID_LIMIT to max_uid + 1 and
979 AVAIL_P to 0. */
980
981 static int
982 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
983 int avail_p)
984 {
985 VEC (rtx,heap) *list = modify_mem_list[bb->index];
986 rtx setter;
987 unsigned ix;
988
989 /* If this is a readonly then we aren't going to be changing it. */
990 if (MEM_READONLY_P (x))
991 return 0;
992
993 FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
994 {
995 struct mem_conflict_info mci;
996
997 /* Ignore entries in the list that do not apply. */
998 if ((avail_p
999 && DF_INSN_LUID (setter) < uid_limit)
1000 || (! avail_p
1001 && DF_INSN_LUID (setter) > uid_limit))
1002 continue;
1003
1004 /* If SETTER is a call everything is clobbered. Note that calls
1005 to pure functions are never put on the list, so we need not
1006 worry about them. */
1007 if (CALL_P (setter))
1008 return 1;
1009
1010 /* SETTER must be an INSN of some kind that sets memory. Call
1011 note_stores to examine each hunk of memory that is modified. */
1012 mci.mem = x;
1013 mci.conflict = false;
1014 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1015 if (mci.conflict)
1016 return 1;
1017 }
1018 return 0;
1019 }
1020
1021 /* Return nonzero if the operands of expression X are unchanged from
1022 the start of INSN's basic block up to but not including INSN. */
1023
1024 static int
1025 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1026 {
1027 return oprs_unchanged_p (x, insn, 0);
1028 }
1029
1030 /* Return nonzero if the operands of expression X are unchanged from
1031 INSN to the end of INSN's basic block. */
1032
1033 static int
1034 oprs_available_p (const_rtx x, const_rtx insn)
1035 {
1036 return oprs_unchanged_p (x, insn, 1);
1037 }
1038
1039 /* Hash expression X.
1040
1041 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1042 indicating if a volatile operand is found or if the expression contains
1043 something we don't want to insert in the table. HASH_TABLE_SIZE is
1044 the current size of the hash table to be probed. */
1045
1046 static unsigned int
1047 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1048 int hash_table_size)
1049 {
1050 unsigned int hash;
1051
1052 *do_not_record_p = 0;
1053
1054 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1055 return hash % hash_table_size;
1056 }
1057
1058 /* Return nonzero if exp1 is equivalent to exp2. */
1059
1060 static int
1061 expr_equiv_p (const_rtx x, const_rtx y)
1062 {
1063 return exp_equiv_p (x, y, 0, true);
1064 }
1065
1066 /* Insert expression X in INSN in the hash TABLE.
1067 If it is already present, record it as the last occurrence in INSN's
1068 basic block.
1069
1070 MODE is the mode of the value X is being stored into.
1071 It is only used if X is a CONST_INT.
1072
1073 ANTIC_P is nonzero if X is an anticipatable expression.
1074 AVAIL_P is nonzero if X is an available expression.
1075
1076 MAX_DISTANCE is the maximum distance in instructions this expression can
1077 be moved. */
1078
1079 static void
1080 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1081 int avail_p, int max_distance, struct hash_table_d *table)
1082 {
1083 int found, do_not_record_p;
1084 unsigned int hash;
1085 struct expr *cur_expr, *last_expr = NULL;
1086 struct occr *antic_occr, *avail_occr;
1087
1088 hash = hash_expr (x, mode, &do_not_record_p, table->size);
1089
1090 /* Do not insert expression in table if it contains volatile operands,
1091 or if hash_expr determines the expression is something we don't want
1092 to or can't handle. */
1093 if (do_not_record_p)
1094 return;
1095
1096 cur_expr = table->table[hash];
1097 found = 0;
1098
1099 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1100 {
1101 /* If the expression isn't found, save a pointer to the end of
1102 the list. */
1103 last_expr = cur_expr;
1104 cur_expr = cur_expr->next_same_hash;
1105 }
1106
1107 if (! found)
1108 {
1109 cur_expr = GOBNEW (struct expr);
1110 bytes_used += sizeof (struct expr);
1111 if (table->table[hash] == NULL)
1112 /* This is the first pattern that hashed to this index. */
1113 table->table[hash] = cur_expr;
1114 else
1115 /* Add EXPR to end of this hash chain. */
1116 last_expr->next_same_hash = cur_expr;
1117
1118 /* Set the fields of the expr element. */
1119 cur_expr->expr = x;
1120 cur_expr->bitmap_index = table->n_elems++;
1121 cur_expr->next_same_hash = NULL;
1122 cur_expr->antic_occr = NULL;
1123 cur_expr->avail_occr = NULL;
1124 gcc_assert (max_distance >= 0);
1125 cur_expr->max_distance = max_distance;
1126 }
1127 else
1128 gcc_assert (cur_expr->max_distance == max_distance);
1129
1130 /* Now record the occurrence(s). */
1131 if (antic_p)
1132 {
1133 antic_occr = cur_expr->antic_occr;
1134
1135 if (antic_occr
1136 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1137 antic_occr = NULL;
1138
1139 if (antic_occr)
1140 /* Found another instance of the expression in the same basic block.
1141 Prefer the currently recorded one. We want the first one in the
1142 block and the block is scanned from start to end. */
1143 ; /* nothing to do */
1144 else
1145 {
1146 /* First occurrence of this expression in this basic block. */
1147 antic_occr = GOBNEW (struct occr);
1148 bytes_used += sizeof (struct occr);
1149 antic_occr->insn = insn;
1150 antic_occr->next = cur_expr->antic_occr;
1151 antic_occr->deleted_p = 0;
1152 cur_expr->antic_occr = antic_occr;
1153 }
1154 }
1155
1156 if (avail_p)
1157 {
1158 avail_occr = cur_expr->avail_occr;
1159
1160 if (avail_occr
1161 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1162 {
1163 /* Found another instance of the expression in the same basic block.
1164 Prefer this occurrence to the currently recorded one. We want
1165 the last one in the block and the block is scanned from start
1166 to end. */
1167 avail_occr->insn = insn;
1168 }
1169 else
1170 {
1171 /* First occurrence of this expression in this basic block. */
1172 avail_occr = GOBNEW (struct occr);
1173 bytes_used += sizeof (struct occr);
1174 avail_occr->insn = insn;
1175 avail_occr->next = cur_expr->avail_occr;
1176 avail_occr->deleted_p = 0;
1177 cur_expr->avail_occr = avail_occr;
1178 }
1179 }
1180 }
1181
1182 /* Scan SET present in INSN and add an entry to the hash TABLE. */
1183
1184 static void
1185 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1186 {
1187 rtx src = SET_SRC (set);
1188 rtx dest = SET_DEST (set);
1189 rtx note;
1190
1191 if (GET_CODE (src) == CALL)
1192 hash_scan_call (src, insn, table);
1193
1194 else if (REG_P (dest))
1195 {
1196 unsigned int regno = REGNO (dest);
1197 int max_distance = 0;
1198
1199 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1200
1201 This allows us to do a single GCSE pass and still eliminate
1202 redundant constants, addresses or other expressions that are
1203 constructed with multiple instructions.
1204
1205 However, keep the original SRC if INSN is a simple reg-reg move.
1206 In this case, there will almost always be a REG_EQUAL note on the
1207 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1208 for INSN, we miss copy propagation opportunities and we perform the
1209 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1210 do more than one PRE GCSE pass.
1211
1212 Note that this does not impede profitable constant propagations. We
1213 "look through" reg-reg sets in lookup_avail_set. */
1214 note = find_reg_equal_equiv_note (insn);
1215 if (note != 0
1216 && REG_NOTE_KIND (note) == REG_EQUAL
1217 && !REG_P (src)
1218 && want_to_gcse_p (XEXP (note, 0), NULL))
1219 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1220
1221 /* Only record sets of pseudo-regs in the hash table. */
1222 if (regno >= FIRST_PSEUDO_REGISTER
1223 /* Don't GCSE something if we can't do a reg/reg copy. */
1224 && can_copy_p (GET_MODE (dest))
1225 /* GCSE commonly inserts instruction after the insn. We can't
1226 do that easily for EH edges so disable GCSE on these for now. */
1227 /* ??? We can now easily create new EH landing pads at the
1228 gimple level, for splitting edges; there's no reason we
1229 can't do the same thing at the rtl level. */
1230 && !can_throw_internal (insn)
1231 /* Is SET_SRC something we want to gcse? */
1232 && want_to_gcse_p (src, &max_distance)
1233 /* Don't CSE a nop. */
1234 && ! set_noop_p (set)
1235 /* Don't GCSE if it has attached REG_EQUIV note.
1236 At this point this only function parameters should have
1237 REG_EQUIV notes and if the argument slot is used somewhere
1238 explicitly, it means address of parameter has been taken,
1239 so we should not extend the lifetime of the pseudo. */
1240 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1241 {
1242 /* An expression is not anticipatable if its operands are
1243 modified before this insn or if this is not the only SET in
1244 this insn. The latter condition does not have to mean that
1245 SRC itself is not anticipatable, but we just will not be
1246 able to handle code motion of insns with multiple sets. */
1247 int antic_p = oprs_anticipatable_p (src, insn)
1248 && !multiple_sets (insn);
1249 /* An expression is not available if its operands are
1250 subsequently modified, including this insn. It's also not
1251 available if this is a branch, because we can't insert
1252 a set after the branch. */
1253 int avail_p = (oprs_available_p (src, insn)
1254 && ! JUMP_P (insn));
1255
1256 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1257 max_distance, table);
1258 }
1259 }
1260 /* In case of store we want to consider the memory value as available in
1261 the REG stored in that memory. This makes it possible to remove
1262 redundant loads from due to stores to the same location. */
1263 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1264 {
1265 unsigned int regno = REGNO (src);
1266 int max_distance = 0;
1267
1268 /* Only record sets of pseudo-regs in the hash table. */
1269 if (regno >= FIRST_PSEUDO_REGISTER
1270 /* Don't GCSE something if we can't do a reg/reg copy. */
1271 && can_copy_p (GET_MODE (src))
1272 /* GCSE commonly inserts instruction after the insn. We can't
1273 do that easily for EH edges so disable GCSE on these for now. */
1274 && !can_throw_internal (insn)
1275 /* Is SET_DEST something we want to gcse? */
1276 && want_to_gcse_p (dest, &max_distance)
1277 /* Don't CSE a nop. */
1278 && ! set_noop_p (set)
1279 /* Don't GCSE if it has attached REG_EQUIV note.
1280 At this point this only function parameters should have
1281 REG_EQUIV notes and if the argument slot is used somewhere
1282 explicitly, it means address of parameter has been taken,
1283 so we should not extend the lifetime of the pseudo. */
1284 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1285 || ! MEM_P (XEXP (note, 0))))
1286 {
1287 /* Stores are never anticipatable. */
1288 int antic_p = 0;
1289 /* An expression is not available if its operands are
1290 subsequently modified, including this insn. It's also not
1291 available if this is a branch, because we can't insert
1292 a set after the branch. */
1293 int avail_p = oprs_available_p (dest, insn)
1294 && ! JUMP_P (insn);
1295
1296 /* Record the memory expression (DEST) in the hash table. */
1297 insert_expr_in_table (dest, GET_MODE (dest), insn,
1298 antic_p, avail_p, max_distance, table);
1299 }
1300 }
1301 }
1302
1303 static void
1304 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1305 struct hash_table_d *table ATTRIBUTE_UNUSED)
1306 {
1307 /* Currently nothing to do. */
1308 }
1309
1310 static void
1311 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1312 struct hash_table_d *table ATTRIBUTE_UNUSED)
1313 {
1314 /* Currently nothing to do. */
1315 }
1316
1317 /* Process INSN and add hash table entries as appropriate. */
1318
1319 static void
1320 hash_scan_insn (rtx insn, struct hash_table_d *table)
1321 {
1322 rtx pat = PATTERN (insn);
1323 int i;
1324
1325 /* Pick out the sets of INSN and for other forms of instructions record
1326 what's been modified. */
1327
1328 if (GET_CODE (pat) == SET)
1329 hash_scan_set (pat, insn, table);
1330
1331 else if (GET_CODE (pat) == CLOBBER)
1332 hash_scan_clobber (pat, insn, table);
1333
1334 else if (GET_CODE (pat) == CALL)
1335 hash_scan_call (pat, insn, table);
1336
1337 else if (GET_CODE (pat) == PARALLEL)
1338 for (i = 0; i < XVECLEN (pat, 0); i++)
1339 {
1340 rtx x = XVECEXP (pat, 0, i);
1341
1342 if (GET_CODE (x) == SET)
1343 hash_scan_set (x, insn, table);
1344 else if (GET_CODE (x) == CLOBBER)
1345 hash_scan_clobber (x, insn, table);
1346 else if (GET_CODE (x) == CALL)
1347 hash_scan_call (x, insn, table);
1348 }
1349 }
1350
1351 /* Dump the hash table TABLE to file FILE under the name NAME. */
1352
1353 static void
1354 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1355 {
1356 int i;
1357 /* Flattened out table, so it's printed in proper order. */
1358 struct expr **flat_table;
1359 unsigned int *hash_val;
1360 struct expr *expr;
1361
1362 flat_table = XCNEWVEC (struct expr *, table->n_elems);
1363 hash_val = XNEWVEC (unsigned int, table->n_elems);
1364
1365 for (i = 0; i < (int) table->size; i++)
1366 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1367 {
1368 flat_table[expr->bitmap_index] = expr;
1369 hash_val[expr->bitmap_index] = i;
1370 }
1371
1372 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1373 name, table->size, table->n_elems);
1374
1375 for (i = 0; i < (int) table->n_elems; i++)
1376 if (flat_table[i] != 0)
1377 {
1378 expr = flat_table[i];
1379 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1380 expr->bitmap_index, hash_val[i], expr->max_distance);
1381 print_rtl (file, expr->expr);
1382 fprintf (file, "\n");
1383 }
1384
1385 fprintf (file, "\n");
1386
1387 free (flat_table);
1388 free (hash_val);
1389 }
1390
1391 /* Record register first/last/block set information for REGNO in INSN.
1392
1393 first_set records the first place in the block where the register
1394 is set and is used to compute "anticipatability".
1395
1396 last_set records the last place in the block where the register
1397 is set and is used to compute "availability".
1398
1399 last_bb records the block for which first_set and last_set are
1400 valid, as a quick test to invalidate them. */
1401
1402 static void
1403 record_last_reg_set_info (rtx insn, int regno)
1404 {
1405 struct reg_avail_info *info = &reg_avail_info[regno];
1406 int luid = DF_INSN_LUID (insn);
1407
1408 info->last_set = luid;
1409 if (info->last_bb != current_bb)
1410 {
1411 info->last_bb = current_bb;
1412 info->first_set = luid;
1413 }
1414 }
1415
1416 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1417 Note we store a pair of elements in the list, so they have to be
1418 taken off pairwise. */
1419
1420 static void
1421 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1422 void * v_insn)
1423 {
1424 rtx dest_addr, insn;
1425 int bb;
1426 modify_pair *pair;
1427
1428 while (GET_CODE (dest) == SUBREG
1429 || GET_CODE (dest) == ZERO_EXTRACT
1430 || GET_CODE (dest) == STRICT_LOW_PART)
1431 dest = XEXP (dest, 0);
1432
1433 /* If DEST is not a MEM, then it will not conflict with a load. Note
1434 that function calls are assumed to clobber memory, but are handled
1435 elsewhere. */
1436
1437 if (! MEM_P (dest))
1438 return;
1439
1440 dest_addr = get_addr (XEXP (dest, 0));
1441 dest_addr = canon_rtx (dest_addr);
1442 insn = (rtx) v_insn;
1443 bb = BLOCK_FOR_INSN (insn)->index;
1444
1445 pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1446 pair->dest = dest;
1447 pair->dest_addr = dest_addr;
1448 }
1449
1450 /* Record memory modification information for INSN. We do not actually care
1451 about the memory location(s) that are set, or even how they are set (consider
1452 a CALL_INSN). We merely need to record which insns modify memory. */
1453
1454 static void
1455 record_last_mem_set_info (rtx insn)
1456 {
1457 int bb = BLOCK_FOR_INSN (insn)->index;
1458
1459 /* load_killed_in_block_p will handle the case of calls clobbering
1460 everything. */
1461 VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1462 bitmap_set_bit (modify_mem_list_set, bb);
1463
1464 if (CALL_P (insn))
1465 bitmap_set_bit (blocks_with_calls, bb);
1466 else
1467 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1468 }
1469
1470 /* Called from compute_hash_table via note_stores to handle one
1471 SET or CLOBBER in an insn. DATA is really the instruction in which
1472 the SET is taking place. */
1473
1474 static void
1475 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1476 {
1477 rtx last_set_insn = (rtx) data;
1478
1479 if (GET_CODE (dest) == SUBREG)
1480 dest = SUBREG_REG (dest);
1481
1482 if (REG_P (dest))
1483 record_last_reg_set_info (last_set_insn, REGNO (dest));
1484 else if (MEM_P (dest)
1485 /* Ignore pushes, they clobber nothing. */
1486 && ! push_operand (dest, GET_MODE (dest)))
1487 record_last_mem_set_info (last_set_insn);
1488 }
1489
1490 /* Top level function to create an expression hash table.
1491
1492 Expression entries are placed in the hash table if
1493 - they are of the form (set (pseudo-reg) src),
1494 - src is something we want to perform GCSE on,
1495 - none of the operands are subsequently modified in the block
1496
1497 Currently src must be a pseudo-reg or a const_int.
1498
1499 TABLE is the table computed. */
1500
1501 static void
1502 compute_hash_table_work (struct hash_table_d *table)
1503 {
1504 int i;
1505
1506 /* re-Cache any INSN_LIST nodes we have allocated. */
1507 clear_modify_mem_tables ();
1508 /* Some working arrays used to track first and last set in each block. */
1509 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1510
1511 for (i = 0; i < max_reg_num (); ++i)
1512 reg_avail_info[i].last_bb = NULL;
1513
1514 FOR_EACH_BB (current_bb)
1515 {
1516 rtx insn;
1517 unsigned int regno;
1518
1519 /* First pass over the instructions records information used to
1520 determine when registers and memory are first and last set. */
1521 FOR_BB_INSNS (current_bb, insn)
1522 {
1523 if (!NONDEBUG_INSN_P (insn))
1524 continue;
1525
1526 if (CALL_P (insn))
1527 {
1528 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1529 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1530 record_last_reg_set_info (insn, regno);
1531
1532 if (! RTL_CONST_OR_PURE_CALL_P (insn))
1533 record_last_mem_set_info (insn);
1534 }
1535
1536 note_stores (PATTERN (insn), record_last_set_info, insn);
1537 }
1538
1539 /* The next pass builds the hash table. */
1540 FOR_BB_INSNS (current_bb, insn)
1541 if (NONDEBUG_INSN_P (insn))
1542 hash_scan_insn (insn, table);
1543 }
1544
1545 free (reg_avail_info);
1546 reg_avail_info = NULL;
1547 }
1548
1549 /* Allocate space for the set/expr hash TABLE.
1550 It is used to determine the number of buckets to use. */
1551
1552 static void
1553 alloc_hash_table (struct hash_table_d *table)
1554 {
1555 int n;
1556
1557 n = get_max_insn_count ();
1558
1559 table->size = n / 4;
1560 if (table->size < 11)
1561 table->size = 11;
1562
1563 /* Attempt to maintain efficient use of hash table.
1564 Making it an odd number is simplest for now.
1565 ??? Later take some measurements. */
1566 table->size |= 1;
1567 n = table->size * sizeof (struct expr *);
1568 table->table = GNEWVAR (struct expr *, n);
1569 }
1570
1571 /* Free things allocated by alloc_hash_table. */
1572
1573 static void
1574 free_hash_table (struct hash_table_d *table)
1575 {
1576 free (table->table);
1577 }
1578
1579 /* Compute the expression hash table TABLE. */
1580
1581 static void
1582 compute_hash_table (struct hash_table_d *table)
1583 {
1584 /* Initialize count of number of entries in hash table. */
1585 table->n_elems = 0;
1586 memset (table->table, 0, table->size * sizeof (struct expr *));
1587
1588 compute_hash_table_work (table);
1589 }
1590 \f
1591 /* Expression tracking support. */
1592
1593 /* Clear canon_modify_mem_list and modify_mem_list tables. */
1594 static void
1595 clear_modify_mem_tables (void)
1596 {
1597 unsigned i;
1598 bitmap_iterator bi;
1599
1600 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1601 {
1602 VEC_free (rtx, heap, modify_mem_list[i]);
1603 VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1604 }
1605 bitmap_clear (modify_mem_list_set);
1606 bitmap_clear (blocks_with_calls);
1607 }
1608
1609 /* Release memory used by modify_mem_list_set. */
1610
1611 static void
1612 free_modify_mem_tables (void)
1613 {
1614 clear_modify_mem_tables ();
1615 free (modify_mem_list);
1616 free (canon_modify_mem_list);
1617 modify_mem_list = 0;
1618 canon_modify_mem_list = 0;
1619 }
1620 \f
1621 /* For each block, compute whether X is transparent. X is either an
1622 expression or an assignment [though we don't care which, for this context
1623 an assignment is treated as an expression]. For each block where an
1624 element of X is modified, reset the INDX bit in BMAP. */
1625
1626 static void
1627 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1628 {
1629 int i, j;
1630 enum rtx_code code;
1631 const char *fmt;
1632
1633 /* repeat is used to turn tail-recursion into iteration since GCC
1634 can't do it when there's no return value. */
1635 repeat:
1636
1637 if (x == 0)
1638 return;
1639
1640 code = GET_CODE (x);
1641 switch (code)
1642 {
1643 case REG:
1644 {
1645 df_ref def;
1646 for (def = DF_REG_DEF_CHAIN (REGNO (x));
1647 def;
1648 def = DF_REF_NEXT_REG (def))
1649 RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1650 }
1651
1652 return;
1653
1654 case MEM:
1655 if (! MEM_READONLY_P (x))
1656 {
1657 bitmap_iterator bi;
1658 unsigned bb_index;
1659
1660 /* First handle all the blocks with calls. We don't need to
1661 do any list walking for them. */
1662 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1663 {
1664 RESET_BIT (bmap[bb_index], indx);
1665 }
1666
1667 /* Now iterate over the blocks which have memory modifications
1668 but which do not have any calls. */
1669 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1670 blocks_with_calls,
1671 0, bb_index, bi)
1672 {
1673 VEC (modify_pair,heap) *list
1674 = canon_modify_mem_list[bb_index];
1675 modify_pair *pair;
1676 unsigned ix;
1677
1678 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1679 {
1680 rtx dest = pair->dest;
1681 rtx dest_addr = pair->dest_addr;
1682
1683 if (canon_true_dependence (dest, GET_MODE (dest),
1684 dest_addr, x, NULL_RTX))
1685 RESET_BIT (bmap[bb_index], indx);
1686 }
1687 }
1688 }
1689
1690 x = XEXP (x, 0);
1691 goto repeat;
1692
1693 case PC:
1694 case CC0: /*FIXME*/
1695 case CONST:
1696 case CONST_INT:
1697 case CONST_DOUBLE:
1698 case CONST_FIXED:
1699 case CONST_VECTOR:
1700 case SYMBOL_REF:
1701 case LABEL_REF:
1702 case ADDR_VEC:
1703 case ADDR_DIFF_VEC:
1704 return;
1705
1706 default:
1707 break;
1708 }
1709
1710 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1711 {
1712 if (fmt[i] == 'e')
1713 {
1714 /* If we are about to do the last recursive call
1715 needed at this level, change it into iteration.
1716 This function is called enough to be worth it. */
1717 if (i == 0)
1718 {
1719 x = XEXP (x, i);
1720 goto repeat;
1721 }
1722
1723 compute_transp (XEXP (x, i), indx, bmap);
1724 }
1725 else if (fmt[i] == 'E')
1726 for (j = 0; j < XVECLEN (x, i); j++)
1727 compute_transp (XVECEXP (x, i, j), indx, bmap);
1728 }
1729 }
1730 \f
1731 /* Compute PRE+LCM working variables. */
1732
1733 /* Local properties of expressions. */
1734
1735 /* Nonzero for expressions that are transparent in the block. */
1736 static sbitmap *transp;
1737
1738 /* Nonzero for expressions that are computed (available) in the block. */
1739 static sbitmap *comp;
1740
1741 /* Nonzero for expressions that are locally anticipatable in the block. */
1742 static sbitmap *antloc;
1743
1744 /* Nonzero for expressions where this block is an optimal computation
1745 point. */
1746 static sbitmap *pre_optimal;
1747
1748 /* Nonzero for expressions which are redundant in a particular block. */
1749 static sbitmap *pre_redundant;
1750
1751 /* Nonzero for expressions which should be inserted on a specific edge. */
1752 static sbitmap *pre_insert_map;
1753
1754 /* Nonzero for expressions which should be deleted in a specific block. */
1755 static sbitmap *pre_delete_map;
1756
1757 /* Allocate vars used for PRE analysis. */
1758
1759 static void
1760 alloc_pre_mem (int n_blocks, int n_exprs)
1761 {
1762 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1763 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1764 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1765
1766 pre_optimal = NULL;
1767 pre_redundant = NULL;
1768 pre_insert_map = NULL;
1769 pre_delete_map = NULL;
1770 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1771
1772 /* pre_insert and pre_delete are allocated later. */
1773 }
1774
1775 /* Free vars used for PRE analysis. */
1776
1777 static void
1778 free_pre_mem (void)
1779 {
1780 sbitmap_vector_free (transp);
1781 sbitmap_vector_free (comp);
1782
1783 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1784
1785 if (pre_optimal)
1786 sbitmap_vector_free (pre_optimal);
1787 if (pre_redundant)
1788 sbitmap_vector_free (pre_redundant);
1789 if (pre_insert_map)
1790 sbitmap_vector_free (pre_insert_map);
1791 if (pre_delete_map)
1792 sbitmap_vector_free (pre_delete_map);
1793
1794 transp = comp = NULL;
1795 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1796 }
1797
1798 /* Remove certain expressions from anticipatable and transparent
1799 sets of basic blocks that have incoming abnormal edge.
1800 For PRE remove potentially trapping expressions to avoid placing
1801 them on abnormal edges. For hoisting remove memory references that
1802 can be clobbered by calls. */
1803
1804 static void
1805 prune_expressions (bool pre_p)
1806 {
1807 sbitmap prune_exprs;
1808 struct expr *expr;
1809 unsigned int ui;
1810 basic_block bb;
1811
1812 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1813 sbitmap_zero (prune_exprs);
1814 for (ui = 0; ui < expr_hash_table.size; ui++)
1815 {
1816 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1817 {
1818 /* Note potentially trapping expressions. */
1819 if (may_trap_p (expr->expr))
1820 {
1821 SET_BIT (prune_exprs, expr->bitmap_index);
1822 continue;
1823 }
1824
1825 if (!pre_p && MEM_P (expr->expr))
1826 /* Note memory references that can be clobbered by a call.
1827 We do not split abnormal edges in hoisting, so would
1828 a memory reference get hoisted along an abnormal edge,
1829 it would be placed /before/ the call. Therefore, only
1830 constant memory references can be hoisted along abnormal
1831 edges. */
1832 {
1833 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1834 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1835 continue;
1836
1837 if (MEM_READONLY_P (expr->expr)
1838 && !MEM_VOLATILE_P (expr->expr)
1839 && MEM_NOTRAP_P (expr->expr))
1840 /* Constant memory reference, e.g., a PIC address. */
1841 continue;
1842
1843 /* ??? Optimally, we would use interprocedural alias
1844 analysis to determine if this mem is actually killed
1845 by this call. */
1846
1847 SET_BIT (prune_exprs, expr->bitmap_index);
1848 }
1849 }
1850 }
1851
1852 FOR_EACH_BB (bb)
1853 {
1854 edge e;
1855 edge_iterator ei;
1856
1857 /* If the current block is the destination of an abnormal edge, we
1858 kill all trapping (for PRE) and memory (for hoist) expressions
1859 because we won't be able to properly place the instruction on
1860 the edge. So make them neither anticipatable nor transparent.
1861 This is fairly conservative.
1862
1863 ??? For hoisting it may be necessary to check for set-and-jump
1864 instructions here, not just for abnormal edges. The general problem
1865 is that when an expression cannot not be placed right at the end of
1866 a basic block we should account for any side-effects of a subsequent
1867 jump instructions that could clobber the expression. It would
1868 be best to implement this check along the lines of
1869 hoist_expr_reaches_here_p where the target block is already known
1870 and, hence, there's no need to conservatively prune expressions on
1871 "intermediate" set-and-jump instructions. */
1872 FOR_EACH_EDGE (e, ei, bb->preds)
1873 if ((e->flags & EDGE_ABNORMAL)
1874 && (pre_p || CALL_P (BB_END (e->src))))
1875 {
1876 sbitmap_difference (antloc[bb->index],
1877 antloc[bb->index], prune_exprs);
1878 sbitmap_difference (transp[bb->index],
1879 transp[bb->index], prune_exprs);
1880 break;
1881 }
1882 }
1883
1884 sbitmap_free (prune_exprs);
1885 }
1886
1887 /* It may be necessary to insert a large number of insns on edges to
1888 make the existing occurrences of expressions fully redundant. This
1889 routine examines the set of insertions and deletions and if the ratio
1890 of insertions to deletions is too high for a particular expression, then
1891 the expression is removed from the insertion/deletion sets.
1892
1893 N_ELEMS is the number of elements in the hash table. */
1894
1895 static void
1896 prune_insertions_deletions (int n_elems)
1897 {
1898 sbitmap_iterator sbi;
1899 sbitmap prune_exprs;
1900
1901 /* We always use I to iterate over blocks/edges and J to iterate over
1902 expressions. */
1903 unsigned int i, j;
1904
1905 /* Counts for the number of times an expression needs to be inserted and
1906 number of times an expression can be removed as a result. */
1907 int *insertions = GCNEWVEC (int, n_elems);
1908 int *deletions = GCNEWVEC (int, n_elems);
1909
1910 /* Set of expressions which require too many insertions relative to
1911 the number of deletions achieved. We will prune these out of the
1912 insertion/deletion sets. */
1913 prune_exprs = sbitmap_alloc (n_elems);
1914 sbitmap_zero (prune_exprs);
1915
1916 /* Iterate over the edges counting the number of times each expression
1917 needs to be inserted. */
1918 for (i = 0; i < (unsigned) n_edges; i++)
1919 {
1920 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1921 insertions[j]++;
1922 }
1923
1924 /* Similarly for deletions, but those occur in blocks rather than on
1925 edges. */
1926 for (i = 0; i < (unsigned) last_basic_block; i++)
1927 {
1928 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1929 deletions[j]++;
1930 }
1931
1932 /* Now that we have accurate counts, iterate over the elements in the
1933 hash table and see if any need too many insertions relative to the
1934 number of evaluations that can be removed. If so, mark them in
1935 PRUNE_EXPRS. */
1936 for (j = 0; j < (unsigned) n_elems; j++)
1937 if (deletions[j]
1938 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1939 SET_BIT (prune_exprs, j);
1940
1941 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1942 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1943 {
1944 for (i = 0; i < (unsigned) n_edges; i++)
1945 RESET_BIT (pre_insert_map[i], j);
1946
1947 for (i = 0; i < (unsigned) last_basic_block; i++)
1948 RESET_BIT (pre_delete_map[i], j);
1949 }
1950
1951 sbitmap_free (prune_exprs);
1952 free (insertions);
1953 free (deletions);
1954 }
1955
1956 /* Top level routine to do the dataflow analysis needed by PRE. */
1957
1958 static struct edge_list *
1959 compute_pre_data (void)
1960 {
1961 struct edge_list *edge_list;
1962 basic_block bb;
1963
1964 compute_local_properties (transp, comp, antloc, &expr_hash_table);
1965 prune_expressions (true);
1966 sbitmap_vector_zero (ae_kill, last_basic_block);
1967
1968 /* Compute ae_kill for each basic block using:
1969
1970 ~(TRANSP | COMP)
1971 */
1972
1973 FOR_EACH_BB (bb)
1974 {
1975 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1976 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1977 }
1978
1979 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1980 ae_kill, &pre_insert_map, &pre_delete_map);
1981 sbitmap_vector_free (antloc);
1982 antloc = NULL;
1983 sbitmap_vector_free (ae_kill);
1984 ae_kill = NULL;
1985
1986 prune_insertions_deletions (expr_hash_table.n_elems);
1987
1988 return edge_list;
1989 }
1990 \f
1991 /* PRE utilities */
1992
1993 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1994 block BB.
1995
1996 VISITED is a pointer to a working buffer for tracking which BB's have
1997 been visited. It is NULL for the top-level call.
1998
1999 We treat reaching expressions that go through blocks containing the same
2000 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2001 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2002 2 as not reaching. The intent is to improve the probability of finding
2003 only one reaching expression and to reduce register lifetimes by picking
2004 the closest such expression. */
2005
2006 static int
2007 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2008 basic_block bb, char *visited)
2009 {
2010 edge pred;
2011 edge_iterator ei;
2012
2013 FOR_EACH_EDGE (pred, ei, bb->preds)
2014 {
2015 basic_block pred_bb = pred->src;
2016
2017 if (pred->src == ENTRY_BLOCK_PTR
2018 /* Has predecessor has already been visited? */
2019 || visited[pred_bb->index])
2020 ;/* Nothing to do. */
2021
2022 /* Does this predecessor generate this expression? */
2023 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2024 {
2025 /* Is this the occurrence we're looking for?
2026 Note that there's only one generating occurrence per block
2027 so we just need to check the block number. */
2028 if (occr_bb == pred_bb)
2029 return 1;
2030
2031 visited[pred_bb->index] = 1;
2032 }
2033 /* Ignore this predecessor if it kills the expression. */
2034 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2035 visited[pred_bb->index] = 1;
2036
2037 /* Neither gen nor kill. */
2038 else
2039 {
2040 visited[pred_bb->index] = 1;
2041 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2042 return 1;
2043 }
2044 }
2045
2046 /* All paths have been checked. */
2047 return 0;
2048 }
2049
2050 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2051 memory allocated for that function is returned. */
2052
2053 static int
2054 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2055 {
2056 int rval;
2057 char *visited = XCNEWVEC (char, last_basic_block);
2058
2059 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2060
2061 free (visited);
2062 return rval;
2063 }
2064 \f
2065 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2066
2067 static rtx
2068 process_insert_insn (struct expr *expr)
2069 {
2070 rtx reg = expr->reaching_reg;
2071 /* Copy the expression to make sure we don't have any sharing issues. */
2072 rtx exp = copy_rtx (expr->expr);
2073 rtx pat;
2074
2075 start_sequence ();
2076
2077 /* If the expression is something that's an operand, like a constant,
2078 just copy it to a register. */
2079 if (general_operand (exp, GET_MODE (reg)))
2080 emit_move_insn (reg, exp);
2081
2082 /* Otherwise, make a new insn to compute this expression and make sure the
2083 insn will be recognized (this also adds any needed CLOBBERs). */
2084 else
2085 {
2086 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2087
2088 if (insn_invalid_p (insn, false))
2089 gcc_unreachable ();
2090 }
2091
2092 pat = get_insns ();
2093 end_sequence ();
2094
2095 return pat;
2096 }
2097
2098 /* Add EXPR to the end of basic block BB.
2099
2100 This is used by both the PRE and code hoisting. */
2101
2102 static void
2103 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2104 {
2105 rtx insn = BB_END (bb);
2106 rtx new_insn;
2107 rtx reg = expr->reaching_reg;
2108 int regno = REGNO (reg);
2109 rtx pat, pat_end;
2110
2111 pat = process_insert_insn (expr);
2112 gcc_assert (pat && INSN_P (pat));
2113
2114 pat_end = pat;
2115 while (NEXT_INSN (pat_end) != NULL_RTX)
2116 pat_end = NEXT_INSN (pat_end);
2117
2118 /* If the last insn is a jump, insert EXPR in front [taking care to
2119 handle cc0, etc. properly]. Similarly we need to care trapping
2120 instructions in presence of non-call exceptions. */
2121
2122 if (JUMP_P (insn)
2123 || (NONJUMP_INSN_P (insn)
2124 && (!single_succ_p (bb)
2125 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2126 {
2127 #ifdef HAVE_cc0
2128 rtx note;
2129 #endif
2130
2131 /* If this is a jump table, then we can't insert stuff here. Since
2132 we know the previous real insn must be the tablejump, we insert
2133 the new instruction just before the tablejump. */
2134 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2135 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2136 insn = prev_active_insn (insn);
2137
2138 #ifdef HAVE_cc0
2139 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2140 if cc0 isn't set. */
2141 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2142 if (note)
2143 insn = XEXP (note, 0);
2144 else
2145 {
2146 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2147 if (maybe_cc0_setter
2148 && INSN_P (maybe_cc0_setter)
2149 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2150 insn = maybe_cc0_setter;
2151 }
2152 #endif
2153 /* FIXME: What if something in cc0/jump uses value set in new insn? */
2154 new_insn = emit_insn_before_noloc (pat, insn, bb);
2155 }
2156
2157 /* Likewise if the last insn is a call, as will happen in the presence
2158 of exception handling. */
2159 else if (CALL_P (insn)
2160 && (!single_succ_p (bb)
2161 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2162 {
2163 /* Keeping in mind targets with small register classes and parameters
2164 in registers, we search backward and place the instructions before
2165 the first parameter is loaded. Do this for everyone for consistency
2166 and a presumption that we'll get better code elsewhere as well. */
2167
2168 /* Since different machines initialize their parameter registers
2169 in different orders, assume nothing. Collect the set of all
2170 parameter registers. */
2171 insn = find_first_parameter_load (insn, BB_HEAD (bb));
2172
2173 /* If we found all the parameter loads, then we want to insert
2174 before the first parameter load.
2175
2176 If we did not find all the parameter loads, then we might have
2177 stopped on the head of the block, which could be a CODE_LABEL.
2178 If we inserted before the CODE_LABEL, then we would be putting
2179 the insn in the wrong basic block. In that case, put the insn
2180 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2181 while (LABEL_P (insn)
2182 || NOTE_INSN_BASIC_BLOCK_P (insn))
2183 insn = NEXT_INSN (insn);
2184
2185 new_insn = emit_insn_before_noloc (pat, insn, bb);
2186 }
2187 else
2188 new_insn = emit_insn_after_noloc (pat, insn, bb);
2189
2190 while (1)
2191 {
2192 if (INSN_P (pat))
2193 add_label_notes (PATTERN (pat), new_insn);
2194 if (pat == pat_end)
2195 break;
2196 pat = NEXT_INSN (pat);
2197 }
2198
2199 gcse_create_count++;
2200
2201 if (dump_file)
2202 {
2203 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2204 bb->index, INSN_UID (new_insn));
2205 fprintf (dump_file, "copying expression %d to reg %d\n",
2206 expr->bitmap_index, regno);
2207 }
2208 }
2209
2210 /* Insert partially redundant expressions on edges in the CFG to make
2211 the expressions fully redundant. */
2212
2213 static int
2214 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2215 {
2216 int e, i, j, num_edges, set_size, did_insert = 0;
2217 sbitmap *inserted;
2218
2219 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2220 if it reaches any of the deleted expressions. */
2221
2222 set_size = pre_insert_map[0]->size;
2223 num_edges = NUM_EDGES (edge_list);
2224 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2225 sbitmap_vector_zero (inserted, num_edges);
2226
2227 for (e = 0; e < num_edges; e++)
2228 {
2229 int indx;
2230 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2231
2232 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2233 {
2234 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2235
2236 for (j = indx;
2237 insert && j < (int) expr_hash_table.n_elems;
2238 j++, insert >>= 1)
2239 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2240 {
2241 struct expr *expr = index_map[j];
2242 struct occr *occr;
2243
2244 /* Now look at each deleted occurrence of this expression. */
2245 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2246 {
2247 if (! occr->deleted_p)
2248 continue;
2249
2250 /* Insert this expression on this edge if it would
2251 reach the deleted occurrence in BB. */
2252 if (!TEST_BIT (inserted[e], j))
2253 {
2254 rtx insn;
2255 edge eg = INDEX_EDGE (edge_list, e);
2256
2257 /* We can't insert anything on an abnormal and
2258 critical edge, so we insert the insn at the end of
2259 the previous block. There are several alternatives
2260 detailed in Morgans book P277 (sec 10.5) for
2261 handling this situation. This one is easiest for
2262 now. */
2263
2264 if (eg->flags & EDGE_ABNORMAL)
2265 insert_insn_end_basic_block (index_map[j], bb);
2266 else
2267 {
2268 insn = process_insert_insn (index_map[j]);
2269 insert_insn_on_edge (insn, eg);
2270 }
2271
2272 if (dump_file)
2273 {
2274 fprintf (dump_file, "PRE: edge (%d,%d), ",
2275 bb->index,
2276 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2277 fprintf (dump_file, "copy expression %d\n",
2278 expr->bitmap_index);
2279 }
2280
2281 update_ld_motion_stores (expr);
2282 SET_BIT (inserted[e], j);
2283 did_insert = 1;
2284 gcse_create_count++;
2285 }
2286 }
2287 }
2288 }
2289 }
2290
2291 sbitmap_vector_free (inserted);
2292 return did_insert;
2293 }
2294
2295 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2296 Given "old_reg <- expr" (INSN), instead of adding after it
2297 reaching_reg <- old_reg
2298 it's better to do the following:
2299 reaching_reg <- expr
2300 old_reg <- reaching_reg
2301 because this way copy propagation can discover additional PRE
2302 opportunities. But if this fails, we try the old way.
2303 When "expr" is a store, i.e.
2304 given "MEM <- old_reg", instead of adding after it
2305 reaching_reg <- old_reg
2306 it's better to add it before as follows:
2307 reaching_reg <- old_reg
2308 MEM <- reaching_reg. */
2309
2310 static void
2311 pre_insert_copy_insn (struct expr *expr, rtx insn)
2312 {
2313 rtx reg = expr->reaching_reg;
2314 int regno = REGNO (reg);
2315 int indx = expr->bitmap_index;
2316 rtx pat = PATTERN (insn);
2317 rtx set, first_set, new_insn;
2318 rtx old_reg;
2319 int i;
2320
2321 /* This block matches the logic in hash_scan_insn. */
2322 switch (GET_CODE (pat))
2323 {
2324 case SET:
2325 set = pat;
2326 break;
2327
2328 case PARALLEL:
2329 /* Search through the parallel looking for the set whose
2330 source was the expression that we're interested in. */
2331 first_set = NULL_RTX;
2332 set = NULL_RTX;
2333 for (i = 0; i < XVECLEN (pat, 0); i++)
2334 {
2335 rtx x = XVECEXP (pat, 0, i);
2336 if (GET_CODE (x) == SET)
2337 {
2338 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2339 may not find an equivalent expression, but in this
2340 case the PARALLEL will have a single set. */
2341 if (first_set == NULL_RTX)
2342 first_set = x;
2343 if (expr_equiv_p (SET_SRC (x), expr->expr))
2344 {
2345 set = x;
2346 break;
2347 }
2348 }
2349 }
2350
2351 gcc_assert (first_set);
2352 if (set == NULL_RTX)
2353 set = first_set;
2354 break;
2355
2356 default:
2357 gcc_unreachable ();
2358 }
2359
2360 if (REG_P (SET_DEST (set)))
2361 {
2362 old_reg = SET_DEST (set);
2363 /* Check if we can modify the set destination in the original insn. */
2364 if (validate_change (insn, &SET_DEST (set), reg, 0))
2365 {
2366 new_insn = gen_move_insn (old_reg, reg);
2367 new_insn = emit_insn_after (new_insn, insn);
2368 }
2369 else
2370 {
2371 new_insn = gen_move_insn (reg, old_reg);
2372 new_insn = emit_insn_after (new_insn, insn);
2373 }
2374 }
2375 else /* This is possible only in case of a store to memory. */
2376 {
2377 old_reg = SET_SRC (set);
2378 new_insn = gen_move_insn (reg, old_reg);
2379
2380 /* Check if we can modify the set source in the original insn. */
2381 if (validate_change (insn, &SET_SRC (set), reg, 0))
2382 new_insn = emit_insn_before (new_insn, insn);
2383 else
2384 new_insn = emit_insn_after (new_insn, insn);
2385 }
2386
2387 gcse_create_count++;
2388
2389 if (dump_file)
2390 fprintf (dump_file,
2391 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2392 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2393 INSN_UID (insn), regno);
2394 }
2395
2396 /* Copy available expressions that reach the redundant expression
2397 to `reaching_reg'. */
2398
2399 static void
2400 pre_insert_copies (void)
2401 {
2402 unsigned int i, added_copy;
2403 struct expr *expr;
2404 struct occr *occr;
2405 struct occr *avail;
2406
2407 /* For each available expression in the table, copy the result to
2408 `reaching_reg' if the expression reaches a deleted one.
2409
2410 ??? The current algorithm is rather brute force.
2411 Need to do some profiling. */
2412
2413 for (i = 0; i < expr_hash_table.size; i++)
2414 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2415 {
2416 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2417 we don't want to insert a copy here because the expression may not
2418 really be redundant. So only insert an insn if the expression was
2419 deleted. This test also avoids further processing if the
2420 expression wasn't deleted anywhere. */
2421 if (expr->reaching_reg == NULL)
2422 continue;
2423
2424 /* Set when we add a copy for that expression. */
2425 added_copy = 0;
2426
2427 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2428 {
2429 if (! occr->deleted_p)
2430 continue;
2431
2432 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2433 {
2434 rtx insn = avail->insn;
2435
2436 /* No need to handle this one if handled already. */
2437 if (avail->copied_p)
2438 continue;
2439
2440 /* Don't handle this one if it's a redundant one. */
2441 if (INSN_DELETED_P (insn))
2442 continue;
2443
2444 /* Or if the expression doesn't reach the deleted one. */
2445 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2446 expr,
2447 BLOCK_FOR_INSN (occr->insn)))
2448 continue;
2449
2450 added_copy = 1;
2451
2452 /* Copy the result of avail to reaching_reg. */
2453 pre_insert_copy_insn (expr, insn);
2454 avail->copied_p = 1;
2455 }
2456 }
2457
2458 if (added_copy)
2459 update_ld_motion_stores (expr);
2460 }
2461 }
2462
2463 /* Emit move from SRC to DEST noting the equivalence with expression computed
2464 in INSN. */
2465
2466 static rtx
2467 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2468 {
2469 rtx new_rtx;
2470 rtx set = single_set (insn), set2;
2471 rtx note;
2472 rtx eqv;
2473
2474 /* This should never fail since we're creating a reg->reg copy
2475 we've verified to be valid. */
2476
2477 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2478
2479 /* Note the equivalence for local CSE pass. */
2480 set2 = single_set (new_rtx);
2481 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2482 return new_rtx;
2483 if ((note = find_reg_equal_equiv_note (insn)))
2484 eqv = XEXP (note, 0);
2485 else
2486 eqv = SET_SRC (set);
2487
2488 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2489
2490 return new_rtx;
2491 }
2492
2493 /* Delete redundant computations.
2494 Deletion is done by changing the insn to copy the `reaching_reg' of
2495 the expression into the result of the SET. It is left to later passes
2496 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2497
2498 Return nonzero if a change is made. */
2499
2500 static int
2501 pre_delete (void)
2502 {
2503 unsigned int i;
2504 int changed;
2505 struct expr *expr;
2506 struct occr *occr;
2507
2508 changed = 0;
2509 for (i = 0; i < expr_hash_table.size; i++)
2510 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2511 {
2512 int indx = expr->bitmap_index;
2513
2514 /* We only need to search antic_occr since we require ANTLOC != 0. */
2515 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2516 {
2517 rtx insn = occr->insn;
2518 rtx set;
2519 basic_block bb = BLOCK_FOR_INSN (insn);
2520
2521 /* We only delete insns that have a single_set. */
2522 if (TEST_BIT (pre_delete_map[bb->index], indx)
2523 && (set = single_set (insn)) != 0
2524 && dbg_cnt (pre_insn))
2525 {
2526 /* Create a pseudo-reg to store the result of reaching
2527 expressions into. Get the mode for the new pseudo from
2528 the mode of the original destination pseudo. */
2529 if (expr->reaching_reg == NULL)
2530 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2531
2532 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2533 delete_insn (insn);
2534 occr->deleted_p = 1;
2535 changed = 1;
2536 gcse_subst_count++;
2537
2538 if (dump_file)
2539 {
2540 fprintf (dump_file,
2541 "PRE: redundant insn %d (expression %d) in ",
2542 INSN_UID (insn), indx);
2543 fprintf (dump_file, "bb %d, reaching reg is %d\n",
2544 bb->index, REGNO (expr->reaching_reg));
2545 }
2546 }
2547 }
2548 }
2549
2550 return changed;
2551 }
2552
2553 /* Perform GCSE optimizations using PRE.
2554 This is called by one_pre_gcse_pass after all the dataflow analysis
2555 has been done.
2556
2557 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2558 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2559 Compiler Design and Implementation.
2560
2561 ??? A new pseudo reg is created to hold the reaching expression. The nice
2562 thing about the classical approach is that it would try to use an existing
2563 reg. If the register can't be adequately optimized [i.e. we introduce
2564 reload problems], one could add a pass here to propagate the new register
2565 through the block.
2566
2567 ??? We don't handle single sets in PARALLELs because we're [currently] not
2568 able to copy the rest of the parallel when we insert copies to create full
2569 redundancies from partial redundancies. However, there's no reason why we
2570 can't handle PARALLELs in the cases where there are no partial
2571 redundancies. */
2572
2573 static int
2574 pre_gcse (struct edge_list *edge_list)
2575 {
2576 unsigned int i;
2577 int did_insert, changed;
2578 struct expr **index_map;
2579 struct expr *expr;
2580
2581 /* Compute a mapping from expression number (`bitmap_index') to
2582 hash table entry. */
2583
2584 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2585 for (i = 0; i < expr_hash_table.size; i++)
2586 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2587 index_map[expr->bitmap_index] = expr;
2588
2589 /* Delete the redundant insns first so that
2590 - we know what register to use for the new insns and for the other
2591 ones with reaching expressions
2592 - we know which insns are redundant when we go to create copies */
2593
2594 changed = pre_delete ();
2595 did_insert = pre_edge_insert (edge_list, index_map);
2596
2597 /* In other places with reaching expressions, copy the expression to the
2598 specially allocated pseudo-reg that reaches the redundant expr. */
2599 pre_insert_copies ();
2600 if (did_insert)
2601 {
2602 commit_edge_insertions ();
2603 changed = 1;
2604 }
2605
2606 free (index_map);
2607 return changed;
2608 }
2609
2610 /* Top level routine to perform one PRE GCSE pass.
2611
2612 Return nonzero if a change was made. */
2613
2614 static int
2615 one_pre_gcse_pass (void)
2616 {
2617 int changed = 0;
2618
2619 gcse_subst_count = 0;
2620 gcse_create_count = 0;
2621
2622 /* Return if there's nothing to do, or it is too expensive. */
2623 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2624 || is_too_expensive (_("PRE disabled")))
2625 return 0;
2626
2627 /* We need alias. */
2628 init_alias_analysis ();
2629
2630 bytes_used = 0;
2631 gcc_obstack_init (&gcse_obstack);
2632 alloc_gcse_mem ();
2633
2634 alloc_hash_table (&expr_hash_table);
2635 add_noreturn_fake_exit_edges ();
2636 if (flag_gcse_lm)
2637 compute_ld_motion_mems ();
2638
2639 compute_hash_table (&expr_hash_table);
2640 if (flag_gcse_lm)
2641 trim_ld_motion_mems ();
2642 if (dump_file)
2643 dump_hash_table (dump_file, "Expression", &expr_hash_table);
2644
2645 if (expr_hash_table.n_elems > 0)
2646 {
2647 struct edge_list *edge_list;
2648 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2649 edge_list = compute_pre_data ();
2650 changed |= pre_gcse (edge_list);
2651 free_edge_list (edge_list);
2652 free_pre_mem ();
2653 }
2654
2655 if (flag_gcse_lm)
2656 free_ld_motion_mems ();
2657 remove_fake_exit_edges ();
2658 free_hash_table (&expr_hash_table);
2659
2660 free_gcse_mem ();
2661 obstack_free (&gcse_obstack, NULL);
2662
2663 /* We are finished with alias. */
2664 end_alias_analysis ();
2665
2666 if (dump_file)
2667 {
2668 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2669 current_function_name (), n_basic_blocks, bytes_used);
2670 fprintf (dump_file, "%d substs, %d insns created\n",
2671 gcse_subst_count, gcse_create_count);
2672 }
2673
2674 return changed;
2675 }
2676 \f
2677 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2678 to INSN. If such notes are added to an insn which references a
2679 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2680 that note, because the following loop optimization pass requires
2681 them. */
2682
2683 /* ??? If there was a jump optimization pass after gcse and before loop,
2684 then we would not need to do this here, because jump would add the
2685 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2686
2687 static void
2688 add_label_notes (rtx x, rtx insn)
2689 {
2690 enum rtx_code code = GET_CODE (x);
2691 int i, j;
2692 const char *fmt;
2693
2694 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2695 {
2696 /* This code used to ignore labels that referred to dispatch tables to
2697 avoid flow generating (slightly) worse code.
2698
2699 We no longer ignore such label references (see LABEL_REF handling in
2700 mark_jump_label for additional information). */
2701
2702 /* There's no reason for current users to emit jump-insns with
2703 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2704 notes. */
2705 gcc_assert (!JUMP_P (insn));
2706 add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2707
2708 if (LABEL_P (XEXP (x, 0)))
2709 LABEL_NUSES (XEXP (x, 0))++;
2710
2711 return;
2712 }
2713
2714 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2715 {
2716 if (fmt[i] == 'e')
2717 add_label_notes (XEXP (x, i), insn);
2718 else if (fmt[i] == 'E')
2719 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2720 add_label_notes (XVECEXP (x, i, j), insn);
2721 }
2722 }
2723
2724 /* Code Hoisting variables and subroutines. */
2725
2726 /* Very busy expressions. */
2727 static sbitmap *hoist_vbein;
2728 static sbitmap *hoist_vbeout;
2729
2730 /* ??? We could compute post dominators and run this algorithm in
2731 reverse to perform tail merging, doing so would probably be
2732 more effective than the tail merging code in jump.c.
2733
2734 It's unclear if tail merging could be run in parallel with
2735 code hoisting. It would be nice. */
2736
2737 /* Allocate vars used for code hoisting analysis. */
2738
2739 static void
2740 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2741 {
2742 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2743 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2744 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2745
2746 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2747 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2748 }
2749
2750 /* Free vars used for code hoisting analysis. */
2751
2752 static void
2753 free_code_hoist_mem (void)
2754 {
2755 sbitmap_vector_free (antloc);
2756 sbitmap_vector_free (transp);
2757 sbitmap_vector_free (comp);
2758
2759 sbitmap_vector_free (hoist_vbein);
2760 sbitmap_vector_free (hoist_vbeout);
2761
2762 free_dominance_info (CDI_DOMINATORS);
2763 }
2764
2765 /* Compute the very busy expressions at entry/exit from each block.
2766
2767 An expression is very busy if all paths from a given point
2768 compute the expression. */
2769
2770 static void
2771 compute_code_hoist_vbeinout (void)
2772 {
2773 int changed, passes;
2774 basic_block bb;
2775
2776 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2777 sbitmap_vector_zero (hoist_vbein, last_basic_block);
2778
2779 passes = 0;
2780 changed = 1;
2781
2782 while (changed)
2783 {
2784 changed = 0;
2785
2786 /* We scan the blocks in the reverse order to speed up
2787 the convergence. */
2788 FOR_EACH_BB_REVERSE (bb)
2789 {
2790 if (bb->next_bb != EXIT_BLOCK_PTR)
2791 {
2792 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2793 hoist_vbein, bb);
2794
2795 /* Include expressions in VBEout that are calculated
2796 in BB and available at its end. */
2797 sbitmap_a_or_b (hoist_vbeout[bb->index],
2798 hoist_vbeout[bb->index], comp[bb->index]);
2799 }
2800
2801 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2802 antloc[bb->index],
2803 hoist_vbeout[bb->index],
2804 transp[bb->index]);
2805 }
2806
2807 passes++;
2808 }
2809
2810 if (dump_file)
2811 {
2812 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2813
2814 FOR_EACH_BB (bb)
2815 {
2816 fprintf (dump_file, "vbein (%d): ", bb->index);
2817 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2818 fprintf (dump_file, "vbeout(%d): ", bb->index);
2819 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2820 }
2821 }
2822 }
2823
2824 /* Top level routine to do the dataflow analysis needed by code hoisting. */
2825
2826 static void
2827 compute_code_hoist_data (void)
2828 {
2829 compute_local_properties (transp, comp, antloc, &expr_hash_table);
2830 prune_expressions (false);
2831 compute_code_hoist_vbeinout ();
2832 calculate_dominance_info (CDI_DOMINATORS);
2833 if (dump_file)
2834 fprintf (dump_file, "\n");
2835 }
2836
2837 /* Determine if the expression identified by EXPR_INDEX would
2838 reach BB unimpared if it was placed at the end of EXPR_BB.
2839 Stop the search if the expression would need to be moved more
2840 than DISTANCE instructions.
2841
2842 It's unclear exactly what Muchnick meant by "unimpared". It seems
2843 to me that the expression must either be computed or transparent in
2844 *every* block in the path(s) from EXPR_BB to BB. Any other definition
2845 would allow the expression to be hoisted out of loops, even if
2846 the expression wasn't a loop invariant.
2847
2848 Contrast this to reachability for PRE where an expression is
2849 considered reachable if *any* path reaches instead of *all*
2850 paths. */
2851
2852 static int
2853 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2854 char *visited, int distance, int *bb_size)
2855 {
2856 edge pred;
2857 edge_iterator ei;
2858 int visited_allocated_locally = 0;
2859
2860 /* Terminate the search if distance, for which EXPR is allowed to move,
2861 is exhausted. */
2862 if (distance > 0)
2863 {
2864 distance -= bb_size[bb->index];
2865
2866 if (distance <= 0)
2867 return 0;
2868 }
2869 else
2870 gcc_assert (distance == 0);
2871
2872 if (visited == NULL)
2873 {
2874 visited_allocated_locally = 1;
2875 visited = XCNEWVEC (char, last_basic_block);
2876 }
2877
2878 FOR_EACH_EDGE (pred, ei, bb->preds)
2879 {
2880 basic_block pred_bb = pred->src;
2881
2882 if (pred->src == ENTRY_BLOCK_PTR)
2883 break;
2884 else if (pred_bb == expr_bb)
2885 continue;
2886 else if (visited[pred_bb->index])
2887 continue;
2888
2889 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2890 break;
2891
2892 /* Not killed. */
2893 else
2894 {
2895 visited[pred_bb->index] = 1;
2896 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2897 visited, distance, bb_size))
2898 break;
2899 }
2900 }
2901 if (visited_allocated_locally)
2902 free (visited);
2903
2904 return (pred == NULL);
2905 }
2906 \f
2907 /* Find occurrence in BB. */
2908
2909 static struct occr *
2910 find_occr_in_bb (struct occr *occr, basic_block bb)
2911 {
2912 /* Find the right occurrence of this expression. */
2913 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2914 occr = occr->next;
2915
2916 return occr;
2917 }
2918
2919 /* Actually perform code hoisting. */
2920
2921 static int
2922 hoist_code (void)
2923 {
2924 basic_block bb, dominated;
2925 VEC (basic_block, heap) *dom_tree_walk;
2926 unsigned int dom_tree_walk_index;
2927 VEC (basic_block, heap) *domby;
2928 unsigned int i,j;
2929 struct expr **index_map;
2930 struct expr *expr;
2931 int *to_bb_head;
2932 int *bb_size;
2933 int changed = 0;
2934
2935 /* Compute a mapping from expression number (`bitmap_index') to
2936 hash table entry. */
2937
2938 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2939 for (i = 0; i < expr_hash_table.size; i++)
2940 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2941 index_map[expr->bitmap_index] = expr;
2942
2943 /* Calculate sizes of basic blocks and note how far
2944 each instruction is from the start of its block. We then use this
2945 data to restrict distance an expression can travel. */
2946
2947 to_bb_head = XCNEWVEC (int, get_max_uid ());
2948 bb_size = XCNEWVEC (int, last_basic_block);
2949
2950 FOR_EACH_BB (bb)
2951 {
2952 rtx insn;
2953 int to_head;
2954
2955 to_head = 0;
2956 FOR_BB_INSNS (bb, insn)
2957 {
2958 /* Don't count debug instructions to avoid them affecting
2959 decision choices. */
2960 if (NONDEBUG_INSN_P (insn))
2961 to_bb_head[INSN_UID (insn)] = to_head++;
2962 }
2963
2964 bb_size[bb->index] = to_head;
2965 }
2966
2967 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2968 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2969 == ENTRY_BLOCK_PTR->next_bb));
2970
2971 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2972 ENTRY_BLOCK_PTR->next_bb);
2973
2974 /* Walk over each basic block looking for potentially hoistable
2975 expressions, nothing gets hoisted from the entry block. */
2976 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2977 {
2978 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2979
2980 if (VEC_length (basic_block, domby) == 0)
2981 continue;
2982
2983 /* Examine each expression that is very busy at the exit of this
2984 block. These are the potentially hoistable expressions. */
2985 for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
2986 {
2987 if (TEST_BIT (hoist_vbeout[bb->index], i))
2988 {
2989 /* Current expression. */
2990 struct expr *expr = index_map[i];
2991 /* Number of occurrences of EXPR that can be hoisted to BB. */
2992 int hoistable = 0;
2993 /* Basic blocks that have occurrences reachable from BB. */
2994 bitmap_head _from_bbs, *from_bbs = &_from_bbs;
2995 /* Occurrences reachable from BB. */
2996 VEC (occr_t, heap) *occrs_to_hoist = NULL;
2997 /* We want to insert the expression into BB only once, so
2998 note when we've inserted it. */
2999 int insn_inserted_p;
3000 occr_t occr;
3001
3002 bitmap_initialize (from_bbs, 0);
3003
3004 /* If an expression is computed in BB and is available at end of
3005 BB, hoist all occurrences dominated by BB to BB. */
3006 if (TEST_BIT (comp[bb->index], i))
3007 {
3008 occr = find_occr_in_bb (expr->antic_occr, bb);
3009
3010 if (occr)
3011 {
3012 /* An occurrence might've been already deleted
3013 while processing a dominator of BB. */
3014 if (!occr->deleted_p)
3015 {
3016 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3017 hoistable++;
3018 }
3019 }
3020 else
3021 hoistable++;
3022 }
3023
3024 /* We've found a potentially hoistable expression, now
3025 we look at every block BB dominates to see if it
3026 computes the expression. */
3027 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3028 {
3029 int max_distance;
3030
3031 /* Ignore self dominance. */
3032 if (bb == dominated)
3033 continue;
3034 /* We've found a dominated block, now see if it computes
3035 the busy expression and whether or not moving that
3036 expression to the "beginning" of that block is safe. */
3037 if (!TEST_BIT (antloc[dominated->index], i))
3038 continue;
3039
3040 occr = find_occr_in_bb (expr->antic_occr, dominated);
3041 gcc_assert (occr);
3042
3043 /* An occurrence might've been already deleted
3044 while processing a dominator of BB. */
3045 if (occr->deleted_p)
3046 continue;
3047 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3048
3049 max_distance = expr->max_distance;
3050 if (max_distance > 0)
3051 /* Adjust MAX_DISTANCE to account for the fact that
3052 OCCR won't have to travel all of DOMINATED, but
3053 only part of it. */
3054 max_distance += (bb_size[dominated->index]
3055 - to_bb_head[INSN_UID (occr->insn)]);
3056
3057 /* Note if the expression would reach the dominated block
3058 unimpared if it was placed at the end of BB.
3059
3060 Keep track of how many times this expression is hoistable
3061 from a dominated block into BB. */
3062 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3063 max_distance, bb_size))
3064 {
3065 hoistable++;
3066 VEC_safe_push (occr_t, heap,
3067 occrs_to_hoist, occr);
3068 bitmap_set_bit (from_bbs, dominated->index);
3069 }
3070 }
3071
3072 /* If we found more than one hoistable occurrence of this
3073 expression, then note it in the vector of expressions to
3074 hoist. It makes no sense to hoist things which are computed
3075 in only one BB, and doing so tends to pessimize register
3076 allocation. One could increase this value to try harder
3077 to avoid any possible code expansion due to register
3078 allocation issues; however experiments have shown that
3079 the vast majority of hoistable expressions are only movable
3080 from two successors, so raising this threshold is likely
3081 to nullify any benefit we get from code hoisting. */
3082 if (hoistable > 1 && dbg_cnt (hoist_insn))
3083 {
3084 /* If (hoistable != VEC_length), then there is
3085 an occurrence of EXPR in BB itself. Don't waste
3086 time looking for LCA in this case. */
3087 if ((unsigned) hoistable
3088 == VEC_length (occr_t, occrs_to_hoist))
3089 {
3090 basic_block lca;
3091
3092 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3093 from_bbs);
3094 if (lca != bb)
3095 /* Punt, it's better to hoist these occurrences to
3096 LCA. */
3097 VEC_free (occr_t, heap, occrs_to_hoist);
3098 }
3099 }
3100 else
3101 /* Punt, no point hoisting a single occurence. */
3102 VEC_free (occr_t, heap, occrs_to_hoist);
3103
3104 insn_inserted_p = 0;
3105
3106 /* Walk through occurrences of I'th expressions we want
3107 to hoist to BB and make the transformations. */
3108 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3109 {
3110 rtx insn;
3111 rtx set;
3112
3113 gcc_assert (!occr->deleted_p);
3114
3115 insn = occr->insn;
3116 set = single_set (insn);
3117 gcc_assert (set);
3118
3119 /* Create a pseudo-reg to store the result of reaching
3120 expressions into. Get the mode for the new pseudo
3121 from the mode of the original destination pseudo.
3122
3123 It is important to use new pseudos whenever we
3124 emit a set. This will allow reload to use
3125 rematerialization for such registers. */
3126 if (!insn_inserted_p)
3127 expr->reaching_reg
3128 = gen_reg_rtx_and_attrs (SET_DEST (set));
3129
3130 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3131 insn);
3132 delete_insn (insn);
3133 occr->deleted_p = 1;
3134 changed = 1;
3135 gcse_subst_count++;
3136
3137 if (!insn_inserted_p)
3138 {
3139 insert_insn_end_basic_block (expr, bb);
3140 insn_inserted_p = 1;
3141 }
3142 }
3143
3144 VEC_free (occr_t, heap, occrs_to_hoist);
3145 bitmap_clear (from_bbs);
3146 }
3147 }
3148 VEC_free (basic_block, heap, domby);
3149 }
3150
3151 VEC_free (basic_block, heap, dom_tree_walk);
3152 free (bb_size);
3153 free (to_bb_head);
3154 free (index_map);
3155
3156 return changed;
3157 }
3158
3159 /* Top level routine to perform one code hoisting (aka unification) pass
3160
3161 Return nonzero if a change was made. */
3162
3163 static int
3164 one_code_hoisting_pass (void)
3165 {
3166 int changed = 0;
3167
3168 gcse_subst_count = 0;
3169 gcse_create_count = 0;
3170
3171 /* Return if there's nothing to do, or it is too expensive. */
3172 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3173 || is_too_expensive (_("GCSE disabled")))
3174 return 0;
3175
3176 doing_code_hoisting_p = true;
3177
3178 /* We need alias. */
3179 init_alias_analysis ();
3180
3181 bytes_used = 0;
3182 gcc_obstack_init (&gcse_obstack);
3183 alloc_gcse_mem ();
3184
3185 alloc_hash_table (&expr_hash_table);
3186 compute_hash_table (&expr_hash_table);
3187 if (dump_file)
3188 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3189
3190 if (expr_hash_table.n_elems > 0)
3191 {
3192 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3193 compute_code_hoist_data ();
3194 changed = hoist_code ();
3195 free_code_hoist_mem ();
3196 }
3197
3198 free_hash_table (&expr_hash_table);
3199 free_gcse_mem ();
3200 obstack_free (&gcse_obstack, NULL);
3201
3202 /* We are finished with alias. */
3203 end_alias_analysis ();
3204
3205 if (dump_file)
3206 {
3207 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3208 current_function_name (), n_basic_blocks, bytes_used);
3209 fprintf (dump_file, "%d substs, %d insns created\n",
3210 gcse_subst_count, gcse_create_count);
3211 }
3212
3213 doing_code_hoisting_p = false;
3214
3215 return changed;
3216 }
3217 \f
3218 /* Here we provide the things required to do store motion towards the exit.
3219 In order for this to be effective, gcse also needed to be taught how to
3220 move a load when it is killed only by a store to itself.
3221
3222 int i;
3223 float a[10];
3224
3225 void foo(float scale)
3226 {
3227 for (i=0; i<10; i++)
3228 a[i] *= scale;
3229 }
3230
3231 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3232 the load out since its live around the loop, and stored at the bottom
3233 of the loop.
3234
3235 The 'Load Motion' referred to and implemented in this file is
3236 an enhancement to gcse which when using edge based LCM, recognizes
3237 this situation and allows gcse to move the load out of the loop.
3238
3239 Once gcse has hoisted the load, store motion can then push this
3240 load towards the exit, and we end up with no loads or stores of 'i'
3241 in the loop. */
3242
3243 static hashval_t
3244 pre_ldst_expr_hash (const void *p)
3245 {
3246 int do_not_record_p = 0;
3247 const struct ls_expr *const x = (const struct ls_expr *) p;
3248 return
3249 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3250 }
3251
3252 static int
3253 pre_ldst_expr_eq (const void *p1, const void *p2)
3254 {
3255 const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3256 *const ptr2 = (const struct ls_expr *) p2;
3257 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3258 }
3259
3260 /* This will search the ldst list for a matching expression. If it
3261 doesn't find one, we create one and initialize it. */
3262
3263 static struct ls_expr *
3264 ldst_entry (rtx x)
3265 {
3266 int do_not_record_p = 0;
3267 struct ls_expr * ptr;
3268 unsigned int hash;
3269 void **slot;
3270 struct ls_expr e;
3271
3272 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3273 NULL, /*have_reg_qty=*/false);
3274
3275 e.pattern = x;
3276 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3277 if (*slot)
3278 return (struct ls_expr *)*slot;
3279
3280 ptr = XNEW (struct ls_expr);
3281
3282 ptr->next = pre_ldst_mems;
3283 ptr->expr = NULL;
3284 ptr->pattern = x;
3285 ptr->pattern_regs = NULL_RTX;
3286 ptr->loads = NULL_RTX;
3287 ptr->stores = NULL_RTX;
3288 ptr->reaching_reg = NULL_RTX;
3289 ptr->invalid = 0;
3290 ptr->index = 0;
3291 ptr->hash_index = hash;
3292 pre_ldst_mems = ptr;
3293 *slot = ptr;
3294
3295 return ptr;
3296 }
3297
3298 /* Free up an individual ldst entry. */
3299
3300 static void
3301 free_ldst_entry (struct ls_expr * ptr)
3302 {
3303 free_INSN_LIST_list (& ptr->loads);
3304 free_INSN_LIST_list (& ptr->stores);
3305
3306 free (ptr);
3307 }
3308
3309 /* Free up all memory associated with the ldst list. */
3310
3311 static void
3312 free_ld_motion_mems (void)
3313 {
3314 if (pre_ldst_table)
3315 htab_delete (pre_ldst_table);
3316 pre_ldst_table = NULL;
3317
3318 while (pre_ldst_mems)
3319 {
3320 struct ls_expr * tmp = pre_ldst_mems;
3321
3322 pre_ldst_mems = pre_ldst_mems->next;
3323
3324 free_ldst_entry (tmp);
3325 }
3326
3327 pre_ldst_mems = NULL;
3328 }
3329
3330 /* Dump debugging info about the ldst list. */
3331
3332 static void
3333 print_ldst_list (FILE * file)
3334 {
3335 struct ls_expr * ptr;
3336
3337 fprintf (file, "LDST list: \n");
3338
3339 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3340 {
3341 fprintf (file, " Pattern (%3d): ", ptr->index);
3342
3343 print_rtl (file, ptr->pattern);
3344
3345 fprintf (file, "\n Loads : ");
3346
3347 if (ptr->loads)
3348 print_rtl (file, ptr->loads);
3349 else
3350 fprintf (file, "(nil)");
3351
3352 fprintf (file, "\n Stores : ");
3353
3354 if (ptr->stores)
3355 print_rtl (file, ptr->stores);
3356 else
3357 fprintf (file, "(nil)");
3358
3359 fprintf (file, "\n\n");
3360 }
3361
3362 fprintf (file, "\n");
3363 }
3364
3365 /* Returns 1 if X is in the list of ldst only expressions. */
3366
3367 static struct ls_expr *
3368 find_rtx_in_ldst (rtx x)
3369 {
3370 struct ls_expr e;
3371 void **slot;
3372 if (!pre_ldst_table)
3373 return NULL;
3374 e.pattern = x;
3375 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3376 if (!slot || ((struct ls_expr *)*slot)->invalid)
3377 return NULL;
3378 return (struct ls_expr *) *slot;
3379 }
3380 \f
3381 /* Load Motion for loads which only kill themselves. */
3382
3383 /* Return true if x, a MEM, is a simple access with no side effects.
3384 These are the types of loads we consider for the ld_motion list,
3385 otherwise we let the usual aliasing take care of it. */
3386
3387 static int
3388 simple_mem (const_rtx x)
3389 {
3390 if (MEM_VOLATILE_P (x))
3391 return 0;
3392
3393 if (GET_MODE (x) == BLKmode)
3394 return 0;
3395
3396 /* If we are handling exceptions, we must be careful with memory references
3397 that may trap. If we are not, the behavior is undefined, so we may just
3398 continue. */
3399 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3400 return 0;
3401
3402 if (side_effects_p (x))
3403 return 0;
3404
3405 /* Do not consider function arguments passed on stack. */
3406 if (reg_mentioned_p (stack_pointer_rtx, x))
3407 return 0;
3408
3409 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3410 return 0;
3411
3412 return 1;
3413 }
3414
3415 /* Make sure there isn't a buried reference in this pattern anywhere.
3416 If there is, invalidate the entry for it since we're not capable
3417 of fixing it up just yet.. We have to be sure we know about ALL
3418 loads since the aliasing code will allow all entries in the
3419 ld_motion list to not-alias itself. If we miss a load, we will get
3420 the wrong value since gcse might common it and we won't know to
3421 fix it up. */
3422
3423 static void
3424 invalidate_any_buried_refs (rtx x)
3425 {
3426 const char * fmt;
3427 int i, j;
3428 struct ls_expr * ptr;
3429
3430 /* Invalidate it in the list. */
3431 if (MEM_P (x) && simple_mem (x))
3432 {
3433 ptr = ldst_entry (x);
3434 ptr->invalid = 1;
3435 }
3436
3437 /* Recursively process the insn. */
3438 fmt = GET_RTX_FORMAT (GET_CODE (x));
3439
3440 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3441 {
3442 if (fmt[i] == 'e')
3443 invalidate_any_buried_refs (XEXP (x, i));
3444 else if (fmt[i] == 'E')
3445 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3446 invalidate_any_buried_refs (XVECEXP (x, i, j));
3447 }
3448 }
3449
3450 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3451 being defined as MEM loads and stores to symbols, with no side effects
3452 and no registers in the expression. For a MEM destination, we also
3453 check that the insn is still valid if we replace the destination with a
3454 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3455 which don't match this criteria, they are invalidated and trimmed out
3456 later. */
3457
3458 static void
3459 compute_ld_motion_mems (void)
3460 {
3461 struct ls_expr * ptr;
3462 basic_block bb;
3463 rtx insn;
3464
3465 pre_ldst_mems = NULL;
3466 pre_ldst_table
3467 = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3468
3469 FOR_EACH_BB (bb)
3470 {
3471 FOR_BB_INSNS (bb, insn)
3472 {
3473 if (NONDEBUG_INSN_P (insn))
3474 {
3475 if (GET_CODE (PATTERN (insn)) == SET)
3476 {
3477 rtx src = SET_SRC (PATTERN (insn));
3478 rtx dest = SET_DEST (PATTERN (insn));
3479
3480 /* Check for a simple LOAD... */
3481 if (MEM_P (src) && simple_mem (src))
3482 {
3483 ptr = ldst_entry (src);
3484 if (REG_P (dest))
3485 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3486 else
3487 ptr->invalid = 1;
3488 }
3489 else
3490 {
3491 /* Make sure there isn't a buried load somewhere. */
3492 invalidate_any_buried_refs (src);
3493 }
3494
3495 /* Check for stores. Don't worry about aliased ones, they
3496 will block any movement we might do later. We only care
3497 about this exact pattern since those are the only
3498 circumstance that we will ignore the aliasing info. */
3499 if (MEM_P (dest) && simple_mem (dest))
3500 {
3501 ptr = ldst_entry (dest);
3502
3503 if (! MEM_P (src)
3504 && GET_CODE (src) != ASM_OPERANDS
3505 /* Check for REG manually since want_to_gcse_p
3506 returns 0 for all REGs. */
3507 && can_assign_to_reg_without_clobbers_p (src))
3508 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3509 else
3510 ptr->invalid = 1;
3511 }
3512 }
3513 else
3514 invalidate_any_buried_refs (PATTERN (insn));
3515 }
3516 }
3517 }
3518 }
3519
3520 /* Remove any references that have been either invalidated or are not in the
3521 expression list for pre gcse. */
3522
3523 static void
3524 trim_ld_motion_mems (void)
3525 {
3526 struct ls_expr * * last = & pre_ldst_mems;
3527 struct ls_expr * ptr = pre_ldst_mems;
3528
3529 while (ptr != NULL)
3530 {
3531 struct expr * expr;
3532
3533 /* Delete if entry has been made invalid. */
3534 if (! ptr->invalid)
3535 {
3536 /* Delete if we cannot find this mem in the expression list. */
3537 unsigned int hash = ptr->hash_index % expr_hash_table.size;
3538
3539 for (expr = expr_hash_table.table[hash];
3540 expr != NULL;
3541 expr = expr->next_same_hash)
3542 if (expr_equiv_p (expr->expr, ptr->pattern))
3543 break;
3544 }
3545 else
3546 expr = (struct expr *) 0;
3547
3548 if (expr)
3549 {
3550 /* Set the expression field if we are keeping it. */
3551 ptr->expr = expr;
3552 last = & ptr->next;
3553 ptr = ptr->next;
3554 }
3555 else
3556 {
3557 *last = ptr->next;
3558 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3559 free_ldst_entry (ptr);
3560 ptr = * last;
3561 }
3562 }
3563
3564 /* Show the world what we've found. */
3565 if (dump_file && pre_ldst_mems != NULL)
3566 print_ldst_list (dump_file);
3567 }
3568
3569 /* This routine will take an expression which we are replacing with
3570 a reaching register, and update any stores that are needed if
3571 that expression is in the ld_motion list. Stores are updated by
3572 copying their SRC to the reaching register, and then storing
3573 the reaching register into the store location. These keeps the
3574 correct value in the reaching register for the loads. */
3575
3576 static void
3577 update_ld_motion_stores (struct expr * expr)
3578 {
3579 struct ls_expr * mem_ptr;
3580
3581 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3582 {
3583 /* We can try to find just the REACHED stores, but is shouldn't
3584 matter to set the reaching reg everywhere... some might be
3585 dead and should be eliminated later. */
3586
3587 /* We replace (set mem expr) with (set reg expr) (set mem reg)
3588 where reg is the reaching reg used in the load. We checked in
3589 compute_ld_motion_mems that we can replace (set mem expr) with
3590 (set reg expr) in that insn. */
3591 rtx list = mem_ptr->stores;
3592
3593 for ( ; list != NULL_RTX; list = XEXP (list, 1))
3594 {
3595 rtx insn = XEXP (list, 0);
3596 rtx pat = PATTERN (insn);
3597 rtx src = SET_SRC (pat);
3598 rtx reg = expr->reaching_reg;
3599 rtx copy;
3600
3601 /* If we've already copied it, continue. */
3602 if (expr->reaching_reg == src)
3603 continue;
3604
3605 if (dump_file)
3606 {
3607 fprintf (dump_file, "PRE: store updated with reaching reg ");
3608 print_rtl (dump_file, reg);
3609 fprintf (dump_file, ":\n ");
3610 print_inline_rtx (dump_file, insn, 8);
3611 fprintf (dump_file, "\n");
3612 }
3613
3614 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3615 emit_insn_before (copy, insn);
3616 SET_SRC (pat) = reg;
3617 df_insn_rescan (insn);
3618
3619 /* un-recognize this pattern since it's probably different now. */
3620 INSN_CODE (insn) = -1;
3621 gcse_create_count++;
3622 }
3623 }
3624 }
3625 \f
3626 /* Return true if the graph is too expensive to optimize. PASS is the
3627 optimization about to be performed. */
3628
3629 static bool
3630 is_too_expensive (const char *pass)
3631 {
3632 /* Trying to perform global optimizations on flow graphs which have
3633 a high connectivity will take a long time and is unlikely to be
3634 particularly useful.
3635
3636 In normal circumstances a cfg should have about twice as many
3637 edges as blocks. But we do not want to punish small functions
3638 which have a couple switch statements. Rather than simply
3639 threshold the number of blocks, uses something with a more
3640 graceful degradation. */
3641 if (n_edges > 20000 + n_basic_blocks * 4)
3642 {
3643 warning (OPT_Wdisabled_optimization,
3644 "%s: %d basic blocks and %d edges/basic block",
3645 pass, n_basic_blocks, n_edges / n_basic_blocks);
3646
3647 return true;
3648 }
3649
3650 /* If allocating memory for the dataflow bitmaps would take up too much
3651 storage it's better just to disable the optimization. */
3652 if ((n_basic_blocks
3653 * SBITMAP_SET_SIZE (max_reg_num ())
3654 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3655 {
3656 warning (OPT_Wdisabled_optimization,
3657 "%s: %d basic blocks and %d registers",
3658 pass, n_basic_blocks, max_reg_num ());
3659
3660 return true;
3661 }
3662
3663 return false;
3664 }
3665 \f
3666 /* All the passes implemented in this file. Each pass has its
3667 own gate and execute function, and at the end of the file a
3668 pass definition for passes.c.
3669
3670 We do not construct an accurate cfg in functions which call
3671 setjmp, so none of these passes runs if the function calls
3672 setjmp.
3673 FIXME: Should just handle setjmp via REG_SETJMP notes. */
3674
3675 static bool
3676 gate_rtl_pre (void)
3677 {
3678 return optimize > 0 && flag_gcse
3679 && !cfun->calls_setjmp
3680 && optimize_function_for_speed_p (cfun)
3681 && dbg_cnt (pre);
3682 }
3683
3684 static unsigned int
3685 execute_rtl_pre (void)
3686 {
3687 int changed;
3688 delete_unreachable_blocks ();
3689 df_analyze ();
3690 changed = one_pre_gcse_pass ();
3691 flag_rerun_cse_after_global_opts |= changed;
3692 if (changed)
3693 cleanup_cfg (0);
3694 return 0;
3695 }
3696
3697 static bool
3698 gate_rtl_hoist (void)
3699 {
3700 return optimize > 0 && flag_gcse
3701 && !cfun->calls_setjmp
3702 /* It does not make sense to run code hoisting unless we are optimizing
3703 for code size -- it rarely makes programs faster, and can make then
3704 bigger if we did PRE (when optimizing for space, we don't run PRE). */
3705 && optimize_function_for_size_p (cfun)
3706 && dbg_cnt (hoist);
3707 }
3708
3709 static unsigned int
3710 execute_rtl_hoist (void)
3711 {
3712 int changed;
3713 delete_unreachable_blocks ();
3714 df_analyze ();
3715 changed = one_code_hoisting_pass ();
3716 flag_rerun_cse_after_global_opts |= changed;
3717 if (changed)
3718 cleanup_cfg (0);
3719 return 0;
3720 }
3721
3722 struct rtl_opt_pass pass_rtl_pre =
3723 {
3724 {
3725 RTL_PASS,
3726 "rtl pre", /* name */
3727 gate_rtl_pre, /* gate */
3728 execute_rtl_pre, /* execute */
3729 NULL, /* sub */
3730 NULL, /* next */
3731 0, /* static_pass_number */
3732 TV_PRE, /* tv_id */
3733 PROP_cfglayout, /* properties_required */
3734 0, /* properties_provided */
3735 0, /* properties_destroyed */
3736 0, /* todo_flags_start */
3737 TODO_df_finish | TODO_verify_rtl_sharing |
3738 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3739 }
3740 };
3741
3742 struct rtl_opt_pass pass_rtl_hoist =
3743 {
3744 {
3745 RTL_PASS,
3746 "hoist", /* name */
3747 gate_rtl_hoist, /* gate */
3748 execute_rtl_hoist, /* execute */
3749 NULL, /* sub */
3750 NULL, /* next */
3751 0, /* static_pass_number */
3752 TV_HOIST, /* tv_id */
3753 PROP_cfglayout, /* properties_required */
3754 0, /* properties_provided */
3755 0, /* properties_destroyed */
3756 0, /* todo_flags_start */
3757 TODO_df_finish | TODO_verify_rtl_sharing |
3758 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3759 }
3760 };
3761
3762 #include "gt-gcse.h"