1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
25 /* Instruction scheduling pass.
27 This pass implements list scheduling within basic blocks. It is
28 run twice: (1) after flow analysis, but before register allocation,
29 and (2) after register allocation.
31 The first run performs interblock scheduling, moving insns between
32 different blocks in the same "region", and the second runs only
33 basic block scheduling.
35 Interblock motions performed are useful motions and speculative
36 motions, including speculative loads. Motions requiring code
37 duplication are not supported. The identification of motion type
38 and the check for validity of speculative motions requires
39 construction and analysis of the function's control flow graph.
40 The scheduler works as follows:
42 We compute insn priorities based on data dependencies. Flow
43 analysis only creates a fraction of the data-dependencies we must
44 observe: namely, only those dependencies which the combiner can be
45 expected to use. For this pass, we must therefore create the
46 remaining dependencies we need to observe: register dependencies,
47 memory dependencies, dependencies to keep function calls in order,
48 and the dependence between a conditional branch and the setting of
49 condition codes are all dealt with here.
51 The scheduler first traverses the data flow graph, starting with
52 the last instruction, and proceeding to the first, assigning values
53 to insn_priority as it goes. This sorts the instructions
54 topologically by data dependence.
56 Once priorities have been established, we order the insns using
57 list scheduling. This works as follows: starting with a list of
58 all the ready insns, and sorted according to priority number, we
59 schedule the insn from the end of the list by placing its
60 predecessors in the list according to their priority order. We
61 consider this insn scheduled by setting the pointer to the "end" of
62 the list to point to the previous insn. When an insn has no
63 predecessors, we either queue it until sufficient time has elapsed
64 or add it to the ready list. As the instructions are scheduled or
65 when stalls are introduced, the queue advances and dumps insns into
66 the ready list. When all insns down to the lowest priority have
67 been scheduled, the critical path of the basic block has been made
68 as short as possible. The remaining insns are then scheduled in
71 Function unit conflicts are resolved during forward list scheduling
72 by tracking the time when each insn is committed to the schedule
73 and from that, the time the function units it uses must be free.
74 As insns on the ready list are considered for scheduling, those
75 that would result in a blockage of the already committed insns are
76 queued until no blockage will result.
78 The following list shows the order in which we want to break ties
79 among insns in the ready list:
81 1. choose insn with the longest path to end of bb, ties
83 2. choose insn with least contribution to register pressure,
85 3. prefer in-block upon interblock motion, ties broken by
86 4. prefer useful upon speculative motion, ties broken by
87 5. choose insn with largest control flow probability, ties
89 6. choose insn with the least dependences upon the previously
90 scheduled insn, or finally
91 7 choose the insn which has the most insns dependent on it.
92 8. choose insn with lowest UID.
94 Memory references complicate matters. Only if we can be certain
95 that memory references are not part of the data dependency graph
96 (via true, anti, or output dependence), can we move operations past
97 memory references. To first approximation, reads can be done
98 independently, while writes introduce dependencies. Better
99 approximations will yield fewer dependencies.
101 Before reload, an extended analysis of interblock data dependences
102 is required for interblock scheduling. This is performed in
103 compute_block_backward_dependences ().
105 Dependencies set up by memory references are treated in exactly the
106 same way as other dependencies, by using LOG_LINKS backward
107 dependences. LOG_LINKS are translated into INSN_DEPEND forward
108 dependences for the purpose of forward list scheduling.
110 Having optimized the critical path, we may have also unduly
111 extended the lifetimes of some registers. If an operation requires
112 that constants be loaded into registers, it is certainly desirable
113 to load those constants as early as necessary, but no earlier.
114 I.e., it will not do to load up a bunch of registers at the
115 beginning of a basic block only to use them at the end, if they
116 could be loaded later, since this may result in excessive register
119 Note that since branches are never in basic blocks, but only end
120 basic blocks, this pass will not move branches. But that is ok,
121 since we can use GNU's delayed branch scheduling pass to take care
124 Also note that no further optimizations based on algebraic
125 identities are performed, so this pass would be a good one to
126 perform instruction splitting, such as breaking up a multiply
127 instruction into shifts and adds where that is profitable.
129 Given the memory aliasing analysis that this pass should perform,
130 it should be possible to remove redundant stores to memory, and to
131 load values from registers instead of hitting memory.
133 Before reload, speculative insns are moved only if a 'proof' exists
134 that no exception will be caused by this, and if no live registers
135 exist that inhibit the motion (live registers constraints are not
136 represented by data dependence edges).
138 This pass must update information that subsequent passes expect to
139 be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths,
140 reg_n_calls_crossed, and reg_live_length. Also, BLOCK_HEAD,
143 The information in the line number notes is carefully retained by
144 this pass. Notes that refer to the starting and ending of
145 exception regions are also carefully retained by this pass. All
146 other NOTE insns are grouped in their same relative order at the
147 beginning of basic blocks and regions that have been scheduled.
149 The main entry point for this pass is schedule_insns(), called for
150 each function. The work of the scheduler is organized in three
151 levels: (1) function level: insns are subject to splitting,
152 control-flow-graph is constructed, regions are computed (after
153 reload, each region is of one block), (2) region level: control
154 flow graph attributes required for interblock scheduling are
155 computed (dominators, reachability, etc.), data dependences and
156 priorities are computed, and (3) block level: insns in the block
157 are actually scheduled. */
164 #include "basic-block.h"
166 #include "function.h"
167 #include "hard-reg-set.h"
169 #include "insn-config.h"
170 #include "insn-attr.h"
175 extern char *reg_known_equiv_p
;
176 extern rtx
*reg_known_value
;
178 #ifdef INSN_SCHEDULING
180 /* target_units bitmask has 1 for each unit in the cpu. It should be
181 possible to compute this variable from the machine description.
182 But currently it is computed by examining the insn list. Since
183 this is only needed for visualization, it seems an acceptable
184 solution. (For understanding the mapping of bits to units, see
185 definition of function_units[] in "insn-attrtab.c".) */
187 static int target_units
= 0;
189 /* issue_rate is the number of insns that can be scheduled in the same
190 machine cycle. It can be defined in the config/mach/mach.h file,
191 otherwise we set it to 1. */
193 static int issue_rate
;
199 /* sched-verbose controls the amount of debugging output the
200 scheduler prints. It is controlled by -fsched-verbose-N:
201 N>0 and no -DSR : the output is directed to stderr.
202 N>=10 will direct the printouts to stderr (regardless of -dSR).
204 N=2: bb's probabilities, detailed ready list info, unit/insn info.
205 N=3: rtl at abort point, control-flow, regions info.
206 N=5: dependences info. */
208 #define MAX_RGN_BLOCKS 10
209 #define MAX_RGN_INSNS 100
211 static int sched_verbose_param
= 0;
212 static int sched_verbose
= 0;
214 /* nr_inter/spec counts interblock/speculative motion for the function. */
215 static int nr_inter
, nr_spec
;
218 /* Debugging file. All printouts are sent to dump, which is always set,
219 either to stderr, or to the dump listing file (-dRS). */
220 static FILE *dump
= 0;
222 /* fix_sched_param() is called from toplev.c upon detection
223 of the -fsched-***-N options. */
226 fix_sched_param (param
, val
)
227 const char *param
, *val
;
229 if (!strcmp (param
, "verbose"))
230 sched_verbose_param
= atoi (val
);
232 warning ("fix_sched_param: unknown param: %s", param
);
235 /* Describe state of dependencies used during sched_analyze phase. */
238 /* The *_insns and *_mems are paired lists. Each pending memory operation
239 will have a pointer to the MEM rtx on one list and a pointer to the
240 containing insn on the other list in the same place in the list. */
242 /* We can't use add_dependence like the old code did, because a single insn
243 may have multiple memory accesses, and hence needs to be on the list
244 once for each memory access. Add_dependence won't let you add an insn
245 to a list more than once. */
247 /* An INSN_LIST containing all insns with pending read operations. */
248 rtx pending_read_insns
;
250 /* An EXPR_LIST containing all MEM rtx's which are pending reads. */
251 rtx pending_read_mems
;
253 /* An INSN_LIST containing all insns with pending write operations. */
254 rtx pending_write_insns
;
256 /* An EXPR_LIST containing all MEM rtx's which are pending writes. */
257 rtx pending_write_mems
;
259 /* Indicates the combined length of the two pending lists. We must prevent
260 these lists from ever growing too large since the number of dependencies
261 produced is at least O(N*N), and execution time is at least O(4*N*N), as
262 a function of the length of these pending lists. */
263 int pending_lists_length
;
265 /* The last insn upon which all memory references must depend.
266 This is an insn which flushed the pending lists, creating a dependency
267 between it and all previously pending memory references. This creates
268 a barrier (or a checkpoint) which no memory reference is allowed to cross.
270 This includes all non constant CALL_INSNs. When we do interprocedural
271 alias analysis, this restriction can be relaxed.
272 This may also be an INSN that writes memory if the pending lists grow
274 rtx last_pending_memory_flush
;
276 /* The last function call we have seen. All hard regs, and, of course,
277 the last function call, must depend on this. */
278 rtx last_function_call
;
280 /* The LOG_LINKS field of this is a list of insns which use a pseudo register
281 that does not already cross a call. We create dependencies between each
282 of those insn and the next call insn, to ensure that they won't cross a call
283 after scheduling is done. */
284 rtx sched_before_next_call
;
286 /* Element N is the next insn that sets (hard or pseudo) register
287 N within the current basic block; or zero, if there is no
288 such insn. Needed for new registers which may be introduced
289 by splitting insns. */
292 rtx
*reg_last_clobbers
;
295 static regset reg_pending_sets
;
296 static regset reg_pending_clobbers
;
297 static int reg_pending_sets_all
;
299 /* To speed up the test for duplicate dependency links we keep a record
300 of true dependencies created by add_dependence when the average number
301 of instructions in a basic block is very large.
303 Studies have shown that there is typically around 5 instructions between
304 branches for typical C code. So we can make a guess that the average
305 basic block is approximately 5 instructions long; we will choose 100X
306 the average size as a very large basic block.
308 Each insn has an associated bitmap for its dependencies. Each bitmap
309 has enough entries to represent a dependency on any other insn in the
311 static sbitmap
*true_dependency_cache
;
313 /* Indexed by INSN_UID, the collection of all data associated with
314 a single instruction. */
316 struct haifa_insn_data
318 /* A list of insns which depend on the instruction. Unlike LOG_LINKS,
319 it represents forward dependancies. */
322 /* The line number note in effect for each insn. For line number
323 notes, this indicates whether the note may be reused. */
326 /* Logical uid gives the original ordering of the insns. */
329 /* A priority for each insn. */
332 /* The number of incoming edges in the forward dependency graph.
333 As scheduling proceds, counts are decreased. An insn moves to
334 the ready queue when its counter reaches zero. */
337 /* An encoding of the blockage range function. Both unit and range
339 unsigned int blockage
;
341 /* Number of instructions referring to this insn. */
344 /* The minimum clock tick at which the insn becomes ready. This is
345 used to note timing constraints for the insns in the pending list. */
350 /* An encoding of the function units used. */
353 /* This weight is an estimation of the insn's contribution to
354 register pressure. */
357 /* Some insns (e.g. call) are not allowed to move across blocks. */
358 unsigned int cant_move
: 1;
360 /* Set if there's DEF-USE dependance between some speculatively
361 moved load insn and this one. */
362 unsigned int fed_by_spec_load
: 1;
363 unsigned int is_load_insn
: 1;
366 static struct haifa_insn_data
*h_i_d
;
368 #define INSN_DEPEND(INSN) (h_i_d[INSN_UID (INSN)].depend)
369 #define INSN_LUID(INSN) (h_i_d[INSN_UID (INSN)].luid)
370 #define INSN_PRIORITY(INSN) (h_i_d[INSN_UID (INSN)].priority)
371 #define INSN_DEP_COUNT(INSN) (h_i_d[INSN_UID (INSN)].dep_count)
372 #define INSN_COST(INSN) (h_i_d[INSN_UID (INSN)].cost)
373 #define INSN_UNIT(INSN) (h_i_d[INSN_UID (INSN)].units)
374 #define INSN_REG_WEIGHT(INSN) (h_i_d[INSN_UID (INSN)].reg_weight)
376 #define INSN_BLOCKAGE(INSN) (h_i_d[INSN_UID (INSN)].blockage)
378 #define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1)
379 #define ENCODE_BLOCKAGE(U, R) \
380 (((U) << BLOCKAGE_BITS \
381 | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \
382 | MAX_BLOCKAGE_COST (R))
383 #define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS))
384 #define BLOCKAGE_RANGE(B) \
385 (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \
386 | ((B) & BLOCKAGE_MASK))
388 /* Encodings of the `<name>_unit_blockage_range' function. */
389 #define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2))
390 #define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1))
392 #define DONE_PRIORITY -1
393 #define MAX_PRIORITY 0x7fffffff
394 #define TAIL_PRIORITY 0x7ffffffe
395 #define LAUNCH_PRIORITY 0x7f000001
396 #define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0)
397 #define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0)
399 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
400 #define LINE_NOTE(INSN) (h_i_d[INSN_UID (INSN)].line_note)
401 #define INSN_TICK(INSN) (h_i_d[INSN_UID (INSN)].tick)
402 #define CANT_MOVE(insn) (h_i_d[INSN_UID (insn)].cant_move)
403 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
404 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
406 /* Vector indexed by basic block number giving the starting line-number
407 for each basic block. */
408 static rtx
*line_note_head
;
410 /* List of important notes we must keep around. This is a pointer to the
411 last element in the list. */
412 static rtx note_list
;
416 /* An instruction is ready to be scheduled when all insns preceding it
417 have already been scheduled. It is important to ensure that all
418 insns which use its result will not be executed until its result
419 has been computed. An insn is maintained in one of four structures:
421 (P) the "Pending" set of insns which cannot be scheduled until
422 their dependencies have been satisfied.
423 (Q) the "Queued" set of insns that can be scheduled when sufficient
425 (R) the "Ready" list of unscheduled, uncommitted insns.
426 (S) the "Scheduled" list of insns.
428 Initially, all insns are either "Pending" or "Ready" depending on
429 whether their dependencies are satisfied.
431 Insns move from the "Ready" list to the "Scheduled" list as they
432 are committed to the schedule. As this occurs, the insns in the
433 "Pending" list have their dependencies satisfied and move to either
434 the "Ready" list or the "Queued" set depending on whether
435 sufficient time has passed to make them ready. As time passes,
436 insns move from the "Queued" set to the "Ready" list. Insns may
437 move from the "Ready" list to the "Queued" set if they are blocked
438 due to a function unit conflict.
440 The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled
441 insns, i.e., those that are ready, queued, and pending.
442 The "Queued" set (Q) is implemented by the variable `insn_queue'.
443 The "Ready" list (R) is implemented by the variables `ready' and
445 The "Scheduled" list (S) is the new insn chain built by this pass.
447 The transition (R->S) is implemented in the scheduling loop in
448 `schedule_block' when the best insn to schedule is chosen.
449 The transition (R->Q) is implemented in `queue_insn' when an
450 insn is found to have a function unit conflict with the already
452 The transitions (P->R and P->Q) are implemented in `schedule_insn' as
453 insns move from the ready list to the scheduled list.
454 The transition (Q->R) is implemented in 'queue_to_insn' as time
455 passes or stalls are introduced. */
457 /* Implement a circular buffer to delay instructions until sufficient
458 time has passed. INSN_QUEUE_SIZE is a power of two larger than
459 MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the
460 longest time an isnsn may be queued. */
461 static rtx insn_queue
[INSN_QUEUE_SIZE
];
462 static int q_ptr
= 0;
463 static int q_size
= 0;
464 #define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
465 #define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
467 /* Forward declarations. */
468 static void add_dependence
PARAMS ((rtx
, rtx
, enum reg_note
));
470 static void remove_dependence
PARAMS ((rtx
, rtx
));
472 static rtx find_insn_list
PARAMS ((rtx
, rtx
));
473 static int insn_unit
PARAMS ((rtx
));
474 static unsigned int blockage_range
PARAMS ((int, rtx
));
475 static void clear_units
PARAMS ((void));
476 static int actual_hazard_this_instance
PARAMS ((int, int, rtx
, int, int));
477 static void schedule_unit
PARAMS ((int, rtx
, int));
478 static int actual_hazard
PARAMS ((int, rtx
, int, int));
479 static int potential_hazard
PARAMS ((int, rtx
, int));
480 static int insn_cost
PARAMS ((rtx
, rtx
, rtx
));
481 static int priority
PARAMS ((rtx
));
482 static void free_pending_lists
PARAMS ((void));
483 static void add_insn_mem_dependence
PARAMS ((struct deps
*, rtx
*, rtx
*, rtx
,
485 static void flush_pending_lists
PARAMS ((struct deps
*, rtx
, int));
486 static void sched_analyze_1
PARAMS ((struct deps
*, rtx
, rtx
));
487 static void sched_analyze_2
PARAMS ((struct deps
*, rtx
, rtx
));
488 static void sched_analyze_insn
PARAMS ((struct deps
*, rtx
, rtx
, rtx
));
489 static void sched_analyze
PARAMS ((struct deps
*, rtx
, rtx
));
490 static int rank_for_schedule
PARAMS ((const PTR
, const PTR
));
491 static void swap_sort
PARAMS ((rtx
*, int));
492 static void queue_insn
PARAMS ((rtx
, int));
493 static int schedule_insn
PARAMS ((rtx
, rtx
*, int, int));
494 static void find_insn_reg_weight
PARAMS ((int));
495 static int schedule_block
PARAMS ((int, int));
496 static char *safe_concat
PARAMS ((char *, char *, const char *));
497 static int insn_issue_delay
PARAMS ((rtx
));
498 static void adjust_priority
PARAMS ((rtx
));
500 /* Control flow graph edges are kept in circular lists. */
509 static haifa_edge
*edge_table
;
511 #define NEXT_IN(edge) (edge_table[edge].next_in)
512 #define NEXT_OUT(edge) (edge_table[edge].next_out)
513 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
514 #define TO_BLOCK(edge) (edge_table[edge].to_block)
516 /* Number of edges in the control flow graph. (In fact, larger than
517 that by 1, since edge 0 is unused.) */
520 /* Circular list of incoming/outgoing edges of a block. */
521 static int *in_edges
;
522 static int *out_edges
;
524 #define IN_EDGES(block) (in_edges[block])
525 #define OUT_EDGES(block) (out_edges[block])
529 static int is_cfg_nonregular
PARAMS ((void));
530 static int build_control_flow
PARAMS ((struct edge_list
*));
531 static void new_edge
PARAMS ((int, int));
534 /* A region is the main entity for interblock scheduling: insns
535 are allowed to move between blocks in the same region, along
536 control flow graph edges, in the 'up' direction. */
539 int rgn_nr_blocks
; /* Number of blocks in region. */
540 int rgn_blocks
; /* cblocks in the region (actually index in rgn_bb_table). */
544 /* Number of regions in the procedure. */
545 static int nr_regions
;
547 /* Table of region descriptions. */
548 static region
*rgn_table
;
550 /* Array of lists of regions' blocks. */
551 static int *rgn_bb_table
;
553 /* Topological order of blocks in the region (if b2 is reachable from
554 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
555 always referred to by either block or b, while its topological
556 order name (in the region) is refered to by bb. */
557 static int *block_to_bb
;
559 /* The number of the region containing a block. */
560 static int *containing_rgn
;
562 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
563 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
564 #define BLOCK_TO_BB(block) (block_to_bb[block])
565 #define CONTAINING_RGN(block) (containing_rgn[block])
567 void debug_regions
PARAMS ((void));
568 static void find_single_block_region
PARAMS ((void));
569 static void find_rgns
PARAMS ((struct edge_list
*, sbitmap
*));
570 static int too_large
PARAMS ((int, int *, int *));
572 extern void debug_live
PARAMS ((int, int));
574 /* Blocks of the current region being scheduled. */
575 static int current_nr_blocks
;
576 static int current_blocks
;
578 /* The mapping from bb to block. */
579 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
582 /* Bit vectors and bitset operations are needed for computations on
583 the control flow graph. */
585 typedef unsigned HOST_WIDE_INT
*bitset
;
588 int *first_member
; /* Pointer to the list start in bitlst_table. */
589 int nr_members
; /* The number of members of the bit list. */
593 static int bitlst_table_last
;
594 static int bitlst_table_size
;
595 static int *bitlst_table
;
597 static char bitset_member
PARAMS ((bitset
, int, int));
598 static void extract_bitlst
PARAMS ((bitset
, int, int, bitlst
*));
600 /* Target info declarations.
602 The block currently being scheduled is referred to as the "target" block,
603 while other blocks in the region from which insns can be moved to the
604 target are called "source" blocks. The candidate structure holds info
605 about such sources: are they valid? Speculative? Etc. */
606 typedef bitlst bblst
;
617 static candidate
*candidate_table
;
619 /* A speculative motion requires checking live information on the path
620 from 'source' to 'target'. The split blocks are those to be checked.
621 After a speculative motion, live information should be modified in
624 Lists of split and update blocks for each candidate of the current
625 target are in array bblst_table. */
626 static int *bblst_table
, bblst_size
, bblst_last
;
628 #define IS_VALID(src) ( candidate_table[src].is_valid )
629 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
630 #define SRC_PROB(src) ( candidate_table[src].src_prob )
632 /* The bb being currently scheduled. */
633 static int target_bb
;
636 typedef bitlst edgelst
;
638 /* Target info functions. */
639 static void split_edges
PARAMS ((int, int, edgelst
*));
640 static void compute_trg_info
PARAMS ((int));
641 void debug_candidate
PARAMS ((int));
642 void debug_candidates
PARAMS ((int));
645 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
646 typedef bitset bbset
;
648 /* Number of words of the bbset. */
649 static int bbset_size
;
651 /* Dominators array: dom[i] contains the bbset of dominators of
652 bb i in the region. */
655 /* bb 0 is the only region entry. */
656 #define IS_RGN_ENTRY(bb) (!bb)
658 /* Is bb_src dominated by bb_trg. */
659 #define IS_DOMINATED(bb_src, bb_trg) \
660 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
662 /* Probability: Prob[i] is a float in [0, 1] which is the probability
663 of bb i relative to the region entry. */
666 /* The probability of bb_src, relative to bb_trg. Note, that while the
667 'prob[bb]' is a float in [0, 1], this macro returns an integer
669 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
672 /* Bit-set of edges, where bit i stands for edge i. */
673 typedef bitset edgeset
;
675 /* Number of edges in the region. */
676 static int rgn_nr_edges
;
678 /* Array of size rgn_nr_edges. */
679 static int *rgn_edges
;
681 /* Number of words in an edgeset. */
682 static int edgeset_size
;
684 /* Number of bits in an edgeset. */
685 static int edgeset_bitsize
;
687 /* Mapping from each edge in the graph to its number in the rgn. */
688 static int *edge_to_bit
;
689 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
691 /* The split edges of a source bb is different for each target
692 bb. In order to compute this efficiently, the 'potential-split edges'
693 are computed for each bb prior to scheduling a region. This is actually
694 the split edges of each bb relative to the region entry.
696 pot_split[bb] is the set of potential split edges of bb. */
697 static edgeset
*pot_split
;
699 /* For every bb, a set of its ancestor edges. */
700 static edgeset
*ancestor_edges
;
702 static void compute_dom_prob_ps
PARAMS ((int));
704 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
705 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
706 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
707 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
709 /* Parameters affecting the decision of rank_for_schedule(). */
710 #define MIN_DIFF_PRIORITY 2
711 #define MIN_PROBABILITY 40
712 #define MIN_PROB_DIFF 10
714 /* Speculative scheduling functions. */
715 static int check_live_1
PARAMS ((int, rtx
));
716 static void update_live_1
PARAMS ((int, rtx
));
717 static int check_live
PARAMS ((rtx
, int));
718 static void update_live
PARAMS ((rtx
, int));
719 static void set_spec_fed
PARAMS ((rtx
));
720 static int is_pfree
PARAMS ((rtx
, int, int));
721 static int find_conditional_protection
PARAMS ((rtx
, int));
722 static int is_conditionally_protected
PARAMS ((rtx
, int, int));
723 static int may_trap_exp
PARAMS ((rtx
, int));
724 static int haifa_classify_insn
PARAMS ((rtx
));
725 static int is_prisky
PARAMS ((rtx
, int, int));
726 static int is_exception_free
PARAMS ((rtx
, int, int));
728 static char find_insn_mem_list
PARAMS ((rtx
, rtx
, rtx
, rtx
));
729 static void compute_block_forward_dependences
PARAMS ((int));
730 static void add_branch_dependences
PARAMS ((rtx
, rtx
));
731 static void compute_block_backward_dependences
PARAMS ((int));
732 void debug_dependencies
PARAMS ((void));
734 /* Notes handling mechanism:
735 =========================
736 Generally, NOTES are saved before scheduling and restored after scheduling.
737 The scheduler distinguishes between three types of notes:
739 (1) LINE_NUMBER notes, generated and used for debugging. Here,
740 before scheduling a region, a pointer to the LINE_NUMBER note is
741 added to the insn following it (in save_line_notes()), and the note
742 is removed (in rm_line_notes() and unlink_line_notes()). After
743 scheduling the region, this pointer is used for regeneration of
744 the LINE_NUMBER note (in restore_line_notes()).
746 (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes:
747 Before scheduling a region, a pointer to the note is added to the insn
748 that follows or precedes it. (This happens as part of the data dependence
749 computation). After scheduling an insn, the pointer contained in it is
750 used for regenerating the corresponding note (in reemit_notes).
752 (3) All other notes (e.g. INSN_DELETED): Before scheduling a block,
753 these notes are put in a list (in rm_other_notes() and
754 unlink_other_notes ()). After scheduling the block, these notes are
755 inserted at the beginning of the block (in schedule_block()). */
757 static rtx unlink_other_notes
PARAMS ((rtx
, rtx
));
758 static rtx unlink_line_notes
PARAMS ((rtx
, rtx
));
759 static void rm_line_notes
PARAMS ((int));
760 static void save_line_notes
PARAMS ((int));
761 static void restore_line_notes
PARAMS ((int));
762 static void rm_redundant_line_notes
PARAMS ((void));
763 static void rm_other_notes
PARAMS ((rtx
, rtx
));
764 static rtx reemit_notes
PARAMS ((rtx
, rtx
));
766 static void get_block_head_tail
PARAMS ((int, rtx
*, rtx
*));
767 static void get_bb_head_tail
PARAMS ((int, rtx
*, rtx
*));
769 static int queue_to_ready
PARAMS ((rtx
[], int));
771 static void debug_ready_list
PARAMS ((rtx
[], int));
772 static void init_target_units
PARAMS ((void));
773 static void insn_print_units
PARAMS ((rtx
));
774 static int get_visual_tbl_length
PARAMS ((void));
775 static void init_block_visualization
PARAMS ((void));
776 static void print_block_visualization
PARAMS ((int, const char *));
777 static void visualize_scheduled_insns
PARAMS ((int, int));
778 static void visualize_no_unit
PARAMS ((rtx
));
779 static void visualize_stall_cycles
PARAMS ((int, int));
780 static void print_exp
PARAMS ((char *, rtx
, int));
781 static void print_value
PARAMS ((char *, rtx
, int));
782 static void print_pattern
PARAMS ((char *, rtx
, int));
783 static void print_insn
PARAMS ((char *, rtx
, int));
784 void debug_reg_vector
PARAMS ((regset
));
786 static rtx move_insn1
PARAMS ((rtx
, rtx
));
787 static rtx move_insn
PARAMS ((rtx
, rtx
));
788 static rtx group_leader
PARAMS ((rtx
));
789 static int set_priorities
PARAMS ((int));
790 static void init_deps
PARAMS ((struct deps
*));
791 static void schedule_region
PARAMS ((int));
792 static void propagate_deps
PARAMS ((int, struct deps
*, int));
794 #endif /* INSN_SCHEDULING */
796 #define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
798 /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the
799 LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type
800 of dependence that this link represents. */
803 add_dependence (insn
, elem
, dep_type
)
806 enum reg_note dep_type
;
810 /* Don't depend an insn on itself. */
814 /* We can get a dependency on deleted insns due to optimizations in
815 the register allocation and reloading or due to splitting. Any
816 such dependency is useless and can be ignored. */
817 if (GET_CODE (elem
) == NOTE
)
820 /* If elem is part of a sequence that must be scheduled together, then
821 make the dependence point to the last insn of the sequence.
822 When HAVE_cc0, it is possible for NOTEs to exist between users and
823 setters of the condition codes, so we must skip past notes here.
824 Otherwise, NOTEs are impossible here. */
826 next
= NEXT_INSN (elem
);
829 while (next
&& GET_CODE (next
) == NOTE
)
830 next
= NEXT_INSN (next
);
833 if (next
&& SCHED_GROUP_P (next
)
834 && GET_CODE (next
) != CODE_LABEL
)
836 /* Notes will never intervene here though, so don't bother checking
838 /* We must reject CODE_LABELs, so that we don't get confused by one
839 that has LABEL_PRESERVE_P set, which is represented by the same
840 bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be
842 while (NEXT_INSN (next
) && SCHED_GROUP_P (NEXT_INSN (next
))
843 && GET_CODE (NEXT_INSN (next
)) != CODE_LABEL
)
844 next
= NEXT_INSN (next
);
846 /* Again, don't depend an insn on itself. */
850 /* Make the dependence to NEXT, the last insn of the group, instead
851 of the original ELEM. */
855 #ifdef INSN_SCHEDULING
856 /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.)
857 No need for interblock dependences with calls, since
858 calls are not moved between blocks. Note: the edge where
859 elem is a CALL is still required. */
860 if (GET_CODE (insn
) == CALL_INSN
861 && (INSN_BB (elem
) != INSN_BB (insn
)))
865 /* If we already have a true dependency for ELEM, then we do not
866 need to do anything. Avoiding the list walk below can cut
867 compile times dramatically for some code. */
868 if (true_dependency_cache
869 && TEST_BIT (true_dependency_cache
[INSN_LUID (insn
)], INSN_LUID (elem
)))
873 /* Check that we don't already have this dependence. */
874 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
875 if (XEXP (link
, 0) == elem
)
877 /* If this is a more restrictive type of dependence than the existing
878 one, then change the existing dependence to this type. */
879 if ((int) dep_type
< (int) REG_NOTE_KIND (link
))
880 PUT_REG_NOTE_KIND (link
, dep_type
);
882 #ifdef INSN_SCHEDULING
883 /* If we are adding a true dependency to INSN's LOG_LINKs, then
884 note that in the bitmap cache of true dependency information. */
885 if ((int)dep_type
== 0 && true_dependency_cache
)
886 SET_BIT (true_dependency_cache
[INSN_LUID (insn
)], INSN_LUID (elem
));
890 /* Might want to check one level of transitivity to save conses. */
892 link
= alloc_INSN_LIST (elem
, LOG_LINKS (insn
));
893 LOG_LINKS (insn
) = link
;
895 /* Insn dependency, not data dependency. */
896 PUT_REG_NOTE_KIND (link
, dep_type
);
898 #ifdef INSN_SCHEDULING
899 /* If we are adding a true dependency to INSN's LOG_LINKs, then
900 note that in the bitmap cache of true dependency information. */
901 if ((int)dep_type
== 0 && true_dependency_cache
)
902 SET_BIT (true_dependency_cache
[INSN_LUID (insn
)], INSN_LUID (elem
));
907 /* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
908 of INSN. Abort if not found. */
911 remove_dependence (insn
, elem
)
915 rtx prev
, link
, next
;
918 for (prev
= 0, link
= LOG_LINKS (insn
); link
; link
= next
)
920 next
= XEXP (link
, 1);
921 if (XEXP (link
, 0) == elem
)
924 XEXP (prev
, 1) = next
;
926 LOG_LINKS (insn
) = next
;
928 #ifdef INSN_SCHEDULING
929 /* If we are removing a true dependency from the LOG_LINKS list,
930 make sure to remove it from the cache too. */
931 if (REG_NOTE_KIND (link
) == 0 && true_dependency_cache
)
932 RESET_BIT (true_dependency_cache
[INSN_LUID (insn
)],
936 free_INSN_LIST_node (link
);
948 #endif /* HAVE_cc0 */
950 #ifndef INSN_SCHEDULING
952 schedule_insns (dump_file
)
953 FILE *dump_file ATTRIBUTE_UNUSED
;
962 #define HAIFA_INLINE __inline
965 /* Computation of memory dependencies. */
967 /* Data structures for the computation of data dependences in a regions. We
968 keep one mem_deps structure for every basic block. Before analyzing the
969 data dependences for a bb, its variables are initialized as a function of
970 the variables of its predecessors. When the analysis for a bb completes,
971 we save the contents to the corresponding bb_mem_deps[bb] variable. */
973 static struct deps
*bb_deps
;
975 /* Pointer to the last instruction scheduled. Used by rank_for_schedule,
976 so that insns independent of the last scheduled insn will be preferred
977 over dependent instructions. */
979 static rtx last_scheduled_insn
;
981 /* Functions for construction of the control flow graph. */
983 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
985 We decide not to build the control flow graph if there is possibly more
986 than one entry to the function, if computed branches exist, of if we
987 have nonlocal gotos. */
996 /* If we have a label that could be the target of a nonlocal goto, then
997 the cfg is not well structured. */
998 if (nonlocal_goto_handler_labels
)
1001 /* If we have any forced labels, then the cfg is not well structured. */
1005 /* If this function has a computed jump, then we consider the cfg
1006 not well structured. */
1007 if (current_function_has_computed_jump
)
1010 /* If we have exception handlers, then we consider the cfg not well
1011 structured. ?!? We should be able to handle this now that flow.c
1012 computes an accurate cfg for EH. */
1013 if (exception_handler_labels
)
1016 /* If we have non-jumping insns which refer to labels, then we consider
1017 the cfg not well structured. */
1018 /* Check for labels referred to other thn by jumps. */
1019 for (b
= 0; b
< n_basic_blocks
; b
++)
1020 for (insn
= BLOCK_HEAD (b
);; insn
= NEXT_INSN (insn
))
1022 code
= GET_CODE (insn
);
1023 if (GET_RTX_CLASS (code
) == 'i')
1027 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
1028 if (REG_NOTE_KIND (note
) == REG_LABEL
)
1032 if (insn
== BLOCK_END (b
))
1036 /* All the tests passed. Consider the cfg well structured. */
1040 /* Build the control flow graph and set nr_edges.
1042 Instead of trying to build a cfg ourselves, we rely on flow to
1043 do it for us. Stamp out useless code (and bug) duplication.
1045 Return nonzero if an irregularity in the cfg is found which would
1046 prevent cross block scheduling. */
1049 build_control_flow (edge_list
)
1050 struct edge_list
*edge_list
;
1052 int i
, unreachable
, num_edges
;
1054 /* This already accounts for entry/exit edges. */
1055 num_edges
= NUM_EDGES (edge_list
);
1057 /* Unreachable loops with more than one basic block are detected
1058 during the DFS traversal in find_rgns.
1060 Unreachable loops with a single block are detected here. This
1061 test is redundant with the one in find_rgns, but it's much
1062 cheaper to go ahead and catch the trivial case here. */
1064 for (i
= 0; i
< n_basic_blocks
; i
++)
1066 basic_block b
= BASIC_BLOCK (i
);
1069 || (b
->pred
->src
== b
1070 && b
->pred
->pred_next
== NULL
))
1074 /* ??? We can kill these soon. */
1075 in_edges
= (int *) xcalloc (n_basic_blocks
, sizeof (int));
1076 out_edges
= (int *) xcalloc (n_basic_blocks
, sizeof (int));
1077 edge_table
= (haifa_edge
*) xcalloc (num_edges
, sizeof (haifa_edge
));
1080 for (i
= 0; i
< num_edges
; i
++)
1082 edge e
= INDEX_EDGE (edge_list
, i
);
1084 if (e
->dest
!= EXIT_BLOCK_PTR
1085 && e
->src
!= ENTRY_BLOCK_PTR
)
1086 new_edge (e
->src
->index
, e
->dest
->index
);
1089 /* Increment by 1, since edge 0 is unused. */
1096 /* Record an edge in the control flow graph from SOURCE to TARGET.
1098 In theory, this is redundant with the s_succs computed above, but
1099 we have not converted all of haifa to use information from the
1103 new_edge (source
, target
)
1107 int curr_edge
, fst_edge
;
1109 /* Check for duplicates. */
1110 fst_edge
= curr_edge
= OUT_EDGES (source
);
1113 if (FROM_BLOCK (curr_edge
) == source
1114 && TO_BLOCK (curr_edge
) == target
)
1119 curr_edge
= NEXT_OUT (curr_edge
);
1121 if (fst_edge
== curr_edge
)
1127 FROM_BLOCK (e
) = source
;
1128 TO_BLOCK (e
) = target
;
1130 if (OUT_EDGES (source
))
1132 next_edge
= NEXT_OUT (OUT_EDGES (source
));
1133 NEXT_OUT (OUT_EDGES (source
)) = e
;
1134 NEXT_OUT (e
) = next_edge
;
1138 OUT_EDGES (source
) = e
;
1142 if (IN_EDGES (target
))
1144 next_edge
= NEXT_IN (IN_EDGES (target
));
1145 NEXT_IN (IN_EDGES (target
)) = e
;
1146 NEXT_IN (e
) = next_edge
;
1150 IN_EDGES (target
) = e
;
1156 /* BITSET macros for operations on the control flow graph. */
1158 /* Compute bitwise union of two bitsets. */
1159 #define BITSET_UNION(set1, set2, len) \
1160 do { register bitset tp = set1, sp = set2; \
1162 for (i = 0; i < len; i++) \
1163 *(tp++) |= *(sp++); } while (0)
1165 /* Compute bitwise intersection of two bitsets. */
1166 #define BITSET_INTER(set1, set2, len) \
1167 do { register bitset tp = set1, sp = set2; \
1169 for (i = 0; i < len; i++) \
1170 *(tp++) &= *(sp++); } while (0)
1172 /* Compute bitwise difference of two bitsets. */
1173 #define BITSET_DIFFER(set1, set2, len) \
1174 do { register bitset tp = set1, sp = set2; \
1176 for (i = 0; i < len; i++) \
1177 *(tp++) &= ~*(sp++); } while (0)
1179 /* Inverts every bit of bitset 'set'. */
1180 #define BITSET_INVERT(set, len) \
1181 do { register bitset tmpset = set; \
1183 for (i = 0; i < len; i++, tmpset++) \
1184 *tmpset = ~*tmpset; } while (0)
1186 /* Turn on the index'th bit in bitset set. */
1187 #define BITSET_ADD(set, index, len) \
1189 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1192 set[index/HOST_BITS_PER_WIDE_INT] |= \
1193 1 << (index % HOST_BITS_PER_WIDE_INT); \
1196 /* Turn off the index'th bit in set. */
1197 #define BITSET_REMOVE(set, index, len) \
1199 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1202 set[index/HOST_BITS_PER_WIDE_INT] &= \
1203 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
1207 /* Check if the index'th bit in bitset set is on. */
1210 bitset_member (set
, index
, len
)
1214 if (index
>= HOST_BITS_PER_WIDE_INT
* len
)
1216 return (set
[index
/ HOST_BITS_PER_WIDE_INT
] &
1217 1 << (index
% HOST_BITS_PER_WIDE_INT
)) ? 1 : 0;
1221 /* Translate a bit-set SET to a list BL of the bit-set members. */
1224 extract_bitlst (set
, len
, bitlen
, bl
)
1231 unsigned HOST_WIDE_INT word
;
1233 /* bblst table space is reused in each call to extract_bitlst. */
1234 bitlst_table_last
= 0;
1236 bl
->first_member
= &bitlst_table
[bitlst_table_last
];
1239 /* Iterate over each word in the bitset. */
1240 for (i
= 0; i
< len
; i
++)
1243 offset
= i
* HOST_BITS_PER_WIDE_INT
;
1245 /* Iterate over each bit in the word, but do not
1246 go beyond the end of the defined bits. */
1247 for (j
= 0; offset
< bitlen
&& word
; j
++)
1251 bitlst_table
[bitlst_table_last
++] = offset
;
1262 /* Functions for the construction of regions. */
1264 /* Print the regions, for debugging purposes. Callable from debugger. */
1271 fprintf (dump
, "\n;; ------------ REGIONS ----------\n\n");
1272 for (rgn
= 0; rgn
< nr_regions
; rgn
++)
1274 fprintf (dump
, ";;\trgn %d nr_blocks %d:\n", rgn
,
1275 rgn_table
[rgn
].rgn_nr_blocks
);
1276 fprintf (dump
, ";;\tbb/block: ");
1278 for (bb
= 0; bb
< rgn_table
[rgn
].rgn_nr_blocks
; bb
++)
1280 current_blocks
= RGN_BLOCKS (rgn
);
1282 if (bb
!= BLOCK_TO_BB (BB_TO_BLOCK (bb
)))
1285 fprintf (dump
, " %d/%d ", bb
, BB_TO_BLOCK (bb
));
1288 fprintf (dump
, "\n\n");
1293 /* Build a single block region for each basic block in the function.
1294 This allows for using the same code for interblock and basic block
1298 find_single_block_region ()
1302 for (i
= 0; i
< n_basic_blocks
; i
++)
1304 rgn_bb_table
[i
] = i
;
1305 RGN_NR_BLOCKS (i
) = 1;
1307 CONTAINING_RGN (i
) = i
;
1308 BLOCK_TO_BB (i
) = 0;
1310 nr_regions
= n_basic_blocks
;
1314 /* Update number of blocks and the estimate for number of insns
1315 in the region. Return 1 if the region is "too large" for interblock
1316 scheduling (compile time considerations), otherwise return 0. */
1319 too_large (block
, num_bbs
, num_insns
)
1320 int block
, *num_bbs
, *num_insns
;
1323 (*num_insns
) += (INSN_LUID (BLOCK_END (block
)) -
1324 INSN_LUID (BLOCK_HEAD (block
)));
1325 if ((*num_bbs
> MAX_RGN_BLOCKS
) || (*num_insns
> MAX_RGN_INSNS
))
1332 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
1333 is still an inner loop. Put in max_hdr[blk] the header of the most inner
1334 loop containing blk. */
1335 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
1337 if (max_hdr[blk] == -1) \
1338 max_hdr[blk] = hdr; \
1339 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
1340 RESET_BIT (inner, hdr); \
1341 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
1343 RESET_BIT (inner,max_hdr[blk]); \
1344 max_hdr[blk] = hdr; \
1349 /* Find regions for interblock scheduling.
1351 A region for scheduling can be:
1353 * A loop-free procedure, or
1355 * A reducible inner loop, or
1357 * A basic block not contained in any other region.
1360 ?!? In theory we could build other regions based on extended basic
1361 blocks or reverse extended basic blocks. Is it worth the trouble?
1363 Loop blocks that form a region are put into the region's block list
1364 in topological order.
1366 This procedure stores its results into the following global (ick) variables
1375 We use dominator relationships to avoid making regions out of non-reducible
1378 This procedure needs to be converted to work on pred/succ lists instead
1379 of edge tables. That would simplify it somewhat. */
1382 find_rgns (edge_list
, dom
)
1383 struct edge_list
*edge_list
;
1386 int *max_hdr
, *dfs_nr
, *stack
, *degree
;
1388 int node
, child
, loop_head
, i
, head
, tail
;
1389 int count
= 0, sp
, idx
= 0, current_edge
= out_edges
[0];
1390 int num_bbs
, num_insns
, unreachable
;
1391 int too_large_failure
;
1393 /* Note if an edge has been passed. */
1396 /* Note if a block is a natural loop header. */
1399 /* Note if a block is an natural inner loop header. */
1402 /* Note if a block is in the block queue. */
1405 /* Note if a block is in the block queue. */
1408 int num_edges
= NUM_EDGES (edge_list
);
1410 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
1411 and a mapping from block to its loop header (if the block is contained
1412 in a loop, else -1).
1414 Store results in HEADER, INNER, and MAX_HDR respectively, these will
1415 be used as inputs to the second traversal.
1417 STACK, SP and DFS_NR are only used during the first traversal. */
1419 /* Allocate and initialize variables for the first traversal. */
1420 max_hdr
= (int *) xmalloc (n_basic_blocks
* sizeof (int));
1421 dfs_nr
= (int *) xcalloc (n_basic_blocks
, sizeof (int));
1422 stack
= (int *) xmalloc (nr_edges
* sizeof (int));
1424 inner
= sbitmap_alloc (n_basic_blocks
);
1425 sbitmap_ones (inner
);
1427 header
= sbitmap_alloc (n_basic_blocks
);
1428 sbitmap_zero (header
);
1430 passed
= sbitmap_alloc (nr_edges
);
1431 sbitmap_zero (passed
);
1433 in_queue
= sbitmap_alloc (n_basic_blocks
);
1434 sbitmap_zero (in_queue
);
1436 in_stack
= sbitmap_alloc (n_basic_blocks
);
1437 sbitmap_zero (in_stack
);
1439 for (i
= 0; i
< n_basic_blocks
; i
++)
1442 /* DFS traversal to find inner loops in the cfg. */
1447 if (current_edge
== 0 || TEST_BIT (passed
, current_edge
))
1449 /* We have reached a leaf node or a node that was already
1450 processed. Pop edges off the stack until we find
1451 an edge that has not yet been processed. */
1453 && (current_edge
== 0 || TEST_BIT (passed
, current_edge
)))
1455 /* Pop entry off the stack. */
1456 current_edge
= stack
[sp
--];
1457 node
= FROM_BLOCK (current_edge
);
1458 child
= TO_BLOCK (current_edge
);
1459 RESET_BIT (in_stack
, child
);
1460 if (max_hdr
[child
] >= 0 && TEST_BIT (in_stack
, max_hdr
[child
]))
1461 UPDATE_LOOP_RELATIONS (node
, max_hdr
[child
]);
1462 current_edge
= NEXT_OUT (current_edge
);
1465 /* See if have finished the DFS tree traversal. */
1466 if (sp
< 0 && TEST_BIT (passed
, current_edge
))
1469 /* Nope, continue the traversal with the popped node. */
1473 /* Process a node. */
1474 node
= FROM_BLOCK (current_edge
);
1475 child
= TO_BLOCK (current_edge
);
1476 SET_BIT (in_stack
, node
);
1477 dfs_nr
[node
] = ++count
;
1479 /* If the successor is in the stack, then we've found a loop.
1480 Mark the loop, if it is not a natural loop, then it will
1481 be rejected during the second traversal. */
1482 if (TEST_BIT (in_stack
, child
))
1485 SET_BIT (header
, child
);
1486 UPDATE_LOOP_RELATIONS (node
, child
);
1487 SET_BIT (passed
, current_edge
);
1488 current_edge
= NEXT_OUT (current_edge
);
1492 /* If the child was already visited, then there is no need to visit
1493 it again. Just update the loop relationships and restart
1497 if (max_hdr
[child
] >= 0 && TEST_BIT (in_stack
, max_hdr
[child
]))
1498 UPDATE_LOOP_RELATIONS (node
, max_hdr
[child
]);
1499 SET_BIT (passed
, current_edge
);
1500 current_edge
= NEXT_OUT (current_edge
);
1504 /* Push an entry on the stack and continue DFS traversal. */
1505 stack
[++sp
] = current_edge
;
1506 SET_BIT (passed
, current_edge
);
1507 current_edge
= OUT_EDGES (child
);
1509 /* This is temporary until haifa is converted to use rth's new
1510 cfg routines which have true entry/exit blocks and the
1511 appropriate edges from/to those blocks.
1513 Generally we update dfs_nr for a node when we process its
1514 out edge. However, if the node has no out edge then we will
1515 not set dfs_nr for that node. This can confuse the scheduler
1516 into thinking that we have unreachable blocks, which in turn
1517 disables cross block scheduling.
1519 So, if we have a node with no out edges, go ahead and mark it
1520 as reachable now. */
1521 if (current_edge
== 0)
1522 dfs_nr
[child
] = ++count
;
1525 /* Another check for unreachable blocks. The earlier test in
1526 is_cfg_nonregular only finds unreachable blocks that do not
1529 The DFS traversal will mark every block that is reachable from
1530 the entry node by placing a nonzero value in dfs_nr. Thus if
1531 dfs_nr is zero for any block, then it must be unreachable. */
1533 for (i
= 0; i
< n_basic_blocks
; i
++)
1540 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
1541 to hold degree counts. */
1544 for (i
= 0; i
< n_basic_blocks
; i
++)
1546 for (i
= 0; i
< num_edges
; i
++)
1548 edge e
= INDEX_EDGE (edge_list
, i
);
1550 if (e
->dest
!= EXIT_BLOCK_PTR
)
1551 degree
[e
->dest
->index
]++;
1554 /* Do not perform region scheduling if there are any unreachable
1561 SET_BIT (header
, 0);
1563 /* Second travsersal:find reducible inner loops and topologically sort
1564 block of each region. */
1566 queue
= (int *) xmalloc (n_basic_blocks
* sizeof (int));
1568 /* Find blocks which are inner loop headers. We still have non-reducible
1569 loops to consider at this point. */
1570 for (i
= 0; i
< n_basic_blocks
; i
++)
1572 if (TEST_BIT (header
, i
) && TEST_BIT (inner
, i
))
1577 /* Now check that the loop is reducible. We do this separate
1578 from finding inner loops so that we do not find a reducible
1579 loop which contains an inner non-reducible loop.
1581 A simple way to find reducible/natural loops is to verify
1582 that each block in the loop is dominated by the loop
1585 If there exists a block that is not dominated by the loop
1586 header, then the block is reachable from outside the loop
1587 and thus the loop is not a natural loop. */
1588 for (j
= 0; j
< n_basic_blocks
; j
++)
1590 /* First identify blocks in the loop, except for the loop
1592 if (i
== max_hdr
[j
] && i
!= j
)
1594 /* Now verify that the block is dominated by the loop
1596 if (!TEST_BIT (dom
[j
], i
))
1601 /* If we exited the loop early, then I is the header of
1602 a non-reducible loop and we should quit processing it
1604 if (j
!= n_basic_blocks
)
1607 /* I is a header of an inner loop, or block 0 in a subroutine
1608 with no loops at all. */
1610 too_large_failure
= 0;
1611 loop_head
= max_hdr
[i
];
1613 /* Decrease degree of all I's successors for topological
1615 for (e
= BASIC_BLOCK (i
)->succ
; e
; e
= e
->succ_next
)
1616 if (e
->dest
!= EXIT_BLOCK_PTR
)
1617 --degree
[e
->dest
->index
];
1619 /* Estimate # insns, and count # blocks in the region. */
1621 num_insns
= (INSN_LUID (BLOCK_END (i
))
1622 - INSN_LUID (BLOCK_HEAD (i
)));
1625 /* Find all loop latches (blocks with back edges to the loop
1626 header) or all the leaf blocks in the cfg has no loops.
1628 Place those blocks into the queue. */
1631 for (j
= 0; j
< n_basic_blocks
; j
++)
1632 /* Leaf nodes have only a single successor which must
1634 if (BASIC_BLOCK (j
)->succ
1635 && BASIC_BLOCK (j
)->succ
->dest
== EXIT_BLOCK_PTR
1636 && BASIC_BLOCK (j
)->succ
->succ_next
== NULL
)
1639 SET_BIT (in_queue
, j
);
1641 if (too_large (j
, &num_bbs
, &num_insns
))
1643 too_large_failure
= 1;
1652 for (e
= BASIC_BLOCK (i
)->pred
; e
; e
= e
->pred_next
)
1654 if (e
->src
== ENTRY_BLOCK_PTR
)
1657 node
= e
->src
->index
;
1659 if (max_hdr
[node
] == loop_head
&& node
!= i
)
1661 /* This is a loop latch. */
1662 queue
[++tail
] = node
;
1663 SET_BIT (in_queue
, node
);
1665 if (too_large (node
, &num_bbs
, &num_insns
))
1667 too_large_failure
= 1;
1675 /* Now add all the blocks in the loop to the queue.
1677 We know the loop is a natural loop; however the algorithm
1678 above will not always mark certain blocks as being in the
1687 The algorithm in the DFS traversal may not mark B & D as part
1688 of the loop (ie they will not have max_hdr set to A).
1690 We know they can not be loop latches (else they would have
1691 had max_hdr set since they'd have a backedge to a dominator
1692 block). So we don't need them on the initial queue.
1694 We know they are part of the loop because they are dominated
1695 by the loop header and can be reached by a backwards walk of
1696 the edges starting with nodes on the initial queue.
1698 It is safe and desirable to include those nodes in the
1699 loop/scheduling region. To do so we would need to decrease
1700 the degree of a node if it is the target of a backedge
1701 within the loop itself as the node is placed in the queue.
1703 We do not do this because I'm not sure that the actual
1704 scheduling code will properly handle this case. ?!? */
1706 while (head
< tail
&& !too_large_failure
)
1709 child
= queue
[++head
];
1711 for (e
= BASIC_BLOCK (child
)->pred
; e
; e
= e
->pred_next
)
1713 node
= e
->src
->index
;
1715 /* See discussion above about nodes not marked as in
1716 this loop during the initial DFS traversal. */
1717 if (e
->src
== ENTRY_BLOCK_PTR
1718 || max_hdr
[node
] != loop_head
)
1723 else if (!TEST_BIT (in_queue
, node
) && node
!= i
)
1725 queue
[++tail
] = node
;
1726 SET_BIT (in_queue
, node
);
1728 if (too_large (node
, &num_bbs
, &num_insns
))
1730 too_large_failure
= 1;
1737 if (tail
>= 0 && !too_large_failure
)
1739 /* Place the loop header into list of region blocks. */
1741 rgn_bb_table
[idx
] = i
;
1742 RGN_NR_BLOCKS (nr_regions
) = num_bbs
;
1743 RGN_BLOCKS (nr_regions
) = idx
++;
1744 CONTAINING_RGN (i
) = nr_regions
;
1745 BLOCK_TO_BB (i
) = count
= 0;
1747 /* Remove blocks from queue[] when their in degree
1748 becomes zero. Repeat until no blocks are left on the
1749 list. This produces a topological list of blocks in
1755 child
= queue
[head
];
1756 if (degree
[child
] == 0)
1761 rgn_bb_table
[idx
++] = child
;
1762 BLOCK_TO_BB (child
) = ++count
;
1763 CONTAINING_RGN (child
) = nr_regions
;
1764 queue
[head
] = queue
[tail
--];
1766 for (e
= BASIC_BLOCK (child
)->succ
;
1769 if (e
->dest
!= EXIT_BLOCK_PTR
)
1770 --degree
[e
->dest
->index
];
1782 /* Any block that did not end up in a region is placed into a region
1784 for (i
= 0; i
< n_basic_blocks
; i
++)
1787 rgn_bb_table
[idx
] = i
;
1788 RGN_NR_BLOCKS (nr_regions
) = 1;
1789 RGN_BLOCKS (nr_regions
) = idx
++;
1790 CONTAINING_RGN (i
) = nr_regions
++;
1791 BLOCK_TO_BB (i
) = 0;
1805 /* Functions for regions scheduling information. */
1807 /* Compute dominators, probability, and potential-split-edges of bb.
1808 Assume that these values were already computed for bb's predecessors. */
1811 compute_dom_prob_ps (bb
)
1814 int nxt_in_edge
, fst_in_edge
, pred
;
1815 int fst_out_edge
, nxt_out_edge
, nr_out_edges
, nr_rgn_out_edges
;
1818 if (IS_RGN_ENTRY (bb
))
1820 BITSET_ADD (dom
[bb
], 0, bbset_size
);
1825 fst_in_edge
= nxt_in_edge
= IN_EDGES (BB_TO_BLOCK (bb
));
1827 /* Intialize dom[bb] to '111..1'. */
1828 BITSET_INVERT (dom
[bb
], bbset_size
);
1832 pred
= FROM_BLOCK (nxt_in_edge
);
1833 BITSET_INTER (dom
[bb
], dom
[BLOCK_TO_BB (pred
)], bbset_size
);
1835 BITSET_UNION (ancestor_edges
[bb
], ancestor_edges
[BLOCK_TO_BB (pred
)],
1838 BITSET_ADD (ancestor_edges
[bb
], EDGE_TO_BIT (nxt_in_edge
), edgeset_size
);
1841 nr_rgn_out_edges
= 0;
1842 fst_out_edge
= OUT_EDGES (pred
);
1843 nxt_out_edge
= NEXT_OUT (fst_out_edge
);
1844 BITSET_UNION (pot_split
[bb
], pot_split
[BLOCK_TO_BB (pred
)],
1847 BITSET_ADD (pot_split
[bb
], EDGE_TO_BIT (fst_out_edge
), edgeset_size
);
1849 /* The successor doesn't belong in the region? */
1850 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge
)) !=
1851 CONTAINING_RGN (BB_TO_BLOCK (bb
)))
1854 while (fst_out_edge
!= nxt_out_edge
)
1857 /* The successor doesn't belong in the region? */
1858 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge
)) !=
1859 CONTAINING_RGN (BB_TO_BLOCK (bb
)))
1861 BITSET_ADD (pot_split
[bb
], EDGE_TO_BIT (nxt_out_edge
), edgeset_size
);
1862 nxt_out_edge
= NEXT_OUT (nxt_out_edge
);
1866 /* Now nr_rgn_out_edges is the number of region-exit edges from
1867 pred, and nr_out_edges will be the number of pred out edges
1868 not leaving the region. */
1869 nr_out_edges
-= nr_rgn_out_edges
;
1870 if (nr_rgn_out_edges
> 0)
1871 prob
[bb
] += 0.9 * prob
[BLOCK_TO_BB (pred
)] / nr_out_edges
;
1873 prob
[bb
] += prob
[BLOCK_TO_BB (pred
)] / nr_out_edges
;
1874 nxt_in_edge
= NEXT_IN (nxt_in_edge
);
1876 while (fst_in_edge
!= nxt_in_edge
);
1878 BITSET_ADD (dom
[bb
], bb
, bbset_size
);
1879 BITSET_DIFFER (pot_split
[bb
], ancestor_edges
[bb
], edgeset_size
);
1881 if (sched_verbose
>= 2)
1882 fprintf (dump
, ";; bb_prob(%d, %d) = %3d\n", bb
, BB_TO_BLOCK (bb
), (int) (100.0 * prob
[bb
]));
1883 } /* compute_dom_prob_ps */
1885 /* Functions for target info. */
1887 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1888 Note that bb_trg dominates bb_src. */
1891 split_edges (bb_src
, bb_trg
, bl
)
1896 int es
= edgeset_size
;
1897 edgeset src
= (edgeset
) xcalloc (es
, sizeof (HOST_WIDE_INT
));
1900 src
[es
] = (pot_split
[bb_src
])[es
];
1901 BITSET_DIFFER (src
, pot_split
[bb_trg
], edgeset_size
);
1902 extract_bitlst (src
, edgeset_size
, edgeset_bitsize
, bl
);
1907 /* Find the valid candidate-source-blocks for the target block TRG, compute
1908 their probability, and check if they are speculative or not.
1909 For speculative sources, compute their update-blocks and split-blocks. */
1912 compute_trg_info (trg
)
1915 register candidate
*sp
;
1917 int check_block
, update_idx
;
1918 int i
, j
, k
, fst_edge
, nxt_edge
;
1920 /* Define some of the fields for the target bb as well. */
1921 sp
= candidate_table
+ trg
;
1923 sp
->is_speculative
= 0;
1926 for (i
= trg
+ 1; i
< current_nr_blocks
; i
++)
1928 sp
= candidate_table
+ i
;
1930 sp
->is_valid
= IS_DOMINATED (i
, trg
);
1933 sp
->src_prob
= GET_SRC_PROB (i
, trg
);
1934 sp
->is_valid
= (sp
->src_prob
>= MIN_PROBABILITY
);
1939 split_edges (i
, trg
, &el
);
1940 sp
->is_speculative
= (el
.nr_members
) ? 1 : 0;
1941 if (sp
->is_speculative
&& !flag_schedule_speculative
)
1947 sp
->split_bbs
.first_member
= &bblst_table
[bblst_last
];
1948 sp
->split_bbs
.nr_members
= el
.nr_members
;
1949 for (j
= 0; j
< el
.nr_members
; bblst_last
++, j
++)
1950 bblst_table
[bblst_last
] =
1951 TO_BLOCK (rgn_edges
[el
.first_member
[j
]]);
1952 sp
->update_bbs
.first_member
= &bblst_table
[bblst_last
];
1954 for (j
= 0; j
< el
.nr_members
; j
++)
1956 check_block
= FROM_BLOCK (rgn_edges
[el
.first_member
[j
]]);
1957 fst_edge
= nxt_edge
= OUT_EDGES (check_block
);
1960 for (k
= 0; k
< el
.nr_members
; k
++)
1961 if (EDGE_TO_BIT (nxt_edge
) == el
.first_member
[k
])
1964 if (k
>= el
.nr_members
)
1966 bblst_table
[bblst_last
++] = TO_BLOCK (nxt_edge
);
1970 nxt_edge
= NEXT_OUT (nxt_edge
);
1972 while (fst_edge
!= nxt_edge
);
1974 sp
->update_bbs
.nr_members
= update_idx
;
1979 sp
->split_bbs
.nr_members
= sp
->update_bbs
.nr_members
= 0;
1981 sp
->is_speculative
= 0;
1985 } /* compute_trg_info */
1988 /* Print candidates info, for debugging purposes. Callable from debugger. */
1994 if (!candidate_table
[i
].is_valid
)
1997 if (candidate_table
[i
].is_speculative
)
2000 fprintf (dump
, "src b %d bb %d speculative \n", BB_TO_BLOCK (i
), i
);
2002 fprintf (dump
, "split path: ");
2003 for (j
= 0; j
< candidate_table
[i
].split_bbs
.nr_members
; j
++)
2005 int b
= candidate_table
[i
].split_bbs
.first_member
[j
];
2007 fprintf (dump
, " %d ", b
);
2009 fprintf (dump
, "\n");
2011 fprintf (dump
, "update path: ");
2012 for (j
= 0; j
< candidate_table
[i
].update_bbs
.nr_members
; j
++)
2014 int b
= candidate_table
[i
].update_bbs
.first_member
[j
];
2016 fprintf (dump
, " %d ", b
);
2018 fprintf (dump
, "\n");
2022 fprintf (dump
, " src %d equivalent\n", BB_TO_BLOCK (i
));
2027 /* Print candidates info, for debugging purposes. Callable from debugger. */
2030 debug_candidates (trg
)
2035 fprintf (dump
, "----------- candidate table: target: b=%d bb=%d ---\n",
2036 BB_TO_BLOCK (trg
), trg
);
2037 for (i
= trg
+ 1; i
< current_nr_blocks
; i
++)
2038 debug_candidate (i
);
2042 /* Functions for speculative scheduing. */
2044 /* Return 0 if x is a set of a register alive in the beginning of one
2045 of the split-blocks of src, otherwise return 1. */
2048 check_live_1 (src
, x
)
2054 register rtx reg
= SET_DEST (x
);
2059 while (GET_CODE (reg
) == SUBREG
|| GET_CODE (reg
) == ZERO_EXTRACT
2060 || GET_CODE (reg
) == SIGN_EXTRACT
2061 || GET_CODE (reg
) == STRICT_LOW_PART
)
2062 reg
= XEXP (reg
, 0);
2064 if (GET_CODE (reg
) == PARALLEL
2065 && GET_MODE (reg
) == BLKmode
)
2068 for (i
= XVECLEN (reg
, 0) - 1; i
>= 0; i
--)
2069 if (check_live_1 (src
, XVECEXP (reg
, 0, i
)))
2074 if (GET_CODE (reg
) != REG
)
2077 regno
= REGNO (reg
);
2079 if (regno
< FIRST_PSEUDO_REGISTER
&& global_regs
[regno
])
2081 /* Global registers are assumed live. */
2086 if (regno
< FIRST_PSEUDO_REGISTER
)
2088 /* Check for hard registers. */
2089 int j
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
2092 for (i
= 0; i
< candidate_table
[src
].split_bbs
.nr_members
; i
++)
2094 int b
= candidate_table
[src
].split_bbs
.first_member
[i
];
2096 if (REGNO_REG_SET_P (BASIC_BLOCK (b
)->global_live_at_start
,
2106 /* Check for psuedo registers. */
2107 for (i
= 0; i
< candidate_table
[src
].split_bbs
.nr_members
; i
++)
2109 int b
= candidate_table
[src
].split_bbs
.first_member
[i
];
2111 if (REGNO_REG_SET_P (BASIC_BLOCK (b
)->global_live_at_start
, regno
))
2123 /* If x is a set of a register R, mark that R is alive in the beginning
2124 of every update-block of src. */
2127 update_live_1 (src
, x
)
2133 register rtx reg
= SET_DEST (x
);
2138 while (GET_CODE (reg
) == SUBREG
|| GET_CODE (reg
) == ZERO_EXTRACT
2139 || GET_CODE (reg
) == SIGN_EXTRACT
2140 || GET_CODE (reg
) == STRICT_LOW_PART
)
2141 reg
= XEXP (reg
, 0);
2143 if (GET_CODE (reg
) == PARALLEL
2144 && GET_MODE (reg
) == BLKmode
)
2147 for (i
= XVECLEN (reg
, 0) - 1; i
>= 0; i
--)
2148 update_live_1 (src
, XVECEXP (reg
, 0, i
));
2152 if (GET_CODE (reg
) != REG
)
2155 /* Global registers are always live, so the code below does not apply
2158 regno
= REGNO (reg
);
2160 if (regno
>= FIRST_PSEUDO_REGISTER
|| !global_regs
[regno
])
2162 if (regno
< FIRST_PSEUDO_REGISTER
)
2164 int j
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
2167 for (i
= 0; i
< candidate_table
[src
].update_bbs
.nr_members
; i
++)
2169 int b
= candidate_table
[src
].update_bbs
.first_member
[i
];
2171 SET_REGNO_REG_SET (BASIC_BLOCK (b
)->global_live_at_start
,
2178 for (i
= 0; i
< candidate_table
[src
].update_bbs
.nr_members
; i
++)
2180 int b
= candidate_table
[src
].update_bbs
.first_member
[i
];
2182 SET_REGNO_REG_SET (BASIC_BLOCK (b
)->global_live_at_start
, regno
);
2189 /* Return 1 if insn can be speculatively moved from block src to trg,
2190 otherwise return 0. Called before first insertion of insn to
2191 ready-list or before the scheduling. */
2194 check_live (insn
, src
)
2198 /* Find the registers set by instruction. */
2199 if (GET_CODE (PATTERN (insn
)) == SET
2200 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
2201 return check_live_1 (src
, PATTERN (insn
));
2202 else if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
2205 for (j
= XVECLEN (PATTERN (insn
), 0) - 1; j
>= 0; j
--)
2206 if ((GET_CODE (XVECEXP (PATTERN (insn
), 0, j
)) == SET
2207 || GET_CODE (XVECEXP (PATTERN (insn
), 0, j
)) == CLOBBER
)
2208 && !check_live_1 (src
, XVECEXP (PATTERN (insn
), 0, j
)))
2218 /* Update the live registers info after insn was moved speculatively from
2219 block src to trg. */
2222 update_live (insn
, src
)
2226 /* Find the registers set by instruction. */
2227 if (GET_CODE (PATTERN (insn
)) == SET
2228 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
2229 update_live_1 (src
, PATTERN (insn
));
2230 else if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
2233 for (j
= XVECLEN (PATTERN (insn
), 0) - 1; j
>= 0; j
--)
2234 if (GET_CODE (XVECEXP (PATTERN (insn
), 0, j
)) == SET
2235 || GET_CODE (XVECEXP (PATTERN (insn
), 0, j
)) == CLOBBER
)
2236 update_live_1 (src
, XVECEXP (PATTERN (insn
), 0, j
));
2240 /* Exception Free Loads:
2242 We define five classes of speculative loads: IFREE, IRISKY,
2243 PFREE, PRISKY, and MFREE.
2245 IFREE loads are loads that are proved to be exception-free, just
2246 by examining the load insn. Examples for such loads are loads
2247 from TOC and loads of global data.
2249 IRISKY loads are loads that are proved to be exception-risky,
2250 just by examining the load insn. Examples for such loads are
2251 volatile loads and loads from shared memory.
2253 PFREE loads are loads for which we can prove, by examining other
2254 insns, that they are exception-free. Currently, this class consists
2255 of loads for which we are able to find a "similar load", either in
2256 the target block, or, if only one split-block exists, in that split
2257 block. Load2 is similar to load1 if both have same single base
2258 register. We identify only part of the similar loads, by finding
2259 an insn upon which both load1 and load2 have a DEF-USE dependence.
2261 PRISKY loads are loads for which we can prove, by examining other
2262 insns, that they are exception-risky. Currently we have two proofs for
2263 such loads. The first proof detects loads that are probably guarded by a
2264 test on the memory address. This proof is based on the
2265 backward and forward data dependence information for the region.
2266 Let load-insn be the examined load.
2267 Load-insn is PRISKY iff ALL the following hold:
2269 - insn1 is not in the same block as load-insn
2270 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
2271 - test-insn is either a compare or a branch, not in the same block
2273 - load-insn is reachable from test-insn
2274 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
2276 This proof might fail when the compare and the load are fed
2277 by an insn not in the region. To solve this, we will add to this
2278 group all loads that have no input DEF-USE dependence.
2280 The second proof detects loads that are directly or indirectly
2281 fed by a speculative load. This proof is affected by the
2282 scheduling process. We will use the flag fed_by_spec_load.
2283 Initially, all insns have this flag reset. After a speculative
2284 motion of an insn, if insn is either a load, or marked as
2285 fed_by_spec_load, we will also mark as fed_by_spec_load every
2286 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
2287 load which is fed_by_spec_load is also PRISKY.
2289 MFREE (maybe-free) loads are all the remaining loads. They may be
2290 exception-free, but we cannot prove it.
2292 Now, all loads in IFREE and PFREE classes are considered
2293 exception-free, while all loads in IRISKY and PRISKY classes are
2294 considered exception-risky. As for loads in the MFREE class,
2295 these are considered either exception-free or exception-risky,
2296 depending on whether we are pessimistic or optimistic. We have
2297 to take the pessimistic approach to assure the safety of
2298 speculative scheduling, but we can take the optimistic approach
2299 by invoking the -fsched_spec_load_dangerous option. */
2301 enum INSN_TRAP_CLASS
2303 TRAP_FREE
= 0, IFREE
= 1, PFREE_CANDIDATE
= 2,
2304 PRISKY_CANDIDATE
= 3, IRISKY
= 4, TRAP_RISKY
= 5
2307 #define WORST_CLASS(class1, class2) \
2308 ((class1 > class2) ? class1 : class2)
2310 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
2311 #define IS_REACHABLE(bb_from, bb_to) \
2313 || IS_RGN_ENTRY (bb_from) \
2314 || (bitset_member (ancestor_edges[bb_to], \
2315 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
2318 /* Non-zero iff the address is comprised from at most 1 register. */
2319 #define CONST_BASED_ADDRESS_P(x) \
2320 (GET_CODE (x) == REG \
2321 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
2322 || (GET_CODE (x) == LO_SUM)) \
2323 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
2324 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
2326 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
2329 set_spec_fed (load_insn
)
2334 for (link
= INSN_DEPEND (load_insn
); link
; link
= XEXP (link
, 1))
2335 if (GET_MODE (link
) == VOIDmode
)
2336 FED_BY_SPEC_LOAD (XEXP (link
, 0)) = 1;
2337 } /* set_spec_fed */
2339 /* On the path from the insn to load_insn_bb, find a conditional
2340 branch depending on insn, that guards the speculative load. */
2343 find_conditional_protection (insn
, load_insn_bb
)
2349 /* Iterate through DEF-USE forward dependences. */
2350 for (link
= INSN_DEPEND (insn
); link
; link
= XEXP (link
, 1))
2352 rtx next
= XEXP (link
, 0);
2353 if ((CONTAINING_RGN (BLOCK_NUM (next
)) ==
2354 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb
)))
2355 && IS_REACHABLE (INSN_BB (next
), load_insn_bb
)
2356 && load_insn_bb
!= INSN_BB (next
)
2357 && GET_MODE (link
) == VOIDmode
2358 && (GET_CODE (next
) == JUMP_INSN
2359 || find_conditional_protection (next
, load_insn_bb
)))
2363 } /* find_conditional_protection */
2365 /* Returns 1 if the same insn1 that participates in the computation
2366 of load_insn's address is feeding a conditional branch that is
2367 guarding on load_insn. This is true if we find a the two DEF-USE
2369 insn1 -> ... -> conditional-branch
2370 insn1 -> ... -> load_insn,
2371 and if a flow path exist:
2372 insn1 -> ... -> conditional-branch -> ... -> load_insn,
2373 and if insn1 is on the path
2374 region-entry -> ... -> bb_trg -> ... load_insn.
2376 Locate insn1 by climbing on LOG_LINKS from load_insn.
2377 Locate the branch by following INSN_DEPEND from insn1. */
2380 is_conditionally_protected (load_insn
, bb_src
, bb_trg
)
2386 for (link
= LOG_LINKS (load_insn
); link
; link
= XEXP (link
, 1))
2388 rtx insn1
= XEXP (link
, 0);
2390 /* Must be a DEF-USE dependence upon non-branch. */
2391 if (GET_MODE (link
) != VOIDmode
2392 || GET_CODE (insn1
) == JUMP_INSN
)
2395 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
2396 if (INSN_BB (insn1
) == bb_src
2397 || (CONTAINING_RGN (BLOCK_NUM (insn1
))
2398 != CONTAINING_RGN (BB_TO_BLOCK (bb_src
)))
2399 || (!IS_REACHABLE (bb_trg
, INSN_BB (insn1
))
2400 && !IS_REACHABLE (INSN_BB (insn1
), bb_trg
)))
2403 /* Now search for the conditional-branch. */
2404 if (find_conditional_protection (insn1
, bb_src
))
2407 /* Recursive step: search another insn1, "above" current insn1. */
2408 return is_conditionally_protected (insn1
, bb_src
, bb_trg
);
2411 /* The chain does not exist. */
2413 } /* is_conditionally_protected */
2415 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
2416 load_insn can move speculatively from bb_src to bb_trg. All the
2417 following must hold:
2419 (1) both loads have 1 base register (PFREE_CANDIDATEs).
2420 (2) load_insn and load1 have a def-use dependence upon
2421 the same insn 'insn1'.
2422 (3) either load2 is in bb_trg, or:
2423 - there's only one split-block, and
2424 - load1 is on the escape path, and
2426 From all these we can conclude that the two loads access memory
2427 addresses that differ at most by a constant, and hence if moving
2428 load_insn would cause an exception, it would have been caused by
2432 is_pfree (load_insn
, bb_src
, bb_trg
)
2437 register candidate
*candp
= candidate_table
+ bb_src
;
2439 if (candp
->split_bbs
.nr_members
!= 1)
2440 /* Must have exactly one escape block. */
2443 for (back_link
= LOG_LINKS (load_insn
);
2444 back_link
; back_link
= XEXP (back_link
, 1))
2446 rtx insn1
= XEXP (back_link
, 0);
2448 if (GET_MODE (back_link
) == VOIDmode
)
2450 /* Found a DEF-USE dependence (insn1, load_insn). */
2453 for (fore_link
= INSN_DEPEND (insn1
);
2454 fore_link
; fore_link
= XEXP (fore_link
, 1))
2456 rtx insn2
= XEXP (fore_link
, 0);
2457 if (GET_MODE (fore_link
) == VOIDmode
)
2459 /* Found a DEF-USE dependence (insn1, insn2). */
2460 if (haifa_classify_insn (insn2
) != PFREE_CANDIDATE
)
2461 /* insn2 not guaranteed to be a 1 base reg load. */
2464 if (INSN_BB (insn2
) == bb_trg
)
2465 /* insn2 is the similar load, in the target block. */
2468 if (*(candp
->split_bbs
.first_member
) == BLOCK_NUM (insn2
))
2469 /* insn2 is a similar load, in a split-block. */
2476 /* Couldn't find a similar load. */
2480 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
2481 as found by analyzing insn's expression. */
2484 may_trap_exp (x
, is_store
)
2492 code
= GET_CODE (x
);
2502 /* The insn uses memory: a volatile load. */
2503 if (MEM_VOLATILE_P (x
))
2505 /* An exception-free load. */
2506 if (!may_trap_p (x
))
2508 /* A load with 1 base register, to be further checked. */
2509 if (CONST_BASED_ADDRESS_P (XEXP (x
, 0)))
2510 return PFREE_CANDIDATE
;
2511 /* No info on the load, to be further checked. */
2512 return PRISKY_CANDIDATE
;
2517 int i
, insn_class
= TRAP_FREE
;
2519 /* Neither store nor load, check if it may cause a trap. */
2522 /* Recursive step: walk the insn... */
2523 fmt
= GET_RTX_FORMAT (code
);
2524 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
2528 int tmp_class
= may_trap_exp (XEXP (x
, i
), is_store
);
2529 insn_class
= WORST_CLASS (insn_class
, tmp_class
);
2531 else if (fmt
[i
] == 'E')
2534 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
2536 int tmp_class
= may_trap_exp (XVECEXP (x
, i
, j
), is_store
);
2537 insn_class
= WORST_CLASS (insn_class
, tmp_class
);
2538 if (insn_class
== TRAP_RISKY
|| insn_class
== IRISKY
)
2542 if (insn_class
== TRAP_RISKY
|| insn_class
== IRISKY
)
2547 } /* may_trap_exp */
2550 /* Classifies insn for the purpose of verifying that it can be
2551 moved speculatively, by examining it's patterns, returning:
2552 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
2553 TRAP_FREE: non-load insn.
2554 IFREE: load from a globaly safe location.
2555 IRISKY: volatile load.
2556 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
2557 being either PFREE or PRISKY. */
2560 haifa_classify_insn (insn
)
2563 rtx pat
= PATTERN (insn
);
2564 int tmp_class
= TRAP_FREE
;
2565 int insn_class
= TRAP_FREE
;
2568 if (GET_CODE (pat
) == PARALLEL
)
2570 int i
, len
= XVECLEN (pat
, 0);
2572 for (i
= len
- 1; i
>= 0; i
--)
2574 code
= GET_CODE (XVECEXP (pat
, 0, i
));
2578 /* Test if it is a 'store'. */
2579 tmp_class
= may_trap_exp (XEXP (XVECEXP (pat
, 0, i
), 0), 1);
2582 /* Test if it is a store. */
2583 tmp_class
= may_trap_exp (SET_DEST (XVECEXP (pat
, 0, i
)), 1);
2584 if (tmp_class
== TRAP_RISKY
)
2586 /* Test if it is a load. */
2588 WORST_CLASS (tmp_class
,
2589 may_trap_exp (SET_SRC (XVECEXP (pat
, 0, i
)), 0));
2592 tmp_class
= TRAP_RISKY
;
2596 insn_class
= WORST_CLASS (insn_class
, tmp_class
);
2597 if (insn_class
== TRAP_RISKY
|| insn_class
== IRISKY
)
2603 code
= GET_CODE (pat
);
2607 /* Test if it is a 'store'. */
2608 tmp_class
= may_trap_exp (XEXP (pat
, 0), 1);
2611 /* Test if it is a store. */
2612 tmp_class
= may_trap_exp (SET_DEST (pat
), 1);
2613 if (tmp_class
== TRAP_RISKY
)
2615 /* Test if it is a load. */
2617 WORST_CLASS (tmp_class
,
2618 may_trap_exp (SET_SRC (pat
), 0));
2621 tmp_class
= TRAP_RISKY
;
2625 insn_class
= tmp_class
;
2630 } /* haifa_classify_insn */
2632 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
2633 a load moved speculatively, or if load_insn is protected by
2634 a compare on load_insn's address). */
2637 is_prisky (load_insn
, bb_src
, bb_trg
)
2641 if (FED_BY_SPEC_LOAD (load_insn
))
2644 if (LOG_LINKS (load_insn
) == NULL
)
2645 /* Dependence may 'hide' out of the region. */
2648 if (is_conditionally_protected (load_insn
, bb_src
, bb_trg
))
2654 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2655 Return 1 if insn is exception-free (and the motion is valid)
2659 is_exception_free (insn
, bb_src
, bb_trg
)
2663 int insn_class
= haifa_classify_insn (insn
);
2665 /* Handle non-load insns. */
2676 if (!flag_schedule_speculative_load
)
2678 IS_LOAD_INSN (insn
) = 1;
2685 case PFREE_CANDIDATE
:
2686 if (is_pfree (insn
, bb_src
, bb_trg
))
2688 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2689 case PRISKY_CANDIDATE
:
2690 if (!flag_schedule_speculative_load_dangerous
2691 || is_prisky (insn
, bb_src
, bb_trg
))
2697 return flag_schedule_speculative_load_dangerous
;
2698 } /* is_exception_free */
2701 /* Process an insn's memory dependencies. There are four kinds of
2704 (0) read dependence: read follows read
2705 (1) true dependence: read follows write
2706 (2) anti dependence: write follows read
2707 (3) output dependence: write follows write
2709 We are careful to build only dependencies which actually exist, and
2710 use transitivity to avoid building too many links. */
2712 /* Return the INSN_LIST containing INSN in LIST, or NULL
2713 if LIST does not contain INSN. */
2715 HAIFA_INLINE
static rtx
2716 find_insn_list (insn
, list
)
2722 if (XEXP (list
, 0) == insn
)
2724 list
= XEXP (list
, 1);
2730 /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0
2733 HAIFA_INLINE
static char
2734 find_insn_mem_list (insn
, x
, list
, list1
)
2740 if (XEXP (list
, 0) == insn
2741 && XEXP (list1
, 0) == x
)
2743 list
= XEXP (list
, 1);
2744 list1
= XEXP (list1
, 1);
2750 /* Compute the function units used by INSN. This caches the value
2751 returned by function_units_used. A function unit is encoded as the
2752 unit number if the value is non-negative and the compliment of a
2753 mask if the value is negative. A function unit index is the
2754 non-negative encoding. */
2756 HAIFA_INLINE
static int
2760 register int unit
= INSN_UNIT (insn
);
2764 recog_memoized (insn
);
2766 /* A USE insn, or something else we don't need to understand.
2767 We can't pass these directly to function_units_used because it will
2768 trigger a fatal error for unrecognizable insns. */
2769 if (INSN_CODE (insn
) < 0)
2773 unit
= function_units_used (insn
);
2774 /* Increment non-negative values so we can cache zero. */
2778 /* We only cache 16 bits of the result, so if the value is out of
2779 range, don't cache it. */
2780 if (FUNCTION_UNITS_SIZE
< HOST_BITS_PER_SHORT
2782 || (unit
& ~((1 << (HOST_BITS_PER_SHORT
- 1)) - 1)) == 0)
2783 INSN_UNIT (insn
) = unit
;
2785 return (unit
> 0 ? unit
- 1 : unit
);
2788 /* Compute the blockage range for executing INSN on UNIT. This caches
2789 the value returned by the blockage_range_function for the unit.
2790 These values are encoded in an int where the upper half gives the
2791 minimum value and the lower half gives the maximum value. */
2793 HAIFA_INLINE
static unsigned int
2794 blockage_range (unit
, insn
)
2798 unsigned int blockage
= INSN_BLOCKAGE (insn
);
2801 if ((int) UNIT_BLOCKED (blockage
) != unit
+ 1)
2803 range
= function_units
[unit
].blockage_range_function (insn
);
2804 /* We only cache the blockage range for one unit and then only if
2806 if (HOST_BITS_PER_INT
>= UNIT_BITS
+ 2 * BLOCKAGE_BITS
)
2807 INSN_BLOCKAGE (insn
) = ENCODE_BLOCKAGE (unit
+ 1, range
);
2810 range
= BLOCKAGE_RANGE (blockage
);
2815 /* A vector indexed by function unit instance giving the last insn to use
2816 the unit. The value of the function unit instance index for unit U
2817 instance I is (U + I * FUNCTION_UNITS_SIZE). */
2818 static rtx unit_last_insn
[FUNCTION_UNITS_SIZE
* MAX_MULTIPLICITY
];
2820 /* A vector indexed by function unit instance giving the minimum time when
2821 the unit will unblock based on the maximum blockage cost. */
2822 static int unit_tick
[FUNCTION_UNITS_SIZE
* MAX_MULTIPLICITY
];
2824 /* A vector indexed by function unit number giving the number of insns
2825 that remain to use the unit. */
2826 static int unit_n_insns
[FUNCTION_UNITS_SIZE
];
2828 /* Reset the function unit state to the null state. */
2833 bzero ((char *) unit_last_insn
, sizeof (unit_last_insn
));
2834 bzero ((char *) unit_tick
, sizeof (unit_tick
));
2835 bzero ((char *) unit_n_insns
, sizeof (unit_n_insns
));
2838 /* Return the issue-delay of an insn. */
2840 HAIFA_INLINE
static int
2841 insn_issue_delay (insn
)
2845 int unit
= insn_unit (insn
);
2847 /* Efficiency note: in fact, we are working 'hard' to compute a
2848 value that was available in md file, and is not available in
2849 function_units[] structure. It would be nice to have this
2850 value there, too. */
2853 if (function_units
[unit
].blockage_range_function
&&
2854 function_units
[unit
].blockage_function
)
2855 delay
= function_units
[unit
].blockage_function (insn
, insn
);
2858 for (i
= 0, unit
= ~unit
; unit
; i
++, unit
>>= 1)
2859 if ((unit
& 1) != 0 && function_units
[i
].blockage_range_function
2860 && function_units
[i
].blockage_function
)
2861 delay
= MAX (delay
, function_units
[i
].blockage_function (insn
, insn
));
2866 /* Return the actual hazard cost of executing INSN on the unit UNIT,
2867 instance INSTANCE at time CLOCK if the previous actual hazard cost
2870 HAIFA_INLINE
static int
2871 actual_hazard_this_instance (unit
, instance
, insn
, clock
, cost
)
2872 int unit
, instance
, clock
, cost
;
2875 int tick
= unit_tick
[instance
]; /* Issue time of the last issued insn. */
2877 if (tick
- clock
> cost
)
2879 /* The scheduler is operating forward, so unit's last insn is the
2880 executing insn and INSN is the candidate insn. We want a
2881 more exact measure of the blockage if we execute INSN at CLOCK
2882 given when we committed the execution of the unit's last insn.
2884 The blockage value is given by either the unit's max blockage
2885 constant, blockage range function, or blockage function. Use
2886 the most exact form for the given unit. */
2888 if (function_units
[unit
].blockage_range_function
)
2890 if (function_units
[unit
].blockage_function
)
2891 tick
+= (function_units
[unit
].blockage_function
2892 (unit_last_insn
[instance
], insn
)
2893 - function_units
[unit
].max_blockage
);
2895 tick
+= ((int) MAX_BLOCKAGE_COST (blockage_range (unit
, insn
))
2896 - function_units
[unit
].max_blockage
);
2898 if (tick
- clock
> cost
)
2899 cost
= tick
- clock
;
2904 /* Record INSN as having begun execution on the units encoded by UNIT at
2907 HAIFA_INLINE
static void
2908 schedule_unit (unit
, insn
, clock
)
2916 int instance
= unit
;
2917 #if MAX_MULTIPLICITY > 1
2918 /* Find the first free instance of the function unit and use that
2919 one. We assume that one is free. */
2920 for (i
= function_units
[unit
].multiplicity
- 1; i
> 0; i
--)
2922 if (!actual_hazard_this_instance (unit
, instance
, insn
, clock
, 0))
2924 instance
+= FUNCTION_UNITS_SIZE
;
2927 unit_last_insn
[instance
] = insn
;
2928 unit_tick
[instance
] = (clock
+ function_units
[unit
].max_blockage
);
2931 for (i
= 0, unit
= ~unit
; unit
; i
++, unit
>>= 1)
2932 if ((unit
& 1) != 0)
2933 schedule_unit (i
, insn
, clock
);
2936 /* Return the actual hazard cost of executing INSN on the units encoded by
2937 UNIT at time CLOCK if the previous actual hazard cost was COST. */
2939 HAIFA_INLINE
static int
2940 actual_hazard (unit
, insn
, clock
, cost
)
2941 int unit
, clock
, cost
;
2948 /* Find the instance of the function unit with the minimum hazard. */
2949 int instance
= unit
;
2950 int best_cost
= actual_hazard_this_instance (unit
, instance
, insn
,
2952 #if MAX_MULTIPLICITY > 1
2955 if (best_cost
> cost
)
2957 for (i
= function_units
[unit
].multiplicity
- 1; i
> 0; i
--)
2959 instance
+= FUNCTION_UNITS_SIZE
;
2960 this_cost
= actual_hazard_this_instance (unit
, instance
, insn
,
2962 if (this_cost
< best_cost
)
2964 best_cost
= this_cost
;
2965 if (this_cost
<= cost
)
2971 cost
= MAX (cost
, best_cost
);
2974 for (i
= 0, unit
= ~unit
; unit
; i
++, unit
>>= 1)
2975 if ((unit
& 1) != 0)
2976 cost
= actual_hazard (i
, insn
, clock
, cost
);
2981 /* Return the potential hazard cost of executing an instruction on the
2982 units encoded by UNIT if the previous potential hazard cost was COST.
2983 An insn with a large blockage time is chosen in preference to one
2984 with a smaller time; an insn that uses a unit that is more likely
2985 to be used is chosen in preference to one with a unit that is less
2986 used. We are trying to minimize a subsequent actual hazard. */
2988 HAIFA_INLINE
static int
2989 potential_hazard (unit
, insn
, cost
)
2994 unsigned int minb
, maxb
;
2998 minb
= maxb
= function_units
[unit
].max_blockage
;
3001 if (function_units
[unit
].blockage_range_function
)
3003 maxb
= minb
= blockage_range (unit
, insn
);
3004 maxb
= MAX_BLOCKAGE_COST (maxb
);
3005 minb
= MIN_BLOCKAGE_COST (minb
);
3010 /* Make the number of instructions left dominate. Make the
3011 minimum delay dominate the maximum delay. If all these
3012 are the same, use the unit number to add an arbitrary
3013 ordering. Other terms can be added. */
3014 ncost
= minb
* 0x40 + maxb
;
3015 ncost
*= (unit_n_insns
[unit
] - 1) * 0x1000 + unit
;
3022 for (i
= 0, unit
= ~unit
; unit
; i
++, unit
>>= 1)
3023 if ((unit
& 1) != 0)
3024 cost
= potential_hazard (i
, insn
, cost
);
3029 /* Compute cost of executing INSN given the dependence LINK on the insn USED.
3030 This is the number of cycles between instruction issue and
3031 instruction results. */
3033 HAIFA_INLINE
static int
3034 insn_cost (insn
, link
, used
)
3035 rtx insn
, link
, used
;
3037 register int cost
= INSN_COST (insn
);
3041 recog_memoized (insn
);
3043 /* A USE insn, or something else we don't need to understand.
3044 We can't pass these directly to result_ready_cost because it will
3045 trigger a fatal error for unrecognizable insns. */
3046 if (INSN_CODE (insn
) < 0)
3048 INSN_COST (insn
) = 1;
3053 cost
= result_ready_cost (insn
);
3058 INSN_COST (insn
) = cost
;
3062 /* In this case estimate cost without caring how insn is used. */
3063 if (link
== 0 && used
== 0)
3066 /* A USE insn should never require the value used to be computed. This
3067 allows the computation of a function's result and parameter values to
3068 overlap the return and call. */
3069 recog_memoized (used
);
3070 if (INSN_CODE (used
) < 0)
3071 LINK_COST_FREE (link
) = 1;
3073 /* If some dependencies vary the cost, compute the adjustment. Most
3074 commonly, the adjustment is complete: either the cost is ignored
3075 (in the case of an output- or anti-dependence), or the cost is
3076 unchanged. These values are cached in the link as LINK_COST_FREE
3077 and LINK_COST_ZERO. */
3079 if (LINK_COST_FREE (link
))
3082 else if (!LINK_COST_ZERO (link
))
3086 ADJUST_COST (used
, link
, insn
, ncost
);
3089 LINK_COST_FREE (link
) = 1;
3093 LINK_COST_ZERO (link
) = 1;
3100 /* Compute the priority number for INSN. */
3109 if (GET_RTX_CLASS (GET_CODE (insn
)) != 'i')
3112 if ((this_priority
= INSN_PRIORITY (insn
)) == 0)
3114 if (INSN_DEPEND (insn
) == 0)
3115 this_priority
= insn_cost (insn
, 0, 0);
3117 for (link
= INSN_DEPEND (insn
); link
; link
= XEXP (link
, 1))
3122 if (RTX_INTEGRATED_P (link
))
3125 next
= XEXP (link
, 0);
3127 /* Critical path is meaningful in block boundaries only. */
3128 if (BLOCK_NUM (next
) != BLOCK_NUM (insn
))
3131 next_priority
= insn_cost (insn
, link
, next
) + priority (next
);
3132 if (next_priority
> this_priority
)
3133 this_priority
= next_priority
;
3135 INSN_PRIORITY (insn
) = this_priority
;
3137 return this_priority
;
3141 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
3142 them to the unused_*_list variables, so that they can be reused. */
3145 free_pending_lists ()
3149 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
3151 free_INSN_LIST_list (&bb_deps
[bb
].pending_read_insns
);
3152 free_INSN_LIST_list (&bb_deps
[bb
].pending_write_insns
);
3153 free_EXPR_LIST_list (&bb_deps
[bb
].pending_read_mems
);
3154 free_EXPR_LIST_list (&bb_deps
[bb
].pending_write_mems
);
3158 /* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST.
3159 The MEM is a memory reference contained within INSN, which we are saving
3160 so that we can do memory aliasing on it. */
3163 add_insn_mem_dependence (deps
, insn_list
, mem_list
, insn
, mem
)
3165 rtx
*insn_list
, *mem_list
, insn
, mem
;
3169 link
= alloc_INSN_LIST (insn
, *insn_list
);
3172 link
= alloc_EXPR_LIST (VOIDmode
, mem
, *mem_list
);
3175 deps
->pending_lists_length
++;
3178 /* Make a dependency between every memory reference on the pending lists
3179 and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush
3183 flush_pending_lists (deps
, insn
, only_write
)
3191 while (deps
->pending_read_insns
&& ! only_write
)
3193 add_dependence (insn
, XEXP (deps
->pending_read_insns
, 0),
3196 link
= deps
->pending_read_insns
;
3197 deps
->pending_read_insns
= XEXP (deps
->pending_read_insns
, 1);
3198 free_INSN_LIST_node (link
);
3200 link
= deps
->pending_read_mems
;
3201 deps
->pending_read_mems
= XEXP (deps
->pending_read_mems
, 1);
3202 free_EXPR_LIST_node (link
);
3204 while (deps
->pending_write_insns
)
3206 add_dependence (insn
, XEXP (deps
->pending_write_insns
, 0),
3209 link
= deps
->pending_write_insns
;
3210 deps
->pending_write_insns
= XEXP (deps
->pending_write_insns
, 1);
3211 free_INSN_LIST_node (link
);
3213 link
= deps
->pending_write_mems
;
3214 deps
->pending_write_mems
= XEXP (deps
->pending_write_mems
, 1);
3215 free_EXPR_LIST_node (link
);
3217 deps
->pending_lists_length
= 0;
3219 /* last_pending_memory_flush is now a list of insns. */
3220 for (u
= deps
->last_pending_memory_flush
; u
; u
= XEXP (u
, 1))
3221 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3223 free_INSN_LIST_list (&deps
->last_pending_memory_flush
);
3224 deps
->last_pending_memory_flush
= alloc_INSN_LIST (insn
, NULL_RTX
);
3227 /* Analyze a single SET, CLOBBER, PRE_DEC, POST_DEC, PRE_INC or POST_INC
3228 rtx, X, creating all dependencies generated by the write to the
3229 destination of X, and reads of everything mentioned. */
3232 sched_analyze_1 (deps
, x
, insn
)
3238 register rtx dest
= XEXP (x
, 0);
3239 enum rtx_code code
= GET_CODE (x
);
3244 if (GET_CODE (dest
) == PARALLEL
3245 && GET_MODE (dest
) == BLKmode
)
3248 for (i
= XVECLEN (dest
, 0) - 1; i
>= 0; i
--)
3249 sched_analyze_1 (deps
, XVECEXP (dest
, 0, i
), insn
);
3250 if (GET_CODE (x
) == SET
)
3251 sched_analyze_2 (deps
, SET_SRC (x
), insn
);
3255 while (GET_CODE (dest
) == STRICT_LOW_PART
|| GET_CODE (dest
) == SUBREG
3256 || GET_CODE (dest
) == ZERO_EXTRACT
|| GET_CODE (dest
) == SIGN_EXTRACT
)
3258 if (GET_CODE (dest
) == ZERO_EXTRACT
|| GET_CODE (dest
) == SIGN_EXTRACT
)
3260 /* The second and third arguments are values read by this insn. */
3261 sched_analyze_2 (deps
, XEXP (dest
, 1), insn
);
3262 sched_analyze_2 (deps
, XEXP (dest
, 2), insn
);
3264 dest
= XEXP (dest
, 0);
3267 if (GET_CODE (dest
) == REG
)
3271 regno
= REGNO (dest
);
3273 /* A hard reg in a wide mode may really be multiple registers.
3274 If so, mark all of them just like the first. */
3275 if (regno
< FIRST_PSEUDO_REGISTER
)
3277 i
= HARD_REGNO_NREGS (regno
, GET_MODE (dest
));
3283 for (u
= deps
->reg_last_uses
[r
]; u
; u
= XEXP (u
, 1))
3284 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3286 for (u
= deps
->reg_last_sets
[r
]; u
; u
= XEXP (u
, 1))
3287 add_dependence (insn
, XEXP (u
, 0), REG_DEP_OUTPUT
);
3289 /* Clobbers need not be ordered with respect to one
3290 another, but sets must be ordered with respect to a
3294 free_INSN_LIST_list (&deps
->reg_last_uses
[r
]);
3295 for (u
= deps
->reg_last_clobbers
[r
]; u
; u
= XEXP (u
, 1))
3296 add_dependence (insn
, XEXP (u
, 0), REG_DEP_OUTPUT
);
3297 SET_REGNO_REG_SET (reg_pending_sets
, r
);
3300 SET_REGNO_REG_SET (reg_pending_clobbers
, r
);
3302 /* Function calls clobber all call_used regs. */
3303 if (global_regs
[r
] || (code
== SET
&& call_used_regs
[r
]))
3304 for (u
= deps
->last_function_call
; u
; u
= XEXP (u
, 1))
3305 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3312 for (u
= deps
->reg_last_uses
[regno
]; u
; u
= XEXP (u
, 1))
3313 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3315 for (u
= deps
->reg_last_sets
[regno
]; u
; u
= XEXP (u
, 1))
3316 add_dependence (insn
, XEXP (u
, 0), REG_DEP_OUTPUT
);
3320 free_INSN_LIST_list (&deps
->reg_last_uses
[regno
]);
3321 for (u
= deps
->reg_last_clobbers
[regno
]; u
; u
= XEXP (u
, 1))
3322 add_dependence (insn
, XEXP (u
, 0), REG_DEP_OUTPUT
);
3323 SET_REGNO_REG_SET (reg_pending_sets
, regno
);
3326 SET_REGNO_REG_SET (reg_pending_clobbers
, regno
);
3328 /* Pseudos that are REG_EQUIV to something may be replaced
3329 by that during reloading. We need only add dependencies for
3330 the address in the REG_EQUIV note. */
3331 if (!reload_completed
3332 && reg_known_equiv_p
[regno
]
3333 && GET_CODE (reg_known_value
[regno
]) == MEM
)
3334 sched_analyze_2 (deps
, XEXP (reg_known_value
[regno
], 0), insn
);
3336 /* Don't let it cross a call after scheduling if it doesn't
3337 already cross one. */
3339 if (REG_N_CALLS_CROSSED (regno
) == 0)
3340 for (u
= deps
->last_function_call
; u
; u
= XEXP (u
, 1))
3341 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3344 else if (GET_CODE (dest
) == MEM
)
3346 /* Writing memory. */
3348 if (deps
->pending_lists_length
> 32)
3350 /* Flush all pending reads and writes to prevent the pending lists
3351 from getting any larger. Insn scheduling runs too slowly when
3352 these lists get long. The number 32 was chosen because it
3353 seems like a reasonable number. When compiling GCC with itself,
3354 this flush occurs 8 times for sparc, and 10 times for m88k using
3356 flush_pending_lists (deps
, insn
, 0);
3361 rtx pending
, pending_mem
;
3363 pending
= deps
->pending_read_insns
;
3364 pending_mem
= deps
->pending_read_mems
;
3367 if (anti_dependence (XEXP (pending_mem
, 0), dest
))
3368 add_dependence (insn
, XEXP (pending
, 0), REG_DEP_ANTI
);
3370 pending
= XEXP (pending
, 1);
3371 pending_mem
= XEXP (pending_mem
, 1);
3374 pending
= deps
->pending_write_insns
;
3375 pending_mem
= deps
->pending_write_mems
;
3378 if (output_dependence (XEXP (pending_mem
, 0), dest
))
3379 add_dependence (insn
, XEXP (pending
, 0), REG_DEP_OUTPUT
);
3381 pending
= XEXP (pending
, 1);
3382 pending_mem
= XEXP (pending_mem
, 1);
3385 for (u
= deps
->last_pending_memory_flush
; u
; u
= XEXP (u
, 1))
3386 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3388 add_insn_mem_dependence (deps
, &deps
->pending_write_insns
,
3389 &deps
->pending_write_mems
, insn
, dest
);
3391 sched_analyze_2 (deps
, XEXP (dest
, 0), insn
);
3394 /* Analyze reads. */
3395 if (GET_CODE (x
) == SET
)
3396 sched_analyze_2 (deps
, SET_SRC (x
), insn
);
3399 /* Analyze the uses of memory and registers in rtx X in INSN. */
3402 sched_analyze_2 (deps
, x
, insn
)
3409 register enum rtx_code code
;
3410 register const char *fmt
;
3415 code
= GET_CODE (x
);
3424 /* Ignore constants. Note that we must handle CONST_DOUBLE here
3425 because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but
3426 this does not mean that this insn is using cc0. */
3434 /* User of CC0 depends on immediately preceding insn. */
3435 SCHED_GROUP_P (insn
) = 1;
3437 /* There may be a note before this insn now, but all notes will
3438 be removed before we actually try to schedule the insns, so
3439 it won't cause a problem later. We must avoid it here though. */
3440 prev
= prev_nonnote_insn (insn
);
3442 /* Make a copy of all dependencies on the immediately previous insn,
3443 and add to this insn. This is so that all the dependencies will
3444 apply to the group. Remove an explicit dependence on this insn
3445 as SCHED_GROUP_P now represents it. */
3447 if (find_insn_list (prev
, LOG_LINKS (insn
)))
3448 remove_dependence (insn
, prev
);
3450 for (link
= LOG_LINKS (prev
); link
; link
= XEXP (link
, 1))
3451 add_dependence (insn
, XEXP (link
, 0), REG_NOTE_KIND (link
));
3460 int regno
= REGNO (x
);
3461 if (regno
< FIRST_PSEUDO_REGISTER
)
3465 i
= HARD_REGNO_NREGS (regno
, GET_MODE (x
));
3469 deps
->reg_last_uses
[r
]
3470 = alloc_INSN_LIST (insn
, deps
->reg_last_uses
[r
]);
3472 for (u
= deps
->reg_last_sets
[r
]; u
; u
= XEXP (u
, 1))
3473 add_dependence (insn
, XEXP (u
, 0), 0);
3475 /* ??? This should never happen. */
3476 for (u
= deps
->reg_last_clobbers
[r
]; u
; u
= XEXP (u
, 1))
3477 add_dependence (insn
, XEXP (u
, 0), 0);
3479 if (call_used_regs
[r
] || global_regs
[r
])
3480 /* Function calls clobber all call_used regs. */
3481 for (u
= deps
->last_function_call
; u
; u
= XEXP (u
, 1))
3482 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3487 deps
->reg_last_uses
[regno
]
3488 = alloc_INSN_LIST (insn
, deps
->reg_last_uses
[regno
]);
3490 for (u
= deps
->reg_last_sets
[regno
]; u
; u
= XEXP (u
, 1))
3491 add_dependence (insn
, XEXP (u
, 0), 0);
3493 /* ??? This should never happen. */
3494 for (u
= deps
->reg_last_clobbers
[regno
]; u
; u
= XEXP (u
, 1))
3495 add_dependence (insn
, XEXP (u
, 0), 0);
3497 /* Pseudos that are REG_EQUIV to something may be replaced
3498 by that during reloading. We need only add dependencies for
3499 the address in the REG_EQUIV note. */
3500 if (!reload_completed
3501 && reg_known_equiv_p
[regno
]
3502 && GET_CODE (reg_known_value
[regno
]) == MEM
)
3503 sched_analyze_2 (deps
, XEXP (reg_known_value
[regno
], 0), insn
);
3505 /* If the register does not already cross any calls, then add this
3506 insn to the sched_before_next_call list so that it will still
3507 not cross calls after scheduling. */
3508 if (REG_N_CALLS_CROSSED (regno
) == 0)
3509 add_dependence (deps
->sched_before_next_call
, insn
,
3517 /* Reading memory. */
3519 rtx pending
, pending_mem
;
3521 pending
= deps
->pending_read_insns
;
3522 pending_mem
= deps
->pending_read_mems
;
3525 if (read_dependence (XEXP (pending_mem
, 0), x
))
3526 add_dependence (insn
, XEXP (pending
, 0), REG_DEP_ANTI
);
3528 pending
= XEXP (pending
, 1);
3529 pending_mem
= XEXP (pending_mem
, 1);
3532 pending
= deps
->pending_write_insns
;
3533 pending_mem
= deps
->pending_write_mems
;
3536 if (true_dependence (XEXP (pending_mem
, 0), VOIDmode
,
3538 add_dependence (insn
, XEXP (pending
, 0), 0);
3540 pending
= XEXP (pending
, 1);
3541 pending_mem
= XEXP (pending_mem
, 1);
3544 for (u
= deps
->last_pending_memory_flush
; u
; u
= XEXP (u
, 1))
3545 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3547 /* Always add these dependencies to pending_reads, since
3548 this insn may be followed by a write. */
3549 add_insn_mem_dependence (deps
, &deps
->pending_read_insns
,
3550 &deps
->pending_read_mems
, insn
, x
);
3552 /* Take advantage of tail recursion here. */
3553 sched_analyze_2 (deps
, XEXP (x
, 0), insn
);
3557 /* Force pending stores to memory in case a trap handler needs them. */
3559 flush_pending_lists (deps
, insn
, 1);
3564 case UNSPEC_VOLATILE
:
3568 /* Traditional and volatile asm instructions must be considered to use
3569 and clobber all hard registers, all pseudo-registers and all of
3570 memory. So must TRAP_IF and UNSPEC_VOLATILE operations.
3572 Consider for instance a volatile asm that changes the fpu rounding
3573 mode. An insn should not be moved across this even if it only uses
3574 pseudo-regs because it might give an incorrectly rounded result. */
3575 if (code
!= ASM_OPERANDS
|| MEM_VOLATILE_P (x
))
3577 int max_reg
= max_reg_num ();
3578 for (i
= 0; i
< max_reg
; i
++)
3580 for (u
= deps
->reg_last_uses
[i
]; u
; u
= XEXP (u
, 1))
3581 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3582 free_INSN_LIST_list (&deps
->reg_last_uses
[i
]);
3584 for (u
= deps
->reg_last_sets
[i
]; u
; u
= XEXP (u
, 1))
3585 add_dependence (insn
, XEXP (u
, 0), 0);
3587 for (u
= deps
->reg_last_clobbers
[i
]; u
; u
= XEXP (u
, 1))
3588 add_dependence (insn
, XEXP (u
, 0), 0);
3590 reg_pending_sets_all
= 1;
3592 flush_pending_lists (deps
, insn
, 0);
3595 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
3596 We can not just fall through here since then we would be confused
3597 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
3598 traditional asms unlike their normal usage. */
3600 if (code
== ASM_OPERANDS
)
3602 for (j
= 0; j
< ASM_OPERANDS_INPUT_LENGTH (x
); j
++)
3603 sched_analyze_2 (deps
, ASM_OPERANDS_INPUT (x
, j
), insn
);
3613 /* These both read and modify the result. We must handle them as writes
3614 to get proper dependencies for following instructions. We must handle
3615 them as reads to get proper dependencies from this to previous
3616 instructions. Thus we need to pass them to both sched_analyze_1
3617 and sched_analyze_2. We must call sched_analyze_2 first in order
3618 to get the proper antecedent for the read. */
3619 sched_analyze_2 (deps
, XEXP (x
, 0), insn
);
3620 sched_analyze_1 (deps
, x
, insn
);
3627 /* Other cases: walk the insn. */
3628 fmt
= GET_RTX_FORMAT (code
);
3629 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
3632 sched_analyze_2 (deps
, XEXP (x
, i
), insn
);
3633 else if (fmt
[i
] == 'E')
3634 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
3635 sched_analyze_2 (deps
, XVECEXP (x
, i
, j
), insn
);
3639 /* Analyze an INSN with pattern X to find all dependencies. */
3642 sched_analyze_insn (deps
, x
, insn
, loop_notes
)
3647 register RTX_CODE code
= GET_CODE (x
);
3649 int maxreg
= max_reg_num ();
3652 if (code
== SET
|| code
== CLOBBER
)
3653 sched_analyze_1 (deps
, x
, insn
);
3654 else if (code
== PARALLEL
)
3657 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; i
--)
3659 code
= GET_CODE (XVECEXP (x
, 0, i
));
3660 if (code
== SET
|| code
== CLOBBER
)
3661 sched_analyze_1 (deps
, XVECEXP (x
, 0, i
), insn
);
3663 sched_analyze_2 (deps
, XVECEXP (x
, 0, i
), insn
);
3667 sched_analyze_2 (deps
, x
, insn
);
3669 /* Mark registers CLOBBERED or used by called function. */
3670 if (GET_CODE (insn
) == CALL_INSN
)
3671 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
3673 if (GET_CODE (XEXP (link
, 0)) == CLOBBER
)
3674 sched_analyze_1 (deps
, XEXP (link
, 0), insn
);
3676 sched_analyze_2 (deps
, XEXP (link
, 0), insn
);
3679 /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic
3680 block, then we must be sure that no instructions are scheduled across it.
3681 Otherwise, the reg_n_refs info (which depends on loop_depth) would
3682 become incorrect. */
3686 int max_reg
= max_reg_num ();
3687 int schedule_barrier_found
= 0;
3690 /* Update loop_notes with any notes from this insn. Also determine
3691 if any of the notes on the list correspond to instruction scheduling
3692 barriers (loop, eh & setjmp notes, but not range notes. */
3694 while (XEXP (link
, 1))
3696 if (INTVAL (XEXP (link
, 0)) == NOTE_INSN_LOOP_BEG
3697 || INTVAL (XEXP (link
, 0)) == NOTE_INSN_LOOP_END
3698 || INTVAL (XEXP (link
, 0)) == NOTE_INSN_EH_REGION_BEG
3699 || INTVAL (XEXP (link
, 0)) == NOTE_INSN_EH_REGION_END
3700 || INTVAL (XEXP (link
, 0)) == NOTE_INSN_SETJMP
)
3701 schedule_barrier_found
= 1;
3703 link
= XEXP (link
, 1);
3705 XEXP (link
, 1) = REG_NOTES (insn
);
3706 REG_NOTES (insn
) = loop_notes
;
3708 /* Add dependencies if a scheduling barrier was found. */
3709 if (schedule_barrier_found
)
3711 for (i
= 0; i
< max_reg
; i
++)
3714 for (u
= deps
->reg_last_uses
[i
]; u
; u
= XEXP (u
, 1))
3715 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3716 free_INSN_LIST_list (&deps
->reg_last_uses
[i
]);
3718 for (u
= deps
->reg_last_sets
[i
]; u
; u
= XEXP (u
, 1))
3719 add_dependence (insn
, XEXP (u
, 0), 0);
3721 for (u
= deps
->reg_last_clobbers
[i
]; u
; u
= XEXP (u
, 1))
3722 add_dependence (insn
, XEXP (u
, 0), 0);
3724 reg_pending_sets_all
= 1;
3726 flush_pending_lists (deps
, insn
, 0);
3731 /* Accumulate clobbers until the next set so that it will be output dependent
3732 on all of them. At the next set we can clear the clobber list, since
3733 subsequent sets will be output dependent on it. */
3734 EXECUTE_IF_SET_IN_REG_SET
3735 (reg_pending_sets
, 0, i
,
3737 free_INSN_LIST_list (&deps
->reg_last_sets
[i
]);
3738 free_INSN_LIST_list (&deps
->reg_last_clobbers
[i
]);
3739 deps
->reg_last_sets
[i
] = alloc_INSN_LIST (insn
, NULL_RTX
);
3741 EXECUTE_IF_SET_IN_REG_SET
3742 (reg_pending_clobbers
, 0, i
,
3744 deps
->reg_last_clobbers
[i
]
3745 = alloc_INSN_LIST (insn
, deps
->reg_last_clobbers
[i
]);
3747 CLEAR_REG_SET (reg_pending_sets
);
3748 CLEAR_REG_SET (reg_pending_clobbers
);
3750 if (reg_pending_sets_all
)
3752 for (i
= 0; i
< maxreg
; i
++)
3754 free_INSN_LIST_list (&deps
->reg_last_sets
[i
]);
3755 free_INSN_LIST_list (&deps
->reg_last_clobbers
[i
]);
3756 deps
->reg_last_sets
[i
] = alloc_INSN_LIST (insn
, NULL_RTX
);
3759 reg_pending_sets_all
= 0;
3762 /* Handle function calls and function returns created by the epilogue
3764 if (GET_CODE (insn
) == CALL_INSN
|| GET_CODE (insn
) == JUMP_INSN
)
3769 /* When scheduling instructions, we make sure calls don't lose their
3770 accompanying USE insns by depending them one on another in order.
3772 Also, we must do the same thing for returns created by the epilogue
3773 threading code. Note this code works only in this special case,
3774 because other passes make no guarantee that they will never emit
3775 an instruction between a USE and a RETURN. There is such a guarantee
3776 for USE instructions immediately before a call. */
3778 prev_dep_insn
= insn
;
3779 dep_insn
= PREV_INSN (insn
);
3780 while (GET_CODE (dep_insn
) == INSN
3781 && GET_CODE (PATTERN (dep_insn
)) == USE
3782 && GET_CODE (XEXP (PATTERN (dep_insn
), 0)) == REG
)
3784 SCHED_GROUP_P (prev_dep_insn
) = 1;
3786 /* Make a copy of all dependencies on dep_insn, and add to insn.
3787 This is so that all of the dependencies will apply to the
3790 for (link
= LOG_LINKS (dep_insn
); link
; link
= XEXP (link
, 1))
3791 add_dependence (insn
, XEXP (link
, 0), REG_NOTE_KIND (link
));
3793 prev_dep_insn
= dep_insn
;
3794 dep_insn
= PREV_INSN (dep_insn
);
3799 /* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS
3800 for every dependency. */
3803 sched_analyze (deps
, head
, tail
)
3811 for (insn
= head
;; insn
= NEXT_INSN (insn
))
3813 if (GET_CODE (insn
) == INSN
|| GET_CODE (insn
) == JUMP_INSN
)
3815 /* Clear out the stale LOG_LINKS from flow. */
3816 free_INSN_LIST_list (&LOG_LINKS (insn
));
3818 /* Make each JUMP_INSN a scheduling barrier for memory
3820 if (GET_CODE (insn
) == JUMP_INSN
)
3821 deps
->last_pending_memory_flush
3822 = alloc_INSN_LIST (insn
, deps
->last_pending_memory_flush
);
3823 sched_analyze_insn (deps
, PATTERN (insn
), insn
, loop_notes
);
3826 else if (GET_CODE (insn
) == CALL_INSN
)
3831 CANT_MOVE (insn
) = 1;
3833 /* Clear out the stale LOG_LINKS from flow. */
3834 free_INSN_LIST_list (&LOG_LINKS (insn
));
3836 /* Any instruction using a hard register which may get clobbered
3837 by a call needs to be marked as dependent on this call.
3838 This prevents a use of a hard return reg from being moved
3839 past a void call (i.e. it does not explicitly set the hard
3842 /* If this call is followed by a NOTE_INSN_SETJMP, then assume that
3843 all registers, not just hard registers, may be clobbered by this
3846 /* Insn, being a CALL_INSN, magically depends on
3847 `last_function_call' already. */
3849 if (NEXT_INSN (insn
) && GET_CODE (NEXT_INSN (insn
)) == NOTE
3850 && NOTE_LINE_NUMBER (NEXT_INSN (insn
)) == NOTE_INSN_SETJMP
)
3852 int max_reg
= max_reg_num ();
3853 for (i
= 0; i
< max_reg
; i
++)
3855 for (u
= deps
->reg_last_uses
[i
]; u
; u
= XEXP (u
, 1))
3856 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3857 free_INSN_LIST_list (&deps
->reg_last_uses
[i
]);
3859 for (u
= deps
->reg_last_sets
[i
]; u
; u
= XEXP (u
, 1))
3860 add_dependence (insn
, XEXP (u
, 0), 0);
3862 for (u
= deps
->reg_last_clobbers
[i
]; u
; u
= XEXP (u
, 1))
3863 add_dependence (insn
, XEXP (u
, 0), 0);
3865 reg_pending_sets_all
= 1;
3867 /* Add a pair of REG_SAVE_NOTEs which we will later
3868 convert back into a NOTE_INSN_SETJMP note. See
3869 reemit_notes for why we use a pair of NOTEs. */
3870 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_SAVE_NOTE
,
3873 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_SAVE_NOTE
,
3874 GEN_INT (NOTE_INSN_SETJMP
),
3879 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3880 if (call_used_regs
[i
] || global_regs
[i
])
3882 for (u
= deps
->reg_last_uses
[i
]; u
; u
= XEXP (u
, 1))
3883 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3885 for (u
= deps
->reg_last_sets
[i
]; u
; u
= XEXP (u
, 1))
3886 add_dependence (insn
, XEXP (u
, 0), REG_DEP_ANTI
);
3888 SET_REGNO_REG_SET (reg_pending_clobbers
, i
);
3892 /* For each insn which shouldn't cross a call, add a dependence
3893 between that insn and this call insn. */
3894 x
= LOG_LINKS (deps
->sched_before_next_call
);
3897 add_dependence (insn
, XEXP (x
, 0), REG_DEP_ANTI
);
3900 free_INSN_LIST_list (&LOG_LINKS (deps
->sched_before_next_call
));
3902 sched_analyze_insn (deps
, PATTERN (insn
), insn
, loop_notes
);
3905 /* In the absence of interprocedural alias analysis, we must flush
3906 all pending reads and writes, and start new dependencies starting
3907 from here. But only flush writes for constant calls (which may
3908 be passed a pointer to something we haven't written yet). */
3909 flush_pending_lists (deps
, insn
, CONST_CALL_P (insn
));
3911 /* Depend this function call (actually, the user of this
3912 function call) on all hard register clobberage. */
3914 /* last_function_call is now a list of insns. */
3915 free_INSN_LIST_list (&deps
->last_function_call
);
3916 deps
->last_function_call
= alloc_INSN_LIST (insn
, NULL_RTX
);
3919 /* See comments on reemit_notes as to why we do this.
3920 ??? Actually, the reemit_notes just say what is done, not why. */
3922 else if (GET_CODE (insn
) == NOTE
3923 && (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_RANGE_START
3924 || NOTE_LINE_NUMBER (insn
) == NOTE_INSN_RANGE_END
))
3926 loop_notes
= alloc_EXPR_LIST (REG_SAVE_NOTE
, NOTE_RANGE_INFO (insn
),
3928 loop_notes
= alloc_EXPR_LIST (REG_SAVE_NOTE
,
3929 GEN_INT (NOTE_LINE_NUMBER (insn
)),
3932 else if (GET_CODE (insn
) == NOTE
3933 && (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
3934 || NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_END
3935 || NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EH_REGION_BEG
3936 || NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EH_REGION_END
3937 || (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_SETJMP
3938 && GET_CODE (PREV_INSN (insn
)) != CALL_INSN
)))
3942 if (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EH_REGION_BEG
3943 || NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EH_REGION_END
)
3944 rtx_region
= GEN_INT (NOTE_EH_HANDLER (insn
));
3946 rtx_region
= GEN_INT (0);
3948 loop_notes
= alloc_EXPR_LIST (REG_SAVE_NOTE
,
3951 loop_notes
= alloc_EXPR_LIST (REG_SAVE_NOTE
,
3952 GEN_INT (NOTE_LINE_NUMBER (insn
)),
3954 CONST_CALL_P (loop_notes
) = CONST_CALL_P (insn
);
3963 /* Macros and functions for keeping the priority queue sorted, and
3964 dealing with queueing and dequeueing of instructions. */
3966 #define SCHED_SORT(READY, N_READY) \
3967 do { if ((N_READY) == 2) \
3968 swap_sort (READY, N_READY); \
3969 else if ((N_READY) > 2) \
3970 qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \
3973 /* Returns a positive value if x is preferred; returns a negative value if
3974 y is preferred. Should never return 0, since that will make the sort
3978 rank_for_schedule (x
, y
)
3982 rtx tmp
= *(const rtx
*)y
;
3983 rtx tmp2
= *(const rtx
*)x
;
3985 int tmp_class
, tmp2_class
, depend_count1
, depend_count2
;
3986 int val
, priority_val
, spec_val
, prob_val
, weight_val
;
3989 /* Prefer insn with higher priority. */
3990 priority_val
= INSN_PRIORITY (tmp2
) - INSN_PRIORITY (tmp
);
3992 return priority_val
;
3994 /* Prefer an insn with smaller contribution to registers-pressure. */
3995 if (!reload_completed
&&
3996 (weight_val
= INSN_REG_WEIGHT (tmp
) - INSN_REG_WEIGHT (tmp2
)))
3997 return (weight_val
);
3999 /* Some comparison make sense in interblock scheduling only. */
4000 if (INSN_BB (tmp
) != INSN_BB (tmp2
))
4002 /* Prefer an inblock motion on an interblock motion. */
4003 if ((INSN_BB (tmp2
) == target_bb
) && (INSN_BB (tmp
) != target_bb
))
4005 if ((INSN_BB (tmp
) == target_bb
) && (INSN_BB (tmp2
) != target_bb
))
4008 /* Prefer a useful motion on a speculative one. */
4009 if ((spec_val
= IS_SPECULATIVE_INSN (tmp
) - IS_SPECULATIVE_INSN (tmp2
)))
4012 /* Prefer a more probable (speculative) insn. */
4013 prob_val
= INSN_PROBABILITY (tmp2
) - INSN_PROBABILITY (tmp
);
4018 /* Compare insns based on their relation to the last-scheduled-insn. */
4019 if (last_scheduled_insn
)
4021 /* Classify the instructions into three classes:
4022 1) Data dependent on last schedule insn.
4023 2) Anti/Output dependent on last scheduled insn.
4024 3) Independent of last scheduled insn, or has latency of one.
4025 Choose the insn from the highest numbered class if different. */
4026 link
= find_insn_list (tmp
, INSN_DEPEND (last_scheduled_insn
));
4027 if (link
== 0 || insn_cost (last_scheduled_insn
, link
, tmp
) == 1)
4029 else if (REG_NOTE_KIND (link
) == 0) /* Data dependence. */
4034 link
= find_insn_list (tmp2
, INSN_DEPEND (last_scheduled_insn
));
4035 if (link
== 0 || insn_cost (last_scheduled_insn
, link
, tmp2
) == 1)
4037 else if (REG_NOTE_KIND (link
) == 0) /* Data dependence. */
4042 if ((val
= tmp2_class
- tmp_class
))
4046 /* Prefer the insn which has more later insns that depend on it.
4047 This gives the scheduler more freedom when scheduling later
4048 instructions at the expense of added register pressure. */
4050 for (link
= INSN_DEPEND (tmp
); link
; link
= XEXP (link
, 1))
4054 for (link
= INSN_DEPEND (tmp2
); link
; link
= XEXP (link
, 1))
4057 val
= depend_count2
- depend_count1
;
4061 /* If insns are equally good, sort by INSN_LUID (original insn order),
4062 so that we make the sort stable. This minimizes instruction movement,
4063 thus minimizing sched's effect on debugging and cross-jumping. */
4064 return INSN_LUID (tmp
) - INSN_LUID (tmp2
);
4067 /* Resort the array A in which only element at index N may be out of order. */
4069 HAIFA_INLINE
static void
4074 rtx insn
= a
[n
- 1];
4077 while (i
>= 0 && rank_for_schedule (a
+ i
, &insn
) >= 0)
4085 static int max_priority
;
4087 /* Add INSN to the insn queue so that it can be executed at least
4088 N_CYCLES after the currently executing insn. Preserve insns
4089 chain for debugging purposes. */
4091 HAIFA_INLINE
static void
4092 queue_insn (insn
, n_cycles
)
4096 int next_q
= NEXT_Q_AFTER (q_ptr
, n_cycles
);
4097 rtx link
= alloc_INSN_LIST (insn
, insn_queue
[next_q
]);
4098 insn_queue
[next_q
] = link
;
4101 if (sched_verbose
>= 2)
4103 fprintf (dump
, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn
));
4105 if (INSN_BB (insn
) != target_bb
)
4106 fprintf (dump
, "(b%d) ", BLOCK_NUM (insn
));
4108 fprintf (dump
, "queued for %d cycles.\n", n_cycles
);
4113 /* PREV is an insn that is ready to execute. Adjust its priority if that
4114 will help shorten or lengthen register lifetimes as appropriate. Also
4115 provide a hook for the target to tweek itself. */
4117 HAIFA_INLINE
static void
4118 adjust_priority (prev
)
4119 rtx prev ATTRIBUTE_UNUSED
;
4121 /* ??? There used to be code here to try and estimate how an insn
4122 affected register lifetimes, but it did it by looking at REG_DEAD
4123 notes, which we removed in schedule_region. Nor did it try to
4124 take into account register pressure or anything useful like that.
4126 Revisit when we have a machine model to work with and not before. */
4128 #ifdef ADJUST_PRIORITY
4129 ADJUST_PRIORITY (prev
);
4133 /* Clock at which the previous instruction was issued. */
4134 static int last_clock_var
;
4136 /* INSN is the "currently executing insn". Launch each insn which was
4137 waiting on INSN. READY is a vector of insns which are ready to fire.
4138 N_READY is the number of elements in READY. CLOCK is the current
4142 schedule_insn (insn
, ready
, n_ready
, clock
)
4151 unit
= insn_unit (insn
);
4153 if (sched_verbose
>= 2)
4155 fprintf (dump
, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
4157 insn_print_units (insn
);
4158 fprintf (dump
, "\n");
4161 if (sched_verbose
&& unit
== -1)
4162 visualize_no_unit (insn
);
4164 if (MAX_BLOCKAGE
> 1 || issue_rate
> 1 || sched_verbose
)
4165 schedule_unit (unit
, insn
, clock
);
4167 if (INSN_DEPEND (insn
) == 0)
4170 /* This is used by the function adjust_priority above. */
4172 max_priority
= MAX (INSN_PRIORITY (ready
[0]), INSN_PRIORITY (insn
));
4174 max_priority
= INSN_PRIORITY (insn
);
4176 for (link
= INSN_DEPEND (insn
); link
!= 0; link
= XEXP (link
, 1))
4178 rtx next
= XEXP (link
, 0);
4179 int cost
= insn_cost (insn
, link
, next
);
4181 INSN_TICK (next
) = MAX (INSN_TICK (next
), clock
+ cost
);
4183 if ((INSN_DEP_COUNT (next
) -= 1) == 0)
4185 int effective_cost
= INSN_TICK (next
) - clock
;
4187 /* For speculative insns, before inserting to ready/queue,
4188 check live, exception-free, and issue-delay. */
4189 if (INSN_BB (next
) != target_bb
4190 && (!IS_VALID (INSN_BB (next
))
4192 || (IS_SPECULATIVE_INSN (next
)
4193 && (insn_issue_delay (next
) > 3
4194 || !check_live (next
, INSN_BB (next
))
4195 || !is_exception_free (next
, INSN_BB (next
), target_bb
)))))
4198 if (sched_verbose
>= 2)
4200 fprintf (dump
, ";;\t\tdependences resolved: insn %d ",
4203 if (current_nr_blocks
> 1 && INSN_BB (next
) != target_bb
)
4204 fprintf (dump
, "/b%d ", BLOCK_NUM (next
));
4206 if (effective_cost
< 1)
4207 fprintf (dump
, "into ready\n");
4209 fprintf (dump
, "into queue with cost=%d\n", effective_cost
);
4212 /* Adjust the priority of NEXT and either put it on the ready
4213 list or queue it. */
4214 adjust_priority (next
);
4215 if (effective_cost
< 1)
4216 ready
[n_ready
++] = next
;
4218 queue_insn (next
, effective_cost
);
4222 /* Annotate the instruction with issue information -- TImode
4223 indicates that the instruction is expected not to be able
4224 to issue on the same cycle as the previous insn. A machine
4225 may use this information to decide how the instruction should
4227 if (reload_completed
&& issue_rate
> 1)
4229 PUT_MODE (insn
, clock
> last_clock_var
? TImode
: VOIDmode
);
4230 last_clock_var
= clock
;
4236 /* Functions for handling of notes. */
4238 /* Delete notes beginning with INSN and put them in the chain
4239 of notes ended by NOTE_LIST.
4240 Returns the insn following the notes. */
4243 unlink_other_notes (insn
, tail
)
4246 rtx prev
= PREV_INSN (insn
);
4248 while (insn
!= tail
&& GET_CODE (insn
) == NOTE
)
4250 rtx next
= NEXT_INSN (insn
);
4251 /* Delete the note from its current position. */
4253 NEXT_INSN (prev
) = next
;
4255 PREV_INSN (next
) = prev
;
4257 /* See sched_analyze to see how these are handled. */
4258 if (NOTE_LINE_NUMBER (insn
) != NOTE_INSN_SETJMP
4259 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_LOOP_BEG
4260 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_LOOP_END
4261 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_RANGE_START
4262 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_RANGE_END
4263 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_EH_REGION_BEG
4264 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_EH_REGION_END
)
4266 /* Insert the note at the end of the notes list. */
4267 PREV_INSN (insn
) = note_list
;
4269 NEXT_INSN (note_list
) = insn
;
4278 /* Delete line notes beginning with INSN. Record line-number notes so
4279 they can be reused. Returns the insn following the notes. */
4282 unlink_line_notes (insn
, tail
)
4285 rtx prev
= PREV_INSN (insn
);
4287 while (insn
!= tail
&& GET_CODE (insn
) == NOTE
)
4289 rtx next
= NEXT_INSN (insn
);
4291 if (write_symbols
!= NO_DEBUG
&& NOTE_LINE_NUMBER (insn
) > 0)
4293 /* Delete the note from its current position. */
4295 NEXT_INSN (prev
) = next
;
4297 PREV_INSN (next
) = prev
;
4299 /* Record line-number notes so they can be reused. */
4300 LINE_NOTE (insn
) = insn
;
4310 /* Return the head and tail pointers of BB. */
4312 HAIFA_INLINE
static void
4313 get_block_head_tail (b
, headp
, tailp
)
4322 /* HEAD and TAIL delimit the basic block being scheduled. */
4323 head
= BLOCK_HEAD (b
);
4324 tail
= BLOCK_END (b
);
4326 /* Don't include any notes or labels at the beginning of the
4327 basic block, or notes at the ends of basic blocks. */
4328 while (head
!= tail
)
4330 if (GET_CODE (head
) == NOTE
)
4331 head
= NEXT_INSN (head
);
4332 else if (GET_CODE (tail
) == NOTE
)
4333 tail
= PREV_INSN (tail
);
4334 else if (GET_CODE (head
) == CODE_LABEL
)
4335 head
= NEXT_INSN (head
);
4344 HAIFA_INLINE
static void
4345 get_bb_head_tail (bb
, headp
, tailp
)
4350 get_block_head_tail (BB_TO_BLOCK (bb
), headp
, tailp
);
4353 /* Delete line notes from bb. Save them so they can be later restored
4354 (in restore_line_notes ()). */
4365 get_bb_head_tail (bb
, &head
, &tail
);
4368 && (GET_RTX_CLASS (GET_CODE (head
)) != 'i'))
4371 next_tail
= NEXT_INSN (tail
);
4372 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
4376 /* Farm out notes, and maybe save them in NOTE_LIST.
4377 This is needed to keep the debugger from
4378 getting completely deranged. */
4379 if (GET_CODE (insn
) == NOTE
)
4382 insn
= unlink_line_notes (insn
, next_tail
);
4388 if (insn
== next_tail
)
4394 /* Save line number notes for each insn in bb. */
4397 save_line_notes (bb
)
4403 /* We must use the true line number for the first insn in the block
4404 that was computed and saved at the start of this pass. We can't
4405 use the current line number, because scheduling of the previous
4406 block may have changed the current line number. */
4408 rtx line
= line_note_head
[BB_TO_BLOCK (bb
)];
4411 get_bb_head_tail (bb
, &head
, &tail
);
4412 next_tail
= NEXT_INSN (tail
);
4414 for (insn
= BLOCK_HEAD (BB_TO_BLOCK (bb
));
4416 insn
= NEXT_INSN (insn
))
4417 if (GET_CODE (insn
) == NOTE
&& NOTE_LINE_NUMBER (insn
) > 0)
4420 LINE_NOTE (insn
) = line
;
4424 /* After bb was scheduled, insert line notes into the insns list. */
4427 restore_line_notes (bb
)
4430 rtx line
, note
, prev
, new;
4431 int added_notes
= 0;
4433 rtx head
, next_tail
, insn
;
4435 b
= BB_TO_BLOCK (bb
);
4437 head
= BLOCK_HEAD (b
);
4438 next_tail
= NEXT_INSN (BLOCK_END (b
));
4440 /* Determine the current line-number. We want to know the current
4441 line number of the first insn of the block here, in case it is
4442 different from the true line number that was saved earlier. If
4443 different, then we need a line number note before the first insn
4444 of this block. If it happens to be the same, then we don't want to
4445 emit another line number note here. */
4446 for (line
= head
; line
; line
= PREV_INSN (line
))
4447 if (GET_CODE (line
) == NOTE
&& NOTE_LINE_NUMBER (line
) > 0)
4450 /* Walk the insns keeping track of the current line-number and inserting
4451 the line-number notes as needed. */
4452 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
4453 if (GET_CODE (insn
) == NOTE
&& NOTE_LINE_NUMBER (insn
) > 0)
4455 /* This used to emit line number notes before every non-deleted note.
4456 However, this confuses a debugger, because line notes not separated
4457 by real instructions all end up at the same address. I can find no
4458 use for line number notes before other notes, so none are emitted. */
4459 else if (GET_CODE (insn
) != NOTE
4460 && (note
= LINE_NOTE (insn
)) != 0
4463 || NOTE_LINE_NUMBER (note
) != NOTE_LINE_NUMBER (line
)
4464 || NOTE_SOURCE_FILE (note
) != NOTE_SOURCE_FILE (line
)))
4467 prev
= PREV_INSN (insn
);
4468 if (LINE_NOTE (note
))
4470 /* Re-use the original line-number note. */
4471 LINE_NOTE (note
) = 0;
4472 PREV_INSN (note
) = prev
;
4473 NEXT_INSN (prev
) = note
;
4474 PREV_INSN (insn
) = note
;
4475 NEXT_INSN (note
) = insn
;
4480 new = emit_note_after (NOTE_LINE_NUMBER (note
), prev
);
4481 NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note
);
4482 RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note
);
4485 if (sched_verbose
&& added_notes
)
4486 fprintf (dump
, ";; added %d line-number notes\n", added_notes
);
4489 /* After scheduling the function, delete redundant line notes from the
4493 rm_redundant_line_notes ()
4496 rtx insn
= get_insns ();
4497 int active_insn
= 0;
4500 /* Walk the insns deleting redundant line-number notes. Many of these
4501 are already present. The remainder tend to occur at basic
4502 block boundaries. */
4503 for (insn
= get_last_insn (); insn
; insn
= PREV_INSN (insn
))
4504 if (GET_CODE (insn
) == NOTE
&& NOTE_LINE_NUMBER (insn
) > 0)
4506 /* If there are no active insns following, INSN is redundant. */
4507 if (active_insn
== 0)
4510 NOTE_SOURCE_FILE (insn
) = 0;
4511 NOTE_LINE_NUMBER (insn
) = NOTE_INSN_DELETED
;
4513 /* If the line number is unchanged, LINE is redundant. */
4515 && NOTE_LINE_NUMBER (line
) == NOTE_LINE_NUMBER (insn
)
4516 && NOTE_SOURCE_FILE (line
) == NOTE_SOURCE_FILE (insn
))
4519 NOTE_SOURCE_FILE (line
) = 0;
4520 NOTE_LINE_NUMBER (line
) = NOTE_INSN_DELETED
;
4527 else if (!((GET_CODE (insn
) == NOTE
4528 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED
)
4529 || (GET_CODE (insn
) == INSN
4530 && (GET_CODE (PATTERN (insn
)) == USE
4531 || GET_CODE (PATTERN (insn
)) == CLOBBER
))))
4534 if (sched_verbose
&& notes
)
4535 fprintf (dump
, ";; deleted %d line-number notes\n", notes
);
4538 /* Delete notes between head and tail and put them in the chain
4539 of notes ended by NOTE_LIST. */
4542 rm_other_notes (head
, tail
)
4550 && (GET_RTX_CLASS (GET_CODE (head
)) != 'i'))
4553 next_tail
= NEXT_INSN (tail
);
4554 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
4558 /* Farm out notes, and maybe save them in NOTE_LIST.
4559 This is needed to keep the debugger from
4560 getting completely deranged. */
4561 if (GET_CODE (insn
) == NOTE
)
4565 insn
= unlink_other_notes (insn
, next_tail
);
4571 if (insn
== next_tail
)
4577 /* Functions for computation of registers live/usage info. */
4579 /* Calculate INSN_REG_WEIGHT for all insns of a block. */
4582 find_insn_reg_weight (b
)
4585 rtx insn
, next_tail
, head
, tail
;
4587 get_block_head_tail (b
, &head
, &tail
);
4588 next_tail
= NEXT_INSN (tail
);
4590 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
4595 /* Handle register life information. */
4596 if (GET_RTX_CLASS (GET_CODE (insn
)) != 'i')
4599 /* Increment weight for each register born here. */
4601 if ((GET_CODE (x
) == SET
|| GET_CODE (x
) == CLOBBER
)
4602 && register_operand (SET_DEST (x
), VOIDmode
))
4604 else if (GET_CODE (x
) == PARALLEL
)
4607 for (j
= XVECLEN (x
, 0) - 1; j
>= 0; j
--)
4609 x
= XVECEXP (PATTERN (insn
), 0, j
);
4610 if ((GET_CODE (x
) == SET
|| GET_CODE (x
) == CLOBBER
)
4611 && register_operand (SET_DEST (x
), VOIDmode
))
4616 /* Decrement weight for each register that dies here. */
4617 for (x
= REG_NOTES (insn
); x
; x
= XEXP (x
, 1))
4619 if (REG_NOTE_KIND (x
) == REG_DEAD
4620 || REG_NOTE_KIND (x
) == REG_UNUSED
)
4624 INSN_REG_WEIGHT (insn
) = reg_weight
;
4628 /* Scheduling clock, modified in schedule_block() and queue_to_ready (). */
4629 static int clock_var
;
4631 /* Move insns that became ready to fire from queue to ready list. */
4634 queue_to_ready (ready
, n_ready
)
4641 q_ptr
= NEXT_Q (q_ptr
);
4643 /* Add all pending insns that can be scheduled without stalls to the
4645 for (link
= insn_queue
[q_ptr
]; link
; link
= XEXP (link
, 1))
4648 insn
= XEXP (link
, 0);
4651 if (sched_verbose
>= 2)
4652 fprintf (dump
, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn
));
4654 if (sched_verbose
>= 2 && INSN_BB (insn
) != target_bb
)
4655 fprintf (dump
, "(b%d) ", BLOCK_NUM (insn
));
4657 ready
[n_ready
++] = insn
;
4658 if (sched_verbose
>= 2)
4659 fprintf (dump
, "moving to ready without stalls\n");
4661 insn_queue
[q_ptr
] = 0;
4663 /* If there are no ready insns, stall until one is ready and add all
4664 of the pending insns at that point to the ready list. */
4667 register int stalls
;
4669 for (stalls
= 1; stalls
< INSN_QUEUE_SIZE
; stalls
++)
4671 if ((link
= insn_queue
[NEXT_Q_AFTER (q_ptr
, stalls
)]))
4673 for (; link
; link
= XEXP (link
, 1))
4675 insn
= XEXP (link
, 0);
4678 if (sched_verbose
>= 2)
4679 fprintf (dump
, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn
));
4681 if (sched_verbose
>= 2 && INSN_BB (insn
) != target_bb
)
4682 fprintf (dump
, "(b%d) ", BLOCK_NUM (insn
));
4684 ready
[n_ready
++] = insn
;
4685 if (sched_verbose
>= 2)
4686 fprintf (dump
, "moving to ready with %d stalls\n", stalls
);
4688 insn_queue
[NEXT_Q_AFTER (q_ptr
, stalls
)] = 0;
4695 if (sched_verbose
&& stalls
)
4696 visualize_stall_cycles (BB_TO_BLOCK (target_bb
), stalls
);
4697 q_ptr
= NEXT_Q_AFTER (q_ptr
, stalls
);
4698 clock_var
+= stalls
;
4703 /* Print the ready list for debugging purposes. Callable from debugger. */
4706 debug_ready_list (ready
, n_ready
)
4712 for (i
= 0; i
< n_ready
; i
++)
4714 fprintf (dump
, " %d", INSN_UID (ready
[i
]));
4715 if (current_nr_blocks
> 1 && INSN_BB (ready
[i
]) != target_bb
)
4716 fprintf (dump
, "/b%d", BLOCK_NUM (ready
[i
]));
4718 fprintf (dump
, "\n");
4721 /* Print names of units on which insn can/should execute, for debugging. */
4724 insn_print_units (insn
)
4728 int unit
= insn_unit (insn
);
4731 fprintf (dump
, "none");
4733 fprintf (dump
, "%s", function_units
[unit
].name
);
4736 fprintf (dump
, "[");
4737 for (i
= 0, unit
= ~unit
; unit
; i
++, unit
>>= 1)
4740 fprintf (dump
, "%s", function_units
[i
].name
);
4742 fprintf (dump
, " ");
4744 fprintf (dump
, "]");
4748 /* MAX_VISUAL_LINES is the maximum number of lines in visualization table
4749 of a basic block. If more lines are needed, table is splitted to two.
4750 n_visual_lines is the number of lines printed so far for a block.
4751 visual_tbl contains the block visualization info.
4752 vis_no_unit holds insns in a cycle that are not mapped to any unit. */
4753 #define MAX_VISUAL_LINES 100
4758 rtx vis_no_unit
[10];
4760 /* Finds units that are in use in this fuction. Required only
4761 for visualization. */
4764 init_target_units ()
4769 for (insn
= get_last_insn (); insn
; insn
= PREV_INSN (insn
))
4771 if (GET_RTX_CLASS (GET_CODE (insn
)) != 'i')
4774 unit
= insn_unit (insn
);
4777 target_units
|= ~unit
;
4779 target_units
|= (1 << unit
);
4783 /* Return the length of the visualization table. */
4786 get_visual_tbl_length ()
4792 /* Compute length of one field in line. */
4793 s
= (char *) alloca (INSN_LEN
+ 6);
4794 sprintf (s
, " %33s", "uname");
4797 /* Compute length of one line. */
4800 for (unit
= 0; unit
< FUNCTION_UNITS_SIZE
; unit
++)
4801 if (function_units
[unit
].bitmask
& target_units
)
4802 for (i
= 0; i
< function_units
[unit
].multiplicity
; i
++)
4805 n
+= strlen ("\n") + 2;
4807 /* Compute length of visualization string. */
4808 return (MAX_VISUAL_LINES
* n
);
4811 /* Init block visualization debugging info. */
4814 init_block_visualization ()
4816 strcpy (visual_tbl
, "");
4821 #define BUF_LEN 2048
4824 safe_concat (buf
, cur
, str
)
4829 char *end
= buf
+ BUF_LEN
- 2; /* Leave room for null. */
4838 while (cur
< end
&& (c
= *str
++) != '\0')
4845 /* This recognizes rtx, I classified as expressions. These are always
4846 represent some action on values or results of other expression, that
4847 may be stored in objects representing values. */
4850 print_exp (buf
, x
, verbose
)
4858 const char *fun
= (char *)0;
4863 for (i
= 0; i
< 4; i
++)
4869 switch (GET_CODE (x
))
4872 op
[0] = XEXP (x
, 0);
4873 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
4874 && INTVAL (XEXP (x
, 1)) < 0)
4877 op
[1] = GEN_INT (-INTVAL (XEXP (x
, 1)));
4882 op
[1] = XEXP (x
, 1);
4886 op
[0] = XEXP (x
, 0);
4888 op
[1] = XEXP (x
, 1);
4892 op
[0] = XEXP (x
, 0);
4894 op
[1] = XEXP (x
, 1);
4898 op
[0] = XEXP (x
, 0);
4899 op
[1] = XEXP (x
, 1);
4903 op
[0] = XEXP (x
, 0);
4906 op
[0] = XEXP (x
, 0);
4908 op
[1] = XEXP (x
, 1);
4911 op
[0] = XEXP (x
, 0);
4913 op
[1] = XEXP (x
, 1);
4917 op
[0] = XEXP (x
, 0);
4918 op
[1] = XEXP (x
, 1);
4921 op
[0] = XEXP (x
, 0);
4923 op
[1] = XEXP (x
, 1);
4927 op
[0] = XEXP (x
, 0);
4928 op
[1] = XEXP (x
, 1);
4932 op
[0] = XEXP (x
, 0);
4933 op
[1] = XEXP (x
, 1);
4937 op
[0] = XEXP (x
, 0);
4938 op
[1] = XEXP (x
, 1);
4942 op
[0] = XEXP (x
, 0);
4943 op
[1] = XEXP (x
, 1);
4947 op
[0] = XEXP (x
, 0);
4948 op
[1] = XEXP (x
, 1);
4952 op
[0] = XEXP (x
, 0);
4955 op
[0] = XEXP (x
, 0);
4957 op
[1] = XEXP (x
, 1);
4960 op
[0] = XEXP (x
, 0);
4962 op
[1] = XEXP (x
, 1);
4965 op
[0] = XEXP (x
, 0);
4967 op
[1] = XEXP (x
, 1);
4970 op
[0] = XEXP (x
, 0);
4972 op
[1] = XEXP (x
, 1);
4975 op
[0] = XEXP (x
, 0);
4977 op
[1] = XEXP (x
, 1);
4980 op
[0] = XEXP (x
, 0);
4982 op
[1] = XEXP (x
, 1);
4985 op
[0] = XEXP (x
, 0);
4987 op
[1] = XEXP (x
, 1);
4990 op
[0] = XEXP (x
, 0);
4992 op
[1] = XEXP (x
, 1);
4996 op
[0] = XEXP (x
, 0);
5000 op
[0] = XEXP (x
, 0);
5004 op
[0] = XEXP (x
, 0);
5007 op
[0] = XEXP (x
, 0);
5009 op
[1] = XEXP (x
, 1);
5012 op
[0] = XEXP (x
, 0);
5014 op
[1] = XEXP (x
, 1);
5017 op
[0] = XEXP (x
, 0);
5019 op
[1] = XEXP (x
, 1);
5023 op
[0] = XEXP (x
, 0);
5024 op
[1] = XEXP (x
, 1);
5027 op
[0] = XEXP (x
, 0);
5029 op
[1] = XEXP (x
, 1);
5033 op
[0] = XEXP (x
, 0);
5034 op
[1] = XEXP (x
, 1);
5037 op
[0] = XEXP (x
, 0);
5039 op
[1] = XEXP (x
, 1);
5043 op
[0] = XEXP (x
, 0);
5044 op
[1] = XEXP (x
, 1);
5047 op
[0] = XEXP (x
, 0);
5049 op
[1] = XEXP (x
, 1);
5053 op
[0] = XEXP (x
, 0);
5054 op
[1] = XEXP (x
, 1);
5057 fun
= (verbose
) ? "sign_extract" : "sxt";
5058 op
[0] = XEXP (x
, 0);
5059 op
[1] = XEXP (x
, 1);
5060 op
[2] = XEXP (x
, 2);
5063 fun
= (verbose
) ? "zero_extract" : "zxt";
5064 op
[0] = XEXP (x
, 0);
5065 op
[1] = XEXP (x
, 1);
5066 op
[2] = XEXP (x
, 2);
5069 fun
= (verbose
) ? "sign_extend" : "sxn";
5070 op
[0] = XEXP (x
, 0);
5073 fun
= (verbose
) ? "zero_extend" : "zxn";
5074 op
[0] = XEXP (x
, 0);
5077 fun
= (verbose
) ? "float_extend" : "fxn";
5078 op
[0] = XEXP (x
, 0);
5081 fun
= (verbose
) ? "trunc" : "trn";
5082 op
[0] = XEXP (x
, 0);
5084 case FLOAT_TRUNCATE
:
5085 fun
= (verbose
) ? "float_trunc" : "ftr";
5086 op
[0] = XEXP (x
, 0);
5089 fun
= (verbose
) ? "float" : "flt";
5090 op
[0] = XEXP (x
, 0);
5092 case UNSIGNED_FLOAT
:
5093 fun
= (verbose
) ? "uns_float" : "ufl";
5094 op
[0] = XEXP (x
, 0);
5098 op
[0] = XEXP (x
, 0);
5101 fun
= (verbose
) ? "uns_fix" : "ufx";
5102 op
[0] = XEXP (x
, 0);
5106 op
[0] = XEXP (x
, 0);
5110 op
[0] = XEXP (x
, 0);
5113 op
[0] = XEXP (x
, 0);
5117 op
[0] = XEXP (x
, 0);
5122 op
[0] = XEXP (x
, 0);
5126 op
[1] = XEXP (x
, 1);
5131 op
[0] = XEXP (x
, 0);
5133 op
[1] = XEXP (x
, 1);
5135 op
[2] = XEXP (x
, 2);
5140 op
[0] = TRAP_CONDITION (x
);
5143 case UNSPEC_VOLATILE
:
5145 cur
= safe_concat (buf
, cur
, "unspec");
5146 if (GET_CODE (x
) == UNSPEC_VOLATILE
)
5147 cur
= safe_concat (buf
, cur
, "/v");
5148 cur
= safe_concat (buf
, cur
, "[");
5150 for (i
= 0; i
< XVECLEN (x
, 0); i
++)
5152 print_pattern (tmp
, XVECEXP (x
, 0, i
), verbose
);
5153 cur
= safe_concat (buf
, cur
, sep
);
5154 cur
= safe_concat (buf
, cur
, tmp
);
5157 cur
= safe_concat (buf
, cur
, "] ");
5158 sprintf (tmp
, "%d", XINT (x
, 1));
5159 cur
= safe_concat (buf
, cur
, tmp
);
5163 /* If (verbose) debug_rtx (x); */
5164 st
[0] = GET_RTX_NAME (GET_CODE (x
));
5168 /* Print this as a function? */
5171 cur
= safe_concat (buf
, cur
, fun
);
5172 cur
= safe_concat (buf
, cur
, "(");
5175 for (i
= 0; i
< 4; i
++)
5178 cur
= safe_concat (buf
, cur
, st
[i
]);
5183 cur
= safe_concat (buf
, cur
, ",");
5185 print_value (tmp
, op
[i
], verbose
);
5186 cur
= safe_concat (buf
, cur
, tmp
);
5191 cur
= safe_concat (buf
, cur
, ")");
5194 /* Prints rtxes, I customly classified as values. They're constants,
5195 registers, labels, symbols and memory accesses. */
5198 print_value (buf
, x
, verbose
)
5206 switch (GET_CODE (x
))
5209 sprintf (t
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
5210 cur
= safe_concat (buf
, cur
, t
);
5213 sprintf (t
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
5214 cur
= safe_concat (buf
, cur
, t
);
5217 cur
= safe_concat (buf
, cur
, "\"");
5218 cur
= safe_concat (buf
, cur
, XSTR (x
, 0));
5219 cur
= safe_concat (buf
, cur
, "\"");
5222 cur
= safe_concat (buf
, cur
, "`");
5223 cur
= safe_concat (buf
, cur
, XSTR (x
, 0));
5224 cur
= safe_concat (buf
, cur
, "'");
5227 sprintf (t
, "L%d", INSN_UID (XEXP (x
, 0)));
5228 cur
= safe_concat (buf
, cur
, t
);
5231 print_value (t
, XEXP (x
, 0), verbose
);
5232 cur
= safe_concat (buf
, cur
, "const(");
5233 cur
= safe_concat (buf
, cur
, t
);
5234 cur
= safe_concat (buf
, cur
, ")");
5237 print_value (t
, XEXP (x
, 0), verbose
);
5238 cur
= safe_concat (buf
, cur
, "high(");
5239 cur
= safe_concat (buf
, cur
, t
);
5240 cur
= safe_concat (buf
, cur
, ")");
5243 if (REGNO (x
) < FIRST_PSEUDO_REGISTER
)
5245 int c
= reg_names
[ REGNO (x
) ][0];
5246 if (c
>= '0' && c
<= '9')
5247 cur
= safe_concat (buf
, cur
, "%");
5249 cur
= safe_concat (buf
, cur
, reg_names
[ REGNO (x
) ]);
5253 sprintf (t
, "r%d", REGNO (x
));
5254 cur
= safe_concat (buf
, cur
, t
);
5258 print_value (t
, SUBREG_REG (x
), verbose
);
5259 cur
= safe_concat (buf
, cur
, t
);
5260 sprintf (t
, "#%d", SUBREG_WORD (x
));
5261 cur
= safe_concat (buf
, cur
, t
);
5264 cur
= safe_concat (buf
, cur
, "scratch");
5267 cur
= safe_concat (buf
, cur
, "cc0");
5270 cur
= safe_concat (buf
, cur
, "pc");
5273 print_value (t
, XEXP (x
, 0), verbose
);
5274 cur
= safe_concat (buf
, cur
, "[");
5275 cur
= safe_concat (buf
, cur
, t
);
5276 cur
= safe_concat (buf
, cur
, "]");
5279 print_exp (t
, x
, verbose
);
5280 cur
= safe_concat (buf
, cur
, t
);
5285 /* The next step in insn detalization, its pattern recognition. */
5288 print_pattern (buf
, x
, verbose
)
5293 char t1
[BUF_LEN
], t2
[BUF_LEN
], t3
[BUF_LEN
];
5295 switch (GET_CODE (x
))
5298 print_value (t1
, SET_DEST (x
), verbose
);
5299 print_value (t2
, SET_SRC (x
), verbose
);
5300 sprintf (buf
, "%s=%s", t1
, t2
);
5303 sprintf (buf
, "return");
5306 print_exp (buf
, x
, verbose
);
5309 print_value (t1
, XEXP (x
, 0), verbose
);
5310 sprintf (buf
, "clobber %s", t1
);
5313 print_value (t1
, XEXP (x
, 0), verbose
);
5314 sprintf (buf
, "use %s", t1
);
5321 for (i
= 0; i
< XVECLEN (x
, 0); i
++)
5323 print_pattern (t2
, XVECEXP (x
, 0, i
), verbose
);
5324 sprintf (t3
, "%s%s;", t1
, t2
);
5327 sprintf (buf
, "%s}", t1
);
5334 sprintf (t1
, "%%{");
5335 for (i
= 0; i
< XVECLEN (x
, 0); i
++)
5337 print_insn (t2
, XVECEXP (x
, 0, i
), verbose
);
5338 sprintf (t3
, "%s%s;", t1
, t2
);
5341 sprintf (buf
, "%s%%}", t1
);
5345 sprintf (buf
, "asm {%s}", XSTR (x
, 0));
5350 print_value (buf
, XEXP (x
, 0), verbose
);
5353 print_value (t1
, TRAP_CONDITION (x
), verbose
);
5354 sprintf (buf
, "trap_if %s", t1
);
5360 sprintf (t1
, "unspec{");
5361 for (i
= 0; i
< XVECLEN (x
, 0); i
++)
5363 print_pattern (t2
, XVECEXP (x
, 0, i
), verbose
);
5364 sprintf (t3
, "%s%s;", t1
, t2
);
5367 sprintf (buf
, "%s}", t1
);
5370 case UNSPEC_VOLATILE
:
5374 sprintf (t1
, "unspec/v{");
5375 for (i
= 0; i
< XVECLEN (x
, 0); i
++)
5377 print_pattern (t2
, XVECEXP (x
, 0, i
), verbose
);
5378 sprintf (t3
, "%s%s;", t1
, t2
);
5381 sprintf (buf
, "%s}", t1
);
5385 print_value (buf
, x
, verbose
);
5387 } /* print_pattern */
5389 /* This is the main function in rtl visualization mechanism. It
5390 accepts an rtx and tries to recognize it as an insn, then prints it
5391 properly in human readable form, resembling assembler mnemonics.
5392 For every insn it prints its UID and BB the insn belongs too.
5393 (Probably the last "option" should be extended somehow, since it
5394 depends now on sched.c inner variables ...) */
5397 print_insn (buf
, x
, verbose
)
5405 switch (GET_CODE (x
))
5408 print_pattern (t
, PATTERN (x
), verbose
);
5410 sprintf (buf
, "b%d: i% 4d: %s", INSN_BB (x
),
5413 sprintf (buf
, "%-4d %s", INSN_UID (x
), t
);
5416 print_pattern (t
, PATTERN (x
), verbose
);
5418 sprintf (buf
, "b%d: i% 4d: jump %s", INSN_BB (x
),
5421 sprintf (buf
, "%-4d %s", INSN_UID (x
), t
);
5425 if (GET_CODE (x
) == PARALLEL
)
5427 x
= XVECEXP (x
, 0, 0);
5428 print_pattern (t
, x
, verbose
);
5431 strcpy (t
, "call <...>");
5433 sprintf (buf
, "b%d: i% 4d: %s", INSN_BB (insn
),
5434 INSN_UID (insn
), t
);
5436 sprintf (buf
, "%-4d %s", INSN_UID (insn
), t
);
5439 sprintf (buf
, "L%d:", INSN_UID (x
));
5442 sprintf (buf
, "i% 4d: barrier", INSN_UID (x
));
5445 if (NOTE_LINE_NUMBER (x
) > 0)
5446 sprintf (buf
, "%4d note \"%s\" %d", INSN_UID (x
),
5447 NOTE_SOURCE_FILE (x
), NOTE_LINE_NUMBER (x
));
5449 sprintf (buf
, "%4d %s", INSN_UID (x
),
5450 GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x
)));
5455 sprintf (buf
, "Not an INSN at all\n");
5459 sprintf (buf
, "i%-4d <What?>", INSN_UID (x
));
5463 /* Print visualization debugging info. */
5466 print_block_visualization (b
, s
)
5473 fprintf (dump
, "\n;; ==================== scheduling visualization for block %d %s \n", b
, s
);
5475 /* Print names of units. */
5476 fprintf (dump
, ";; %-8s", "clock");
5477 for (unit
= 0; unit
< FUNCTION_UNITS_SIZE
; unit
++)
5478 if (function_units
[unit
].bitmask
& target_units
)
5479 for (i
= 0; i
< function_units
[unit
].multiplicity
; i
++)
5480 fprintf (dump
, " %-33s", function_units
[unit
].name
);
5481 fprintf (dump
, " %-8s\n", "no-unit");
5483 fprintf (dump
, ";; %-8s", "=====");
5484 for (unit
= 0; unit
< FUNCTION_UNITS_SIZE
; unit
++)
5485 if (function_units
[unit
].bitmask
& target_units
)
5486 for (i
= 0; i
< function_units
[unit
].multiplicity
; i
++)
5487 fprintf (dump
, " %-33s", "==============================");
5488 fprintf (dump
, " %-8s\n", "=======");
5490 /* Print insns in each cycle. */
5491 fprintf (dump
, "%s\n", visual_tbl
);
5494 /* Print insns in the 'no_unit' column of visualization. */
5497 visualize_no_unit (insn
)
5500 vis_no_unit
[n_vis_no_unit
] = insn
;
5504 /* Print insns scheduled in clock, for visualization. */
5507 visualize_scheduled_insns (b
, clock
)
5512 /* If no more room, split table into two. */
5513 if (n_visual_lines
>= MAX_VISUAL_LINES
)
5515 print_block_visualization (b
, "(incomplete)");
5516 init_block_visualization ();
5521 sprintf (visual_tbl
+ strlen (visual_tbl
), ";; %-8d", clock
);
5522 for (unit
= 0; unit
< FUNCTION_UNITS_SIZE
; unit
++)
5523 if (function_units
[unit
].bitmask
& target_units
)
5524 for (i
= 0; i
< function_units
[unit
].multiplicity
; i
++)
5526 int instance
= unit
+ i
* FUNCTION_UNITS_SIZE
;
5527 rtx insn
= unit_last_insn
[instance
];
5529 /* Print insns that still keep the unit busy. */
5531 actual_hazard_this_instance (unit
, instance
, insn
, clock
, 0))
5534 print_insn (str
, insn
, 0);
5535 str
[INSN_LEN
] = '\0';
5536 sprintf (visual_tbl
+ strlen (visual_tbl
), " %-33s", str
);
5539 sprintf (visual_tbl
+ strlen (visual_tbl
), " %-33s", "------------------------------");
5542 /* Print insns that are not assigned to any unit. */
5543 for (i
= 0; i
< n_vis_no_unit
; i
++)
5544 sprintf (visual_tbl
+ strlen (visual_tbl
), " %-8d",
5545 INSN_UID (vis_no_unit
[i
]));
5548 sprintf (visual_tbl
+ strlen (visual_tbl
), "\n");
5551 /* Print stalled cycles. */
5554 visualize_stall_cycles (b
, stalls
)
5559 /* If no more room, split table into two. */
5560 if (n_visual_lines
>= MAX_VISUAL_LINES
)
5562 print_block_visualization (b
, "(incomplete)");
5563 init_block_visualization ();
5568 sprintf (visual_tbl
+ strlen (visual_tbl
), ";; ");
5569 for (i
= 0; i
< stalls
; i
++)
5570 sprintf (visual_tbl
+ strlen (visual_tbl
), ".");
5571 sprintf (visual_tbl
+ strlen (visual_tbl
), "\n");
5574 /* move_insn1: Remove INSN from insn chain, and link it after LAST insn. */
5577 move_insn1 (insn
, last
)
5580 NEXT_INSN (PREV_INSN (insn
)) = NEXT_INSN (insn
);
5581 PREV_INSN (NEXT_INSN (insn
)) = PREV_INSN (insn
);
5583 NEXT_INSN (insn
) = NEXT_INSN (last
);
5584 PREV_INSN (NEXT_INSN (last
)) = insn
;
5586 NEXT_INSN (last
) = insn
;
5587 PREV_INSN (insn
) = last
;
5592 /* Search INSN for REG_SAVE_NOTE note pairs for NOTE_INSN_SETJMP,
5593 NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into
5594 NOTEs. The REG_SAVE_NOTE note following first one is contains the
5595 saved value for NOTE_BLOCK_NUMBER which is useful for
5596 NOTE_INSN_EH_REGION_{BEG,END} NOTEs. LAST is the last instruction
5597 output by the instruction scheduler. Return the new value of LAST. */
5600 reemit_notes (insn
, last
)
5607 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
5609 if (REG_NOTE_KIND (note
) == REG_SAVE_NOTE
)
5611 int note_type
= INTVAL (XEXP (note
, 0));
5612 if (note_type
== NOTE_INSN_SETJMP
)
5614 retval
= emit_note_after (NOTE_INSN_SETJMP
, insn
);
5615 CONST_CALL_P (retval
) = CONST_CALL_P (note
);
5616 remove_note (insn
, note
);
5617 note
= XEXP (note
, 1);
5619 else if (note_type
== NOTE_INSN_RANGE_START
5620 || note_type
== NOTE_INSN_RANGE_END
)
5622 last
= emit_note_before (note_type
, last
);
5623 remove_note (insn
, note
);
5624 note
= XEXP (note
, 1);
5625 NOTE_RANGE_INFO (last
) = XEXP (note
, 0);
5629 last
= emit_note_before (note_type
, last
);
5630 remove_note (insn
, note
);
5631 note
= XEXP (note
, 1);
5632 if (note_type
== NOTE_INSN_EH_REGION_BEG
5633 || note_type
== NOTE_INSN_EH_REGION_END
)
5634 NOTE_EH_HANDLER (last
) = INTVAL (XEXP (note
, 0));
5636 remove_note (insn
, note
);
5642 /* Move INSN, and all insns which should be issued before it,
5643 due to SCHED_GROUP_P flag. Reemit notes if needed.
5645 Return the last insn emitted by the scheduler, which is the
5646 return value from the first call to reemit_notes. */
5649 move_insn (insn
, last
)
5654 /* If INSN has SCHED_GROUP_P set, then issue it and any other
5655 insns with SCHED_GROUP_P set first. */
5656 while (SCHED_GROUP_P (insn
))
5658 rtx prev
= PREV_INSN (insn
);
5660 /* Move a SCHED_GROUP_P insn. */
5661 move_insn1 (insn
, last
);
5662 /* If this is the first call to reemit_notes, then record
5663 its return value. */
5664 if (retval
== NULL_RTX
)
5665 retval
= reemit_notes (insn
, insn
);
5667 reemit_notes (insn
, insn
);
5671 /* Now move the first non SCHED_GROUP_P insn. */
5672 move_insn1 (insn
, last
);
5674 /* If this is the first call to reemit_notes, then record
5675 its return value. */
5676 if (retval
== NULL_RTX
)
5677 retval
= reemit_notes (insn
, insn
);
5679 reemit_notes (insn
, insn
);
5684 /* Return an insn which represents a SCHED_GROUP, which is
5685 the last insn in the group. */
5696 insn
= next_nonnote_insn (insn
);
5698 while (insn
&& SCHED_GROUP_P (insn
) && (GET_CODE (insn
) != CODE_LABEL
));
5703 /* Use forward list scheduling to rearrange insns of block BB in region RGN,
5704 possibly bringing insns from subsequent blocks in the same region.
5705 Return number of insns scheduled. */
5708 schedule_block (bb
, rgn_n_insns
)
5712 /* Local variables. */
5718 /* Flow block of this bb. */
5719 int b
= BB_TO_BLOCK (bb
);
5721 /* target_n_insns == number of insns in b before scheduling starts.
5722 sched_target_n_insns == how many of b's insns were scheduled.
5723 sched_n_insns == how many insns were scheduled in b. */
5724 int target_n_insns
= 0;
5725 int sched_target_n_insns
= 0;
5726 int sched_n_insns
= 0;
5728 #define NEED_NOTHING 0
5733 /* Head/tail info for this block. */
5740 /* We used to have code to avoid getting parameters moved from hard
5741 argument registers into pseudos.
5743 However, it was removed when it proved to be of marginal benefit
5744 and caused problems because schedule_block and compute_forward_dependences
5745 had different notions of what the "head" insn was. */
5746 get_bb_head_tail (bb
, &head
, &tail
);
5748 /* rm_other_notes only removes notes which are _inside_ the
5749 block---that is, it won't remove notes before the first real insn
5750 or after the last real insn of the block. So if the first insn
5751 has a REG_SAVE_NOTE which would otherwise be emitted before the
5752 insn, it is redundant with the note before the start of the
5753 block, and so we have to take it out.
5755 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
5756 referencing NOTE_INSN_SETJMP at the end of the block. */
5757 if (GET_RTX_CLASS (GET_CODE (head
)) == 'i')
5761 for (note
= REG_NOTES (head
); note
; note
= XEXP (note
, 1))
5762 if (REG_NOTE_KIND (note
) == REG_SAVE_NOTE
)
5764 if (INTVAL (XEXP (note
, 0)) != NOTE_INSN_SETJMP
)
5766 remove_note (head
, note
);
5767 note
= XEXP (note
, 1);
5768 remove_note (head
, note
);
5771 note
= XEXP (note
, 1);
5775 next_tail
= NEXT_INSN (tail
);
5776 prev_head
= PREV_INSN (head
);
5778 /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need
5779 to schedule this block. */
5781 && (GET_RTX_CLASS (GET_CODE (head
)) != 'i'))
5782 return (sched_n_insns
);
5787 fprintf (dump
, ";; ======================================================\n");
5789 ";; -- basic block %d from %d to %d -- %s reload\n",
5790 b
, INSN_UID (BLOCK_HEAD (b
)), INSN_UID (BLOCK_END (b
)),
5791 (reload_completed
? "after" : "before"));
5792 fprintf (dump
, ";; ======================================================\n");
5793 fprintf (dump
, "\n");
5795 visual_tbl
= (char *) alloca (get_visual_tbl_length ());
5796 init_block_visualization ();
5799 /* Remove remaining note insns from the block, save them in
5800 note_list. These notes are restored at the end of
5801 schedule_block (). */
5803 rm_other_notes (head
, tail
);
5807 /* Prepare current target block info. */
5808 if (current_nr_blocks
> 1)
5810 candidate_table
= (candidate
*) xmalloc (current_nr_blocks
5811 * sizeof (candidate
));
5814 /* ??? It is not clear why bblst_size is computed this way. The original
5815 number was clearly too small as it resulted in compiler failures.
5816 Multiplying by the original number by 2 (to account for update_bbs
5817 members) seems to be a reasonable solution. */
5818 /* ??? Or perhaps there is a bug somewhere else in this file? */
5819 bblst_size
= (current_nr_blocks
- bb
) * rgn_nr_edges
* 2;
5820 bblst_table
= (int *) xmalloc (bblst_size
* sizeof (int));
5822 bitlst_table_last
= 0;
5823 bitlst_table_size
= rgn_nr_edges
;
5824 bitlst_table
= (int *) xmalloc (rgn_nr_edges
* sizeof (int));
5826 compute_trg_info (bb
);
5831 /* Allocate the ready list. */
5832 ready
= (rtx
*) xmalloc ((rgn_n_insns
+ 1) * sizeof (rtx
));
5834 /* Print debugging information. */
5835 if (sched_verbose
>= 5)
5836 debug_dependencies ();
5839 /* Initialize ready list with all 'ready' insns in target block.
5840 Count number of insns in the target block being scheduled. */
5842 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5846 if (GET_RTX_CLASS (GET_CODE (insn
)) != 'i')
5848 next
= NEXT_INSN (insn
);
5850 if (INSN_DEP_COUNT (insn
) == 0
5851 && (SCHED_GROUP_P (next
) == 0 || GET_RTX_CLASS (GET_CODE (next
)) != 'i'))
5852 ready
[n_ready
++] = insn
;
5853 if (!(SCHED_GROUP_P (insn
)))
5857 /* Add to ready list all 'ready' insns in valid source blocks.
5858 For speculative insns, check-live, exception-free, and
5860 for (bb_src
= bb
+ 1; bb_src
< current_nr_blocks
; bb_src
++)
5861 if (IS_VALID (bb_src
))
5867 get_bb_head_tail (bb_src
, &head
, &tail
);
5868 src_next_tail
= NEXT_INSN (tail
);
5872 && (GET_RTX_CLASS (GET_CODE (head
)) != 'i'))
5875 for (insn
= src_head
; insn
!= src_next_tail
; insn
= NEXT_INSN (insn
))
5877 if (GET_RTX_CLASS (GET_CODE (insn
)) != 'i')
5880 if (!CANT_MOVE (insn
)
5881 && (!IS_SPECULATIVE_INSN (insn
)
5882 || (insn_issue_delay (insn
) <= 3
5883 && check_live (insn
, bb_src
)
5884 && is_exception_free (insn
, bb_src
, target_bb
))))
5888 /* Note that we havn't squirrled away the notes for
5889 blocks other than the current. So if this is a
5890 speculative insn, NEXT might otherwise be a note. */
5891 next
= next_nonnote_insn (insn
);
5892 if (INSN_DEP_COUNT (insn
) == 0
5894 || SCHED_GROUP_P (next
) == 0
5895 || GET_RTX_CLASS (GET_CODE (next
)) != 'i'))
5896 ready
[n_ready
++] = insn
;
5901 #ifdef MD_SCHED_INIT
5902 MD_SCHED_INIT (dump
, sched_verbose
);
5905 /* No insns scheduled in this block yet. */
5906 last_scheduled_insn
= 0;
5908 /* Q_SIZE is the total number of insns in the queue. */
5912 bzero ((char *) insn_queue
, sizeof (insn_queue
));
5914 /* Start just before the beginning of time. */
5917 /* We start inserting insns after PREV_HEAD. */
5920 /* Initialize INSN_QUEUE, LIST and NEW_NEEDS. */
5921 new_needs
= (NEXT_INSN (prev_head
) == BLOCK_HEAD (b
)
5922 ? NEED_HEAD
: NEED_NOTHING
);
5923 if (PREV_INSN (next_tail
) == BLOCK_END (b
))
5924 new_needs
|= NEED_TAIL
;
5926 /* Loop until all the insns in BB are scheduled. */
5927 while (sched_target_n_insns
< target_n_insns
)
5931 /* Add to the ready list all pending insns that can be issued now.
5932 If there are no ready insns, increment clock until one
5933 is ready and add all pending insns at that point to the ready
5935 n_ready
= queue_to_ready (ready
, n_ready
);
5940 if (sched_verbose
>= 2)
5942 fprintf (dump
, ";;\t\tReady list after queue_to_ready: ");
5943 debug_ready_list (ready
, n_ready
);
5946 /* Sort the ready list based on priority. */
5947 SCHED_SORT (ready
, n_ready
);
5949 /* Allow the target to reorder the list, typically for
5950 better instruction bundling. */
5951 #ifdef MD_SCHED_REORDER
5952 MD_SCHED_REORDER (dump
, sched_verbose
, ready
, n_ready
, clock_var
,
5955 can_issue_more
= issue_rate
;
5960 fprintf (dump
, "\n;;\tReady list (t =%3d): ", clock_var
);
5961 debug_ready_list (ready
, n_ready
);
5964 /* Issue insns from ready list. */
5965 while (n_ready
!= 0 && can_issue_more
)
5967 /* Select and remove the insn from the ready list. */
5968 rtx insn
= ready
[--n_ready
];
5969 int cost
= actual_hazard (insn_unit (insn
), insn
, clock_var
, 0);
5973 queue_insn (insn
, cost
);
5977 /* An interblock motion? */
5978 if (INSN_BB (insn
) != target_bb
)
5983 if (IS_SPECULATIVE_INSN (insn
))
5985 if (!check_live (insn
, INSN_BB (insn
)))
5987 update_live (insn
, INSN_BB (insn
));
5989 /* For speculative load, mark insns fed by it. */
5990 if (IS_LOAD_INSN (insn
) || FED_BY_SPEC_LOAD (insn
))
5991 set_spec_fed (insn
);
5997 /* Find the beginning of the scheduling group. */
5998 /* ??? Ought to update basic block here, but later bits of
5999 schedule_block assumes the original insn block is
6003 while (SCHED_GROUP_P (temp
))
6004 temp
= PREV_INSN (temp
);
6006 /* Update source block boundaries. */
6007 b1
= BLOCK_FOR_INSN (temp
);
6008 if (temp
== b1
->head
&& insn
== b1
->end
)
6010 /* We moved all the insns in the basic block.
6011 Emit a note after the last insn and update the
6012 begin/end boundaries to point to the note. */
6013 rtx note
= emit_note_after (NOTE_INSN_DELETED
, insn
);
6017 else if (insn
== b1
->end
)
6019 /* We took insns from the end of the basic block,
6020 so update the end of block boundary so that it
6021 points to the first insn we did not move. */
6022 b1
->end
= PREV_INSN (temp
);
6024 else if (temp
== b1
->head
)
6026 /* We took insns from the start of the basic block,
6027 so update the start of block boundary so that
6028 it points to the first insn we did not move. */
6029 b1
->head
= NEXT_INSN (insn
);
6034 /* In block motion. */
6035 sched_target_n_insns
++;
6038 last_scheduled_insn
= insn
;
6039 last
= move_insn (insn
, last
);
6042 #ifdef MD_SCHED_VARIABLE_ISSUE
6043 MD_SCHED_VARIABLE_ISSUE (dump
, sched_verbose
, insn
,
6049 n_ready
= schedule_insn (insn
, ready
, n_ready
, clock_var
);
6051 /* Close this block after scheduling its jump. */
6052 if (GET_CODE (last_scheduled_insn
) == JUMP_INSN
)
6058 visualize_scheduled_insns (b
, clock_var
);
6064 fprintf (dump
, ";;\tReady list (final): ");
6065 debug_ready_list (ready
, n_ready
);
6066 print_block_visualization (b
, "");
6069 /* Sanity check -- queue must be empty now. Meaningless if region has
6071 if (current_nr_blocks
> 1)
6072 if (!flag_schedule_interblock
&& q_size
!= 0)
6075 /* Update head/tail boundaries. */
6076 head
= NEXT_INSN (prev_head
);
6079 /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
6080 previously found among the insns. Insert them at the beginning
6084 rtx note_head
= note_list
;
6086 while (PREV_INSN (note_head
))
6088 note_head
= PREV_INSN (note_head
);
6091 PREV_INSN (note_head
) = PREV_INSN (head
);
6092 NEXT_INSN (PREV_INSN (head
)) = note_head
;
6093 PREV_INSN (head
) = note_list
;
6094 NEXT_INSN (note_list
) = head
;
6098 /* Update target block boundaries. */
6099 if (new_needs
& NEED_HEAD
)
6100 BLOCK_HEAD (b
) = head
;
6102 if (new_needs
& NEED_TAIL
)
6103 BLOCK_END (b
) = tail
;
6108 fprintf (dump
, ";; total time = %d\n;; new basic block head = %d\n",
6109 clock_var
, INSN_UID (BLOCK_HEAD (b
)));
6110 fprintf (dump
, ";; new basic block end = %d\n\n",
6111 INSN_UID (BLOCK_END (b
)));
6115 if (current_nr_blocks
> 1)
6117 free (candidate_table
);
6119 free (bitlst_table
);
6123 return (sched_n_insns
);
6124 } /* schedule_block () */
6127 /* Print the bit-set of registers, S, callable from debugger. */
6130 debug_reg_vector (s
)
6135 EXECUTE_IF_SET_IN_REG_SET (s
, 0, regno
,
6137 fprintf (dump
, " %d", regno
);
6140 fprintf (dump
, "\n");
6143 /* Use the backward dependences from LOG_LINKS to build
6144 forward dependences in INSN_DEPEND. */
6147 compute_block_forward_dependences (bb
)
6153 enum reg_note dep_type
;
6155 get_bb_head_tail (bb
, &head
, &tail
);
6156 next_tail
= NEXT_INSN (tail
);
6157 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6159 if (GET_RTX_CLASS (GET_CODE (insn
)) != 'i')
6162 insn
= group_leader (insn
);
6164 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
6166 rtx x
= group_leader (XEXP (link
, 0));
6169 if (x
!= XEXP (link
, 0))
6172 #ifdef ENABLE_CHECKING
6173 /* If add_dependence is working properly there should never
6174 be notes, deleted insns or duplicates in the backward
6175 links. Thus we need not check for them here.
6177 However, if we have enabled checking we might as well go
6178 ahead and verify that add_dependence worked properly. */
6179 if (GET_CODE (x
) == NOTE
6180 || INSN_DELETED_P (x
)
6181 || find_insn_list (insn
, INSN_DEPEND (x
)))
6185 new_link
= alloc_INSN_LIST (insn
, INSN_DEPEND (x
));
6187 dep_type
= REG_NOTE_KIND (link
);
6188 PUT_REG_NOTE_KIND (new_link
, dep_type
);
6190 INSN_DEPEND (x
) = new_link
;
6191 INSN_DEP_COUNT (insn
) += 1;
6196 /* Initialize variables for region data dependence analysis.
6197 n_bbs is the number of region blocks. */
6203 int maxreg
= max_reg_num ();
6204 deps
->reg_last_uses
= (rtx
*) xcalloc (maxreg
, sizeof (rtx
));
6205 deps
->reg_last_sets
= (rtx
*) xcalloc (maxreg
, sizeof (rtx
));
6206 deps
->reg_last_clobbers
= (rtx
*) xcalloc (maxreg
, sizeof (rtx
));
6208 deps
->pending_read_insns
= 0;
6209 deps
->pending_read_mems
= 0;
6210 deps
->pending_write_insns
= 0;
6211 deps
->pending_write_mems
= 0;
6212 deps
->pending_lists_length
= 0;
6213 deps
->last_pending_memory_flush
= 0;
6214 deps
->last_function_call
= 0;
6216 deps
->sched_before_next_call
6217 = gen_rtx_INSN (VOIDmode
, 0, NULL_RTX
, NULL_RTX
,
6218 NULL_RTX
, 0, NULL_RTX
, NULL_RTX
);
6219 LOG_LINKS (deps
->sched_before_next_call
) = 0;
6222 /* Add dependences so that branches are scheduled to run last in their
6226 add_branch_dependences (head
, tail
)
6231 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
6232 to remain in order at the end of the block by adding dependencies and
6233 giving the last a high priority. There may be notes present, and
6234 prev_head may also be a note.
6236 Branches must obviously remain at the end. Calls should remain at the
6237 end since moving them results in worse register allocation. Uses remain
6238 at the end to ensure proper register allocation. cc0 setters remaim
6239 at the end because they can't be moved away from their cc0 user. */
6242 while (GET_CODE (insn
) == CALL_INSN
6243 || GET_CODE (insn
) == JUMP_INSN
6244 || (GET_CODE (insn
) == INSN
6245 && (GET_CODE (PATTERN (insn
)) == USE
6246 || GET_CODE (PATTERN (insn
)) == CLOBBER
6248 || sets_cc0_p (PATTERN (insn
))
6251 || GET_CODE (insn
) == NOTE
)
6253 if (GET_CODE (insn
) != NOTE
)
6256 && !find_insn_list (insn
, LOG_LINKS (last
)))
6258 add_dependence (last
, insn
, REG_DEP_ANTI
);
6259 INSN_REF_COUNT (insn
)++;
6262 CANT_MOVE (insn
) = 1;
6265 /* Skip over insns that are part of a group.
6266 Make each insn explicitly depend on the previous insn.
6267 This ensures that only the group header will ever enter
6268 the ready queue (and, when scheduled, will automatically
6269 schedule the SCHED_GROUP_P block). */
6270 while (SCHED_GROUP_P (insn
))
6272 rtx temp
= prev_nonnote_insn (insn
);
6273 add_dependence (insn
, temp
, REG_DEP_ANTI
);
6278 /* Don't overrun the bounds of the basic block. */
6282 insn
= PREV_INSN (insn
);
6285 /* Make sure these insns are scheduled last in their block. */
6288 while (insn
!= head
)
6290 insn
= prev_nonnote_insn (insn
);
6292 if (INSN_REF_COUNT (insn
) != 0)
6295 add_dependence (last
, insn
, REG_DEP_ANTI
);
6296 INSN_REF_COUNT (insn
) = 1;
6298 /* Skip over insns that are part of a group. */
6299 while (SCHED_GROUP_P (insn
))
6300 insn
= prev_nonnote_insn (insn
);
6304 /* After computing the dependencies for block BB, propagate the dependencies
6305 found in TMP_DEPS to the successors of the block. MAX_REG is the number
6308 propagate_deps (bb
, tmp_deps
, max_reg
)
6310 struct deps
*tmp_deps
;
6313 int b
= BB_TO_BLOCK (bb
);
6316 rtx link_insn
, link_mem
;
6319 /* These lists should point to the right place, for correct
6321 bb_deps
[bb
].pending_read_insns
= tmp_deps
->pending_read_insns
;
6322 bb_deps
[bb
].pending_read_mems
= tmp_deps
->pending_read_mems
;
6323 bb_deps
[bb
].pending_write_insns
= tmp_deps
->pending_write_insns
;
6324 bb_deps
[bb
].pending_write_mems
= tmp_deps
->pending_write_mems
;
6326 /* bb's structures are inherited by its successors. */
6327 first_edge
= e
= OUT_EDGES (b
);
6334 int b_succ
= TO_BLOCK (e
);
6335 int bb_succ
= BLOCK_TO_BB (b_succ
);
6336 struct deps
*succ_deps
= bb_deps
+ bb_succ
;
6338 /* Only bbs "below" bb, in the same region, are interesting. */
6339 if (CONTAINING_RGN (b
) != CONTAINING_RGN (b_succ
)
6346 for (reg
= 0; reg
< max_reg
; reg
++)
6348 /* reg-last-uses lists are inherited by bb_succ. */
6349 for (u
= tmp_deps
->reg_last_uses
[reg
]; u
; u
= XEXP (u
, 1))
6351 if (find_insn_list (XEXP (u
, 0),
6352 succ_deps
->reg_last_uses
[reg
]))
6355 succ_deps
->reg_last_uses
[reg
]
6356 = alloc_INSN_LIST (XEXP (u
, 0),
6357 succ_deps
->reg_last_uses
[reg
]);
6360 /* reg-last-defs lists are inherited by bb_succ. */
6361 for (u
= tmp_deps
->reg_last_sets
[reg
]; u
; u
= XEXP (u
, 1))
6363 if (find_insn_list (XEXP (u
, 0),
6364 succ_deps
->reg_last_sets
[reg
]))
6367 succ_deps
->reg_last_sets
[reg
]
6368 = alloc_INSN_LIST (XEXP (u
, 0),
6369 succ_deps
->reg_last_sets
[reg
]);
6372 for (u
= tmp_deps
->reg_last_clobbers
[reg
]; u
; u
= XEXP (u
, 1))
6374 if (find_insn_list (XEXP (u
, 0),
6375 succ_deps
->reg_last_clobbers
[reg
]))
6378 succ_deps
->reg_last_clobbers
[reg
]
6379 = alloc_INSN_LIST (XEXP (u
, 0),
6380 succ_deps
->reg_last_clobbers
[reg
]);
6384 /* Mem read/write lists are inherited by bb_succ. */
6385 link_insn
= tmp_deps
->pending_read_insns
;
6386 link_mem
= tmp_deps
->pending_read_mems
;
6389 if (!(find_insn_mem_list (XEXP (link_insn
, 0),
6391 succ_deps
->pending_read_insns
,
6392 succ_deps
->pending_read_mems
)))
6393 add_insn_mem_dependence (succ_deps
, &succ_deps
->pending_read_insns
,
6394 &succ_deps
->pending_read_mems
,
6395 XEXP (link_insn
, 0), XEXP (link_mem
, 0));
6396 link_insn
= XEXP (link_insn
, 1);
6397 link_mem
= XEXP (link_mem
, 1);
6400 link_insn
= tmp_deps
->pending_write_insns
;
6401 link_mem
= tmp_deps
->pending_write_mems
;
6404 if (!(find_insn_mem_list (XEXP (link_insn
, 0),
6406 succ_deps
->pending_write_insns
,
6407 succ_deps
->pending_write_mems
)))
6408 add_insn_mem_dependence (succ_deps
,
6409 &succ_deps
->pending_write_insns
,
6410 &succ_deps
->pending_write_mems
,
6411 XEXP (link_insn
, 0), XEXP (link_mem
, 0));
6413 link_insn
= XEXP (link_insn
, 1);
6414 link_mem
= XEXP (link_mem
, 1);
6417 /* last_function_call is inherited by bb_succ. */
6418 for (u
= tmp_deps
->last_function_call
; u
; u
= XEXP (u
, 1))
6420 if (find_insn_list (XEXP (u
, 0),
6421 succ_deps
->last_function_call
))
6424 succ_deps
->last_function_call
6425 = alloc_INSN_LIST (XEXP (u
, 0),
6426 succ_deps
->last_function_call
);
6429 /* last_pending_memory_flush is inherited by bb_succ. */
6430 for (u
= tmp_deps
->last_pending_memory_flush
; u
; u
= XEXP (u
, 1))
6432 if (find_insn_list (XEXP (u
, 0),
6433 succ_deps
->last_pending_memory_flush
))
6436 succ_deps
->last_pending_memory_flush
6437 = alloc_INSN_LIST (XEXP (u
, 0),
6438 succ_deps
->last_pending_memory_flush
);
6441 /* sched_before_next_call is inherited by bb_succ. */
6442 x
= LOG_LINKS (tmp_deps
->sched_before_next_call
);
6443 for (; x
; x
= XEXP (x
, 1))
6444 add_dependence (succ_deps
->sched_before_next_call
,
6445 XEXP (x
, 0), REG_DEP_ANTI
);
6449 while (e
!= first_edge
);
6452 /* Compute backward dependences inside bb. In a multiple blocks region:
6453 (1) a bb is analyzed after its predecessors, and (2) the lists in
6454 effect at the end of bb (after analyzing for bb) are inherited by
6457 Specifically for reg-reg data dependences, the block insns are
6458 scanned by sched_analyze () top-to-bottom. Two lists are
6459 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
6460 and reg_last_uses[] for register USEs.
6462 When analysis is completed for bb, we update for its successors:
6463 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
6464 ; - USES[succ] = Union (USES [succ], DEFS [bb])
6466 The mechanism for computing mem-mem data dependence is very
6467 similar, and the result is interblock dependences in the region. */
6470 compute_block_backward_dependences (bb
)
6475 int max_reg
= max_reg_num ();
6476 struct deps tmp_deps
;
6478 tmp_deps
= bb_deps
[bb
];
6480 /* Do the analysis for this block. */
6481 get_bb_head_tail (bb
, &head
, &tail
);
6482 sched_analyze (&tmp_deps
, head
, tail
);
6483 add_branch_dependences (head
, tail
);
6485 if (current_nr_blocks
> 1)
6486 propagate_deps (bb
, &tmp_deps
, max_reg
);
6488 /* Free up the INSN_LISTs.
6490 Note this loop is executed max_reg * nr_regions times. It's first
6491 implementation accounted for over 90% of the calls to free_INSN_LIST_list.
6492 The list was empty for the vast majority of those calls. On the PA, not
6493 calling free_INSN_LIST_list in those cases improves -O2 compile times by
6495 for (i
= 0; i
< max_reg
; ++i
)
6497 if (tmp_deps
.reg_last_clobbers
[i
])
6498 free_INSN_LIST_list (&tmp_deps
.reg_last_clobbers
[i
]);
6499 if (tmp_deps
.reg_last_sets
[i
])
6500 free_INSN_LIST_list (&tmp_deps
.reg_last_sets
[i
]);
6501 if (tmp_deps
.reg_last_uses
[i
])
6502 free_INSN_LIST_list (&tmp_deps
.reg_last_uses
[i
]);
6505 /* Assert that we won't need bb_reg_last_* for this block anymore. */
6506 free (bb_deps
[bb
].reg_last_uses
);
6507 free (bb_deps
[bb
].reg_last_sets
);
6508 free (bb_deps
[bb
].reg_last_clobbers
);
6509 bb_deps
[bb
].reg_last_uses
= 0;
6510 bb_deps
[bb
].reg_last_sets
= 0;
6511 bb_deps
[bb
].reg_last_clobbers
= 0;
6514 /* Print dependences for debugging, callable from debugger. */
6517 debug_dependencies ()
6521 fprintf (dump
, ";; --------------- forward dependences: ------------ \n");
6522 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
6530 get_bb_head_tail (bb
, &head
, &tail
);
6531 next_tail
= NEXT_INSN (tail
);
6532 fprintf (dump
, "\n;; --- Region Dependences --- b %d bb %d \n",
6533 BB_TO_BLOCK (bb
), bb
);
6535 fprintf (dump
, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6536 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
6537 fprintf (dump
, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6538 "----", "----", "--", "---", "----", "----", "--------", "-----");
6539 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6544 if (GET_RTX_CLASS (GET_CODE (insn
)) != 'i')
6547 fprintf (dump
, ";; %6d ", INSN_UID (insn
));
6548 if (GET_CODE (insn
) == NOTE
)
6550 n
= NOTE_LINE_NUMBER (insn
);
6552 fprintf (dump
, "%s\n", GET_NOTE_INSN_NAME (n
));
6554 fprintf (dump
, "line %d, file %s\n", n
,
6555 NOTE_SOURCE_FILE (insn
));
6558 fprintf (dump
, " {%s}\n", GET_RTX_NAME (GET_CODE (insn
)));
6562 unit
= insn_unit (insn
);
6564 || function_units
[unit
].blockage_range_function
== 0) ? 0 :
6565 function_units
[unit
].blockage_range_function (insn
);
6567 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
6568 (SCHED_GROUP_P (insn
) ? "+" : " "),
6572 INSN_DEP_COUNT (insn
),
6573 INSN_PRIORITY (insn
),
6574 insn_cost (insn
, 0, 0),
6575 (int) MIN_BLOCKAGE_COST (range
),
6576 (int) MAX_BLOCKAGE_COST (range
));
6577 insn_print_units (insn
);
6578 fprintf (dump
, "\t: ");
6579 for (link
= INSN_DEPEND (insn
); link
; link
= XEXP (link
, 1))
6580 fprintf (dump
, "%d ", INSN_UID (XEXP (link
, 0)));
6581 fprintf (dump
, "\n");
6585 fprintf (dump
, "\n");
6588 /* Set_priorities: compute priority of each insn in the block. */
6601 get_bb_head_tail (bb
, &head
, &tail
);
6602 prev_head
= PREV_INSN (head
);
6605 && (GET_RTX_CLASS (GET_CODE (head
)) != 'i'))
6609 for (insn
= tail
; insn
!= prev_head
; insn
= PREV_INSN (insn
))
6612 if (GET_CODE (insn
) == NOTE
)
6615 if (!(SCHED_GROUP_P (insn
)))
6617 (void) priority (insn
);
6623 /* Schedule a region. A region is either an inner loop, a loop-free
6624 subroutine, or a single basic block. Each bb in the region is
6625 scheduled after its flow predecessors. */
6628 schedule_region (rgn
)
6632 int rgn_n_insns
= 0;
6633 int sched_rgn_n_insns
= 0;
6635 /* Set variables for the current region. */
6636 current_nr_blocks
= RGN_NR_BLOCKS (rgn
);
6637 current_blocks
= RGN_BLOCKS (rgn
);
6639 reg_pending_sets
= ALLOCA_REG_SET ();
6640 reg_pending_clobbers
= ALLOCA_REG_SET ();
6641 reg_pending_sets_all
= 0;
6643 /* Initializations for region data dependence analyisis. */
6644 bb_deps
= (struct deps
*) xmalloc (sizeof (struct deps
) * current_nr_blocks
);
6645 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
6646 init_deps (bb_deps
+ bb
);
6648 /* Compute LOG_LINKS. */
6649 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
6650 compute_block_backward_dependences (bb
);
6652 /* Compute INSN_DEPEND. */
6653 for (bb
= current_nr_blocks
- 1; bb
>= 0; bb
--)
6654 compute_block_forward_dependences (bb
);
6656 /* Delete line notes and set priorities. */
6657 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
6659 if (write_symbols
!= NO_DEBUG
)
6661 save_line_notes (bb
);
6665 rgn_n_insns
+= set_priorities (bb
);
6668 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
6669 if (current_nr_blocks
> 1)
6673 prob
= (float *) xmalloc ((current_nr_blocks
) * sizeof (float));
6675 bbset_size
= current_nr_blocks
/ HOST_BITS_PER_WIDE_INT
+ 1;
6676 dom
= (bbset
*) xmalloc (current_nr_blocks
* sizeof (bbset
));
6677 for (i
= 0; i
< current_nr_blocks
; i
++)
6678 dom
[i
] = (bbset
) xcalloc (bbset_size
, sizeof (HOST_WIDE_INT
));
6682 edge_to_bit
= (int *) xmalloc (nr_edges
* sizeof (int));
6683 for (i
= 1; i
< nr_edges
; i
++)
6684 if (CONTAINING_RGN (FROM_BLOCK (i
)) == rgn
)
6685 EDGE_TO_BIT (i
) = rgn_nr_edges
++;
6686 rgn_edges
= (int *) xmalloc (rgn_nr_edges
* sizeof (int));
6689 for (i
= 1; i
< nr_edges
; i
++)
6690 if (CONTAINING_RGN (FROM_BLOCK (i
)) == (rgn
))
6691 rgn_edges
[rgn_nr_edges
++] = i
;
6694 edgeset_size
= rgn_nr_edges
/ HOST_BITS_PER_WIDE_INT
+ 1;
6695 edgeset_bitsize
= rgn_nr_edges
;
6696 pot_split
= (edgeset
*) xmalloc (current_nr_blocks
* sizeof (edgeset
));
6698 = (edgeset
*) xmalloc (current_nr_blocks
* sizeof (edgeset
));
6699 for (i
= 0; i
< current_nr_blocks
; i
++)
6702 (edgeset
) xcalloc (edgeset_size
, sizeof (HOST_WIDE_INT
));
6704 (edgeset
) xcalloc (edgeset_size
, sizeof (HOST_WIDE_INT
));
6707 /* Compute probabilities, dominators, split_edges. */
6708 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
6709 compute_dom_prob_ps (bb
);
6712 /* Now we can schedule all blocks. */
6713 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
6714 sched_rgn_n_insns
+= schedule_block (bb
, rgn_n_insns
);
6716 /* Sanity check: verify that all region insns were scheduled. */
6717 if (sched_rgn_n_insns
!= rgn_n_insns
)
6720 /* Restore line notes. */
6721 if (write_symbols
!= NO_DEBUG
)
6723 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
6724 restore_line_notes (bb
);
6727 /* Done with this region. */
6728 free_pending_lists ();
6730 FREE_REG_SET (reg_pending_sets
);
6731 FREE_REG_SET (reg_pending_clobbers
);
6735 if (current_nr_blocks
> 1)
6740 for (i
= 0; i
< current_nr_blocks
; ++i
)
6743 free (pot_split
[i
]);
6744 free (ancestor_edges
[i
]);
6750 free (ancestor_edges
);
6754 /* The one entry point in this file. DUMP_FILE is the dump file for
6758 schedule_insns (dump_file
)
6761 int *deaths_in_region
;
6762 sbitmap blocks
, large_region_blocks
;
6768 int any_large_regions
;
6770 /* Disable speculative loads in their presence if cc0 defined. */
6772 flag_schedule_speculative_load
= 0;
6775 /* Taking care of this degenerate case makes the rest of
6776 this code simpler. */
6777 if (n_basic_blocks
== 0)
6780 /* Set dump and sched_verbose for the desired debugging output. If no
6781 dump-file was specified, but -fsched-verbose-N (any N), print to stderr.
6782 For -fsched-verbose-N, N>=10, print everything to stderr. */
6783 sched_verbose
= sched_verbose_param
;
6784 if (sched_verbose_param
== 0 && dump_file
)
6786 dump
= ((sched_verbose_param
>= 10 || !dump_file
) ? stderr
: dump_file
);
6791 /* Initialize issue_rate. */
6792 issue_rate
= ISSUE_RATE
;
6794 split_all_insns (1);
6796 /* We use LUID 0 for the fake insn (UID 0) which holds dependencies for
6797 pseudos which do not cross calls. */
6798 max_uid
= get_max_uid () + 1;
6800 h_i_d
= (struct haifa_insn_data
*) xcalloc (max_uid
, sizeof (*h_i_d
));
6804 for (b
= 0; b
< n_basic_blocks
; b
++)
6805 for (insn
= BLOCK_HEAD (b
);; insn
= NEXT_INSN (insn
))
6807 INSN_LUID (insn
) = luid
;
6809 /* Increment the next luid, unless this is a note. We don't
6810 really need separate IDs for notes and we don't want to
6811 schedule differently depending on whether or not there are
6812 line-number notes, i.e., depending on whether or not we're
6813 generating debugging information. */
6814 if (GET_CODE (insn
) != NOTE
)
6817 if (insn
== BLOCK_END (b
))
6821 /* ?!? We could save some memory by computing a per-region luid mapping
6822 which could reduce both the number of vectors in the cache and the size
6823 of each vector. Instead we just avoid the cache entirely unless the
6824 average number of instructions in a basic block is very high. See
6825 the comment before the declaration of true_dependency_cache for
6826 what we consider "very high". */
6827 if (luid
/ n_basic_blocks
> 100 * 5)
6829 true_dependency_cache
= sbitmap_vector_alloc (luid
, luid
);
6830 sbitmap_vector_zero (true_dependency_cache
, luid
);
6834 rgn_table
= (region
*) xmalloc ((n_basic_blocks
) * sizeof (region
));
6835 rgn_bb_table
= (int *) xmalloc ((n_basic_blocks
) * sizeof (int));
6836 block_to_bb
= (int *) xmalloc ((n_basic_blocks
) * sizeof (int));
6837 containing_rgn
= (int *) xmalloc ((n_basic_blocks
) * sizeof (int));
6839 blocks
= sbitmap_alloc (n_basic_blocks
);
6840 large_region_blocks
= sbitmap_alloc (n_basic_blocks
);
6842 compute_bb_for_insn (max_uid
);
6844 /* Compute regions for scheduling. */
6845 if (reload_completed
6846 || n_basic_blocks
== 1
6847 || !flag_schedule_interblock
)
6849 find_single_block_region ();
6853 /* Verify that a 'good' control flow graph can be built. */
6854 if (is_cfg_nonregular ())
6856 find_single_block_region ();
6861 struct edge_list
*edge_list
;
6863 dom
= sbitmap_vector_alloc (n_basic_blocks
, n_basic_blocks
);
6865 /* The scheduler runs after flow; therefore, we can't blindly call
6866 back into find_basic_blocks since doing so could invalidate the
6867 info in global_live_at_start.
6869 Consider a block consisting entirely of dead stores; after life
6870 analysis it would be a block of NOTE_INSN_DELETED notes. If
6871 we call find_basic_blocks again, then the block would be removed
6872 entirely and invalidate our the register live information.
6874 We could (should?) recompute register live information. Doing
6875 so may even be beneficial. */
6876 edge_list
= create_edge_list ();
6878 /* Compute the dominators and post dominators. We don't
6879 currently use post dominators, but we should for
6880 speculative motion analysis. */
6881 compute_flow_dominators (dom
, NULL
);
6883 /* build_control_flow will return nonzero if it detects unreachable
6884 blocks or any other irregularity with the cfg which prevents
6885 cross block scheduling. */
6886 if (build_control_flow (edge_list
) != 0)
6887 find_single_block_region ();
6889 find_rgns (edge_list
, dom
);
6891 if (sched_verbose
>= 3)
6894 /* For now. This will move as more and more of haifa is converted
6895 to using the cfg code in flow.c. */
6900 deaths_in_region
= (int *) xmalloc (sizeof(int) * nr_regions
);
6902 init_alias_analysis ();
6904 if (write_symbols
!= NO_DEBUG
)
6908 line_note_head
= (rtx
*) xcalloc (n_basic_blocks
, sizeof (rtx
));
6910 /* Save-line-note-head:
6911 Determine the line-number at the start of each basic block.
6912 This must be computed and saved now, because after a basic block's
6913 predecessor has been scheduled, it is impossible to accurately
6914 determine the correct line number for the first insn of the block. */
6916 for (b
= 0; b
< n_basic_blocks
; b
++)
6917 for (line
= BLOCK_HEAD (b
); line
; line
= PREV_INSN (line
))
6918 if (GET_CODE (line
) == NOTE
&& NOTE_LINE_NUMBER (line
) > 0)
6920 line_note_head
[b
] = line
;
6925 /* Find units used in this fuction, for visualization. */
6927 init_target_units ();
6929 /* ??? Add a NOTE after the last insn of the last basic block. It is not
6930 known why this is done. */
6932 insn
= BLOCK_END (n_basic_blocks
- 1);
6933 if (NEXT_INSN (insn
) == 0
6934 || (GET_CODE (insn
) != NOTE
6935 && GET_CODE (insn
) != CODE_LABEL
6936 /* Don't emit a NOTE if it would end up between an unconditional
6937 jump and a BARRIER. */
6938 && !(GET_CODE (insn
) == JUMP_INSN
6939 && GET_CODE (NEXT_INSN (insn
)) == BARRIER
)))
6940 emit_note_after (NOTE_INSN_DELETED
, BLOCK_END (n_basic_blocks
- 1));
6942 /* Compute INSN_REG_WEIGHT for all blocks. We must do this before
6943 removing death notes. */
6944 for (b
= n_basic_blocks
- 1; b
>= 0; b
--)
6945 find_insn_reg_weight (b
);
6947 /* Remove all death notes from the subroutine. */
6948 for (rgn
= 0; rgn
< nr_regions
; rgn
++)
6950 sbitmap_zero (blocks
);
6951 for (b
= RGN_NR_BLOCKS (rgn
) - 1; b
>= 0; --b
)
6952 SET_BIT (blocks
, rgn_bb_table
[RGN_BLOCKS (rgn
) + b
]);
6954 deaths_in_region
[rgn
] = count_or_remove_death_notes (blocks
, 1);
6957 /* Schedule every region in the subroutine. */
6958 for (rgn
= 0; rgn
< nr_regions
; rgn
++)
6959 schedule_region (rgn
);
6961 /* Update life analysis for the subroutine. Do single block regions
6962 first so that we can verify that live_at_start didn't change. Then
6963 do all other blocks. */
6964 /* ??? There is an outside possibility that update_life_info, or more
6965 to the point propagate_block, could get called with non-zero flags
6966 more than once for one basic block. This would be kinda bad if it
6967 were to happen, since REG_INFO would be accumulated twice for the
6968 block, and we'd have twice the REG_DEAD notes.
6970 I'm fairly certain that this _shouldn't_ happen, since I don't think
6971 that live_at_start should change at region heads. Not sure what the
6972 best way to test for this kind of thing... */
6974 allocate_reg_life_data ();
6975 compute_bb_for_insn (max_uid
);
6977 any_large_regions
= 0;
6978 sbitmap_ones (large_region_blocks
);
6980 for (rgn
= 0; rgn
< nr_regions
; rgn
++)
6981 if (RGN_NR_BLOCKS (rgn
) > 1)
6982 any_large_regions
= 1;
6985 sbitmap_zero (blocks
);
6986 SET_BIT (blocks
, rgn_bb_table
[RGN_BLOCKS (rgn
)]);
6987 RESET_BIT (large_region_blocks
, rgn_bb_table
[RGN_BLOCKS (rgn
)]);
6989 /* Don't update reg info after reload, since that affects
6990 regs_ever_live, which should not change after reload. */
6991 update_life_info (blocks
, UPDATE_LIFE_LOCAL
,
6992 (reload_completed
? PROP_DEATH_NOTES
6993 : PROP_DEATH_NOTES
| PROP_REG_INFO
));
6995 /* In the single block case, the count of registers that died should
6996 not have changed during the schedule. */
6997 if (count_or_remove_death_notes (blocks
, 0) != deaths_in_region
[rgn
])
7001 if (any_large_regions
)
7003 update_life_info (large_region_blocks
, UPDATE_LIFE_GLOBAL
,
7004 PROP_DEATH_NOTES
| PROP_REG_INFO
);
7007 /* Reposition the prologue and epilogue notes in case we moved the
7008 prologue/epilogue insns. */
7009 if (reload_completed
)
7010 reposition_prologue_and_epilogue_notes (get_insns ());
7012 /* Delete redundant line notes. */
7013 if (write_symbols
!= NO_DEBUG
)
7014 rm_redundant_line_notes ();
7018 if (reload_completed
== 0 && flag_schedule_interblock
)
7020 fprintf (dump
, "\n;; Procedure interblock/speculative motions == %d/%d \n",
7028 fprintf (dump
, "\n\n");
7032 end_alias_analysis ();
7034 if (true_dependency_cache
)
7036 free (true_dependency_cache
);
7037 true_dependency_cache
= NULL
;
7040 free (rgn_bb_table
);
7042 free (containing_rgn
);
7046 if (write_symbols
!= NO_DEBUG
)
7047 free (line_note_head
);
7066 sbitmap_free (blocks
);
7067 sbitmap_free (large_region_blocks
);
7069 free (deaths_in_region
);
7072 #endif /* INSN_SCHEDULING */