]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/sched-rgn.c
Mon Jun 10 20:42:34 CEST 2002 Jan Hubicka <jh@suse.cz>
[thirdparty/gcc.git] / gcc / sched-rgn.c
CommitLineData
b4ead7d4
BS
1/* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
7b25b076 3 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
b4ead7d4
BS
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6
1322177d 7This file is part of GCC.
b4ead7d4 8
1322177d
LB
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 2, or (at your option) any later
12version.
b4ead7d4 13
1322177d
LB
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
b4ead7d4
BS
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
47a1bd82
NC
20along with GCC; see the file COPYING. If not, write to the Free
21Software Foundation, 59 Temple Place - Suite 330, Boston, MA
b4ead7d4
BS
2202111-1307, USA. */
23
24/* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
27
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
31
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
37
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
47\f
48#include "config.h"
49#include "system.h"
50#include "toplev.h"
51#include "rtl.h"
52#include "tm_p.h"
53#include "hard-reg-set.h"
54#include "basic-block.h"
55#include "regs.h"
56#include "function.h"
57#include "flags.h"
58#include "insn-config.h"
59#include "insn-attr.h"
60#include "except.h"
61#include "toplev.h"
62#include "recog.h"
d73b1f07 63#include "cfglayout.h"
b4ead7d4 64#include "sched-int.h"
fae15c93 65#include "target.h"
b4ead7d4 66
73991d6a
JH
67/* Define when we want to do count REG_DEAD notes before and after scheduling
68 for sanity checking. We can't do that when conditional execution is used,
69 as REG_DEAD exist only for unconditional deaths. */
70
71#if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
72#define CHECK_DEAD_NOTES 1
73#else
74#define CHECK_DEAD_NOTES 0
75#endif
76
77
f56887a7 78#ifdef INSN_SCHEDULING
b4ead7d4
BS
79/* Some accessor macros for h_i_d members only used within this file. */
80#define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
81#define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
82#define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
83
84#define MAX_RGN_BLOCKS 10
85#define MAX_RGN_INSNS 100
86
87/* nr_inter/spec counts interblock/speculative motion for the function. */
88static int nr_inter, nr_spec;
89
90/* Control flow graph edges are kept in circular lists. */
91typedef struct
92{
93 int from_block;
94 int to_block;
95 int next_in;
96 int next_out;
97}
98haifa_edge;
99static haifa_edge *edge_table;
100
101#define NEXT_IN(edge) (edge_table[edge].next_in)
102#define NEXT_OUT(edge) (edge_table[edge].next_out)
103#define FROM_BLOCK(edge) (edge_table[edge].from_block)
104#define TO_BLOCK(edge) (edge_table[edge].to_block)
105
106/* Number of edges in the control flow graph. (In fact, larger than
107 that by 1, since edge 0 is unused.) */
108static int nr_edges;
109
110/* Circular list of incoming/outgoing edges of a block. */
111static int *in_edges;
112static int *out_edges;
113
114#define IN_EDGES(block) (in_edges[block])
115#define OUT_EDGES(block) (out_edges[block])
116
117static int is_cfg_nonregular PARAMS ((void));
118static int build_control_flow PARAMS ((struct edge_list *));
119static void new_edge PARAMS ((int, int));
120
121/* A region is the main entity for interblock scheduling: insns
122 are allowed to move between blocks in the same region, along
123 control flow graph edges, in the 'up' direction. */
124typedef struct
125{
126 int rgn_nr_blocks; /* Number of blocks in region. */
127 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
128}
129region;
130
131/* Number of regions in the procedure. */
132static int nr_regions;
133
134/* Table of region descriptions. */
135static region *rgn_table;
136
137/* Array of lists of regions' blocks. */
138static int *rgn_bb_table;
139
140/* Topological order of blocks in the region (if b2 is reachable from
141 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
142 always referred to by either block or b, while its topological
143 order name (in the region) is refered to by bb. */
144static int *block_to_bb;
145
146/* The number of the region containing a block. */
147static int *containing_rgn;
148
149#define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
150#define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
151#define BLOCK_TO_BB(block) (block_to_bb[block])
152#define CONTAINING_RGN(block) (containing_rgn[block])
153
154void debug_regions PARAMS ((void));
155static void find_single_block_region PARAMS ((void));
355be0dc 156static void find_rgns PARAMS ((struct edge_list *, dominance_info));
b4ead7d4
BS
157static int too_large PARAMS ((int, int *, int *));
158
159extern void debug_live PARAMS ((int, int));
160
161/* Blocks of the current region being scheduled. */
162static int current_nr_blocks;
163static int current_blocks;
164
165/* The mapping from bb to block. */
166#define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
167
b4ead7d4
BS
168typedef struct
169{
170 int *first_member; /* Pointer to the list start in bitlst_table. */
171 int nr_members; /* The number of members of the bit list. */
172}
173bitlst;
174
175static int bitlst_table_last;
176static int bitlst_table_size;
177static int *bitlst_table;
178
bdfa170f 179static void extract_bitlst PARAMS ((sbitmap, bitlst *));
b4ead7d4
BS
180
181/* Target info declarations.
182
183 The block currently being scheduled is referred to as the "target" block,
184 while other blocks in the region from which insns can be moved to the
185 target are called "source" blocks. The candidate structure holds info
186 about such sources: are they valid? Speculative? Etc. */
187typedef bitlst bblst;
188typedef struct
189{
190 char is_valid;
191 char is_speculative;
192 int src_prob;
193 bblst split_bbs;
194 bblst update_bbs;
195}
196candidate;
197
198static candidate *candidate_table;
199
200/* A speculative motion requires checking live information on the path
201 from 'source' to 'target'. The split blocks are those to be checked.
202 After a speculative motion, live information should be modified in
203 the 'update' blocks.
204
205 Lists of split and update blocks for each candidate of the current
206 target are in array bblst_table. */
207static int *bblst_table, bblst_size, bblst_last;
208
209#define IS_VALID(src) ( candidate_table[src].is_valid )
210#define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
211#define SRC_PROB(src) ( candidate_table[src].src_prob )
212
213/* The bb being currently scheduled. */
214static int target_bb;
215
216/* List of edges. */
217typedef bitlst edgelst;
218
219/* Target info functions. */
220static void split_edges PARAMS ((int, int, edgelst *));
221static void compute_trg_info PARAMS ((int));
222void debug_candidate PARAMS ((int));
223void debug_candidates PARAMS ((int));
224
bdfa170f 225/* Dominators array: dom[i] contains the sbitmap of dominators of
b4ead7d4 226 bb i in the region. */
bdfa170f 227static sbitmap *dom;
b4ead7d4
BS
228
229/* bb 0 is the only region entry. */
230#define IS_RGN_ENTRY(bb) (!bb)
231
232/* Is bb_src dominated by bb_trg. */
233#define IS_DOMINATED(bb_src, bb_trg) \
bdfa170f 234( TEST_BIT (dom[bb_src], bb_trg) )
b4ead7d4
BS
235
236/* Probability: Prob[i] is a float in [0, 1] which is the probability
237 of bb i relative to the region entry. */
238static float *prob;
239
240/* The probability of bb_src, relative to bb_trg. Note, that while the
241 'prob[bb]' is a float in [0, 1], this macro returns an integer
242 in [0, 100]. */
243#define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 prob[bb_trg])))
245
246/* Bit-set of edges, where bit i stands for edge i. */
bdfa170f 247typedef sbitmap edgeset;
b4ead7d4
BS
248
249/* Number of edges in the region. */
250static int rgn_nr_edges;
251
252/* Array of size rgn_nr_edges. */
253static int *rgn_edges;
254
b4ead7d4
BS
255
256/* Mapping from each edge in the graph to its number in the rgn. */
257static int *edge_to_bit;
258#define EDGE_TO_BIT(edge) (edge_to_bit[edge])
259
260/* The split edges of a source bb is different for each target
261 bb. In order to compute this efficiently, the 'potential-split edges'
262 are computed for each bb prior to scheduling a region. This is actually
263 the split edges of each bb relative to the region entry.
264
265 pot_split[bb] is the set of potential split edges of bb. */
266static edgeset *pot_split;
267
268/* For every bb, a set of its ancestor edges. */
269static edgeset *ancestor_edges;
270
271static void compute_dom_prob_ps PARAMS ((int));
272
273#define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
274#define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
275#define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
276#define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
277
278/* Parameters affecting the decision of rank_for_schedule().
279 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
280#define MIN_DIFF_PRIORITY 2
281#define MIN_PROBABILITY 40
282#define MIN_PROB_DIFF 10
283
284/* Speculative scheduling functions. */
285static int check_live_1 PARAMS ((int, rtx));
286static void update_live_1 PARAMS ((int, rtx));
287static int check_live PARAMS ((rtx, int));
288static void update_live PARAMS ((rtx, int));
289static void set_spec_fed PARAMS ((rtx));
290static int is_pfree PARAMS ((rtx, int, int));
291static int find_conditional_protection PARAMS ((rtx, int));
292static int is_conditionally_protected PARAMS ((rtx, int, int));
293static int may_trap_exp PARAMS ((rtx, int));
294static int haifa_classify_insn PARAMS ((rtx));
295static int is_prisky PARAMS ((rtx, int, int));
296static int is_exception_free PARAMS ((rtx, int, int));
297
68c17f30
RH
298static bool sets_likely_spilled PARAMS ((rtx));
299static void sets_likely_spilled_1 PARAMS ((rtx, rtx, void *));
b4ead7d4
BS
300static void add_branch_dependences PARAMS ((rtx, rtx));
301static void compute_block_backward_dependences PARAMS ((int));
302void debug_dependencies PARAMS ((void));
303
304static void init_regions PARAMS ((void));
305static void schedule_region PARAMS ((int));
37a0f8a5
RH
306static rtx concat_INSN_LIST PARAMS ((rtx, rtx));
307static void concat_insn_mem_list PARAMS ((rtx, rtx, rtx *, rtx *));
4ba478b8 308static void propagate_deps PARAMS ((int, struct deps *));
b4ead7d4
BS
309static void free_pending_lists PARAMS ((void));
310
311/* Functions for construction of the control flow graph. */
312
313/* Return 1 if control flow graph should not be constructed, 0 otherwise.
314
315 We decide not to build the control flow graph if there is possibly more
316 than one entry to the function, if computed branches exist, of if we
317 have nonlocal gotos. */
318
319static int
320is_cfg_nonregular ()
321{
e0082a72 322 basic_block b;
b4ead7d4
BS
323 rtx insn;
324 RTX_CODE code;
325
326 /* If we have a label that could be the target of a nonlocal goto, then
327 the cfg is not well structured. */
328 if (nonlocal_goto_handler_labels)
329 return 1;
330
331 /* If we have any forced labels, then the cfg is not well structured. */
332 if (forced_labels)
333 return 1;
334
335 /* If this function has a computed jump, then we consider the cfg
336 not well structured. */
337 if (current_function_has_computed_jump)
338 return 1;
339
340 /* If we have exception handlers, then we consider the cfg not well
341 structured. ?!? We should be able to handle this now that flow.c
342 computes an accurate cfg for EH. */
6a58eee9 343 if (current_function_has_exception_handlers ())
b4ead7d4
BS
344 return 1;
345
346 /* If we have non-jumping insns which refer to labels, then we consider
347 the cfg not well structured. */
348 /* Check for labels referred to other thn by jumps. */
e0082a72
ZD
349 FOR_EACH_BB (b)
350 for (insn = b->head;; insn = NEXT_INSN (insn))
b4ead7d4
BS
351 {
352 code = GET_CODE (insn);
f759eb8b 353 if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
b4ead7d4 354 {
cabf3891 355 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
f759eb8b
AO
356
357 if (note
358 && ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
cabf3891 359 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
f759eb8b
AO
360 XEXP (note, 0))))
361 return 1;
b4ead7d4
BS
362 }
363
e0082a72 364 if (insn == b->end)
b4ead7d4
BS
365 break;
366 }
367
368 /* All the tests passed. Consider the cfg well structured. */
369 return 0;
370}
371
372/* Build the control flow graph and set nr_edges.
373
374 Instead of trying to build a cfg ourselves, we rely on flow to
375 do it for us. Stamp out useless code (and bug) duplication.
376
377 Return nonzero if an irregularity in the cfg is found which would
378 prevent cross block scheduling. */
379
380static int
381build_control_flow (edge_list)
382 struct edge_list *edge_list;
383{
384 int i, unreachable, num_edges;
e0082a72 385 basic_block b;
b4ead7d4
BS
386
387 /* This already accounts for entry/exit edges. */
388 num_edges = NUM_EDGES (edge_list);
389
390 /* Unreachable loops with more than one basic block are detected
391 during the DFS traversal in find_rgns.
392
393 Unreachable loops with a single block are detected here. This
394 test is redundant with the one in find_rgns, but it's much
395 cheaper to go ahead and catch the trivial case here. */
396 unreachable = 0;
e0082a72 397 FOR_EACH_BB (b)
b4ead7d4 398 {
b4ead7d4
BS
399 if (b->pred == NULL
400 || (b->pred->src == b
401 && b->pred->pred_next == NULL))
402 unreachable = 1;
403 }
404
405 /* ??? We can kill these soon. */
d55bc081
ZD
406 in_edges = (int *) xcalloc (last_basic_block, sizeof (int));
407 out_edges = (int *) xcalloc (last_basic_block, sizeof (int));
b4ead7d4
BS
408 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
409
410 nr_edges = 0;
411 for (i = 0; i < num_edges; i++)
412 {
413 edge e = INDEX_EDGE (edge_list, i);
414
415 if (e->dest != EXIT_BLOCK_PTR
416 && e->src != ENTRY_BLOCK_PTR)
0b17ab2f 417 new_edge (e->src->index, e->dest->index);
b4ead7d4
BS
418 }
419
420 /* Increment by 1, since edge 0 is unused. */
421 nr_edges++;
422
423 return unreachable;
424}
425
426/* Record an edge in the control flow graph from SOURCE to TARGET.
427
428 In theory, this is redundant with the s_succs computed above, but
429 we have not converted all of haifa to use information from the
430 integer lists. */
431
432static void
433new_edge (source, target)
434 int source, target;
435{
436 int e, next_edge;
437 int curr_edge, fst_edge;
438
439 /* Check for duplicates. */
440 fst_edge = curr_edge = OUT_EDGES (source);
441 while (curr_edge)
442 {
443 if (FROM_BLOCK (curr_edge) == source
444 && TO_BLOCK (curr_edge) == target)
445 {
446 return;
447 }
448
449 curr_edge = NEXT_OUT (curr_edge);
450
451 if (fst_edge == curr_edge)
452 break;
453 }
454
455 e = ++nr_edges;
456
457 FROM_BLOCK (e) = source;
458 TO_BLOCK (e) = target;
459
460 if (OUT_EDGES (source))
461 {
462 next_edge = NEXT_OUT (OUT_EDGES (source));
463 NEXT_OUT (OUT_EDGES (source)) = e;
464 NEXT_OUT (e) = next_edge;
465 }
466 else
467 {
468 OUT_EDGES (source) = e;
469 NEXT_OUT (e) = e;
470 }
471
472 if (IN_EDGES (target))
473 {
474 next_edge = NEXT_IN (IN_EDGES (target));
475 NEXT_IN (IN_EDGES (target)) = e;
476 NEXT_IN (e) = next_edge;
477 }
478 else
479 {
480 IN_EDGES (target) = e;
481 NEXT_IN (e) = e;
482 }
483}
484
b4ead7d4
BS
485/* Translate a bit-set SET to a list BL of the bit-set members. */
486
487static void
bdfa170f
DB
488extract_bitlst (set, bl)
489 sbitmap set;
b4ead7d4
BS
490 bitlst *bl;
491{
bdfa170f 492 int i;
b4ead7d4
BS
493
494 /* bblst table space is reused in each call to extract_bitlst. */
495 bitlst_table_last = 0;
496
497 bl->first_member = &bitlst_table[bitlst_table_last];
498 bl->nr_members = 0;
499
500 /* Iterate over each word in the bitset. */
bdfa170f
DB
501 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
502 {
503 bitlst_table[bitlst_table_last++] = i;
504 (bl->nr_members)++;
505 });
b4ead7d4
BS
506
507}
508
509/* Functions for the construction of regions. */
510
511/* Print the regions, for debugging purposes. Callable from debugger. */
512
513void
514debug_regions ()
515{
516 int rgn, bb;
517
518 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
519 for (rgn = 0; rgn < nr_regions; rgn++)
520 {
521 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
522 rgn_table[rgn].rgn_nr_blocks);
523 fprintf (sched_dump, ";;\tbb/block: ");
524
525 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
526 {
527 current_blocks = RGN_BLOCKS (rgn);
528
529 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
530 abort ();
531
532 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
533 }
534
535 fprintf (sched_dump, "\n\n");
536 }
537}
538
539/* Build a single block region for each basic block in the function.
540 This allows for using the same code for interblock and basic block
541 scheduling. */
542
543static void
544find_single_block_region ()
545{
e0082a72 546 basic_block bb;
355e4ec4 547
e0082a72
ZD
548 nr_regions = 0;
549
550 FOR_EACH_BB (bb)
b4ead7d4 551 {
e0082a72
ZD
552 rgn_bb_table[nr_regions] = bb->index;
553 RGN_NR_BLOCKS (nr_regions) = 1;
554 RGN_BLOCKS (nr_regions) = nr_regions;
555 CONTAINING_RGN (bb->index) = nr_regions;
556 BLOCK_TO_BB (bb->index) = 0;
557 nr_regions++;
b4ead7d4 558 }
b4ead7d4
BS
559}
560
561/* Update number of blocks and the estimate for number of insns
562 in the region. Return 1 if the region is "too large" for interblock
563 scheduling (compile time considerations), otherwise return 0. */
564
565static int
566too_large (block, num_bbs, num_insns)
567 int block, *num_bbs, *num_insns;
568{
569 (*num_bbs)++;
570 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
571 INSN_LUID (BLOCK_HEAD (block)));
572 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
573 return 1;
574 else
575 return 0;
576}
577
578/* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
579 is still an inner loop. Put in max_hdr[blk] the header of the most inner
580 loop containing blk. */
786de7eb
KH
581#define UPDATE_LOOP_RELATIONS(blk, hdr) \
582{ \
583 if (max_hdr[blk] == -1) \
584 max_hdr[blk] = hdr; \
585 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
586 RESET_BIT (inner, hdr); \
587 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
588 { \
589 RESET_BIT (inner,max_hdr[blk]); \
590 max_hdr[blk] = hdr; \
591 } \
b4ead7d4
BS
592}
593
594/* Find regions for interblock scheduling.
595
596 A region for scheduling can be:
597
598 * A loop-free procedure, or
599
600 * A reducible inner loop, or
601
602 * A basic block not contained in any other region.
603
604 ?!? In theory we could build other regions based on extended basic
605 blocks or reverse extended basic blocks. Is it worth the trouble?
606
607 Loop blocks that form a region are put into the region's block list
608 in topological order.
609
610 This procedure stores its results into the following global (ick) variables
611
612 * rgn_nr
613 * rgn_table
614 * rgn_bb_table
615 * block_to_bb
616 * containing region
617
618 We use dominator relationships to avoid making regions out of non-reducible
619 loops.
620
621 This procedure needs to be converted to work on pred/succ lists instead
622 of edge tables. That would simplify it somewhat. */
623
624static void
625find_rgns (edge_list, dom)
626 struct edge_list *edge_list;
355be0dc 627 dominance_info dom;
b4ead7d4
BS
628{
629 int *max_hdr, *dfs_nr, *stack, *degree;
630 char no_loops = 1;
631 int node, child, loop_head, i, head, tail;
632 int count = 0, sp, idx = 0, current_edge = out_edges[0];
633 int num_bbs, num_insns, unreachable;
634 int too_large_failure;
e0082a72 635 basic_block bb;
b4ead7d4
BS
636
637 /* Note if an edge has been passed. */
638 sbitmap passed;
639
640 /* Note if a block is a natural loop header. */
641 sbitmap header;
642
643 /* Note if a block is an natural inner loop header. */
644 sbitmap inner;
645
646 /* Note if a block is in the block queue. */
647 sbitmap in_queue;
648
649 /* Note if a block is in the block queue. */
650 sbitmap in_stack;
651
652 int num_edges = NUM_EDGES (edge_list);
653
654 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
655 and a mapping from block to its loop header (if the block is contained
656 in a loop, else -1).
657
658 Store results in HEADER, INNER, and MAX_HDR respectively, these will
659 be used as inputs to the second traversal.
660
661 STACK, SP and DFS_NR are only used during the first traversal. */
662
663 /* Allocate and initialize variables for the first traversal. */
d55bc081
ZD
664 max_hdr = (int *) xmalloc (last_basic_block * sizeof (int));
665 dfs_nr = (int *) xcalloc (last_basic_block, sizeof (int));
b4ead7d4
BS
666 stack = (int *) xmalloc (nr_edges * sizeof (int));
667
d55bc081 668 inner = sbitmap_alloc (last_basic_block);
b4ead7d4
BS
669 sbitmap_ones (inner);
670
d55bc081 671 header = sbitmap_alloc (last_basic_block);
b4ead7d4
BS
672 sbitmap_zero (header);
673
674 passed = sbitmap_alloc (nr_edges);
675 sbitmap_zero (passed);
676
d55bc081 677 in_queue = sbitmap_alloc (last_basic_block);
b4ead7d4
BS
678 sbitmap_zero (in_queue);
679
d55bc081 680 in_stack = sbitmap_alloc (last_basic_block);
b4ead7d4
BS
681 sbitmap_zero (in_stack);
682
bf77398c 683 for (i = 0; i < last_basic_block; i++)
b4ead7d4
BS
684 max_hdr[i] = -1;
685
686 /* DFS traversal to find inner loops in the cfg. */
687
688 sp = -1;
689 while (1)
690 {
691 if (current_edge == 0 || TEST_BIT (passed, current_edge))
692 {
693 /* We have reached a leaf node or a node that was already
694 processed. Pop edges off the stack until we find
695 an edge that has not yet been processed. */
696 while (sp >= 0
697 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
698 {
699 /* Pop entry off the stack. */
700 current_edge = stack[sp--];
701 node = FROM_BLOCK (current_edge);
702 child = TO_BLOCK (current_edge);
703 RESET_BIT (in_stack, child);
704 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
705 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
706 current_edge = NEXT_OUT (current_edge);
707 }
708
709 /* See if have finished the DFS tree traversal. */
710 if (sp < 0 && TEST_BIT (passed, current_edge))
711 break;
712
713 /* Nope, continue the traversal with the popped node. */
714 continue;
715 }
716
717 /* Process a node. */
718 node = FROM_BLOCK (current_edge);
719 child = TO_BLOCK (current_edge);
720 SET_BIT (in_stack, node);
721 dfs_nr[node] = ++count;
722
723 /* If the successor is in the stack, then we've found a loop.
724 Mark the loop, if it is not a natural loop, then it will
725 be rejected during the second traversal. */
726 if (TEST_BIT (in_stack, child))
727 {
728 no_loops = 0;
729 SET_BIT (header, child);
730 UPDATE_LOOP_RELATIONS (node, child);
731 SET_BIT (passed, current_edge);
732 current_edge = NEXT_OUT (current_edge);
733 continue;
734 }
735
736 /* If the child was already visited, then there is no need to visit
737 it again. Just update the loop relationships and restart
738 with a new edge. */
739 if (dfs_nr[child])
740 {
741 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
742 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
743 SET_BIT (passed, current_edge);
744 current_edge = NEXT_OUT (current_edge);
745 continue;
746 }
747
748 /* Push an entry on the stack and continue DFS traversal. */
749 stack[++sp] = current_edge;
750 SET_BIT (passed, current_edge);
751 current_edge = OUT_EDGES (child);
752
753 /* This is temporary until haifa is converted to use rth's new
754 cfg routines which have true entry/exit blocks and the
755 appropriate edges from/to those blocks.
756
757 Generally we update dfs_nr for a node when we process its
758 out edge. However, if the node has no out edge then we will
759 not set dfs_nr for that node. This can confuse the scheduler
760 into thinking that we have unreachable blocks, which in turn
761 disables cross block scheduling.
762
763 So, if we have a node with no out edges, go ahead and mark it
764 as reachable now. */
765 if (current_edge == 0)
766 dfs_nr[child] = ++count;
767 }
768
769 /* Another check for unreachable blocks. The earlier test in
770 is_cfg_nonregular only finds unreachable blocks that do not
771 form a loop.
772
773 The DFS traversal will mark every block that is reachable from
774 the entry node by placing a nonzero value in dfs_nr. Thus if
775 dfs_nr is zero for any block, then it must be unreachable. */
776 unreachable = 0;
e0082a72
ZD
777 FOR_EACH_BB (bb)
778 if (dfs_nr[bb->index] == 0)
b4ead7d4
BS
779 {
780 unreachable = 1;
781 break;
782 }
783
784 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
785 to hold degree counts. */
786 degree = dfs_nr;
787
e0082a72
ZD
788 FOR_EACH_BB (bb)
789 degree[bb->index] = 0;
b4ead7d4
BS
790 for (i = 0; i < num_edges; i++)
791 {
792 edge e = INDEX_EDGE (edge_list, i);
793
794 if (e->dest != EXIT_BLOCK_PTR)
0b17ab2f 795 degree[e->dest->index]++;
b4ead7d4
BS
796 }
797
798 /* Do not perform region scheduling if there are any unreachable
799 blocks. */
800 if (!unreachable)
801 {
802 int *queue;
803
804 if (no_loops)
805 SET_BIT (header, 0);
806
807 /* Second travsersal:find reducible inner loops and topologically sort
808 block of each region. */
809
0b17ab2f 810 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
b4ead7d4
BS
811
812 /* Find blocks which are inner loop headers. We still have non-reducible
813 loops to consider at this point. */
e0082a72 814 FOR_EACH_BB (bb)
b4ead7d4 815 {
e0082a72 816 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
b4ead7d4
BS
817 {
818 edge e;
e0082a72 819 basic_block jbb;
b4ead7d4
BS
820
821 /* Now check that the loop is reducible. We do this separate
822 from finding inner loops so that we do not find a reducible
823 loop which contains an inner non-reducible loop.
824
825 A simple way to find reducible/natural loops is to verify
826 that each block in the loop is dominated by the loop
827 header.
828
829 If there exists a block that is not dominated by the loop
830 header, then the block is reachable from outside the loop
831 and thus the loop is not a natural loop. */
e0082a72 832 FOR_EACH_BB (jbb)
b4ead7d4
BS
833 {
834 /* First identify blocks in the loop, except for the loop
835 entry block. */
e0082a72 836 if (bb->index == max_hdr[jbb->index] && bb != jbb)
b4ead7d4
BS
837 {
838 /* Now verify that the block is dominated by the loop
839 header. */
355be0dc 840 if (!dominated_by_p (dom, jbb, bb))
b4ead7d4
BS
841 break;
842 }
843 }
844
845 /* If we exited the loop early, then I is the header of
846 a non-reducible loop and we should quit processing it
847 now. */
e0082a72 848 if (jbb != EXIT_BLOCK_PTR)
b4ead7d4
BS
849 continue;
850
851 /* I is a header of an inner loop, or block 0 in a subroutine
852 with no loops at all. */
853 head = tail = -1;
854 too_large_failure = 0;
e0082a72 855 loop_head = max_hdr[bb->index];
b4ead7d4
BS
856
857 /* Decrease degree of all I's successors for topological
858 ordering. */
e0082a72 859 for (e = bb->succ; e; e = e->succ_next)
b4ead7d4 860 if (e->dest != EXIT_BLOCK_PTR)
0b17ab2f 861 --degree[e->dest->index];
b4ead7d4
BS
862
863 /* Estimate # insns, and count # blocks in the region. */
864 num_bbs = 1;
e0082a72
ZD
865 num_insns = (INSN_LUID (bb->end)
866 - INSN_LUID (bb->head));
b4ead7d4
BS
867
868 /* Find all loop latches (blocks with back edges to the loop
869 header) or all the leaf blocks in the cfg has no loops.
870
871 Place those blocks into the queue. */
872 if (no_loops)
873 {
e0082a72 874 FOR_EACH_BB (jbb)
b4ead7d4
BS
875 /* Leaf nodes have only a single successor which must
876 be EXIT_BLOCK. */
e0082a72
ZD
877 if (jbb->succ
878 && jbb->succ->dest == EXIT_BLOCK_PTR
879 && jbb->succ->succ_next == NULL)
b4ead7d4 880 {
e0082a72
ZD
881 queue[++tail] = jbb->index;
882 SET_BIT (in_queue, jbb->index);
b4ead7d4 883
e0082a72 884 if (too_large (jbb->index, &num_bbs, &num_insns))
b4ead7d4
BS
885 {
886 too_large_failure = 1;
887 break;
888 }
889 }
890 }
891 else
892 {
893 edge e;
894
e0082a72 895 for (e = bb->pred; e; e = e->pred_next)
b4ead7d4
BS
896 {
897 if (e->src == ENTRY_BLOCK_PTR)
898 continue;
899
0b17ab2f 900 node = e->src->index;
b4ead7d4 901
e0082a72 902 if (max_hdr[node] == loop_head && node != bb->index)
b4ead7d4
BS
903 {
904 /* This is a loop latch. */
905 queue[++tail] = node;
906 SET_BIT (in_queue, node);
907
908 if (too_large (node, &num_bbs, &num_insns))
909 {
910 too_large_failure = 1;
911 break;
912 }
913 }
914 }
915 }
916
917 /* Now add all the blocks in the loop to the queue.
918
919 We know the loop is a natural loop; however the algorithm
920 above will not always mark certain blocks as being in the
921 loop. Consider:
922 node children
923 a b,c
924 b c
925 c a,d
926 d b
927
928 The algorithm in the DFS traversal may not mark B & D as part
929 of the loop (ie they will not have max_hdr set to A).
930
931 We know they can not be loop latches (else they would have
932 had max_hdr set since they'd have a backedge to a dominator
933 block). So we don't need them on the initial queue.
934
935 We know they are part of the loop because they are dominated
936 by the loop header and can be reached by a backwards walk of
937 the edges starting with nodes on the initial queue.
938
939 It is safe and desirable to include those nodes in the
940 loop/scheduling region. To do so we would need to decrease
941 the degree of a node if it is the target of a backedge
942 within the loop itself as the node is placed in the queue.
943
944 We do not do this because I'm not sure that the actual
945 scheduling code will properly handle this case. ?!? */
946
947 while (head < tail && !too_large_failure)
948 {
949 edge e;
950 child = queue[++head];
951
952 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
953 {
0b17ab2f 954 node = e->src->index;
b4ead7d4
BS
955
956 /* See discussion above about nodes not marked as in
957 this loop during the initial DFS traversal. */
958 if (e->src == ENTRY_BLOCK_PTR
959 || max_hdr[node] != loop_head)
960 {
961 tail = -1;
962 break;
963 }
e0082a72 964 else if (!TEST_BIT (in_queue, node) && node != bb->index)
b4ead7d4
BS
965 {
966 queue[++tail] = node;
967 SET_BIT (in_queue, node);
968
969 if (too_large (node, &num_bbs, &num_insns))
970 {
971 too_large_failure = 1;
972 break;
973 }
974 }
975 }
976 }
977
978 if (tail >= 0 && !too_large_failure)
979 {
980 /* Place the loop header into list of region blocks. */
e0082a72
ZD
981 degree[bb->index] = -1;
982 rgn_bb_table[idx] = bb->index;
b4ead7d4
BS
983 RGN_NR_BLOCKS (nr_regions) = num_bbs;
984 RGN_BLOCKS (nr_regions) = idx++;
e0082a72
ZD
985 CONTAINING_RGN (bb->index) = nr_regions;
986 BLOCK_TO_BB (bb->index) = count = 0;
b4ead7d4
BS
987
988 /* Remove blocks from queue[] when their in degree
989 becomes zero. Repeat until no blocks are left on the
990 list. This produces a topological list of blocks in
991 the region. */
992 while (tail >= 0)
993 {
994 if (head < 0)
995 head = tail;
996 child = queue[head];
997 if (degree[child] == 0)
998 {
999 edge e;
1000
1001 degree[child] = -1;
1002 rgn_bb_table[idx++] = child;
1003 BLOCK_TO_BB (child) = ++count;
1004 CONTAINING_RGN (child) = nr_regions;
1005 queue[head] = queue[tail--];
1006
1007 for (e = BASIC_BLOCK (child)->succ;
1008 e;
1009 e = e->succ_next)
1010 if (e->dest != EXIT_BLOCK_PTR)
0b17ab2f 1011 --degree[e->dest->index];
b4ead7d4
BS
1012 }
1013 else
1014 --head;
1015 }
1016 ++nr_regions;
1017 }
1018 }
1019 }
1020 free (queue);
1021 }
1022
1023 /* Any block that did not end up in a region is placed into a region
1024 by itself. */
e0082a72
ZD
1025 FOR_EACH_BB (bb)
1026 if (degree[bb->index] >= 0)
b4ead7d4 1027 {
e0082a72 1028 rgn_bb_table[idx] = bb->index;
b4ead7d4
BS
1029 RGN_NR_BLOCKS (nr_regions) = 1;
1030 RGN_BLOCKS (nr_regions) = idx++;
e0082a72
ZD
1031 CONTAINING_RGN (bb->index) = nr_regions++;
1032 BLOCK_TO_BB (bb->index) = 0;
b4ead7d4
BS
1033 }
1034
1035 free (max_hdr);
1036 free (dfs_nr);
1037 free (stack);
7b25b076
GS
1038 sbitmap_free (passed);
1039 sbitmap_free (header);
1040 sbitmap_free (inner);
1041 sbitmap_free (in_queue);
1042 sbitmap_free (in_stack);
b4ead7d4
BS
1043}
1044
1045/* Functions for regions scheduling information. */
1046
1047/* Compute dominators, probability, and potential-split-edges of bb.
1048 Assume that these values were already computed for bb's predecessors. */
1049
1050static void
1051compute_dom_prob_ps (bb)
1052 int bb;
1053{
1054 int nxt_in_edge, fst_in_edge, pred;
1055 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1056
1057 prob[bb] = 0.0;
1058 if (IS_RGN_ENTRY (bb))
1059 {
bdfa170f 1060 SET_BIT (dom[bb], 0);
b4ead7d4
BS
1061 prob[bb] = 1.0;
1062 return;
1063 }
1064
1065 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1066
eaec9b3d 1067 /* Initialize dom[bb] to '111..1'. */
bdfa170f 1068 sbitmap_ones (dom[bb]);
b4ead7d4
BS
1069
1070 do
1071 {
1072 pred = FROM_BLOCK (nxt_in_edge);
bdfa170f
DB
1073 sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1074 sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
b4ead7d4 1075
bdfa170f 1076 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
b4ead7d4
BS
1077
1078 nr_out_edges = 1;
1079 nr_rgn_out_edges = 0;
1080 fst_out_edge = OUT_EDGES (pred);
1081 nxt_out_edge = NEXT_OUT (fst_out_edge);
b4ead7d4 1082
bdfa170f
DB
1083 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1084
1085 SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
b4ead7d4
BS
1086
1087 /* The successor doesn't belong in the region? */
1088 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1089 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1090 ++nr_rgn_out_edges;
1091
1092 while (fst_out_edge != nxt_out_edge)
1093 {
1094 ++nr_out_edges;
1095 /* The successor doesn't belong in the region? */
1096 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1097 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1098 ++nr_rgn_out_edges;
786de7eb 1099 SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
b4ead7d4
BS
1100 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1101
1102 }
1103
1104 /* Now nr_rgn_out_edges is the number of region-exit edges from
1105 pred, and nr_out_edges will be the number of pred out edges
1106 not leaving the region. */
1107 nr_out_edges -= nr_rgn_out_edges;
1108 if (nr_rgn_out_edges > 0)
1109 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1110 else
1111 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1112 nxt_in_edge = NEXT_IN (nxt_in_edge);
1113 }
1114 while (fst_in_edge != nxt_in_edge);
1115
bdfa170f
DB
1116 SET_BIT (dom[bb], bb);
1117 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
b4ead7d4
BS
1118
1119 if (sched_verbose >= 2)
1120 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1121 (int) (100.0 * prob[bb]));
1122}
1123
1124/* Functions for target info. */
1125
1126/* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1127 Note that bb_trg dominates bb_src. */
1128
1129static void
1130split_edges (bb_src, bb_trg, bl)
1131 int bb_src;
1132 int bb_trg;
1133 edgelst *bl;
1134{
bdfa170f
DB
1135 sbitmap src = (edgeset) sbitmap_alloc (pot_split[bb_src]->n_bits);
1136 sbitmap_copy (src, pot_split[bb_src]);
1137
1138 sbitmap_difference (src, src, pot_split[bb_trg]);
1139 extract_bitlst (src, bl);
1140 sbitmap_free (src);
b4ead7d4
BS
1141}
1142
1143/* Find the valid candidate-source-blocks for the target block TRG, compute
1144 their probability, and check if they are speculative or not.
1145 For speculative sources, compute their update-blocks and split-blocks. */
1146
1147static void
1148compute_trg_info (trg)
1149 int trg;
1150{
b3694847 1151 candidate *sp;
b4ead7d4
BS
1152 edgelst el;
1153 int check_block, update_idx;
1154 int i, j, k, fst_edge, nxt_edge;
1155
1156 /* Define some of the fields for the target bb as well. */
1157 sp = candidate_table + trg;
1158 sp->is_valid = 1;
1159 sp->is_speculative = 0;
1160 sp->src_prob = 100;
1161
1162 for (i = trg + 1; i < current_nr_blocks; i++)
1163 {
1164 sp = candidate_table + i;
1165
1166 sp->is_valid = IS_DOMINATED (i, trg);
1167 if (sp->is_valid)
1168 {
1169 sp->src_prob = GET_SRC_PROB (i, trg);
1170 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1171 }
1172
1173 if (sp->is_valid)
1174 {
1175 split_edges (i, trg, &el);
1176 sp->is_speculative = (el.nr_members) ? 1 : 0;
1177 if (sp->is_speculative && !flag_schedule_speculative)
1178 sp->is_valid = 0;
1179 }
1180
1181 if (sp->is_valid)
1182 {
1183 char *update_blocks;
1184
1185 /* Compute split blocks and store them in bblst_table.
1186 The TO block of every split edge is a split block. */
1187 sp->split_bbs.first_member = &bblst_table[bblst_last];
1188 sp->split_bbs.nr_members = el.nr_members;
1189 for (j = 0; j < el.nr_members; bblst_last++, j++)
1190 bblst_table[bblst_last] =
1191 TO_BLOCK (rgn_edges[el.first_member[j]]);
1192 sp->update_bbs.first_member = &bblst_table[bblst_last];
1193
1194 /* Compute update blocks and store them in bblst_table.
1195 For every split edge, look at the FROM block, and check
1196 all out edges. For each out edge that is not a split edge,
1197 add the TO block to the update block list. This list can end
1198 up with a lot of duplicates. We need to weed them out to avoid
1199 overrunning the end of the bblst_table. */
d55bc081
ZD
1200 update_blocks = (char *) alloca (last_basic_block);
1201 memset (update_blocks, 0, last_basic_block);
b4ead7d4
BS
1202
1203 update_idx = 0;
1204 for (j = 0; j < el.nr_members; j++)
1205 {
1206 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1207 fst_edge = nxt_edge = OUT_EDGES (check_block);
1208 do
1209 {
1210 if (! update_blocks[TO_BLOCK (nxt_edge)])
1211 {
1212 for (k = 0; k < el.nr_members; k++)
1213 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1214 break;
1215
1216 if (k >= el.nr_members)
1217 {
1218 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1219 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1220 update_idx++;
1221 }
1222 }
1223
1224 nxt_edge = NEXT_OUT (nxt_edge);
1225 }
1226 while (fst_edge != nxt_edge);
1227 }
1228 sp->update_bbs.nr_members = update_idx;
1229
1230 /* Make sure we didn't overrun the end of bblst_table. */
1231 if (bblst_last > bblst_size)
1232 abort ();
1233 }
1234 else
1235 {
1236 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1237
1238 sp->is_speculative = 0;
1239 sp->src_prob = 0;
1240 }
1241 }
1242}
1243
1244/* Print candidates info, for debugging purposes. Callable from debugger. */
1245
1246void
1247debug_candidate (i)
1248 int i;
1249{
1250 if (!candidate_table[i].is_valid)
1251 return;
1252
1253 if (candidate_table[i].is_speculative)
1254 {
1255 int j;
1256 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1257
1258 fprintf (sched_dump, "split path: ");
1259 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1260 {
1261 int b = candidate_table[i].split_bbs.first_member[j];
1262
1263 fprintf (sched_dump, " %d ", b);
1264 }
1265 fprintf (sched_dump, "\n");
1266
1267 fprintf (sched_dump, "update path: ");
1268 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1269 {
1270 int b = candidate_table[i].update_bbs.first_member[j];
1271
1272 fprintf (sched_dump, " %d ", b);
1273 }
1274 fprintf (sched_dump, "\n");
1275 }
1276 else
1277 {
1278 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1279 }
1280}
1281
1282/* Print candidates info, for debugging purposes. Callable from debugger. */
1283
1284void
1285debug_candidates (trg)
1286 int trg;
1287{
1288 int i;
1289
1290 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1291 BB_TO_BLOCK (trg), trg);
1292 for (i = trg + 1; i < current_nr_blocks; i++)
1293 debug_candidate (i);
1294}
1295
1296/* Functions for speculative scheduing. */
1297
1298/* Return 0 if x is a set of a register alive in the beginning of one
1299 of the split-blocks of src, otherwise return 1. */
1300
1301static int
1302check_live_1 (src, x)
1303 int src;
1304 rtx x;
1305{
b3694847
SS
1306 int i;
1307 int regno;
1308 rtx reg = SET_DEST (x);
b4ead7d4
BS
1309
1310 if (reg == 0)
1311 return 1;
1312
1313 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1314 || GET_CODE (reg) == SIGN_EXTRACT
1315 || GET_CODE (reg) == STRICT_LOW_PART)
1316 reg = XEXP (reg, 0);
1317
7193d1dc 1318 if (GET_CODE (reg) == PARALLEL)
b4ead7d4 1319 {
b3694847 1320 int i;
90d036a0 1321
b4ead7d4 1322 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
7193d1dc
RK
1323 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1324 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
90d036a0 1325 return 1;
90d036a0 1326
b4ead7d4
BS
1327 return 0;
1328 }
1329
1330 if (GET_CODE (reg) != REG)
1331 return 1;
1332
1333 regno = REGNO (reg);
1334
1335 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1336 {
1337 /* Global registers are assumed live. */
1338 return 0;
1339 }
1340 else
1341 {
1342 if (regno < FIRST_PSEUDO_REGISTER)
1343 {
1344 /* Check for hard registers. */
1345 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1346 while (--j >= 0)
1347 {
1348 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1349 {
1350 int b = candidate_table[src].split_bbs.first_member[i];
1351
1352 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1353 regno + j))
1354 {
1355 return 0;
1356 }
1357 }
1358 }
1359 }
1360 else
1361 {
1362 /* Check for psuedo registers. */
1363 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1364 {
1365 int b = candidate_table[src].split_bbs.first_member[i];
1366
1367 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1368 {
1369 return 0;
1370 }
1371 }
1372 }
1373 }
1374
1375 return 1;
1376}
1377
1378/* If x is a set of a register R, mark that R is alive in the beginning
1379 of every update-block of src. */
1380
1381static void
1382update_live_1 (src, x)
1383 int src;
1384 rtx x;
1385{
b3694847
SS
1386 int i;
1387 int regno;
1388 rtx reg = SET_DEST (x);
b4ead7d4
BS
1389
1390 if (reg == 0)
1391 return;
1392
1393 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1394 || GET_CODE (reg) == SIGN_EXTRACT
1395 || GET_CODE (reg) == STRICT_LOW_PART)
1396 reg = XEXP (reg, 0);
1397
7193d1dc 1398 if (GET_CODE (reg) == PARALLEL)
b4ead7d4 1399 {
b3694847 1400 int i;
90d036a0 1401
b4ead7d4 1402 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
7193d1dc
RK
1403 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1404 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
90d036a0 1405
b4ead7d4
BS
1406 return;
1407 }
1408
1409 if (GET_CODE (reg) != REG)
1410 return;
1411
1412 /* Global registers are always live, so the code below does not apply
1413 to them. */
1414
1415 regno = REGNO (reg);
1416
1417 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1418 {
1419 if (regno < FIRST_PSEUDO_REGISTER)
1420 {
1421 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1422 while (--j >= 0)
1423 {
1424 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1425 {
1426 int b = candidate_table[src].update_bbs.first_member[i];
1427
1428 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1429 regno + j);
1430 }
1431 }
1432 }
1433 else
1434 {
1435 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1436 {
1437 int b = candidate_table[src].update_bbs.first_member[i];
1438
1439 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1440 }
1441 }
1442 }
1443}
1444
1445/* Return 1 if insn can be speculatively moved from block src to trg,
1446 otherwise return 0. Called before first insertion of insn to
1447 ready-list or before the scheduling. */
1448
1449static int
1450check_live (insn, src)
1451 rtx insn;
1452 int src;
1453{
1454 /* Find the registers set by instruction. */
1455 if (GET_CODE (PATTERN (insn)) == SET
1456 || GET_CODE (PATTERN (insn)) == CLOBBER)
1457 return check_live_1 (src, PATTERN (insn));
1458 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1459 {
1460 int j;
1461 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1462 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1463 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1464 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1465 return 0;
1466
1467 return 1;
1468 }
1469
1470 return 1;
1471}
1472
1473/* Update the live registers info after insn was moved speculatively from
1474 block src to trg. */
1475
1476static void
1477update_live (insn, src)
1478 rtx insn;
1479 int src;
1480{
1481 /* Find the registers set by instruction. */
1482 if (GET_CODE (PATTERN (insn)) == SET
1483 || GET_CODE (PATTERN (insn)) == CLOBBER)
1484 update_live_1 (src, PATTERN (insn));
1485 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1486 {
1487 int j;
1488 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1489 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1490 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1491 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1492 }
1493}
1494
1495/* Exception Free Loads:
1496
1497 We define five classes of speculative loads: IFREE, IRISKY,
1498 PFREE, PRISKY, and MFREE.
1499
1500 IFREE loads are loads that are proved to be exception-free, just
1501 by examining the load insn. Examples for such loads are loads
1502 from TOC and loads of global data.
1503
1504 IRISKY loads are loads that are proved to be exception-risky,
1505 just by examining the load insn. Examples for such loads are
1506 volatile loads and loads from shared memory.
1507
1508 PFREE loads are loads for which we can prove, by examining other
1509 insns, that they are exception-free. Currently, this class consists
1510 of loads for which we are able to find a "similar load", either in
1511 the target block, or, if only one split-block exists, in that split
1512 block. Load2 is similar to load1 if both have same single base
1513 register. We identify only part of the similar loads, by finding
1514 an insn upon which both load1 and load2 have a DEF-USE dependence.
1515
1516 PRISKY loads are loads for which we can prove, by examining other
1517 insns, that they are exception-risky. Currently we have two proofs for
1518 such loads. The first proof detects loads that are probably guarded by a
1519 test on the memory address. This proof is based on the
1520 backward and forward data dependence information for the region.
1521 Let load-insn be the examined load.
1522 Load-insn is PRISKY iff ALL the following hold:
1523
1524 - insn1 is not in the same block as load-insn
1525 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1526 - test-insn is either a compare or a branch, not in the same block
1527 as load-insn
1528 - load-insn is reachable from test-insn
1529 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1530
1531 This proof might fail when the compare and the load are fed
1532 by an insn not in the region. To solve this, we will add to this
1533 group all loads that have no input DEF-USE dependence.
1534
1535 The second proof detects loads that are directly or indirectly
1536 fed by a speculative load. This proof is affected by the
1537 scheduling process. We will use the flag fed_by_spec_load.
1538 Initially, all insns have this flag reset. After a speculative
1539 motion of an insn, if insn is either a load, or marked as
1540 fed_by_spec_load, we will also mark as fed_by_spec_load every
1541 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1542 load which is fed_by_spec_load is also PRISKY.
1543
1544 MFREE (maybe-free) loads are all the remaining loads. They may be
1545 exception-free, but we cannot prove it.
1546
1547 Now, all loads in IFREE and PFREE classes are considered
1548 exception-free, while all loads in IRISKY and PRISKY classes are
1549 considered exception-risky. As for loads in the MFREE class,
1550 these are considered either exception-free or exception-risky,
1551 depending on whether we are pessimistic or optimistic. We have
1552 to take the pessimistic approach to assure the safety of
1553 speculative scheduling, but we can take the optimistic approach
1554 by invoking the -fsched_spec_load_dangerous option. */
1555
1556enum INSN_TRAP_CLASS
1557{
1558 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1559 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1560};
1561
1562#define WORST_CLASS(class1, class2) \
1563((class1 > class2) ? class1 : class2)
1564
1565/* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1566#define IS_REACHABLE(bb_from, bb_to) \
786de7eb 1567 (bb_from == bb_to \
b4ead7d4 1568 || IS_RGN_ENTRY (bb_from) \
786de7eb
KH
1569 || (TEST_BIT (ancestor_edges[bb_to], \
1570 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
b4ead7d4
BS
1571
1572/* Non-zero iff the address is comprised from at most 1 register. */
1573#define CONST_BASED_ADDRESS_P(x) \
1574 (GET_CODE (x) == REG \
786de7eb
KH
1575 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1576 || (GET_CODE (x) == LO_SUM)) \
1577 && (CONSTANT_P (XEXP (x, 0)) \
81217be9 1578 || CONSTANT_P (XEXP (x, 1)))))
b4ead7d4
BS
1579
1580/* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1581
1582static void
1583set_spec_fed (load_insn)
1584 rtx load_insn;
1585{
1586 rtx link;
1587
1588 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1589 if (GET_MODE (link) == VOIDmode)
1590 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1591} /* set_spec_fed */
1592
1593/* On the path from the insn to load_insn_bb, find a conditional
1594branch depending on insn, that guards the speculative load. */
1595
1596static int
1597find_conditional_protection (insn, load_insn_bb)
1598 rtx insn;
1599 int load_insn_bb;
1600{
1601 rtx link;
1602
1603 /* Iterate through DEF-USE forward dependences. */
1604 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1605 {
1606 rtx next = XEXP (link, 0);
1607 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1608 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1609 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1610 && load_insn_bb != INSN_BB (next)
1611 && GET_MODE (link) == VOIDmode
1612 && (GET_CODE (next) == JUMP_INSN
1613 || find_conditional_protection (next, load_insn_bb)))
1614 return 1;
1615 }
1616 return 0;
1617} /* find_conditional_protection */
1618
1619/* Returns 1 if the same insn1 that participates in the computation
1620 of load_insn's address is feeding a conditional branch that is
1621 guarding on load_insn. This is true if we find a the two DEF-USE
1622 chains:
1623 insn1 -> ... -> conditional-branch
1624 insn1 -> ... -> load_insn,
1625 and if a flow path exist:
1626 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1627 and if insn1 is on the path
1628 region-entry -> ... -> bb_trg -> ... load_insn.
1629
1630 Locate insn1 by climbing on LOG_LINKS from load_insn.
1631 Locate the branch by following INSN_DEPEND from insn1. */
1632
1633static int
1634is_conditionally_protected (load_insn, bb_src, bb_trg)
1635 rtx load_insn;
1636 int bb_src, bb_trg;
1637{
1638 rtx link;
1639
1640 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1641 {
1642 rtx insn1 = XEXP (link, 0);
1643
1644 /* Must be a DEF-USE dependence upon non-branch. */
1645 if (GET_MODE (link) != VOIDmode
1646 || GET_CODE (insn1) == JUMP_INSN)
1647 continue;
1648
1649 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1650 if (INSN_BB (insn1) == bb_src
1651 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1652 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1653 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1654 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1655 continue;
1656
1657 /* Now search for the conditional-branch. */
1658 if (find_conditional_protection (insn1, bb_src))
1659 return 1;
1660
1661 /* Recursive step: search another insn1, "above" current insn1. */
1662 return is_conditionally_protected (insn1, bb_src, bb_trg);
1663 }
1664
1665 /* The chain does not exist. */
1666 return 0;
1667} /* is_conditionally_protected */
1668
1669/* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1670 load_insn can move speculatively from bb_src to bb_trg. All the
1671 following must hold:
1672
1673 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1674 (2) load_insn and load1 have a def-use dependence upon
1675 the same insn 'insn1'.
1676 (3) either load2 is in bb_trg, or:
1677 - there's only one split-block, and
1678 - load1 is on the escape path, and
1679
1680 From all these we can conclude that the two loads access memory
1681 addresses that differ at most by a constant, and hence if moving
1682 load_insn would cause an exception, it would have been caused by
1683 load2 anyhow. */
1684
1685static int
1686is_pfree (load_insn, bb_src, bb_trg)
1687 rtx load_insn;
1688 int bb_src, bb_trg;
1689{
1690 rtx back_link;
b3694847 1691 candidate *candp = candidate_table + bb_src;
b4ead7d4
BS
1692
1693 if (candp->split_bbs.nr_members != 1)
1694 /* Must have exactly one escape block. */
1695 return 0;
1696
1697 for (back_link = LOG_LINKS (load_insn);
1698 back_link; back_link = XEXP (back_link, 1))
1699 {
1700 rtx insn1 = XEXP (back_link, 0);
1701
1702 if (GET_MODE (back_link) == VOIDmode)
1703 {
1704 /* Found a DEF-USE dependence (insn1, load_insn). */
1705 rtx fore_link;
1706
1707 for (fore_link = INSN_DEPEND (insn1);
1708 fore_link; fore_link = XEXP (fore_link, 1))
1709 {
1710 rtx insn2 = XEXP (fore_link, 0);
1711 if (GET_MODE (fore_link) == VOIDmode)
1712 {
1713 /* Found a DEF-USE dependence (insn1, insn2). */
1714 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1715 /* insn2 not guaranteed to be a 1 base reg load. */
1716 continue;
1717
1718 if (INSN_BB (insn2) == bb_trg)
1719 /* insn2 is the similar load, in the target block. */
1720 return 1;
1721
1722 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1723 /* insn2 is a similar load, in a split-block. */
1724 return 1;
1725 }
1726 }
1727 }
1728 }
1729
1730 /* Couldn't find a similar load. */
1731 return 0;
1732} /* is_pfree */
1733
1734/* Returns a class that insn with GET_DEST(insn)=x may belong to,
1735 as found by analyzing insn's expression. */
1736
1737static int
1738may_trap_exp (x, is_store)
1739 rtx x;
1740 int is_store;
1741{
1742 enum rtx_code code;
1743
1744 if (x == 0)
1745 return TRAP_FREE;
1746 code = GET_CODE (x);
1747 if (is_store)
1748 {
81217be9 1749 if (code == MEM && may_trap_p (x))
b4ead7d4
BS
1750 return TRAP_RISKY;
1751 else
1752 return TRAP_FREE;
1753 }
1754 if (code == MEM)
1755 {
1756 /* The insn uses memory: a volatile load. */
1757 if (MEM_VOLATILE_P (x))
1758 return IRISKY;
1759 /* An exception-free load. */
1760 if (!may_trap_p (x))
1761 return IFREE;
1762 /* A load with 1 base register, to be further checked. */
1763 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1764 return PFREE_CANDIDATE;
1765 /* No info on the load, to be further checked. */
1766 return PRISKY_CANDIDATE;
1767 }
1768 else
1769 {
1770 const char *fmt;
1771 int i, insn_class = TRAP_FREE;
1772
1773 /* Neither store nor load, check if it may cause a trap. */
1774 if (may_trap_p (x))
1775 return TRAP_RISKY;
1776 /* Recursive step: walk the insn... */
1777 fmt = GET_RTX_FORMAT (code);
1778 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1779 {
1780 if (fmt[i] == 'e')
1781 {
1782 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1783 insn_class = WORST_CLASS (insn_class, tmp_class);
1784 }
1785 else if (fmt[i] == 'E')
1786 {
1787 int j;
1788 for (j = 0; j < XVECLEN (x, i); j++)
1789 {
1790 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1791 insn_class = WORST_CLASS (insn_class, tmp_class);
1792 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1793 break;
1794 }
1795 }
1796 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1797 break;
1798 }
1799 return insn_class;
1800 }
1801}
1802
1803/* Classifies insn for the purpose of verifying that it can be
1804 moved speculatively, by examining it's patterns, returning:
1805 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1806 TRAP_FREE: non-load insn.
1807 IFREE: load from a globaly safe location.
1808 IRISKY: volatile load.
1809 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1810 being either PFREE or PRISKY. */
1811
1812static int
1813haifa_classify_insn (insn)
1814 rtx insn;
1815{
1816 rtx pat = PATTERN (insn);
1817 int tmp_class = TRAP_FREE;
1818 int insn_class = TRAP_FREE;
1819 enum rtx_code code;
1820
1821 if (GET_CODE (pat) == PARALLEL)
1822 {
1823 int i, len = XVECLEN (pat, 0);
1824
1825 for (i = len - 1; i >= 0; i--)
1826 {
1827 code = GET_CODE (XVECEXP (pat, 0, i));
1828 switch (code)
1829 {
1830 case CLOBBER:
1831 /* Test if it is a 'store'. */
1832 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1833 break;
1834 case SET:
1835 /* Test if it is a store. */
1836 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1837 if (tmp_class == TRAP_RISKY)
1838 break;
1839 /* Test if it is a load. */
90d036a0
RK
1840 tmp_class
1841 = WORST_CLASS (tmp_class,
1842 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)),
1843 0));
b4ead7d4
BS
1844 break;
1845 case COND_EXEC:
1846 case TRAP_IF:
1847 tmp_class = TRAP_RISKY;
1848 break;
90d036a0
RK
1849 default:
1850 ;
b4ead7d4
BS
1851 }
1852 insn_class = WORST_CLASS (insn_class, tmp_class);
1853 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1854 break;
1855 }
1856 }
1857 else
1858 {
1859 code = GET_CODE (pat);
1860 switch (code)
1861 {
1862 case CLOBBER:
1863 /* Test if it is a 'store'. */
1864 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1865 break;
1866 case SET:
1867 /* Test if it is a store. */
1868 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1869 if (tmp_class == TRAP_RISKY)
1870 break;
1871 /* Test if it is a load. */
1872 tmp_class =
1873 WORST_CLASS (tmp_class,
1874 may_trap_exp (SET_SRC (pat), 0));
1875 break;
1876 case COND_EXEC:
1877 case TRAP_IF:
1878 tmp_class = TRAP_RISKY;
1879 break;
1880 default:;
1881 }
1882 insn_class = tmp_class;
1883 }
1884
1885 return insn_class;
1886}
1887
1888/* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1889 a load moved speculatively, or if load_insn is protected by
1890 a compare on load_insn's address). */
1891
1892static int
1893is_prisky (load_insn, bb_src, bb_trg)
1894 rtx load_insn;
1895 int bb_src, bb_trg;
1896{
1897 if (FED_BY_SPEC_LOAD (load_insn))
1898 return 1;
1899
1900 if (LOG_LINKS (load_insn) == NULL)
1901 /* Dependence may 'hide' out of the region. */
1902 return 1;
1903
1904 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1905 return 1;
1906
1907 return 0;
1908}
1909
1910/* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1911 Return 1 if insn is exception-free (and the motion is valid)
1912 and 0 otherwise. */
1913
1914static int
1915is_exception_free (insn, bb_src, bb_trg)
1916 rtx insn;
1917 int bb_src, bb_trg;
1918{
1919 int insn_class = haifa_classify_insn (insn);
1920
1921 /* Handle non-load insns. */
1922 switch (insn_class)
1923 {
1924 case TRAP_FREE:
1925 return 1;
1926 case TRAP_RISKY:
1927 return 0;
1928 default:;
1929 }
1930
1931 /* Handle loads. */
1932 if (!flag_schedule_speculative_load)
1933 return 0;
1934 IS_LOAD_INSN (insn) = 1;
1935 switch (insn_class)
1936 {
1937 case IFREE:
1938 return (1);
1939 case IRISKY:
1940 return 0;
1941 case PFREE_CANDIDATE:
1942 if (is_pfree (insn, bb_src, bb_trg))
1943 return 1;
1944 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1945 case PRISKY_CANDIDATE:
1946 if (!flag_schedule_speculative_load_dangerous
1947 || is_prisky (insn, bb_src, bb_trg))
1948 return 0;
1949 break;
1950 default:;
1951 }
1952
1953 return flag_schedule_speculative_load_dangerous;
1954}
1955\f
1956/* The number of insns from the current block scheduled so far. */
1957static int sched_target_n_insns;
1958/* The number of insns from the current block to be scheduled in total. */
1959static int target_n_insns;
1960/* The number of insns from the entire region scheduled so far. */
1961static int sched_n_insns;
79c2ffde
BS
1962/* Nonzero if the last scheduled insn was a jump. */
1963static int last_was_jump;
b4ead7d4
BS
1964
1965/* Implementations of the sched_info functions for region scheduling. */
1966static void init_ready_list PARAMS ((struct ready_list *));
1967static int can_schedule_ready_p PARAMS ((rtx));
1968static int new_ready PARAMS ((rtx));
1969static int schedule_more_p PARAMS ((void));
1970static const char *rgn_print_insn PARAMS ((rtx, int));
1971static int rgn_rank PARAMS ((rtx, rtx));
18e720b3
BS
1972static int contributes_to_priority PARAMS ((rtx, rtx));
1973static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
b4ead7d4
BS
1974
1975/* Return nonzero if there are more insns that should be scheduled. */
1976
1977static int
1978schedule_more_p ()
1979{
79c2ffde 1980 return ! last_was_jump && sched_target_n_insns < target_n_insns;
b4ead7d4
BS
1981}
1982
1983/* Add all insns that are initially ready to the ready list READY. Called
1984 once before scheduling a set of insns. */
1985
1986static void
1987init_ready_list (ready)
1988 struct ready_list *ready;
1989{
1990 rtx prev_head = current_sched_info->prev_head;
1991 rtx next_tail = current_sched_info->next_tail;
1992 int bb_src;
1993 rtx insn;
1994
1995 target_n_insns = 0;
1996 sched_target_n_insns = 0;
1997 sched_n_insns = 0;
79c2ffde 1998 last_was_jump = 0;
b4ead7d4
BS
1999
2000 /* Print debugging information. */
2001 if (sched_verbose >= 5)
2002 debug_dependencies ();
2003
2004 /* Prepare current target block info. */
2005 if (current_nr_blocks > 1)
2006 {
2007 candidate_table = (candidate *) xmalloc (current_nr_blocks
2008 * sizeof (candidate));
2009
2010 bblst_last = 0;
2011 /* bblst_table holds split blocks and update blocks for each block after
2012 the current one in the region. split blocks and update blocks are
2013 the TO blocks of region edges, so there can be at most rgn_nr_edges
2014 of them. */
2015 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2016 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2017
2018 bitlst_table_last = 0;
2019 bitlst_table_size = rgn_nr_edges;
2020 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2021
2022 compute_trg_info (target_bb);
2023 }
2024
2025 /* Initialize ready list with all 'ready' insns in target block.
2026 Count number of insns in the target block being scheduled. */
2027 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2028 {
2029 rtx next;
2030
2031 if (! INSN_P (insn))
2032 continue;
2033 next = NEXT_INSN (insn);
2034
2035 if (INSN_DEP_COUNT (insn) == 0
be202ec2 2036 && (! INSN_P (next) || SCHED_GROUP_P (next) == 0))
b4ead7d4
BS
2037 ready_add (ready, insn);
2038 if (!(SCHED_GROUP_P (insn)))
2039 target_n_insns++;
2040 }
2041
2042 /* Add to ready list all 'ready' insns in valid source blocks.
2043 For speculative insns, check-live, exception-free, and
2044 issue-delay. */
2045 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2046 if (IS_VALID (bb_src))
2047 {
2048 rtx src_head;
2049 rtx src_next_tail;
2050 rtx tail, head;
2051
2052 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2053 src_next_tail = NEXT_INSN (tail);
2054 src_head = head;
2055
2056 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2057 {
2058 if (! INSN_P (insn))
2059 continue;
2060
2061 if (!CANT_MOVE (insn)
2062 && (!IS_SPECULATIVE_INSN (insn)
fae15c93
VM
2063 || ((((!targetm.sched.use_dfa_pipeline_interface
2064 || !(*targetm.sched.use_dfa_pipeline_interface) ())
2065 && insn_issue_delay (insn) <= 3)
2066 || (targetm.sched.use_dfa_pipeline_interface
2067 && (*targetm.sched.use_dfa_pipeline_interface) ()
2068 && (recog_memoized (insn) < 0
2069 || min_insn_conflict_delay (curr_state,
2070 insn, insn) <= 3)))
b4ead7d4
BS
2071 && check_live (insn, bb_src)
2072 && is_exception_free (insn, bb_src, target_bb))))
2073 {
2074 rtx next;
2075
a1f300c0 2076 /* Note that we haven't squirreled away the notes for
b4ead7d4
BS
2077 blocks other than the current. So if this is a
2078 speculative insn, NEXT might otherwise be a note. */
2079 next = next_nonnote_insn (insn);
2080 if (INSN_DEP_COUNT (insn) == 0
2081 && (! next
be202ec2
FS
2082 || ! INSN_P (next)
2083 || SCHED_GROUP_P (next) == 0))
b4ead7d4
BS
2084 ready_add (ready, insn);
2085 }
2086 }
2087 }
2088}
2089
2090/* Called after taking INSN from the ready list. Returns nonzero if this
2091 insn can be scheduled, nonzero if we should silently discard it. */
2092
2093static int
2094can_schedule_ready_p (insn)
2095 rtx insn;
2096{
79c2ffde
BS
2097 if (GET_CODE (insn) == JUMP_INSN)
2098 last_was_jump = 1;
2099
b4ead7d4
BS
2100 /* An interblock motion? */
2101 if (INSN_BB (insn) != target_bb)
2102 {
2103 rtx temp;
2104 basic_block b1;
2105
2106 if (IS_SPECULATIVE_INSN (insn))
2107 {
2108 if (!check_live (insn, INSN_BB (insn)))
2109 return 0;
2110 update_live (insn, INSN_BB (insn));
2111
2112 /* For speculative load, mark insns fed by it. */
2113 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2114 set_spec_fed (insn);
2115
2116 nr_spec++;
2117 }
2118 nr_inter++;
2119
2120 /* Find the beginning of the scheduling group. */
2121 /* ??? Ought to update basic block here, but later bits of
2122 schedule_block assumes the original insn block is
2123 still intact. */
2124
2125 temp = insn;
2126 while (SCHED_GROUP_P (temp))
2127 temp = PREV_INSN (temp);
2128
6d2f8887 2129 /* Update source block boundaries. */
b4ead7d4
BS
2130 b1 = BLOCK_FOR_INSN (temp);
2131 if (temp == b1->head && insn == b1->end)
2132 {
2133 /* We moved all the insns in the basic block.
2134 Emit a note after the last insn and update the
2135 begin/end boundaries to point to the note. */
2136 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2137 b1->head = note;
2138 b1->end = note;
2139 }
2140 else if (insn == b1->end)
2141 {
2142 /* We took insns from the end of the basic block,
2143 so update the end of block boundary so that it
2144 points to the first insn we did not move. */
2145 b1->end = PREV_INSN (temp);
2146 }
2147 else if (temp == b1->head)
2148 {
2149 /* We took insns from the start of the basic block,
2150 so update the start of block boundary so that
2151 it points to the first insn we did not move. */
2152 b1->head = NEXT_INSN (insn);
2153 }
2154 }
2155 else
2156 {
2157 /* In block motion. */
2158 sched_target_n_insns++;
2159 }
2160 sched_n_insns++;
2161
2162 return 1;
2163}
2164
2165/* Called after INSN has all its dependencies resolved. Return nonzero
2166 if it should be moved to the ready list or the queue, or zero if we
2167 should silently discard it. */
2168static int
2169new_ready (next)
2170 rtx next;
2171{
2172 /* For speculative insns, before inserting to ready/queue,
2173 check live, exception-free, and issue-delay. */
2174 if (INSN_BB (next) != target_bb
2175 && (!IS_VALID (INSN_BB (next))
2176 || CANT_MOVE (next)
2177 || (IS_SPECULATIVE_INSN (next)
fae15c93
VM
2178 && (0
2179 || (targetm.sched.use_dfa_pipeline_interface
2180 && (*targetm.sched.use_dfa_pipeline_interface) ()
2181 && recog_memoized (next) >= 0
2182 && min_insn_conflict_delay (curr_state, next,
2183 next) > 3)
2184 || ((!targetm.sched.use_dfa_pipeline_interface
2185 || !(*targetm.sched.use_dfa_pipeline_interface) ())
2186 && insn_issue_delay (next) > 3)
b4ead7d4
BS
2187 || !check_live (next, INSN_BB (next))
2188 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2189 return 0;
2190 return 1;
2191}
2192
2193/* Return a string that contains the insn uid and optionally anything else
2194 necessary to identify this insn in an output. It's valid to use a
2195 static buffer for this. The ALIGNED parameter should cause the string
2196 to be formatted so that multiple output lines will line up nicely. */
2197
2198static const char *
2199rgn_print_insn (insn, aligned)
2200 rtx insn;
2201 int aligned;
2202{
2203 static char tmp[80];
2204
2205 if (aligned)
2206 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2207 else
2208 {
b4ead7d4 2209 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
f56887a7
BS
2210 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2211 else
2212 sprintf (tmp, "%d", INSN_UID (insn));
b4ead7d4
BS
2213 }
2214 return tmp;
2215}
2216
2217/* Compare priority of two insns. Return a positive number if the second
2218 insn is to be preferred for scheduling, and a negative one if the first
2219 is to be preferred. Zero if they are equally good. */
2220
2221static int
2222rgn_rank (insn1, insn2)
2223 rtx insn1, insn2;
2224{
2225 /* Some comparison make sense in interblock scheduling only. */
2226 if (INSN_BB (insn1) != INSN_BB (insn2))
2227 {
2228 int spec_val, prob_val;
2229
2230 /* Prefer an inblock motion on an interblock motion. */
2231 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2232 return 1;
2233 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2234 return -1;
2235
2236 /* Prefer a useful motion on a speculative one. */
2237 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2238 if (spec_val)
2239 return spec_val;
2240
2241 /* Prefer a more probable (speculative) insn. */
2242 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2243 if (prob_val)
2244 return prob_val;
2245 }
2246 return 0;
2247}
2248
18e720b3
BS
2249/* NEXT is an instruction that depends on INSN (a backward dependence);
2250 return nonzero if we should include this dependence in priority
2251 calculations. */
2252
2253static int
2254contributes_to_priority (next, insn)
2255 rtx next, insn;
2256{
2257 return BLOCK_NUM (next) == BLOCK_NUM (insn);
2258}
2259
2260/* INSN is a JUMP_INSN. Store the set of registers that must be considered
2261 to be set by this jump in SET. */
2262
2263static void
2264compute_jump_reg_dependencies (insn, set)
2265 rtx insn ATTRIBUTE_UNUSED;
2266 regset set ATTRIBUTE_UNUSED;
2267{
2268 /* Nothing to do here, since we postprocess jumps in
2269 add_branch_dependences. */
2270}
2271
b4ead7d4
BS
2272/* Used in schedule_insns to initialize current_sched_info for scheduling
2273 regions (or single basic blocks). */
2274
2275static struct sched_info region_sched_info =
2276{
2277 init_ready_list,
2278 can_schedule_ready_p,
2279 schedule_more_p,
2280 new_ready,
2281 rgn_rank,
2282 rgn_print_insn,
18e720b3
BS
2283 contributes_to_priority,
2284 compute_jump_reg_dependencies,
b4ead7d4
BS
2285
2286 NULL, NULL,
2287 NULL, NULL,
4b6c5340 2288 0, 0
b4ead7d4
BS
2289};
2290
68c17f30
RH
2291/* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
2292
2293static bool
2294sets_likely_spilled (pat)
2295 rtx pat;
2296{
2297 bool ret = false;
2298 note_stores (pat, sets_likely_spilled_1, &ret);
2299 return ret;
2300}
2301
2302static void
2303sets_likely_spilled_1 (x, pat, data)
2304 rtx x, pat;
2305 void *data;
2306{
2307 bool *ret = (bool *) data;
2308
2309 if (GET_CODE (pat) == SET
2310 && REG_P (x)
2311 && REGNO (x) < FIRST_PSEUDO_REGISTER
2312 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2313 *ret = true;
2314}
2315
b4ead7d4
BS
2316/* Add dependences so that branches are scheduled to run last in their
2317 block. */
2318
2319static void
2320add_branch_dependences (head, tail)
2321 rtx head, tail;
2322{
2323 rtx insn, last;
2324
8d8a083e
RH
2325 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2326 that can throw exceptions, force them to remain in order at the end of
2327 the block by adding dependencies and giving the last a high priority.
2328 There may be notes present, and prev_head may also be a note.
b4ead7d4
BS
2329
2330 Branches must obviously remain at the end. Calls should remain at the
2331 end since moving them results in worse register allocation. Uses remain
68c17f30
RH
2332 at the end to ensure proper register allocation.
2333
2334 cc0 setters remaim at the end because they can't be moved away from
2335 their cc0 user.
2336
2337 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2338 are not moved before reload because we can wind up with register
2339 allocation failures. */
2340
b4ead7d4
BS
2341 insn = tail;
2342 last = 0;
2343 while (GET_CODE (insn) == CALL_INSN
2344 || GET_CODE (insn) == JUMP_INSN
2345 || (GET_CODE (insn) == INSN
2346 && (GET_CODE (PATTERN (insn)) == USE
2347 || GET_CODE (PATTERN (insn)) == CLOBBER
8d8a083e 2348 || can_throw_internal (insn)
b4ead7d4
BS
2349#ifdef HAVE_cc0
2350 || sets_cc0_p (PATTERN (insn))
2351#endif
68c17f30
RH
2352 || (!reload_completed
2353 && sets_likely_spilled (PATTERN (insn)))))
b4ead7d4
BS
2354 || GET_CODE (insn) == NOTE)
2355 {
2356 if (GET_CODE (insn) != NOTE)
2357 {
37a0f8a5 2358 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
b4ead7d4
BS
2359 {
2360 add_dependence (last, insn, REG_DEP_ANTI);
2361 INSN_REF_COUNT (insn)++;
2362 }
2363
2364 CANT_MOVE (insn) = 1;
2365
2366 last = insn;
2367 /* Skip over insns that are part of a group.
2368 Make each insn explicitly depend on the previous insn.
2369 This ensures that only the group header will ever enter
2370 the ready queue (and, when scheduled, will automatically
2371 schedule the SCHED_GROUP_P block). */
2372 while (SCHED_GROUP_P (insn))
2373 {
2374 rtx temp = prev_nonnote_insn (insn);
2375 add_dependence (insn, temp, REG_DEP_ANTI);
2376 insn = temp;
2377 }
2378 }
2379
2380 /* Don't overrun the bounds of the basic block. */
2381 if (insn == head)
2382 break;
2383
2384 insn = PREV_INSN (insn);
2385 }
2386
2387 /* Make sure these insns are scheduled last in their block. */
2388 insn = last;
2389 if (insn != 0)
2390 while (insn != head)
2391 {
2392 insn = prev_nonnote_insn (insn);
2393
2394 if (INSN_REF_COUNT (insn) != 0)
2395 continue;
2396
2397 add_dependence (last, insn, REG_DEP_ANTI);
2398 INSN_REF_COUNT (insn) = 1;
2399
2400 /* Skip over insns that are part of a group. */
2401 while (SCHED_GROUP_P (insn))
2402 insn = prev_nonnote_insn (insn);
2403 }
2404}
2405
2406/* Data structures for the computation of data dependences in a regions. We
2407 keep one `deps' structure for every basic block. Before analyzing the
2408 data dependences for a bb, its variables are initialized as a function of
2409 the variables of its predecessors. When the analysis for a bb completes,
2410 we save the contents to the corresponding bb_deps[bb] variable. */
2411
2412static struct deps *bb_deps;
2413
37a0f8a5
RH
2414/* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2415
2416static rtx
2417concat_INSN_LIST (copy, old)
2418 rtx copy, old;
2419{
2420 rtx new = old;
2421 for (; copy ; copy = XEXP (copy, 1))
2422 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2423 return new;
2424}
2425
2426static void
2427concat_insn_mem_list (copy_insns, copy_mems, old_insns_p, old_mems_p)
2428 rtx copy_insns, copy_mems;
2429 rtx *old_insns_p, *old_mems_p;
2430{
2431 rtx new_insns = *old_insns_p;
2432 rtx new_mems = *old_mems_p;
2433
2434 while (copy_insns)
2435 {
2436 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2437 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2438 copy_insns = XEXP (copy_insns, 1);
2439 copy_mems = XEXP (copy_mems, 1);
2440 }
2441
2442 *old_insns_p = new_insns;
2443 *old_mems_p = new_mems;
2444}
2445
b4ead7d4 2446/* After computing the dependencies for block BB, propagate the dependencies
4ba478b8 2447 found in TMP_DEPS to the successors of the block. */
b4ead7d4 2448static void
37a0f8a5 2449propagate_deps (bb, pred_deps)
b4ead7d4 2450 int bb;
37a0f8a5 2451 struct deps *pred_deps;
b4ead7d4
BS
2452{
2453 int b = BB_TO_BLOCK (bb);
2454 int e, first_edge;
b4ead7d4
BS
2455
2456 /* bb's structures are inherited by its successors. */
2457 first_edge = e = OUT_EDGES (b);
37a0f8a5
RH
2458 if (e > 0)
2459 do
2460 {
2461 int b_succ = TO_BLOCK (e);
2462 int bb_succ = BLOCK_TO_BB (b_succ);
2463 struct deps *succ_deps = bb_deps + bb_succ;
2464 int reg;
2465
2466 /* Only bbs "below" bb, in the same region, are interesting. */
2467 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2468 || bb_succ <= bb)
2469 {
2470 e = NEXT_OUT (e);
2471 continue;
2472 }
b4ead7d4 2473
37a0f8a5
RH
2474 /* The reg_last lists are inherited by bb_succ. */
2475 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2476 {
2477 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2478 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2479
2480 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2481 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2482 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2483 succ_rl->clobbers);
2583081e
RH
2484 succ_rl->uses_length += pred_rl->uses_length;
2485 succ_rl->clobbers_length += pred_rl->clobbers_length;
37a0f8a5
RH
2486 });
2487 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2488
2489 /* Mem read/write lists are inherited by bb_succ. */
2490 concat_insn_mem_list (pred_deps->pending_read_insns,
2491 pred_deps->pending_read_mems,
2492 &succ_deps->pending_read_insns,
2493 &succ_deps->pending_read_mems);
2494 concat_insn_mem_list (pred_deps->pending_write_insns,
2495 pred_deps->pending_write_mems,
2496 &succ_deps->pending_write_insns,
2497 &succ_deps->pending_write_mems);
2498
2499 succ_deps->last_pending_memory_flush
2500 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2501 succ_deps->last_pending_memory_flush);
786de7eb 2502
37a0f8a5
RH
2503 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2504 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2505
2506 /* last_function_call is inherited by bb_succ. */
2507 succ_deps->last_function_call
2508 = concat_INSN_LIST (pred_deps->last_function_call,
2509 succ_deps->last_function_call);
2510
2511 /* sched_before_next_call is inherited by bb_succ. */
2512 succ_deps->sched_before_next_call
2513 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2514 succ_deps->sched_before_next_call);
2515
2516 e = NEXT_OUT (e);
2517 }
2518 while (e != first_edge);
b4ead7d4 2519
37a0f8a5
RH
2520 /* These lists should point to the right place, for correct
2521 freeing later. */
2522 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2523 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2524 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2525 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2526
2527 /* Can't allow these to be freed twice. */
2528 pred_deps->pending_read_insns = 0;
2529 pred_deps->pending_read_mems = 0;
2530 pred_deps->pending_write_insns = 0;
2531 pred_deps->pending_write_mems = 0;
b4ead7d4
BS
2532}
2533
2534/* Compute backward dependences inside bb. In a multiple blocks region:
2535 (1) a bb is analyzed after its predecessors, and (2) the lists in
2536 effect at the end of bb (after analyzing for bb) are inherited by
2537 bb's successrs.
2538
2539 Specifically for reg-reg data dependences, the block insns are
2540 scanned by sched_analyze () top-to-bottom. Two lists are
4ba478b8
RH
2541 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2542 and reg_last[].uses for register USEs.
b4ead7d4
BS
2543
2544 When analysis is completed for bb, we update for its successors:
2545 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2546 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2547
2548 The mechanism for computing mem-mem data dependence is very
2549 similar, and the result is interblock dependences in the region. */
2550
2551static void
2552compute_block_backward_dependences (bb)
2553 int bb;
2554{
2555 rtx head, tail;
b4ead7d4
BS
2556 struct deps tmp_deps;
2557
2558 tmp_deps = bb_deps[bb];
2559
2560 /* Do the analysis for this block. */
2561 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2562 sched_analyze (&tmp_deps, head, tail);
2563 add_branch_dependences (head, tail);
2564
2565 if (current_nr_blocks > 1)
4ba478b8 2566 propagate_deps (bb, &tmp_deps);
b4ead7d4
BS
2567
2568 /* Free up the INSN_LISTs. */
2569 free_deps (&tmp_deps);
b4ead7d4 2570}
4ba478b8 2571
b4ead7d4
BS
2572/* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2573 them to the unused_*_list variables, so that they can be reused. */
2574
2575static void
2576free_pending_lists ()
2577{
2578 int bb;
2579
2580 for (bb = 0; bb < current_nr_blocks; bb++)
2581 {
2582 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2583 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2584 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2585 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2586 }
2587}
2588\f
2589/* Print dependences for debugging, callable from debugger. */
2590
2591void
2592debug_dependencies ()
2593{
2594 int bb;
2595
2596 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2597 for (bb = 0; bb < current_nr_blocks; bb++)
2598 {
2599 if (1)
2600 {
2601 rtx head, tail;
2602 rtx next_tail;
2603 rtx insn;
2604
2605 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2606 next_tail = NEXT_INSN (tail);
2607 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2608 BB_TO_BLOCK (bb), bb);
2609
fae15c93
VM
2610 if (targetm.sched.use_dfa_pipeline_interface
2611 && (*targetm.sched.use_dfa_pipeline_interface) ())
2612 {
2613 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2614 "insn", "code", "bb", "dep", "prio", "cost",
2615 "reservation");
2616 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2617 "----", "----", "--", "---", "----", "----",
2618 "-----------");
2619 }
2620 else
2621 {
2622 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2623 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2624 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2625 "----", "----", "--", "---", "----", "----", "--------", "-----");
2626 }
2627
b4ead7d4
BS
2628 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2629 {
2630 rtx link;
b4ead7d4
BS
2631
2632 if (! INSN_P (insn))
2633 {
2634 int n;
2635 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2636 if (GET_CODE (insn) == NOTE)
2637 {
2638 n = NOTE_LINE_NUMBER (insn);
2639 if (n < 0)
2640 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2641 else
2642 fprintf (sched_dump, "line %d, file %s\n", n,
2643 NOTE_SOURCE_FILE (insn));
2644 }
2645 else
2646 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2647 continue;
2648 }
2649
fae15c93
VM
2650 if (targetm.sched.use_dfa_pipeline_interface
2651 && (*targetm.sched.use_dfa_pipeline_interface) ())
2652 {
2653 fprintf (sched_dump,
2654 ";; %s%5d%6d%6d%6d%6d%6d ",
2655 (SCHED_GROUP_P (insn) ? "+" : " "),
2656 INSN_UID (insn),
2657 INSN_CODE (insn),
2658 INSN_BB (insn),
2659 INSN_DEP_COUNT (insn),
2660 INSN_PRIORITY (insn),
2661 insn_cost (insn, 0, 0));
786de7eb 2662
fae15c93
VM
2663 if (recog_memoized (insn) < 0)
2664 fprintf (sched_dump, "nothing");
2665 else
2666 print_reservation (sched_dump, insn);
2667 }
2668 else
2669 {
2670 int unit = insn_unit (insn);
2671 int range
2672 = (unit < 0
2673 || function_units[unit].blockage_range_function == 0
2674 ? 0
2675 : function_units[unit].blockage_range_function (insn));
2676 fprintf (sched_dump,
2677 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2678 (SCHED_GROUP_P (insn) ? "+" : " "),
2679 INSN_UID (insn),
2680 INSN_CODE (insn),
2681 INSN_BB (insn),
2682 INSN_DEP_COUNT (insn),
2683 INSN_PRIORITY (insn),
2684 insn_cost (insn, 0, 0),
2685 (int) MIN_BLOCKAGE_COST (range),
2686 (int) MAX_BLOCKAGE_COST (range));
2687 insn_print_units (insn);
2688 }
2689
b4ead7d4
BS
2690 fprintf (sched_dump, "\t: ");
2691 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2692 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2693 fprintf (sched_dump, "\n");
2694 }
2695 }
2696 }
2697 fprintf (sched_dump, "\n");
2698}
2699\f
2700/* Schedule a region. A region is either an inner loop, a loop-free
2701 subroutine, or a single basic block. Each bb in the region is
2702 scheduled after its flow predecessors. */
2703
2704static void
2705schedule_region (rgn)
2706 int rgn;
2707{
2708 int bb;
2709 int rgn_n_insns = 0;
2710 int sched_rgn_n_insns = 0;
2711
2712 /* Set variables for the current region. */
2713 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2714 current_blocks = RGN_BLOCKS (rgn);
2715
2716 init_deps_global ();
2717
2718 /* Initializations for region data dependence analyisis. */
2719 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2720 for (bb = 0; bb < current_nr_blocks; bb++)
2721 init_deps (bb_deps + bb);
2722
2723 /* Compute LOG_LINKS. */
2724 for (bb = 0; bb < current_nr_blocks; bb++)
2725 compute_block_backward_dependences (bb);
2726
2727 /* Compute INSN_DEPEND. */
2728 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2729 {
2730 rtx head, tail;
2731 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2732
2733 compute_forward_dependences (head, tail);
2734 }
2735
2736 /* Set priorities. */
2737 for (bb = 0; bb < current_nr_blocks; bb++)
79c2ffde
BS
2738 {
2739 rtx head, tail;
2740 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2741
2742 rgn_n_insns += set_priorities (head, tail);
2743 }
b4ead7d4
BS
2744
2745 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2746 if (current_nr_blocks > 1)
2747 {
2748 int i;
2749
2750 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2751
bdfa170f
DB
2752 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2753 sbitmap_vector_zero (dom, current_nr_blocks);
b4ead7d4
BS
2754 /* Edge to bit. */
2755 rgn_nr_edges = 0;
2756 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2757 for (i = 1; i < nr_edges; i++)
2758 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2759 EDGE_TO_BIT (i) = rgn_nr_edges++;
2760 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2761
2762 rgn_nr_edges = 0;
2763 for (i = 1; i < nr_edges; i++)
2764 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2765 rgn_edges[rgn_nr_edges++] = i;
2766
2767 /* Split edges. */
bdfa170f
DB
2768 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2769 sbitmap_vector_zero (pot_split, current_nr_blocks);
2770 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2771 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
b4ead7d4
BS
2772
2773 /* Compute probabilities, dominators, split_edges. */
2774 for (bb = 0; bb < current_nr_blocks; bb++)
2775 compute_dom_prob_ps (bb);
2776 }
2777
2778 /* Now we can schedule all blocks. */
2779 for (bb = 0; bb < current_nr_blocks; bb++)
2780 {
2781 rtx head, tail;
2782 int b = BB_TO_BLOCK (bb);
2783
2784 get_block_head_tail (b, &head, &tail);
2785
2786 if (no_real_insns_p (head, tail))
2787 continue;
2788
2789 current_sched_info->prev_head = PREV_INSN (head);
2790 current_sched_info->next_tail = NEXT_INSN (tail);
2791
2792 if (write_symbols != NO_DEBUG)
2793 {
79c2ffde
BS
2794 save_line_notes (b, head, tail);
2795 rm_line_notes (head, tail);
b4ead7d4
BS
2796 }
2797
2798 /* rm_other_notes only removes notes which are _inside_ the
2799 block---that is, it won't remove notes before the first real insn
2800 or after the last real insn of the block. So if the first insn
2801 has a REG_SAVE_NOTE which would otherwise be emitted before the
2802 insn, it is redundant with the note before the start of the
570a98eb 2803 block, and so we have to take it out. */
b4ead7d4
BS
2804 if (INSN_P (head))
2805 {
2806 rtx note;
2807
2808 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2809 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2810 {
570a98eb
JH
2811 remove_note (head, note);
2812 note = XEXP (note, 1);
2813 remove_note (head, note);
b4ead7d4
BS
2814 }
2815 }
2816
2817 /* Remove remaining note insns from the block, save them in
2818 note_list. These notes are restored at the end of
2819 schedule_block (). */
2820 rm_other_notes (head, tail);
2821
2822 target_bb = bb;
2823
2824 current_sched_info->queue_must_finish_empty
2825 = current_nr_blocks > 1 && !flag_schedule_interblock;
2826
2827 schedule_block (b, rgn_n_insns);
2828 sched_rgn_n_insns += sched_n_insns;
2829
2830 /* Update target block boundaries. */
2831 if (head == BLOCK_HEAD (b))
2832 BLOCK_HEAD (b) = current_sched_info->head;
2833 if (tail == BLOCK_END (b))
2834 BLOCK_END (b) = current_sched_info->tail;
2835
2836 /* Clean up. */
2837 if (current_nr_blocks > 1)
2838 {
2839 free (candidate_table);
2840 free (bblst_table);
2841 free (bitlst_table);
2842 }
2843 }
2844
2845 /* Sanity check: verify that all region insns were scheduled. */
2846 if (sched_rgn_n_insns != rgn_n_insns)
2847 abort ();
2848
2849 /* Restore line notes. */
2850 if (write_symbols != NO_DEBUG)
2851 {
2852 for (bb = 0; bb < current_nr_blocks; bb++)
79c2ffde
BS
2853 {
2854 rtx head, tail;
2855 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
14052b68 2856 restore_line_notes (head, tail);
79c2ffde 2857 }
b4ead7d4
BS
2858 }
2859
2860 /* Done with this region. */
2861 free_pending_lists ();
2862
2863 finish_deps_global ();
2864
2865 free (bb_deps);
2866
2867 if (current_nr_blocks > 1)
2868 {
b4ead7d4 2869 free (prob);
bdfa170f
DB
2870 sbitmap_vector_free (dom);
2871 sbitmap_vector_free (pot_split);
2872 sbitmap_vector_free (ancestor_edges);
b4ead7d4
BS
2873 free (edge_to_bit);
2874 free (rgn_edges);
b4ead7d4
BS
2875 }
2876}
2877
2878/* Indexed by region, holds the number of death notes found in that region.
2879 Used for consistency checks. */
2880static int *deaths_in_region;
2881
2882/* Initialize data structures for region scheduling. */
2883
2884static void
2885init_regions ()
2886{
2887 sbitmap blocks;
2888 int rgn;
2889
2890 nr_regions = 0;
0b17ab2f
RH
2891 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2892 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
d55bc081
ZD
2893 block_to_bb = (int *) xmalloc ((last_basic_block) * sizeof (int));
2894 containing_rgn = (int *) xmalloc ((last_basic_block) * sizeof (int));
b4ead7d4 2895
b4ead7d4
BS
2896 /* Compute regions for scheduling. */
2897 if (reload_completed
0b17ab2f 2898 || n_basic_blocks == 1
b4ead7d4
BS
2899 || !flag_schedule_interblock)
2900 {
2901 find_single_block_region ();
2902 }
2903 else
2904 {
2905 /* Verify that a 'good' control flow graph can be built. */
2906 if (is_cfg_nonregular ())
2907 {
2908 find_single_block_region ();
2909 }
2910 else
2911 {
355be0dc 2912 dominance_info dom;
b4ead7d4
BS
2913 struct edge_list *edge_list;
2914
b4ead7d4
BS
2915 /* The scheduler runs after flow; therefore, we can't blindly call
2916 back into find_basic_blocks since doing so could invalidate the
2917 info in global_live_at_start.
2918
2919 Consider a block consisting entirely of dead stores; after life
2920 analysis it would be a block of NOTE_INSN_DELETED notes. If
2921 we call find_basic_blocks again, then the block would be removed
2922 entirely and invalidate our the register live information.
2923
2924 We could (should?) recompute register live information. Doing
2925 so may even be beneficial. */
2926 edge_list = create_edge_list ();
2927
2928 /* Compute the dominators and post dominators. */
355be0dc 2929 dom = calculate_dominance_info (CDI_DOMINATORS);
b4ead7d4
BS
2930
2931 /* build_control_flow will return nonzero if it detects unreachable
2932 blocks or any other irregularity with the cfg which prevents
2933 cross block scheduling. */
2934 if (build_control_flow (edge_list) != 0)
2935 find_single_block_region ();
2936 else
2937 find_rgns (edge_list, dom);
2938
2939 if (sched_verbose >= 3)
2940 debug_regions ();
2941
2942 /* We are done with flow's edge list. */
2943 free_edge_list (edge_list);
2944
2945 /* For now. This will move as more and more of haifa is converted
2946 to using the cfg code in flow.c. */
355be0dc 2947 free_dominance_info (dom);
b4ead7d4
BS
2948 }
2949 }
2950
b4ead7d4 2951
73991d6a 2952 if (CHECK_DEAD_NOTES)
b4ead7d4 2953 {
d55bc081 2954 blocks = sbitmap_alloc (last_basic_block);
73991d6a
JH
2955 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2956 /* Remove all death notes from the subroutine. */
2957 for (rgn = 0; rgn < nr_regions; rgn++)
2958 {
2959 int b;
b4ead7d4 2960
73991d6a
JH
2961 sbitmap_zero (blocks);
2962 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2963 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
b4ead7d4 2964
73991d6a
JH
2965 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2966 }
2967 sbitmap_free (blocks);
b4ead7d4 2968 }
73991d6a
JH
2969 else
2970 count_or_remove_death_notes (NULL, 1);
b4ead7d4
BS
2971}
2972
2973/* The one entry point in this file. DUMP_FILE is the dump file for
2974 this pass. */
2975
2976void
2977schedule_insns (dump_file)
2978 FILE *dump_file;
2979{
2980 sbitmap large_region_blocks, blocks;
2981 int rgn;
2982 int any_large_regions;
e0082a72 2983 basic_block bb;
b4ead7d4
BS
2984
2985 /* Taking care of this degenerate case makes the rest of
2986 this code simpler. */
0b17ab2f 2987 if (n_basic_blocks == 0)
b4ead7d4
BS
2988 return;
2989
2990 nr_inter = 0;
2991 nr_spec = 0;
2992
2993 sched_init (dump_file);
2994
2995 init_regions ();
2996
2997 current_sched_info = &region_sched_info;
786de7eb 2998
b4ead7d4
BS
2999 /* Schedule every region in the subroutine. */
3000 for (rgn = 0; rgn < nr_regions; rgn++)
3001 schedule_region (rgn);
3002
3003 /* Update life analysis for the subroutine. Do single block regions
3004 first so that we can verify that live_at_start didn't change. Then
6d2f8887 3005 do all other blocks. */
b4ead7d4
BS
3006 /* ??? There is an outside possibility that update_life_info, or more
3007 to the point propagate_block, could get called with non-zero flags
3008 more than once for one basic block. This would be kinda bad if it
3009 were to happen, since REG_INFO would be accumulated twice for the
3010 block, and we'd have twice the REG_DEAD notes.
3011
3012 I'm fairly certain that this _shouldn't_ happen, since I don't think
3013 that live_at_start should change at region heads. Not sure what the
3014 best way to test for this kind of thing... */
3015
3016 allocate_reg_life_data ();
852c6ec7 3017 compute_bb_for_insn ();
b4ead7d4
BS
3018
3019 any_large_regions = 0;
d55bc081 3020 large_region_blocks = sbitmap_alloc (last_basic_block);
e0082a72
ZD
3021 sbitmap_zero (large_region_blocks);
3022 FOR_EACH_BB (bb)
3023 SET_BIT (large_region_blocks, bb->index);
b4ead7d4 3024
d55bc081 3025 blocks = sbitmap_alloc (last_basic_block);
73991d6a 3026 sbitmap_zero (blocks);
b4ead7d4 3027
73991d6a
JH
3028 /* Update life information. For regions consisting of multiple blocks
3029 we've possibly done interblock scheduling that affects global liveness.
3030 For regions consisting of single blocks we need to do only local
3031 liveness. */
b4ead7d4
BS
3032 for (rgn = 0; rgn < nr_regions; rgn++)
3033 if (RGN_NR_BLOCKS (rgn) > 1)
3034 any_large_regions = 1;
3035 else
3036 {
b4ead7d4
BS
3037 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3038 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
b4ead7d4
BS
3039 }
3040
73991d6a
JH
3041 /* Don't update reg info after reload, since that affects
3042 regs_ever_live, which should not change after reload. */
3043 update_life_info (blocks, UPDATE_LIFE_LOCAL,
3044 (reload_completed ? PROP_DEATH_NOTES
3045 : PROP_DEATH_NOTES | PROP_REG_INFO));
b4ead7d4
BS
3046 if (any_large_regions)
3047 {
3048 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3049 PROP_DEATH_NOTES | PROP_REG_INFO);
3050 }
3051
73991d6a
JH
3052 if (CHECK_DEAD_NOTES)
3053 {
6bbdfefd
JH
3054 /* Verify the counts of basic block notes in single the basic block
3055 regions. */
73991d6a
JH
3056 for (rgn = 0; rgn < nr_regions; rgn++)
3057 if (RGN_NR_BLOCKS (rgn) == 1)
3058 {
73991d6a
JH
3059 sbitmap_zero (blocks);
3060 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3061
3062 if (deaths_in_region[rgn]
3063 != count_or_remove_death_notes (blocks, 0))
3064 abort ();
3065 }
3066 free (deaths_in_region);
3067 }
3068
b4ead7d4
BS
3069 /* Reposition the prologue and epilogue notes in case we moved the
3070 prologue/epilogue insns. */
3071 if (reload_completed)
3072 reposition_prologue_and_epilogue_notes (get_insns ());
3073
3074 /* Delete redundant line notes. */
3075 if (write_symbols != NO_DEBUG)
3076 rm_redundant_line_notes ();
3077
3078 if (sched_verbose)
3079 {
3080 if (reload_completed == 0 && flag_schedule_interblock)
3081 {
3082 fprintf (sched_dump,
3083 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3084 nr_inter, nr_spec);
3085 }
3086 else
3087 {
3088 if (nr_inter > 0)
3089 abort ();
3090 }
3091 fprintf (sched_dump, "\n\n");
3092 }
3093
3094 /* Clean up. */
3095 free (rgn_table);
3096 free (rgn_bb_table);
3097 free (block_to_bb);
3098 free (containing_rgn);
3099
3100 sched_finish ();
3101
3102 if (edge_table)
3103 {
3104 free (edge_table);
3105 edge_table = NULL;
3106 }
3107
3108 if (in_edges)
3109 {
3110 free (in_edges);
3111 in_edges = NULL;
3112 }
3113 if (out_edges)
3114 {
3115 free (out_edges);
3116 out_edges = NULL;
3117 }
3118
3119 sbitmap_free (blocks);
3120 sbitmap_free (large_region_blocks);
b4ead7d4 3121}
f56887a7 3122#endif