]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/sched-rgn.c
alias.c: Remove uses of "register" specifier in declarations of arguments and local...
[thirdparty/gcc.git] / gcc / sched-rgn.c
CommitLineData
b4ead7d4
BS
1/* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
f759eb8b 3 1999, 2000, 2001 Free Software Foundation, Inc.
b4ead7d4
BS
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6
1322177d 7This file is part of GCC.
b4ead7d4 8
1322177d
LB
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 2, or (at your option) any later
12version.
b4ead7d4 13
1322177d
LB
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
b4ead7d4
BS
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
47a1bd82
NC
20along with GCC; see the file COPYING. If not, write to the Free
21Software Foundation, 59 Temple Place - Suite 330, Boston, MA
b4ead7d4
BS
2202111-1307, USA. */
23
24/* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
27
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
31
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
37
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
47\f
48#include "config.h"
49#include "system.h"
50#include "toplev.h"
51#include "rtl.h"
52#include "tm_p.h"
53#include "hard-reg-set.h"
54#include "basic-block.h"
55#include "regs.h"
56#include "function.h"
57#include "flags.h"
58#include "insn-config.h"
59#include "insn-attr.h"
60#include "except.h"
61#include "toplev.h"
62#include "recog.h"
63#include "sched-int.h"
64
f56887a7 65#ifdef INSN_SCHEDULING
b4ead7d4
BS
66/* Some accessor macros for h_i_d members only used within this file. */
67#define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
68#define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
69#define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
70
71#define MAX_RGN_BLOCKS 10
72#define MAX_RGN_INSNS 100
73
74/* nr_inter/spec counts interblock/speculative motion for the function. */
75static int nr_inter, nr_spec;
76
77/* Control flow graph edges are kept in circular lists. */
78typedef struct
79{
80 int from_block;
81 int to_block;
82 int next_in;
83 int next_out;
84}
85haifa_edge;
86static haifa_edge *edge_table;
87
88#define NEXT_IN(edge) (edge_table[edge].next_in)
89#define NEXT_OUT(edge) (edge_table[edge].next_out)
90#define FROM_BLOCK(edge) (edge_table[edge].from_block)
91#define TO_BLOCK(edge) (edge_table[edge].to_block)
92
93/* Number of edges in the control flow graph. (In fact, larger than
94 that by 1, since edge 0 is unused.) */
95static int nr_edges;
96
97/* Circular list of incoming/outgoing edges of a block. */
98static int *in_edges;
99static int *out_edges;
100
101#define IN_EDGES(block) (in_edges[block])
102#define OUT_EDGES(block) (out_edges[block])
103
104static int is_cfg_nonregular PARAMS ((void));
105static int build_control_flow PARAMS ((struct edge_list *));
106static void new_edge PARAMS ((int, int));
107
108/* A region is the main entity for interblock scheduling: insns
109 are allowed to move between blocks in the same region, along
110 control flow graph edges, in the 'up' direction. */
111typedef struct
112{
113 int rgn_nr_blocks; /* Number of blocks in region. */
114 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
115}
116region;
117
118/* Number of regions in the procedure. */
119static int nr_regions;
120
121/* Table of region descriptions. */
122static region *rgn_table;
123
124/* Array of lists of regions' blocks. */
125static int *rgn_bb_table;
126
127/* Topological order of blocks in the region (if b2 is reachable from
128 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
129 always referred to by either block or b, while its topological
130 order name (in the region) is refered to by bb. */
131static int *block_to_bb;
132
133/* The number of the region containing a block. */
134static int *containing_rgn;
135
136#define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
137#define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
138#define BLOCK_TO_BB(block) (block_to_bb[block])
139#define CONTAINING_RGN(block) (containing_rgn[block])
140
141void debug_regions PARAMS ((void));
142static void find_single_block_region PARAMS ((void));
143static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
144static int too_large PARAMS ((int, int *, int *));
145
146extern void debug_live PARAMS ((int, int));
147
148/* Blocks of the current region being scheduled. */
149static int current_nr_blocks;
150static int current_blocks;
151
152/* The mapping from bb to block. */
153#define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
154
155/* Bit vectors and bitset operations are needed for computations on
156 the control flow graph. */
157
158typedef unsigned HOST_WIDE_INT *bitset;
159typedef struct
160{
161 int *first_member; /* Pointer to the list start in bitlst_table. */
162 int nr_members; /* The number of members of the bit list. */
163}
164bitlst;
165
166static int bitlst_table_last;
167static int bitlst_table_size;
168static int *bitlst_table;
169
170static char bitset_member PARAMS ((bitset, int, int));
171static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
172
173/* Target info declarations.
174
175 The block currently being scheduled is referred to as the "target" block,
176 while other blocks in the region from which insns can be moved to the
177 target are called "source" blocks. The candidate structure holds info
178 about such sources: are they valid? Speculative? Etc. */
179typedef bitlst bblst;
180typedef struct
181{
182 char is_valid;
183 char is_speculative;
184 int src_prob;
185 bblst split_bbs;
186 bblst update_bbs;
187}
188candidate;
189
190static candidate *candidate_table;
191
192/* A speculative motion requires checking live information on the path
193 from 'source' to 'target'. The split blocks are those to be checked.
194 After a speculative motion, live information should be modified in
195 the 'update' blocks.
196
197 Lists of split and update blocks for each candidate of the current
198 target are in array bblst_table. */
199static int *bblst_table, bblst_size, bblst_last;
200
201#define IS_VALID(src) ( candidate_table[src].is_valid )
202#define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
203#define SRC_PROB(src) ( candidate_table[src].src_prob )
204
205/* The bb being currently scheduled. */
206static int target_bb;
207
208/* List of edges. */
209typedef bitlst edgelst;
210
211/* Target info functions. */
212static void split_edges PARAMS ((int, int, edgelst *));
213static void compute_trg_info PARAMS ((int));
214void debug_candidate PARAMS ((int));
215void debug_candidates PARAMS ((int));
216
217/* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
218typedef bitset bbset;
219
220/* Number of words of the bbset. */
221static int bbset_size;
222
223/* Dominators array: dom[i] contains the bbset of dominators of
224 bb i in the region. */
225static bbset *dom;
226
227/* bb 0 is the only region entry. */
228#define IS_RGN_ENTRY(bb) (!bb)
229
230/* Is bb_src dominated by bb_trg. */
231#define IS_DOMINATED(bb_src, bb_trg) \
232( bitset_member (dom[bb_src], bb_trg, bbset_size) )
233
234/* Probability: Prob[i] is a float in [0, 1] which is the probability
235 of bb i relative to the region entry. */
236static float *prob;
237
238/* The probability of bb_src, relative to bb_trg. Note, that while the
239 'prob[bb]' is a float in [0, 1], this macro returns an integer
240 in [0, 100]. */
241#define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
242 prob[bb_trg])))
243
244/* Bit-set of edges, where bit i stands for edge i. */
245typedef bitset edgeset;
246
247/* Number of edges in the region. */
248static int rgn_nr_edges;
249
250/* Array of size rgn_nr_edges. */
251static int *rgn_edges;
252
253/* Number of words in an edgeset. */
254static int edgeset_size;
255
256/* Number of bits in an edgeset. */
257static int edgeset_bitsize;
258
259/* Mapping from each edge in the graph to its number in the rgn. */
260static int *edge_to_bit;
261#define EDGE_TO_BIT(edge) (edge_to_bit[edge])
262
263/* The split edges of a source bb is different for each target
264 bb. In order to compute this efficiently, the 'potential-split edges'
265 are computed for each bb prior to scheduling a region. This is actually
266 the split edges of each bb relative to the region entry.
267
268 pot_split[bb] is the set of potential split edges of bb. */
269static edgeset *pot_split;
270
271/* For every bb, a set of its ancestor edges. */
272static edgeset *ancestor_edges;
273
274static void compute_dom_prob_ps PARAMS ((int));
275
276#define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
277#define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
278#define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
279#define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
280
281/* Parameters affecting the decision of rank_for_schedule().
282 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
283#define MIN_DIFF_PRIORITY 2
284#define MIN_PROBABILITY 40
285#define MIN_PROB_DIFF 10
286
287/* Speculative scheduling functions. */
288static int check_live_1 PARAMS ((int, rtx));
289static void update_live_1 PARAMS ((int, rtx));
290static int check_live PARAMS ((rtx, int));
291static void update_live PARAMS ((rtx, int));
292static void set_spec_fed PARAMS ((rtx));
293static int is_pfree PARAMS ((rtx, int, int));
294static int find_conditional_protection PARAMS ((rtx, int));
295static int is_conditionally_protected PARAMS ((rtx, int, int));
296static int may_trap_exp PARAMS ((rtx, int));
297static int haifa_classify_insn PARAMS ((rtx));
298static int is_prisky PARAMS ((rtx, int, int));
299static int is_exception_free PARAMS ((rtx, int, int));
300
301static void add_branch_dependences PARAMS ((rtx, rtx));
302static void compute_block_backward_dependences PARAMS ((int));
303void debug_dependencies PARAMS ((void));
304
305static void init_regions PARAMS ((void));
306static void schedule_region PARAMS ((int));
4ba478b8 307static void propagate_deps PARAMS ((int, struct deps *));
b4ead7d4
BS
308static void free_pending_lists PARAMS ((void));
309
310/* Functions for construction of the control flow graph. */
311
312/* Return 1 if control flow graph should not be constructed, 0 otherwise.
313
314 We decide not to build the control flow graph if there is possibly more
315 than one entry to the function, if computed branches exist, of if we
316 have nonlocal gotos. */
317
318static int
319is_cfg_nonregular ()
320{
321 int b;
322 rtx insn;
323 RTX_CODE code;
324
325 /* If we have a label that could be the target of a nonlocal goto, then
326 the cfg is not well structured. */
327 if (nonlocal_goto_handler_labels)
328 return 1;
329
330 /* If we have any forced labels, then the cfg is not well structured. */
331 if (forced_labels)
332 return 1;
333
334 /* If this function has a computed jump, then we consider the cfg
335 not well structured. */
336 if (current_function_has_computed_jump)
337 return 1;
338
339 /* If we have exception handlers, then we consider the cfg not well
340 structured. ?!? We should be able to handle this now that flow.c
341 computes an accurate cfg for EH. */
342 if (exception_handler_labels)
343 return 1;
344
345 /* If we have non-jumping insns which refer to labels, then we consider
346 the cfg not well structured. */
347 /* Check for labels referred to other thn by jumps. */
348 for (b = 0; b < n_basic_blocks; b++)
349 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
350 {
351 code = GET_CODE (insn);
f759eb8b 352 if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
b4ead7d4 353 {
cabf3891 354 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
f759eb8b
AO
355
356 if (note
357 && ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
cabf3891 358 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
f759eb8b
AO
359 XEXP (note, 0))))
360 return 1;
b4ead7d4
BS
361 }
362
363 if (insn == BLOCK_END (b))
364 break;
365 }
366
367 /* All the tests passed. Consider the cfg well structured. */
368 return 0;
369}
370
371/* Build the control flow graph and set nr_edges.
372
373 Instead of trying to build a cfg ourselves, we rely on flow to
374 do it for us. Stamp out useless code (and bug) duplication.
375
376 Return nonzero if an irregularity in the cfg is found which would
377 prevent cross block scheduling. */
378
379static int
380build_control_flow (edge_list)
381 struct edge_list *edge_list;
382{
383 int i, unreachable, num_edges;
384
385 /* This already accounts for entry/exit edges. */
386 num_edges = NUM_EDGES (edge_list);
387
388 /* Unreachable loops with more than one basic block are detected
389 during the DFS traversal in find_rgns.
390
391 Unreachable loops with a single block are detected here. This
392 test is redundant with the one in find_rgns, but it's much
393 cheaper to go ahead and catch the trivial case here. */
394 unreachable = 0;
395 for (i = 0; i < n_basic_blocks; i++)
396 {
397 basic_block b = BASIC_BLOCK (i);
398
399 if (b->pred == NULL
400 || (b->pred->src == b
401 && b->pred->pred_next == NULL))
402 unreachable = 1;
403 }
404
405 /* ??? We can kill these soon. */
406 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
407 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
408 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
409
410 nr_edges = 0;
411 for (i = 0; i < num_edges; i++)
412 {
413 edge e = INDEX_EDGE (edge_list, i);
414
415 if (e->dest != EXIT_BLOCK_PTR
416 && e->src != ENTRY_BLOCK_PTR)
417 new_edge (e->src->index, e->dest->index);
418 }
419
420 /* Increment by 1, since edge 0 is unused. */
421 nr_edges++;
422
423 return unreachable;
424}
425
426/* Record an edge in the control flow graph from SOURCE to TARGET.
427
428 In theory, this is redundant with the s_succs computed above, but
429 we have not converted all of haifa to use information from the
430 integer lists. */
431
432static void
433new_edge (source, target)
434 int source, target;
435{
436 int e, next_edge;
437 int curr_edge, fst_edge;
438
439 /* Check for duplicates. */
440 fst_edge = curr_edge = OUT_EDGES (source);
441 while (curr_edge)
442 {
443 if (FROM_BLOCK (curr_edge) == source
444 && TO_BLOCK (curr_edge) == target)
445 {
446 return;
447 }
448
449 curr_edge = NEXT_OUT (curr_edge);
450
451 if (fst_edge == curr_edge)
452 break;
453 }
454
455 e = ++nr_edges;
456
457 FROM_BLOCK (e) = source;
458 TO_BLOCK (e) = target;
459
460 if (OUT_EDGES (source))
461 {
462 next_edge = NEXT_OUT (OUT_EDGES (source));
463 NEXT_OUT (OUT_EDGES (source)) = e;
464 NEXT_OUT (e) = next_edge;
465 }
466 else
467 {
468 OUT_EDGES (source) = e;
469 NEXT_OUT (e) = e;
470 }
471
472 if (IN_EDGES (target))
473 {
474 next_edge = NEXT_IN (IN_EDGES (target));
475 NEXT_IN (IN_EDGES (target)) = e;
476 NEXT_IN (e) = next_edge;
477 }
478 else
479 {
480 IN_EDGES (target) = e;
481 NEXT_IN (e) = e;
482 }
483}
484
485/* BITSET macros for operations on the control flow graph. */
486
487/* Compute bitwise union of two bitsets. */
488#define BITSET_UNION(set1, set2, len) \
b3694847
SS
489do { bitset tp = set1, sp = set2; \
490 int i; \
b4ead7d4
BS
491 for (i = 0; i < len; i++) \
492 *(tp++) |= *(sp++); } while (0)
493
494/* Compute bitwise intersection of two bitsets. */
495#define BITSET_INTER(set1, set2, len) \
b3694847
SS
496do { bitset tp = set1, sp = set2; \
497 int i; \
b4ead7d4
BS
498 for (i = 0; i < len; i++) \
499 *(tp++) &= *(sp++); } while (0)
500
501/* Compute bitwise difference of two bitsets. */
502#define BITSET_DIFFER(set1, set2, len) \
b3694847
SS
503do { bitset tp = set1, sp = set2; \
504 int i; \
b4ead7d4
BS
505 for (i = 0; i < len; i++) \
506 *(tp++) &= ~*(sp++); } while (0)
507
508/* Inverts every bit of bitset 'set'. */
509#define BITSET_INVERT(set, len) \
b3694847
SS
510do { bitset tmpset = set; \
511 int i; \
b4ead7d4
BS
512 for (i = 0; i < len; i++, tmpset++) \
513 *tmpset = ~*tmpset; } while (0)
514
515/* Turn on the index'th bit in bitset set. */
516#define BITSET_ADD(set, index, len) \
517{ \
518 if (index >= HOST_BITS_PER_WIDE_INT * len) \
519 abort (); \
520 else \
521 set[index/HOST_BITS_PER_WIDE_INT] |= \
9c8fad33 522 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT); \
b4ead7d4
BS
523}
524
525/* Turn off the index'th bit in set. */
526#define BITSET_REMOVE(set, index, len) \
527{ \
528 if (index >= HOST_BITS_PER_WIDE_INT * len) \
529 abort (); \
530 else \
531 set[index/HOST_BITS_PER_WIDE_INT] &= \
9c8fad33 532 ~(((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT)); \
b4ead7d4
BS
533}
534
535/* Check if the index'th bit in bitset set is on. */
536
537static char
538bitset_member (set, index, len)
539 bitset set;
540 int index, len;
541{
542 if (index >= HOST_BITS_PER_WIDE_INT * len)
543 abort ();
9c8fad33
JW
544 return ((set[index / HOST_BITS_PER_WIDE_INT] &
545 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT))
546 ? 1 : 0);
b4ead7d4
BS
547}
548
549/* Translate a bit-set SET to a list BL of the bit-set members. */
550
551static void
552extract_bitlst (set, len, bitlen, bl)
553 bitset set;
554 int len;
555 int bitlen;
556 bitlst *bl;
557{
558 int i, j, offset;
559 unsigned HOST_WIDE_INT word;
560
561 /* bblst table space is reused in each call to extract_bitlst. */
562 bitlst_table_last = 0;
563
564 bl->first_member = &bitlst_table[bitlst_table_last];
565 bl->nr_members = 0;
566
567 /* Iterate over each word in the bitset. */
568 for (i = 0; i < len; i++)
569 {
570 word = set[i];
571 offset = i * HOST_BITS_PER_WIDE_INT;
572
573 /* Iterate over each bit in the word, but do not
574 go beyond the end of the defined bits. */
575 for (j = 0; offset < bitlen && word; j++)
576 {
577 if (word & 1)
578 {
579 bitlst_table[bitlst_table_last++] = offset;
580 (bl->nr_members)++;
581 }
582 word >>= 1;
583 ++offset;
584 }
585 }
586
587}
588
589/* Functions for the construction of regions. */
590
591/* Print the regions, for debugging purposes. Callable from debugger. */
592
593void
594debug_regions ()
595{
596 int rgn, bb;
597
598 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
599 for (rgn = 0; rgn < nr_regions; rgn++)
600 {
601 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
602 rgn_table[rgn].rgn_nr_blocks);
603 fprintf (sched_dump, ";;\tbb/block: ");
604
605 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
606 {
607 current_blocks = RGN_BLOCKS (rgn);
608
609 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
610 abort ();
611
612 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
613 }
614
615 fprintf (sched_dump, "\n\n");
616 }
617}
618
619/* Build a single block region for each basic block in the function.
620 This allows for using the same code for interblock and basic block
621 scheduling. */
622
623static void
624find_single_block_region ()
625{
626 int i;
627
628 for (i = 0; i < n_basic_blocks; i++)
629 {
630 rgn_bb_table[i] = i;
631 RGN_NR_BLOCKS (i) = 1;
632 RGN_BLOCKS (i) = i;
633 CONTAINING_RGN (i) = i;
634 BLOCK_TO_BB (i) = 0;
635 }
636 nr_regions = n_basic_blocks;
637}
638
639/* Update number of blocks and the estimate for number of insns
640 in the region. Return 1 if the region is "too large" for interblock
641 scheduling (compile time considerations), otherwise return 0. */
642
643static int
644too_large (block, num_bbs, num_insns)
645 int block, *num_bbs, *num_insns;
646{
647 (*num_bbs)++;
648 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
649 INSN_LUID (BLOCK_HEAD (block)));
650 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
651 return 1;
652 else
653 return 0;
654}
655
656/* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
657 is still an inner loop. Put in max_hdr[blk] the header of the most inner
658 loop containing blk. */
659#define UPDATE_LOOP_RELATIONS(blk, hdr) \
660{ \
661 if (max_hdr[blk] == -1) \
662 max_hdr[blk] = hdr; \
663 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
664 RESET_BIT (inner, hdr); \
665 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
666 { \
667 RESET_BIT (inner,max_hdr[blk]); \
668 max_hdr[blk] = hdr; \
669 } \
670}
671
672/* Find regions for interblock scheduling.
673
674 A region for scheduling can be:
675
676 * A loop-free procedure, or
677
678 * A reducible inner loop, or
679
680 * A basic block not contained in any other region.
681
682 ?!? In theory we could build other regions based on extended basic
683 blocks or reverse extended basic blocks. Is it worth the trouble?
684
685 Loop blocks that form a region are put into the region's block list
686 in topological order.
687
688 This procedure stores its results into the following global (ick) variables
689
690 * rgn_nr
691 * rgn_table
692 * rgn_bb_table
693 * block_to_bb
694 * containing region
695
696 We use dominator relationships to avoid making regions out of non-reducible
697 loops.
698
699 This procedure needs to be converted to work on pred/succ lists instead
700 of edge tables. That would simplify it somewhat. */
701
702static void
703find_rgns (edge_list, dom)
704 struct edge_list *edge_list;
705 sbitmap *dom;
706{
707 int *max_hdr, *dfs_nr, *stack, *degree;
708 char no_loops = 1;
709 int node, child, loop_head, i, head, tail;
710 int count = 0, sp, idx = 0, current_edge = out_edges[0];
711 int num_bbs, num_insns, unreachable;
712 int too_large_failure;
713
714 /* Note if an edge has been passed. */
715 sbitmap passed;
716
717 /* Note if a block is a natural loop header. */
718 sbitmap header;
719
720 /* Note if a block is an natural inner loop header. */
721 sbitmap inner;
722
723 /* Note if a block is in the block queue. */
724 sbitmap in_queue;
725
726 /* Note if a block is in the block queue. */
727 sbitmap in_stack;
728
729 int num_edges = NUM_EDGES (edge_list);
730
731 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
732 and a mapping from block to its loop header (if the block is contained
733 in a loop, else -1).
734
735 Store results in HEADER, INNER, and MAX_HDR respectively, these will
736 be used as inputs to the second traversal.
737
738 STACK, SP and DFS_NR are only used during the first traversal. */
739
740 /* Allocate and initialize variables for the first traversal. */
741 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
742 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
743 stack = (int *) xmalloc (nr_edges * sizeof (int));
744
745 inner = sbitmap_alloc (n_basic_blocks);
746 sbitmap_ones (inner);
747
748 header = sbitmap_alloc (n_basic_blocks);
749 sbitmap_zero (header);
750
751 passed = sbitmap_alloc (nr_edges);
752 sbitmap_zero (passed);
753
754 in_queue = sbitmap_alloc (n_basic_blocks);
755 sbitmap_zero (in_queue);
756
757 in_stack = sbitmap_alloc (n_basic_blocks);
758 sbitmap_zero (in_stack);
759
760 for (i = 0; i < n_basic_blocks; i++)
761 max_hdr[i] = -1;
762
763 /* DFS traversal to find inner loops in the cfg. */
764
765 sp = -1;
766 while (1)
767 {
768 if (current_edge == 0 || TEST_BIT (passed, current_edge))
769 {
770 /* We have reached a leaf node or a node that was already
771 processed. Pop edges off the stack until we find
772 an edge that has not yet been processed. */
773 while (sp >= 0
774 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
775 {
776 /* Pop entry off the stack. */
777 current_edge = stack[sp--];
778 node = FROM_BLOCK (current_edge);
779 child = TO_BLOCK (current_edge);
780 RESET_BIT (in_stack, child);
781 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
782 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
783 current_edge = NEXT_OUT (current_edge);
784 }
785
786 /* See if have finished the DFS tree traversal. */
787 if (sp < 0 && TEST_BIT (passed, current_edge))
788 break;
789
790 /* Nope, continue the traversal with the popped node. */
791 continue;
792 }
793
794 /* Process a node. */
795 node = FROM_BLOCK (current_edge);
796 child = TO_BLOCK (current_edge);
797 SET_BIT (in_stack, node);
798 dfs_nr[node] = ++count;
799
800 /* If the successor is in the stack, then we've found a loop.
801 Mark the loop, if it is not a natural loop, then it will
802 be rejected during the second traversal. */
803 if (TEST_BIT (in_stack, child))
804 {
805 no_loops = 0;
806 SET_BIT (header, child);
807 UPDATE_LOOP_RELATIONS (node, child);
808 SET_BIT (passed, current_edge);
809 current_edge = NEXT_OUT (current_edge);
810 continue;
811 }
812
813 /* If the child was already visited, then there is no need to visit
814 it again. Just update the loop relationships and restart
815 with a new edge. */
816 if (dfs_nr[child])
817 {
818 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
819 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
820 SET_BIT (passed, current_edge);
821 current_edge = NEXT_OUT (current_edge);
822 continue;
823 }
824
825 /* Push an entry on the stack and continue DFS traversal. */
826 stack[++sp] = current_edge;
827 SET_BIT (passed, current_edge);
828 current_edge = OUT_EDGES (child);
829
830 /* This is temporary until haifa is converted to use rth's new
831 cfg routines which have true entry/exit blocks and the
832 appropriate edges from/to those blocks.
833
834 Generally we update dfs_nr for a node when we process its
835 out edge. However, if the node has no out edge then we will
836 not set dfs_nr for that node. This can confuse the scheduler
837 into thinking that we have unreachable blocks, which in turn
838 disables cross block scheduling.
839
840 So, if we have a node with no out edges, go ahead and mark it
841 as reachable now. */
842 if (current_edge == 0)
843 dfs_nr[child] = ++count;
844 }
845
846 /* Another check for unreachable blocks. The earlier test in
847 is_cfg_nonregular only finds unreachable blocks that do not
848 form a loop.
849
850 The DFS traversal will mark every block that is reachable from
851 the entry node by placing a nonzero value in dfs_nr. Thus if
852 dfs_nr is zero for any block, then it must be unreachable. */
853 unreachable = 0;
854 for (i = 0; i < n_basic_blocks; i++)
855 if (dfs_nr[i] == 0)
856 {
857 unreachable = 1;
858 break;
859 }
860
861 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
862 to hold degree counts. */
863 degree = dfs_nr;
864
865 for (i = 0; i < n_basic_blocks; i++)
866 degree[i] = 0;
867 for (i = 0; i < num_edges; i++)
868 {
869 edge e = INDEX_EDGE (edge_list, i);
870
871 if (e->dest != EXIT_BLOCK_PTR)
872 degree[e->dest->index]++;
873 }
874
875 /* Do not perform region scheduling if there are any unreachable
876 blocks. */
877 if (!unreachable)
878 {
879 int *queue;
880
881 if (no_loops)
882 SET_BIT (header, 0);
883
884 /* Second travsersal:find reducible inner loops and topologically sort
885 block of each region. */
886
887 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
888
889 /* Find blocks which are inner loop headers. We still have non-reducible
890 loops to consider at this point. */
891 for (i = 0; i < n_basic_blocks; i++)
892 {
893 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
894 {
895 edge e;
896 int j;
897
898 /* Now check that the loop is reducible. We do this separate
899 from finding inner loops so that we do not find a reducible
900 loop which contains an inner non-reducible loop.
901
902 A simple way to find reducible/natural loops is to verify
903 that each block in the loop is dominated by the loop
904 header.
905
906 If there exists a block that is not dominated by the loop
907 header, then the block is reachable from outside the loop
908 and thus the loop is not a natural loop. */
909 for (j = 0; j < n_basic_blocks; j++)
910 {
911 /* First identify blocks in the loop, except for the loop
912 entry block. */
913 if (i == max_hdr[j] && i != j)
914 {
915 /* Now verify that the block is dominated by the loop
916 header. */
917 if (!TEST_BIT (dom[j], i))
918 break;
919 }
920 }
921
922 /* If we exited the loop early, then I is the header of
923 a non-reducible loop and we should quit processing it
924 now. */
925 if (j != n_basic_blocks)
926 continue;
927
928 /* I is a header of an inner loop, or block 0 in a subroutine
929 with no loops at all. */
930 head = tail = -1;
931 too_large_failure = 0;
932 loop_head = max_hdr[i];
933
934 /* Decrease degree of all I's successors for topological
935 ordering. */
936 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
937 if (e->dest != EXIT_BLOCK_PTR)
938 --degree[e->dest->index];
939
940 /* Estimate # insns, and count # blocks in the region. */
941 num_bbs = 1;
942 num_insns = (INSN_LUID (BLOCK_END (i))
943 - INSN_LUID (BLOCK_HEAD (i)));
944
945 /* Find all loop latches (blocks with back edges to the loop
946 header) or all the leaf blocks in the cfg has no loops.
947
948 Place those blocks into the queue. */
949 if (no_loops)
950 {
951 for (j = 0; j < n_basic_blocks; j++)
952 /* Leaf nodes have only a single successor which must
953 be EXIT_BLOCK. */
954 if (BASIC_BLOCK (j)->succ
955 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
956 && BASIC_BLOCK (j)->succ->succ_next == NULL)
957 {
958 queue[++tail] = j;
959 SET_BIT (in_queue, j);
960
961 if (too_large (j, &num_bbs, &num_insns))
962 {
963 too_large_failure = 1;
964 break;
965 }
966 }
967 }
968 else
969 {
970 edge e;
971
972 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
973 {
974 if (e->src == ENTRY_BLOCK_PTR)
975 continue;
976
977 node = e->src->index;
978
979 if (max_hdr[node] == loop_head && node != i)
980 {
981 /* This is a loop latch. */
982 queue[++tail] = node;
983 SET_BIT (in_queue, node);
984
985 if (too_large (node, &num_bbs, &num_insns))
986 {
987 too_large_failure = 1;
988 break;
989 }
990 }
991 }
992 }
993
994 /* Now add all the blocks in the loop to the queue.
995
996 We know the loop is a natural loop; however the algorithm
997 above will not always mark certain blocks as being in the
998 loop. Consider:
999 node children
1000 a b,c
1001 b c
1002 c a,d
1003 d b
1004
1005 The algorithm in the DFS traversal may not mark B & D as part
1006 of the loop (ie they will not have max_hdr set to A).
1007
1008 We know they can not be loop latches (else they would have
1009 had max_hdr set since they'd have a backedge to a dominator
1010 block). So we don't need them on the initial queue.
1011
1012 We know they are part of the loop because they are dominated
1013 by the loop header and can be reached by a backwards walk of
1014 the edges starting with nodes on the initial queue.
1015
1016 It is safe and desirable to include those nodes in the
1017 loop/scheduling region. To do so we would need to decrease
1018 the degree of a node if it is the target of a backedge
1019 within the loop itself as the node is placed in the queue.
1020
1021 We do not do this because I'm not sure that the actual
1022 scheduling code will properly handle this case. ?!? */
1023
1024 while (head < tail && !too_large_failure)
1025 {
1026 edge e;
1027 child = queue[++head];
1028
1029 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1030 {
1031 node = e->src->index;
1032
1033 /* See discussion above about nodes not marked as in
1034 this loop during the initial DFS traversal. */
1035 if (e->src == ENTRY_BLOCK_PTR
1036 || max_hdr[node] != loop_head)
1037 {
1038 tail = -1;
1039 break;
1040 }
1041 else if (!TEST_BIT (in_queue, node) && node != i)
1042 {
1043 queue[++tail] = node;
1044 SET_BIT (in_queue, node);
1045
1046 if (too_large (node, &num_bbs, &num_insns))
1047 {
1048 too_large_failure = 1;
1049 break;
1050 }
1051 }
1052 }
1053 }
1054
1055 if (tail >= 0 && !too_large_failure)
1056 {
1057 /* Place the loop header into list of region blocks. */
1058 degree[i] = -1;
1059 rgn_bb_table[idx] = i;
1060 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1061 RGN_BLOCKS (nr_regions) = idx++;
1062 CONTAINING_RGN (i) = nr_regions;
1063 BLOCK_TO_BB (i) = count = 0;
1064
1065 /* Remove blocks from queue[] when their in degree
1066 becomes zero. Repeat until no blocks are left on the
1067 list. This produces a topological list of blocks in
1068 the region. */
1069 while (tail >= 0)
1070 {
1071 if (head < 0)
1072 head = tail;
1073 child = queue[head];
1074 if (degree[child] == 0)
1075 {
1076 edge e;
1077
1078 degree[child] = -1;
1079 rgn_bb_table[idx++] = child;
1080 BLOCK_TO_BB (child) = ++count;
1081 CONTAINING_RGN (child) = nr_regions;
1082 queue[head] = queue[tail--];
1083
1084 for (e = BASIC_BLOCK (child)->succ;
1085 e;
1086 e = e->succ_next)
1087 if (e->dest != EXIT_BLOCK_PTR)
1088 --degree[e->dest->index];
1089 }
1090 else
1091 --head;
1092 }
1093 ++nr_regions;
1094 }
1095 }
1096 }
1097 free (queue);
1098 }
1099
1100 /* Any block that did not end up in a region is placed into a region
1101 by itself. */
1102 for (i = 0; i < n_basic_blocks; i++)
1103 if (degree[i] >= 0)
1104 {
1105 rgn_bb_table[idx] = i;
1106 RGN_NR_BLOCKS (nr_regions) = 1;
1107 RGN_BLOCKS (nr_regions) = idx++;
1108 CONTAINING_RGN (i) = nr_regions++;
1109 BLOCK_TO_BB (i) = 0;
1110 }
1111
1112 free (max_hdr);
1113 free (dfs_nr);
1114 free (stack);
1115 free (passed);
1116 free (header);
1117 free (inner);
1118 free (in_queue);
1119 free (in_stack);
1120}
1121
1122/* Functions for regions scheduling information. */
1123
1124/* Compute dominators, probability, and potential-split-edges of bb.
1125 Assume that these values were already computed for bb's predecessors. */
1126
1127static void
1128compute_dom_prob_ps (bb)
1129 int bb;
1130{
1131 int nxt_in_edge, fst_in_edge, pred;
1132 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1133
1134 prob[bb] = 0.0;
1135 if (IS_RGN_ENTRY (bb))
1136 {
1137 BITSET_ADD (dom[bb], 0, bbset_size);
1138 prob[bb] = 1.0;
1139 return;
1140 }
1141
1142 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1143
1144 /* Intialize dom[bb] to '111..1'. */
1145 BITSET_INVERT (dom[bb], bbset_size);
1146
1147 do
1148 {
1149 pred = FROM_BLOCK (nxt_in_edge);
1150 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1151
1152 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1153 edgeset_size);
1154
1155 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1156
1157 nr_out_edges = 1;
1158 nr_rgn_out_edges = 0;
1159 fst_out_edge = OUT_EDGES (pred);
1160 nxt_out_edge = NEXT_OUT (fst_out_edge);
1161 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1162 edgeset_size);
1163
1164 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1165
1166 /* The successor doesn't belong in the region? */
1167 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1168 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1169 ++nr_rgn_out_edges;
1170
1171 while (fst_out_edge != nxt_out_edge)
1172 {
1173 ++nr_out_edges;
1174 /* The successor doesn't belong in the region? */
1175 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1176 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1177 ++nr_rgn_out_edges;
1178 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1179 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1180
1181 }
1182
1183 /* Now nr_rgn_out_edges is the number of region-exit edges from
1184 pred, and nr_out_edges will be the number of pred out edges
1185 not leaving the region. */
1186 nr_out_edges -= nr_rgn_out_edges;
1187 if (nr_rgn_out_edges > 0)
1188 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1189 else
1190 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1191 nxt_in_edge = NEXT_IN (nxt_in_edge);
1192 }
1193 while (fst_in_edge != nxt_in_edge);
1194
1195 BITSET_ADD (dom[bb], bb, bbset_size);
1196 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1197
1198 if (sched_verbose >= 2)
1199 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1200 (int) (100.0 * prob[bb]));
1201}
1202
1203/* Functions for target info. */
1204
1205/* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1206 Note that bb_trg dominates bb_src. */
1207
1208static void
1209split_edges (bb_src, bb_trg, bl)
1210 int bb_src;
1211 int bb_trg;
1212 edgelst *bl;
1213{
1214 int es = edgeset_size;
1215 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1216
1217 while (es--)
1218 src[es] = (pot_split[bb_src])[es];
1219 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1220 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1221 free (src);
1222}
1223
1224/* Find the valid candidate-source-blocks for the target block TRG, compute
1225 their probability, and check if they are speculative or not.
1226 For speculative sources, compute their update-blocks and split-blocks. */
1227
1228static void
1229compute_trg_info (trg)
1230 int trg;
1231{
b3694847 1232 candidate *sp;
b4ead7d4
BS
1233 edgelst el;
1234 int check_block, update_idx;
1235 int i, j, k, fst_edge, nxt_edge;
1236
1237 /* Define some of the fields for the target bb as well. */
1238 sp = candidate_table + trg;
1239 sp->is_valid = 1;
1240 sp->is_speculative = 0;
1241 sp->src_prob = 100;
1242
1243 for (i = trg + 1; i < current_nr_blocks; i++)
1244 {
1245 sp = candidate_table + i;
1246
1247 sp->is_valid = IS_DOMINATED (i, trg);
1248 if (sp->is_valid)
1249 {
1250 sp->src_prob = GET_SRC_PROB (i, trg);
1251 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1252 }
1253
1254 if (sp->is_valid)
1255 {
1256 split_edges (i, trg, &el);
1257 sp->is_speculative = (el.nr_members) ? 1 : 0;
1258 if (sp->is_speculative && !flag_schedule_speculative)
1259 sp->is_valid = 0;
1260 }
1261
1262 if (sp->is_valid)
1263 {
1264 char *update_blocks;
1265
1266 /* Compute split blocks and store them in bblst_table.
1267 The TO block of every split edge is a split block. */
1268 sp->split_bbs.first_member = &bblst_table[bblst_last];
1269 sp->split_bbs.nr_members = el.nr_members;
1270 for (j = 0; j < el.nr_members; bblst_last++, j++)
1271 bblst_table[bblst_last] =
1272 TO_BLOCK (rgn_edges[el.first_member[j]]);
1273 sp->update_bbs.first_member = &bblst_table[bblst_last];
1274
1275 /* Compute update blocks and store them in bblst_table.
1276 For every split edge, look at the FROM block, and check
1277 all out edges. For each out edge that is not a split edge,
1278 add the TO block to the update block list. This list can end
1279 up with a lot of duplicates. We need to weed them out to avoid
1280 overrunning the end of the bblst_table. */
1281 update_blocks = (char *) alloca (n_basic_blocks);
1282 memset (update_blocks, 0, n_basic_blocks);
1283
1284 update_idx = 0;
1285 for (j = 0; j < el.nr_members; j++)
1286 {
1287 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1288 fst_edge = nxt_edge = OUT_EDGES (check_block);
1289 do
1290 {
1291 if (! update_blocks[TO_BLOCK (nxt_edge)])
1292 {
1293 for (k = 0; k < el.nr_members; k++)
1294 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1295 break;
1296
1297 if (k >= el.nr_members)
1298 {
1299 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1300 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1301 update_idx++;
1302 }
1303 }
1304
1305 nxt_edge = NEXT_OUT (nxt_edge);
1306 }
1307 while (fst_edge != nxt_edge);
1308 }
1309 sp->update_bbs.nr_members = update_idx;
1310
1311 /* Make sure we didn't overrun the end of bblst_table. */
1312 if (bblst_last > bblst_size)
1313 abort ();
1314 }
1315 else
1316 {
1317 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1318
1319 sp->is_speculative = 0;
1320 sp->src_prob = 0;
1321 }
1322 }
1323}
1324
1325/* Print candidates info, for debugging purposes. Callable from debugger. */
1326
1327void
1328debug_candidate (i)
1329 int i;
1330{
1331 if (!candidate_table[i].is_valid)
1332 return;
1333
1334 if (candidate_table[i].is_speculative)
1335 {
1336 int j;
1337 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1338
1339 fprintf (sched_dump, "split path: ");
1340 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1341 {
1342 int b = candidate_table[i].split_bbs.first_member[j];
1343
1344 fprintf (sched_dump, " %d ", b);
1345 }
1346 fprintf (sched_dump, "\n");
1347
1348 fprintf (sched_dump, "update path: ");
1349 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1350 {
1351 int b = candidate_table[i].update_bbs.first_member[j];
1352
1353 fprintf (sched_dump, " %d ", b);
1354 }
1355 fprintf (sched_dump, "\n");
1356 }
1357 else
1358 {
1359 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1360 }
1361}
1362
1363/* Print candidates info, for debugging purposes. Callable from debugger. */
1364
1365void
1366debug_candidates (trg)
1367 int trg;
1368{
1369 int i;
1370
1371 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1372 BB_TO_BLOCK (trg), trg);
1373 for (i = trg + 1; i < current_nr_blocks; i++)
1374 debug_candidate (i);
1375}
1376
1377/* Functions for speculative scheduing. */
1378
1379/* Return 0 if x is a set of a register alive in the beginning of one
1380 of the split-blocks of src, otherwise return 1. */
1381
1382static int
1383check_live_1 (src, x)
1384 int src;
1385 rtx x;
1386{
b3694847
SS
1387 int i;
1388 int regno;
1389 rtx reg = SET_DEST (x);
b4ead7d4
BS
1390
1391 if (reg == 0)
1392 return 1;
1393
1394 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1395 || GET_CODE (reg) == SIGN_EXTRACT
1396 || GET_CODE (reg) == STRICT_LOW_PART)
1397 reg = XEXP (reg, 0);
1398
7193d1dc 1399 if (GET_CODE (reg) == PARALLEL)
b4ead7d4 1400 {
b3694847 1401 int i;
90d036a0 1402
b4ead7d4 1403 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
7193d1dc
RK
1404 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1405 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
90d036a0 1406 return 1;
90d036a0 1407
b4ead7d4
BS
1408 return 0;
1409 }
1410
1411 if (GET_CODE (reg) != REG)
1412 return 1;
1413
1414 regno = REGNO (reg);
1415
1416 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1417 {
1418 /* Global registers are assumed live. */
1419 return 0;
1420 }
1421 else
1422 {
1423 if (regno < FIRST_PSEUDO_REGISTER)
1424 {
1425 /* Check for hard registers. */
1426 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1427 while (--j >= 0)
1428 {
1429 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1430 {
1431 int b = candidate_table[src].split_bbs.first_member[i];
1432
1433 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1434 regno + j))
1435 {
1436 return 0;
1437 }
1438 }
1439 }
1440 }
1441 else
1442 {
1443 /* Check for psuedo registers. */
1444 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1445 {
1446 int b = candidate_table[src].split_bbs.first_member[i];
1447
1448 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1449 {
1450 return 0;
1451 }
1452 }
1453 }
1454 }
1455
1456 return 1;
1457}
1458
1459/* If x is a set of a register R, mark that R is alive in the beginning
1460 of every update-block of src. */
1461
1462static void
1463update_live_1 (src, x)
1464 int src;
1465 rtx x;
1466{
b3694847
SS
1467 int i;
1468 int regno;
1469 rtx reg = SET_DEST (x);
b4ead7d4
BS
1470
1471 if (reg == 0)
1472 return;
1473
1474 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1475 || GET_CODE (reg) == SIGN_EXTRACT
1476 || GET_CODE (reg) == STRICT_LOW_PART)
1477 reg = XEXP (reg, 0);
1478
7193d1dc 1479 if (GET_CODE (reg) == PARALLEL)
b4ead7d4 1480 {
b3694847 1481 int i;
90d036a0 1482
b4ead7d4 1483 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
7193d1dc
RK
1484 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1485 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
90d036a0 1486
b4ead7d4
BS
1487 return;
1488 }
1489
1490 if (GET_CODE (reg) != REG)
1491 return;
1492
1493 /* Global registers are always live, so the code below does not apply
1494 to them. */
1495
1496 regno = REGNO (reg);
1497
1498 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1499 {
1500 if (regno < FIRST_PSEUDO_REGISTER)
1501 {
1502 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1503 while (--j >= 0)
1504 {
1505 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1506 {
1507 int b = candidate_table[src].update_bbs.first_member[i];
1508
1509 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1510 regno + j);
1511 }
1512 }
1513 }
1514 else
1515 {
1516 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1517 {
1518 int b = candidate_table[src].update_bbs.first_member[i];
1519
1520 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1521 }
1522 }
1523 }
1524}
1525
1526/* Return 1 if insn can be speculatively moved from block src to trg,
1527 otherwise return 0. Called before first insertion of insn to
1528 ready-list or before the scheduling. */
1529
1530static int
1531check_live (insn, src)
1532 rtx insn;
1533 int src;
1534{
1535 /* Find the registers set by instruction. */
1536 if (GET_CODE (PATTERN (insn)) == SET
1537 || GET_CODE (PATTERN (insn)) == CLOBBER)
1538 return check_live_1 (src, PATTERN (insn));
1539 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1540 {
1541 int j;
1542 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1543 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1544 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1545 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1546 return 0;
1547
1548 return 1;
1549 }
1550
1551 return 1;
1552}
1553
1554/* Update the live registers info after insn was moved speculatively from
1555 block src to trg. */
1556
1557static void
1558update_live (insn, src)
1559 rtx insn;
1560 int src;
1561{
1562 /* Find the registers set by instruction. */
1563 if (GET_CODE (PATTERN (insn)) == SET
1564 || GET_CODE (PATTERN (insn)) == CLOBBER)
1565 update_live_1 (src, PATTERN (insn));
1566 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1567 {
1568 int j;
1569 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1570 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1571 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1572 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1573 }
1574}
1575
1576/* Exception Free Loads:
1577
1578 We define five classes of speculative loads: IFREE, IRISKY,
1579 PFREE, PRISKY, and MFREE.
1580
1581 IFREE loads are loads that are proved to be exception-free, just
1582 by examining the load insn. Examples for such loads are loads
1583 from TOC and loads of global data.
1584
1585 IRISKY loads are loads that are proved to be exception-risky,
1586 just by examining the load insn. Examples for such loads are
1587 volatile loads and loads from shared memory.
1588
1589 PFREE loads are loads for which we can prove, by examining other
1590 insns, that they are exception-free. Currently, this class consists
1591 of loads for which we are able to find a "similar load", either in
1592 the target block, or, if only one split-block exists, in that split
1593 block. Load2 is similar to load1 if both have same single base
1594 register. We identify only part of the similar loads, by finding
1595 an insn upon which both load1 and load2 have a DEF-USE dependence.
1596
1597 PRISKY loads are loads for which we can prove, by examining other
1598 insns, that they are exception-risky. Currently we have two proofs for
1599 such loads. The first proof detects loads that are probably guarded by a
1600 test on the memory address. This proof is based on the
1601 backward and forward data dependence information for the region.
1602 Let load-insn be the examined load.
1603 Load-insn is PRISKY iff ALL the following hold:
1604
1605 - insn1 is not in the same block as load-insn
1606 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1607 - test-insn is either a compare or a branch, not in the same block
1608 as load-insn
1609 - load-insn is reachable from test-insn
1610 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1611
1612 This proof might fail when the compare and the load are fed
1613 by an insn not in the region. To solve this, we will add to this
1614 group all loads that have no input DEF-USE dependence.
1615
1616 The second proof detects loads that are directly or indirectly
1617 fed by a speculative load. This proof is affected by the
1618 scheduling process. We will use the flag fed_by_spec_load.
1619 Initially, all insns have this flag reset. After a speculative
1620 motion of an insn, if insn is either a load, or marked as
1621 fed_by_spec_load, we will also mark as fed_by_spec_load every
1622 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1623 load which is fed_by_spec_load is also PRISKY.
1624
1625 MFREE (maybe-free) loads are all the remaining loads. They may be
1626 exception-free, but we cannot prove it.
1627
1628 Now, all loads in IFREE and PFREE classes are considered
1629 exception-free, while all loads in IRISKY and PRISKY classes are
1630 considered exception-risky. As for loads in the MFREE class,
1631 these are considered either exception-free or exception-risky,
1632 depending on whether we are pessimistic or optimistic. We have
1633 to take the pessimistic approach to assure the safety of
1634 speculative scheduling, but we can take the optimistic approach
1635 by invoking the -fsched_spec_load_dangerous option. */
1636
1637enum INSN_TRAP_CLASS
1638{
1639 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1640 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1641};
1642
1643#define WORST_CLASS(class1, class2) \
1644((class1 > class2) ? class1 : class2)
1645
1646/* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1647#define IS_REACHABLE(bb_from, bb_to) \
1648(bb_from == bb_to \
1649 || IS_RGN_ENTRY (bb_from) \
1650 || (bitset_member (ancestor_edges[bb_to], \
1651 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
1652 edgeset_size)))
1653
1654/* Non-zero iff the address is comprised from at most 1 register. */
1655#define CONST_BASED_ADDRESS_P(x) \
1656 (GET_CODE (x) == REG \
1657 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1658 || (GET_CODE (x) == LO_SUM)) \
1659 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
1660 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
1661
1662/* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1663
1664static void
1665set_spec_fed (load_insn)
1666 rtx load_insn;
1667{
1668 rtx link;
1669
1670 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1671 if (GET_MODE (link) == VOIDmode)
1672 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1673} /* set_spec_fed */
1674
1675/* On the path from the insn to load_insn_bb, find a conditional
1676branch depending on insn, that guards the speculative load. */
1677
1678static int
1679find_conditional_protection (insn, load_insn_bb)
1680 rtx insn;
1681 int load_insn_bb;
1682{
1683 rtx link;
1684
1685 /* Iterate through DEF-USE forward dependences. */
1686 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1687 {
1688 rtx next = XEXP (link, 0);
1689 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1690 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1691 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1692 && load_insn_bb != INSN_BB (next)
1693 && GET_MODE (link) == VOIDmode
1694 && (GET_CODE (next) == JUMP_INSN
1695 || find_conditional_protection (next, load_insn_bb)))
1696 return 1;
1697 }
1698 return 0;
1699} /* find_conditional_protection */
1700
1701/* Returns 1 if the same insn1 that participates in the computation
1702 of load_insn's address is feeding a conditional branch that is
1703 guarding on load_insn. This is true if we find a the two DEF-USE
1704 chains:
1705 insn1 -> ... -> conditional-branch
1706 insn1 -> ... -> load_insn,
1707 and if a flow path exist:
1708 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1709 and if insn1 is on the path
1710 region-entry -> ... -> bb_trg -> ... load_insn.
1711
1712 Locate insn1 by climbing on LOG_LINKS from load_insn.
1713 Locate the branch by following INSN_DEPEND from insn1. */
1714
1715static int
1716is_conditionally_protected (load_insn, bb_src, bb_trg)
1717 rtx load_insn;
1718 int bb_src, bb_trg;
1719{
1720 rtx link;
1721
1722 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1723 {
1724 rtx insn1 = XEXP (link, 0);
1725
1726 /* Must be a DEF-USE dependence upon non-branch. */
1727 if (GET_MODE (link) != VOIDmode
1728 || GET_CODE (insn1) == JUMP_INSN)
1729 continue;
1730
1731 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1732 if (INSN_BB (insn1) == bb_src
1733 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1734 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1735 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1736 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1737 continue;
1738
1739 /* Now search for the conditional-branch. */
1740 if (find_conditional_protection (insn1, bb_src))
1741 return 1;
1742
1743 /* Recursive step: search another insn1, "above" current insn1. */
1744 return is_conditionally_protected (insn1, bb_src, bb_trg);
1745 }
1746
1747 /* The chain does not exist. */
1748 return 0;
1749} /* is_conditionally_protected */
1750
1751/* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1752 load_insn can move speculatively from bb_src to bb_trg. All the
1753 following must hold:
1754
1755 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1756 (2) load_insn and load1 have a def-use dependence upon
1757 the same insn 'insn1'.
1758 (3) either load2 is in bb_trg, or:
1759 - there's only one split-block, and
1760 - load1 is on the escape path, and
1761
1762 From all these we can conclude that the two loads access memory
1763 addresses that differ at most by a constant, and hence if moving
1764 load_insn would cause an exception, it would have been caused by
1765 load2 anyhow. */
1766
1767static int
1768is_pfree (load_insn, bb_src, bb_trg)
1769 rtx load_insn;
1770 int bb_src, bb_trg;
1771{
1772 rtx back_link;
b3694847 1773 candidate *candp = candidate_table + bb_src;
b4ead7d4
BS
1774
1775 if (candp->split_bbs.nr_members != 1)
1776 /* Must have exactly one escape block. */
1777 return 0;
1778
1779 for (back_link = LOG_LINKS (load_insn);
1780 back_link; back_link = XEXP (back_link, 1))
1781 {
1782 rtx insn1 = XEXP (back_link, 0);
1783
1784 if (GET_MODE (back_link) == VOIDmode)
1785 {
1786 /* Found a DEF-USE dependence (insn1, load_insn). */
1787 rtx fore_link;
1788
1789 for (fore_link = INSN_DEPEND (insn1);
1790 fore_link; fore_link = XEXP (fore_link, 1))
1791 {
1792 rtx insn2 = XEXP (fore_link, 0);
1793 if (GET_MODE (fore_link) == VOIDmode)
1794 {
1795 /* Found a DEF-USE dependence (insn1, insn2). */
1796 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1797 /* insn2 not guaranteed to be a 1 base reg load. */
1798 continue;
1799
1800 if (INSN_BB (insn2) == bb_trg)
1801 /* insn2 is the similar load, in the target block. */
1802 return 1;
1803
1804 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1805 /* insn2 is a similar load, in a split-block. */
1806 return 1;
1807 }
1808 }
1809 }
1810 }
1811
1812 /* Couldn't find a similar load. */
1813 return 0;
1814} /* is_pfree */
1815
1816/* Returns a class that insn with GET_DEST(insn)=x may belong to,
1817 as found by analyzing insn's expression. */
1818
1819static int
1820may_trap_exp (x, is_store)
1821 rtx x;
1822 int is_store;
1823{
1824 enum rtx_code code;
1825
1826 if (x == 0)
1827 return TRAP_FREE;
1828 code = GET_CODE (x);
1829 if (is_store)
1830 {
1831 if (code == MEM)
1832 return TRAP_RISKY;
1833 else
1834 return TRAP_FREE;
1835 }
1836 if (code == MEM)
1837 {
1838 /* The insn uses memory: a volatile load. */
1839 if (MEM_VOLATILE_P (x))
1840 return IRISKY;
1841 /* An exception-free load. */
1842 if (!may_trap_p (x))
1843 return IFREE;
1844 /* A load with 1 base register, to be further checked. */
1845 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1846 return PFREE_CANDIDATE;
1847 /* No info on the load, to be further checked. */
1848 return PRISKY_CANDIDATE;
1849 }
1850 else
1851 {
1852 const char *fmt;
1853 int i, insn_class = TRAP_FREE;
1854
1855 /* Neither store nor load, check if it may cause a trap. */
1856 if (may_trap_p (x))
1857 return TRAP_RISKY;
1858 /* Recursive step: walk the insn... */
1859 fmt = GET_RTX_FORMAT (code);
1860 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1861 {
1862 if (fmt[i] == 'e')
1863 {
1864 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1865 insn_class = WORST_CLASS (insn_class, tmp_class);
1866 }
1867 else if (fmt[i] == 'E')
1868 {
1869 int j;
1870 for (j = 0; j < XVECLEN (x, i); j++)
1871 {
1872 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1873 insn_class = WORST_CLASS (insn_class, tmp_class);
1874 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1875 break;
1876 }
1877 }
1878 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1879 break;
1880 }
1881 return insn_class;
1882 }
1883}
1884
1885/* Classifies insn for the purpose of verifying that it can be
1886 moved speculatively, by examining it's patterns, returning:
1887 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1888 TRAP_FREE: non-load insn.
1889 IFREE: load from a globaly safe location.
1890 IRISKY: volatile load.
1891 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1892 being either PFREE or PRISKY. */
1893
1894static int
1895haifa_classify_insn (insn)
1896 rtx insn;
1897{
1898 rtx pat = PATTERN (insn);
1899 int tmp_class = TRAP_FREE;
1900 int insn_class = TRAP_FREE;
1901 enum rtx_code code;
1902
1903 if (GET_CODE (pat) == PARALLEL)
1904 {
1905 int i, len = XVECLEN (pat, 0);
1906
1907 for (i = len - 1; i >= 0; i--)
1908 {
1909 code = GET_CODE (XVECEXP (pat, 0, i));
1910 switch (code)
1911 {
1912 case CLOBBER:
1913 /* Test if it is a 'store'. */
1914 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1915 break;
1916 case SET:
1917 /* Test if it is a store. */
1918 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1919 if (tmp_class == TRAP_RISKY)
1920 break;
1921 /* Test if it is a load. */
90d036a0
RK
1922 tmp_class
1923 = WORST_CLASS (tmp_class,
1924 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)),
1925 0));
b4ead7d4
BS
1926 break;
1927 case COND_EXEC:
1928 case TRAP_IF:
1929 tmp_class = TRAP_RISKY;
1930 break;
90d036a0
RK
1931 default:
1932 ;
b4ead7d4
BS
1933 }
1934 insn_class = WORST_CLASS (insn_class, tmp_class);
1935 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1936 break;
1937 }
1938 }
1939 else
1940 {
1941 code = GET_CODE (pat);
1942 switch (code)
1943 {
1944 case CLOBBER:
1945 /* Test if it is a 'store'. */
1946 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1947 break;
1948 case SET:
1949 /* Test if it is a store. */
1950 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1951 if (tmp_class == TRAP_RISKY)
1952 break;
1953 /* Test if it is a load. */
1954 tmp_class =
1955 WORST_CLASS (tmp_class,
1956 may_trap_exp (SET_SRC (pat), 0));
1957 break;
1958 case COND_EXEC:
1959 case TRAP_IF:
1960 tmp_class = TRAP_RISKY;
1961 break;
1962 default:;
1963 }
1964 insn_class = tmp_class;
1965 }
1966
1967 return insn_class;
1968}
1969
1970/* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1971 a load moved speculatively, or if load_insn is protected by
1972 a compare on load_insn's address). */
1973
1974static int
1975is_prisky (load_insn, bb_src, bb_trg)
1976 rtx load_insn;
1977 int bb_src, bb_trg;
1978{
1979 if (FED_BY_SPEC_LOAD (load_insn))
1980 return 1;
1981
1982 if (LOG_LINKS (load_insn) == NULL)
1983 /* Dependence may 'hide' out of the region. */
1984 return 1;
1985
1986 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1987 return 1;
1988
1989 return 0;
1990}
1991
1992/* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1993 Return 1 if insn is exception-free (and the motion is valid)
1994 and 0 otherwise. */
1995
1996static int
1997is_exception_free (insn, bb_src, bb_trg)
1998 rtx insn;
1999 int bb_src, bb_trg;
2000{
2001 int insn_class = haifa_classify_insn (insn);
2002
2003 /* Handle non-load insns. */
2004 switch (insn_class)
2005 {
2006 case TRAP_FREE:
2007 return 1;
2008 case TRAP_RISKY:
2009 return 0;
2010 default:;
2011 }
2012
2013 /* Handle loads. */
2014 if (!flag_schedule_speculative_load)
2015 return 0;
2016 IS_LOAD_INSN (insn) = 1;
2017 switch (insn_class)
2018 {
2019 case IFREE:
2020 return (1);
2021 case IRISKY:
2022 return 0;
2023 case PFREE_CANDIDATE:
2024 if (is_pfree (insn, bb_src, bb_trg))
2025 return 1;
2026 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2027 case PRISKY_CANDIDATE:
2028 if (!flag_schedule_speculative_load_dangerous
2029 || is_prisky (insn, bb_src, bb_trg))
2030 return 0;
2031 break;
2032 default:;
2033 }
2034
2035 return flag_schedule_speculative_load_dangerous;
2036}
2037\f
2038/* The number of insns from the current block scheduled so far. */
2039static int sched_target_n_insns;
2040/* The number of insns from the current block to be scheduled in total. */
2041static int target_n_insns;
2042/* The number of insns from the entire region scheduled so far. */
2043static int sched_n_insns;
79c2ffde
BS
2044/* Nonzero if the last scheduled insn was a jump. */
2045static int last_was_jump;
b4ead7d4
BS
2046
2047/* Implementations of the sched_info functions for region scheduling. */
2048static void init_ready_list PARAMS ((struct ready_list *));
2049static int can_schedule_ready_p PARAMS ((rtx));
2050static int new_ready PARAMS ((rtx));
2051static int schedule_more_p PARAMS ((void));
2052static const char *rgn_print_insn PARAMS ((rtx, int));
2053static int rgn_rank PARAMS ((rtx, rtx));
18e720b3
BS
2054static int contributes_to_priority PARAMS ((rtx, rtx));
2055static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
b4ead7d4
BS
2056
2057/* Return nonzero if there are more insns that should be scheduled. */
2058
2059static int
2060schedule_more_p ()
2061{
79c2ffde 2062 return ! last_was_jump && sched_target_n_insns < target_n_insns;
b4ead7d4
BS
2063}
2064
2065/* Add all insns that are initially ready to the ready list READY. Called
2066 once before scheduling a set of insns. */
2067
2068static void
2069init_ready_list (ready)
2070 struct ready_list *ready;
2071{
2072 rtx prev_head = current_sched_info->prev_head;
2073 rtx next_tail = current_sched_info->next_tail;
2074 int bb_src;
2075 rtx insn;
2076
2077 target_n_insns = 0;
2078 sched_target_n_insns = 0;
2079 sched_n_insns = 0;
79c2ffde 2080 last_was_jump = 0;
b4ead7d4
BS
2081
2082 /* Print debugging information. */
2083 if (sched_verbose >= 5)
2084 debug_dependencies ();
2085
2086 /* Prepare current target block info. */
2087 if (current_nr_blocks > 1)
2088 {
2089 candidate_table = (candidate *) xmalloc (current_nr_blocks
2090 * sizeof (candidate));
2091
2092 bblst_last = 0;
2093 /* bblst_table holds split blocks and update blocks for each block after
2094 the current one in the region. split blocks and update blocks are
2095 the TO blocks of region edges, so there can be at most rgn_nr_edges
2096 of them. */
2097 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2098 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2099
2100 bitlst_table_last = 0;
2101 bitlst_table_size = rgn_nr_edges;
2102 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2103
2104 compute_trg_info (target_bb);
2105 }
2106
2107 /* Initialize ready list with all 'ready' insns in target block.
2108 Count number of insns in the target block being scheduled. */
2109 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2110 {
2111 rtx next;
2112
2113 if (! INSN_P (insn))
2114 continue;
2115 next = NEXT_INSN (insn);
2116
2117 if (INSN_DEP_COUNT (insn) == 0
2118 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
2119 ready_add (ready, insn);
2120 if (!(SCHED_GROUP_P (insn)))
2121 target_n_insns++;
2122 }
2123
2124 /* Add to ready list all 'ready' insns in valid source blocks.
2125 For speculative insns, check-live, exception-free, and
2126 issue-delay. */
2127 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2128 if (IS_VALID (bb_src))
2129 {
2130 rtx src_head;
2131 rtx src_next_tail;
2132 rtx tail, head;
2133
2134 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2135 src_next_tail = NEXT_INSN (tail);
2136 src_head = head;
2137
2138 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2139 {
2140 if (! INSN_P (insn))
2141 continue;
2142
2143 if (!CANT_MOVE (insn)
2144 && (!IS_SPECULATIVE_INSN (insn)
b8ec5764 2145 || (insn_issue_delay (insn) <= 3
b4ead7d4
BS
2146 && check_live (insn, bb_src)
2147 && is_exception_free (insn, bb_src, target_bb))))
2148 {
2149 rtx next;
2150
e3aafbad 2151 /* Note that we havn't squirreled away the notes for
b4ead7d4
BS
2152 blocks other than the current. So if this is a
2153 speculative insn, NEXT might otherwise be a note. */
2154 next = next_nonnote_insn (insn);
2155 if (INSN_DEP_COUNT (insn) == 0
2156 && (! next
2157 || SCHED_GROUP_P (next) == 0
2158 || ! INSN_P (next)))
2159 ready_add (ready, insn);
2160 }
2161 }
2162 }
2163}
2164
2165/* Called after taking INSN from the ready list. Returns nonzero if this
2166 insn can be scheduled, nonzero if we should silently discard it. */
2167
2168static int
2169can_schedule_ready_p (insn)
2170 rtx insn;
2171{
79c2ffde
BS
2172 if (GET_CODE (insn) == JUMP_INSN)
2173 last_was_jump = 1;
2174
b4ead7d4
BS
2175 /* An interblock motion? */
2176 if (INSN_BB (insn) != target_bb)
2177 {
2178 rtx temp;
2179 basic_block b1;
2180
2181 if (IS_SPECULATIVE_INSN (insn))
2182 {
2183 if (!check_live (insn, INSN_BB (insn)))
2184 return 0;
2185 update_live (insn, INSN_BB (insn));
2186
2187 /* For speculative load, mark insns fed by it. */
2188 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2189 set_spec_fed (insn);
2190
2191 nr_spec++;
2192 }
2193 nr_inter++;
2194
2195 /* Find the beginning of the scheduling group. */
2196 /* ??? Ought to update basic block here, but later bits of
2197 schedule_block assumes the original insn block is
2198 still intact. */
2199
2200 temp = insn;
2201 while (SCHED_GROUP_P (temp))
2202 temp = PREV_INSN (temp);
2203
6d2f8887 2204 /* Update source block boundaries. */
b4ead7d4
BS
2205 b1 = BLOCK_FOR_INSN (temp);
2206 if (temp == b1->head && insn == b1->end)
2207 {
2208 /* We moved all the insns in the basic block.
2209 Emit a note after the last insn and update the
2210 begin/end boundaries to point to the note. */
2211 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2212 b1->head = note;
2213 b1->end = note;
2214 }
2215 else if (insn == b1->end)
2216 {
2217 /* We took insns from the end of the basic block,
2218 so update the end of block boundary so that it
2219 points to the first insn we did not move. */
2220 b1->end = PREV_INSN (temp);
2221 }
2222 else if (temp == b1->head)
2223 {
2224 /* We took insns from the start of the basic block,
2225 so update the start of block boundary so that
2226 it points to the first insn we did not move. */
2227 b1->head = NEXT_INSN (insn);
2228 }
2229 }
2230 else
2231 {
2232 /* In block motion. */
2233 sched_target_n_insns++;
2234 }
2235 sched_n_insns++;
2236
2237 return 1;
2238}
2239
2240/* Called after INSN has all its dependencies resolved. Return nonzero
2241 if it should be moved to the ready list or the queue, or zero if we
2242 should silently discard it. */
2243static int
2244new_ready (next)
2245 rtx next;
2246{
2247 /* For speculative insns, before inserting to ready/queue,
2248 check live, exception-free, and issue-delay. */
2249 if (INSN_BB (next) != target_bb
2250 && (!IS_VALID (INSN_BB (next))
2251 || CANT_MOVE (next)
2252 || (IS_SPECULATIVE_INSN (next)
b8ec5764 2253 && (insn_issue_delay (next) > 3
b4ead7d4
BS
2254 || !check_live (next, INSN_BB (next))
2255 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2256 return 0;
2257 return 1;
2258}
2259
2260/* Return a string that contains the insn uid and optionally anything else
2261 necessary to identify this insn in an output. It's valid to use a
2262 static buffer for this. The ALIGNED parameter should cause the string
2263 to be formatted so that multiple output lines will line up nicely. */
2264
2265static const char *
2266rgn_print_insn (insn, aligned)
2267 rtx insn;
2268 int aligned;
2269{
2270 static char tmp[80];
2271
2272 if (aligned)
2273 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2274 else
2275 {
b4ead7d4 2276 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
f56887a7
BS
2277 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2278 else
2279 sprintf (tmp, "%d", INSN_UID (insn));
b4ead7d4
BS
2280 }
2281 return tmp;
2282}
2283
2284/* Compare priority of two insns. Return a positive number if the second
2285 insn is to be preferred for scheduling, and a negative one if the first
2286 is to be preferred. Zero if they are equally good. */
2287
2288static int
2289rgn_rank (insn1, insn2)
2290 rtx insn1, insn2;
2291{
2292 /* Some comparison make sense in interblock scheduling only. */
2293 if (INSN_BB (insn1) != INSN_BB (insn2))
2294 {
2295 int spec_val, prob_val;
2296
2297 /* Prefer an inblock motion on an interblock motion. */
2298 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2299 return 1;
2300 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2301 return -1;
2302
2303 /* Prefer a useful motion on a speculative one. */
2304 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2305 if (spec_val)
2306 return spec_val;
2307
2308 /* Prefer a more probable (speculative) insn. */
2309 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2310 if (prob_val)
2311 return prob_val;
2312 }
2313 return 0;
2314}
2315
18e720b3
BS
2316/* NEXT is an instruction that depends on INSN (a backward dependence);
2317 return nonzero if we should include this dependence in priority
2318 calculations. */
2319
2320static int
2321contributes_to_priority (next, insn)
2322 rtx next, insn;
2323{
2324 return BLOCK_NUM (next) == BLOCK_NUM (insn);
2325}
2326
2327/* INSN is a JUMP_INSN. Store the set of registers that must be considered
2328 to be set by this jump in SET. */
2329
2330static void
2331compute_jump_reg_dependencies (insn, set)
2332 rtx insn ATTRIBUTE_UNUSED;
2333 regset set ATTRIBUTE_UNUSED;
2334{
2335 /* Nothing to do here, since we postprocess jumps in
2336 add_branch_dependences. */
2337}
2338
b4ead7d4
BS
2339/* Used in schedule_insns to initialize current_sched_info for scheduling
2340 regions (or single basic blocks). */
2341
2342static struct sched_info region_sched_info =
2343{
2344 init_ready_list,
2345 can_schedule_ready_p,
2346 schedule_more_p,
2347 new_ready,
2348 rgn_rank,
2349 rgn_print_insn,
18e720b3
BS
2350 contributes_to_priority,
2351 compute_jump_reg_dependencies,
b4ead7d4
BS
2352
2353 NULL, NULL,
2354 NULL, NULL,
4b6c5340 2355 0, 0
b4ead7d4
BS
2356};
2357
2358/* Add dependences so that branches are scheduled to run last in their
2359 block. */
2360
2361static void
2362add_branch_dependences (head, tail)
2363 rtx head, tail;
2364{
2365 rtx insn, last;
2366
2367 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
2368 to remain in order at the end of the block by adding dependencies and
2369 giving the last a high priority. There may be notes present, and
2370 prev_head may also be a note.
2371
2372 Branches must obviously remain at the end. Calls should remain at the
2373 end since moving them results in worse register allocation. Uses remain
2374 at the end to ensure proper register allocation. cc0 setters remaim
2375 at the end because they can't be moved away from their cc0 user. */
2376 insn = tail;
2377 last = 0;
2378 while (GET_CODE (insn) == CALL_INSN
2379 || GET_CODE (insn) == JUMP_INSN
2380 || (GET_CODE (insn) == INSN
2381 && (GET_CODE (PATTERN (insn)) == USE
2382 || GET_CODE (PATTERN (insn)) == CLOBBER
2383#ifdef HAVE_cc0
2384 || sets_cc0_p (PATTERN (insn))
2385#endif
2386 ))
2387 || GET_CODE (insn) == NOTE)
2388 {
2389 if (GET_CODE (insn) != NOTE)
2390 {
2391 if (last != 0
2392 && !find_insn_list (insn, LOG_LINKS (last)))
2393 {
2394 add_dependence (last, insn, REG_DEP_ANTI);
2395 INSN_REF_COUNT (insn)++;
2396 }
2397
2398 CANT_MOVE (insn) = 1;
2399
2400 last = insn;
2401 /* Skip over insns that are part of a group.
2402 Make each insn explicitly depend on the previous insn.
2403 This ensures that only the group header will ever enter
2404 the ready queue (and, when scheduled, will automatically
2405 schedule the SCHED_GROUP_P block). */
2406 while (SCHED_GROUP_P (insn))
2407 {
2408 rtx temp = prev_nonnote_insn (insn);
2409 add_dependence (insn, temp, REG_DEP_ANTI);
2410 insn = temp;
2411 }
2412 }
2413
2414 /* Don't overrun the bounds of the basic block. */
2415 if (insn == head)
2416 break;
2417
2418 insn = PREV_INSN (insn);
2419 }
2420
2421 /* Make sure these insns are scheduled last in their block. */
2422 insn = last;
2423 if (insn != 0)
2424 while (insn != head)
2425 {
2426 insn = prev_nonnote_insn (insn);
2427
2428 if (INSN_REF_COUNT (insn) != 0)
2429 continue;
2430
2431 add_dependence (last, insn, REG_DEP_ANTI);
2432 INSN_REF_COUNT (insn) = 1;
2433
2434 /* Skip over insns that are part of a group. */
2435 while (SCHED_GROUP_P (insn))
2436 insn = prev_nonnote_insn (insn);
2437 }
2438}
2439
2440/* Data structures for the computation of data dependences in a regions. We
2441 keep one `deps' structure for every basic block. Before analyzing the
2442 data dependences for a bb, its variables are initialized as a function of
2443 the variables of its predecessors. When the analysis for a bb completes,
2444 we save the contents to the corresponding bb_deps[bb] variable. */
2445
2446static struct deps *bb_deps;
2447
2448/* After computing the dependencies for block BB, propagate the dependencies
4ba478b8 2449 found in TMP_DEPS to the successors of the block. */
b4ead7d4 2450static void
4ba478b8 2451propagate_deps (bb, tmp_deps)
b4ead7d4
BS
2452 int bb;
2453 struct deps *tmp_deps;
b4ead7d4
BS
2454{
2455 int b = BB_TO_BLOCK (bb);
2456 int e, first_edge;
2457 int reg;
2458 rtx link_insn, link_mem;
2459 rtx u;
2460
2461 /* These lists should point to the right place, for correct
2462 freeing later. */
2463 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
2464 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
2465 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
2466 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
2467
2468 /* bb's structures are inherited by its successors. */
2469 first_edge = e = OUT_EDGES (b);
2470 if (e <= 0)
2471 return;
2472
2473 do
2474 {
2475 rtx x;
2476 int b_succ = TO_BLOCK (e);
2477 int bb_succ = BLOCK_TO_BB (b_succ);
2478 struct deps *succ_deps = bb_deps + bb_succ;
2479
2480 /* Only bbs "below" bb, in the same region, are interesting. */
2481 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2482 || bb_succ <= bb)
2483 {
2484 e = NEXT_OUT (e);
2485 continue;
2486 }
2487
4ba478b8
RH
2488 /* The reg_last lists are inherited by bb_succ. */
2489 EXECUTE_IF_SET_IN_REG_SET (&tmp_deps->reg_last_in_use, 0, reg,
b4ead7d4 2490 {
4ba478b8
RH
2491 struct deps_reg *tmp_deps_reg = &tmp_deps->reg_last[reg];
2492 struct deps_reg *succ_deps_reg = &succ_deps->reg_last[reg];
2493
2494 for (u = tmp_deps_reg->uses; u; u = XEXP (u, 1))
2495 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->uses))
2496 succ_deps_reg->uses
2497 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->uses);
2498
2499 for (u = tmp_deps_reg->sets; u; u = XEXP (u, 1))
2500 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->sets))
2501 succ_deps_reg->sets
2502 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->sets);
2503
2504 for (u = tmp_deps_reg->clobbers; u; u = XEXP (u, 1))
2505 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->clobbers))
2506 succ_deps_reg->clobbers
2507 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->clobbers);
2508 });
2509 IOR_REG_SET (&succ_deps->reg_last_in_use, &tmp_deps->reg_last_in_use);
b4ead7d4
BS
2510
2511 /* Mem read/write lists are inherited by bb_succ. */
2512 link_insn = tmp_deps->pending_read_insns;
2513 link_mem = tmp_deps->pending_read_mems;
2514 while (link_insn)
2515 {
2516 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2517 XEXP (link_mem, 0),
2518 succ_deps->pending_read_insns,
2519 succ_deps->pending_read_mems)))
2520 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
2521 &succ_deps->pending_read_mems,
2522 XEXP (link_insn, 0), XEXP (link_mem, 0));
2523 link_insn = XEXP (link_insn, 1);
2524 link_mem = XEXP (link_mem, 1);
2525 }
2526
2527 link_insn = tmp_deps->pending_write_insns;
2528 link_mem = tmp_deps->pending_write_mems;
2529 while (link_insn)
2530 {
2531 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2532 XEXP (link_mem, 0),
2533 succ_deps->pending_write_insns,
2534 succ_deps->pending_write_mems)))
2535 add_insn_mem_dependence (succ_deps,
2536 &succ_deps->pending_write_insns,
2537 &succ_deps->pending_write_mems,
2538 XEXP (link_insn, 0), XEXP (link_mem, 0));
2539
2540 link_insn = XEXP (link_insn, 1);
2541 link_mem = XEXP (link_mem, 1);
2542 }
2543
2544 /* last_function_call is inherited by bb_succ. */
2545 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
4ba478b8 2546 if (! find_insn_list (XEXP (u, 0), succ_deps->last_function_call))
b4ead7d4 2547 succ_deps->last_function_call
4ba478b8 2548 = alloc_INSN_LIST (XEXP (u, 0), succ_deps->last_function_call);
b4ead7d4
BS
2549
2550 /* last_pending_memory_flush is inherited by bb_succ. */
2551 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
4ba478b8 2552 if (! find_insn_list (XEXP (u, 0),
b4ead7d4 2553 succ_deps->last_pending_memory_flush))
b4ead7d4
BS
2554 succ_deps->last_pending_memory_flush
2555 = alloc_INSN_LIST (XEXP (u, 0),
2556 succ_deps->last_pending_memory_flush);
b4ead7d4
BS
2557
2558 /* sched_before_next_call is inherited by bb_succ. */
2559 x = LOG_LINKS (tmp_deps->sched_before_next_call);
2560 for (; x; x = XEXP (x, 1))
2561 add_dependence (succ_deps->sched_before_next_call,
2562 XEXP (x, 0), REG_DEP_ANTI);
2563
2564 e = NEXT_OUT (e);
2565 }
2566 while (e != first_edge);
2567}
2568
2569/* Compute backward dependences inside bb. In a multiple blocks region:
2570 (1) a bb is analyzed after its predecessors, and (2) the lists in
2571 effect at the end of bb (after analyzing for bb) are inherited by
2572 bb's successrs.
2573
2574 Specifically for reg-reg data dependences, the block insns are
2575 scanned by sched_analyze () top-to-bottom. Two lists are
4ba478b8
RH
2576 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2577 and reg_last[].uses for register USEs.
b4ead7d4
BS
2578
2579 When analysis is completed for bb, we update for its successors:
2580 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2581 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2582
2583 The mechanism for computing mem-mem data dependence is very
2584 similar, and the result is interblock dependences in the region. */
2585
2586static void
2587compute_block_backward_dependences (bb)
2588 int bb;
2589{
2590 rtx head, tail;
b4ead7d4
BS
2591 struct deps tmp_deps;
2592
2593 tmp_deps = bb_deps[bb];
2594
2595 /* Do the analysis for this block. */
2596 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2597 sched_analyze (&tmp_deps, head, tail);
2598 add_branch_dependences (head, tail);
2599
2600 if (current_nr_blocks > 1)
4ba478b8 2601 propagate_deps (bb, &tmp_deps);
b4ead7d4
BS
2602
2603 /* Free up the INSN_LISTs. */
2604 free_deps (&tmp_deps);
b4ead7d4 2605}
4ba478b8 2606
b4ead7d4
BS
2607/* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2608 them to the unused_*_list variables, so that they can be reused. */
2609
2610static void
2611free_pending_lists ()
2612{
2613 int bb;
2614
2615 for (bb = 0; bb < current_nr_blocks; bb++)
2616 {
2617 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2618 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2619 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2620 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2621 }
2622}
2623\f
2624/* Print dependences for debugging, callable from debugger. */
2625
2626void
2627debug_dependencies ()
2628{
2629 int bb;
2630
2631 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2632 for (bb = 0; bb < current_nr_blocks; bb++)
2633 {
2634 if (1)
2635 {
2636 rtx head, tail;
2637 rtx next_tail;
2638 rtx insn;
2639
2640 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2641 next_tail = NEXT_INSN (tail);
2642 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2643 BB_TO_BLOCK (bb), bb);
2644
b8ec5764
VM
2645 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2646 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2647 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2648 "----", "----", "--", "---", "----", "----", "--------", "-----");
b4ead7d4
BS
2649 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2650 {
2651 rtx link;
b8ec5764 2652 int unit, range;
b4ead7d4
BS
2653
2654 if (! INSN_P (insn))
2655 {
2656 int n;
2657 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2658 if (GET_CODE (insn) == NOTE)
2659 {
2660 n = NOTE_LINE_NUMBER (insn);
2661 if (n < 0)
2662 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2663 else
2664 fprintf (sched_dump, "line %d, file %s\n", n,
2665 NOTE_SOURCE_FILE (insn));
2666 }
2667 else
2668 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2669 continue;
2670 }
2671
b8ec5764
VM
2672 unit = insn_unit (insn);
2673 range = (unit < 0
2674 || function_units[unit].blockage_range_function == 0) ? 0 :
2675 function_units[unit].blockage_range_function (insn);
2676 fprintf (sched_dump,
2677 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2678 (SCHED_GROUP_P (insn) ? "+" : " "),
2679 INSN_UID (insn),
2680 INSN_CODE (insn),
2681 INSN_BB (insn),
2682 INSN_DEP_COUNT (insn),
2683 INSN_PRIORITY (insn),
2684 insn_cost (insn, 0, 0),
2685 (int) MIN_BLOCKAGE_COST (range),
2686 (int) MAX_BLOCKAGE_COST (range));
2687 insn_print_units (insn);
b4ead7d4
BS
2688 fprintf (sched_dump, "\t: ");
2689 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2690 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2691 fprintf (sched_dump, "\n");
2692 }
2693 }
2694 }
2695 fprintf (sched_dump, "\n");
2696}
2697\f
2698/* Schedule a region. A region is either an inner loop, a loop-free
2699 subroutine, or a single basic block. Each bb in the region is
2700 scheduled after its flow predecessors. */
2701
2702static void
2703schedule_region (rgn)
2704 int rgn;
2705{
2706 int bb;
2707 int rgn_n_insns = 0;
2708 int sched_rgn_n_insns = 0;
2709
2710 /* Set variables for the current region. */
2711 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2712 current_blocks = RGN_BLOCKS (rgn);
2713
2714 init_deps_global ();
2715
2716 /* Initializations for region data dependence analyisis. */
2717 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2718 for (bb = 0; bb < current_nr_blocks; bb++)
2719 init_deps (bb_deps + bb);
2720
2721 /* Compute LOG_LINKS. */
2722 for (bb = 0; bb < current_nr_blocks; bb++)
2723 compute_block_backward_dependences (bb);
2724
2725 /* Compute INSN_DEPEND. */
2726 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2727 {
2728 rtx head, tail;
2729 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2730
2731 compute_forward_dependences (head, tail);
2732 }
2733
2734 /* Set priorities. */
2735 for (bb = 0; bb < current_nr_blocks; bb++)
79c2ffde
BS
2736 {
2737 rtx head, tail;
2738 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2739
2740 rgn_n_insns += set_priorities (head, tail);
2741 }
b4ead7d4
BS
2742
2743 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2744 if (current_nr_blocks > 1)
2745 {
2746 int i;
2747
2748 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2749
2750 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
2751 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
2752 for (i = 0; i < current_nr_blocks; i++)
2753 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
2754
2755 /* Edge to bit. */
2756 rgn_nr_edges = 0;
2757 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2758 for (i = 1; i < nr_edges; i++)
2759 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2760 EDGE_TO_BIT (i) = rgn_nr_edges++;
2761 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2762
2763 rgn_nr_edges = 0;
2764 for (i = 1; i < nr_edges; i++)
2765 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2766 rgn_edges[rgn_nr_edges++] = i;
2767
2768 /* Split edges. */
2769 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
2770 edgeset_bitsize = rgn_nr_edges;
2771 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2772 ancestor_edges
2773 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2774 for (i = 0; i < current_nr_blocks; i++)
2775 {
2776 pot_split[i] =
2777 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2778 ancestor_edges[i] =
2779 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2780 }
2781
2782 /* Compute probabilities, dominators, split_edges. */
2783 for (bb = 0; bb < current_nr_blocks; bb++)
2784 compute_dom_prob_ps (bb);
2785 }
2786
2787 /* Now we can schedule all blocks. */
2788 for (bb = 0; bb < current_nr_blocks; bb++)
2789 {
2790 rtx head, tail;
2791 int b = BB_TO_BLOCK (bb);
2792
2793 get_block_head_tail (b, &head, &tail);
2794
2795 if (no_real_insns_p (head, tail))
2796 continue;
2797
2798 current_sched_info->prev_head = PREV_INSN (head);
2799 current_sched_info->next_tail = NEXT_INSN (tail);
2800
2801 if (write_symbols != NO_DEBUG)
2802 {
79c2ffde
BS
2803 save_line_notes (b, head, tail);
2804 rm_line_notes (head, tail);
b4ead7d4
BS
2805 }
2806
2807 /* rm_other_notes only removes notes which are _inside_ the
2808 block---that is, it won't remove notes before the first real insn
2809 or after the last real insn of the block. So if the first insn
2810 has a REG_SAVE_NOTE which would otherwise be emitted before the
2811 insn, it is redundant with the note before the start of the
570a98eb 2812 block, and so we have to take it out. */
b4ead7d4
BS
2813 if (INSN_P (head))
2814 {
2815 rtx note;
2816
2817 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2818 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2819 {
570a98eb
JH
2820 remove_note (head, note);
2821 note = XEXP (note, 1);
2822 remove_note (head, note);
b4ead7d4
BS
2823 }
2824 }
2825
2826 /* Remove remaining note insns from the block, save them in
2827 note_list. These notes are restored at the end of
2828 schedule_block (). */
2829 rm_other_notes (head, tail);
2830
2831 target_bb = bb;
2832
2833 current_sched_info->queue_must_finish_empty
2834 = current_nr_blocks > 1 && !flag_schedule_interblock;
2835
2836 schedule_block (b, rgn_n_insns);
2837 sched_rgn_n_insns += sched_n_insns;
2838
2839 /* Update target block boundaries. */
2840 if (head == BLOCK_HEAD (b))
2841 BLOCK_HEAD (b) = current_sched_info->head;
2842 if (tail == BLOCK_END (b))
2843 BLOCK_END (b) = current_sched_info->tail;
2844
2845 /* Clean up. */
2846 if (current_nr_blocks > 1)
2847 {
2848 free (candidate_table);
2849 free (bblst_table);
2850 free (bitlst_table);
2851 }
2852 }
2853
2854 /* Sanity check: verify that all region insns were scheduled. */
2855 if (sched_rgn_n_insns != rgn_n_insns)
2856 abort ();
2857
2858 /* Restore line notes. */
2859 if (write_symbols != NO_DEBUG)
2860 {
2861 for (bb = 0; bb < current_nr_blocks; bb++)
79c2ffde
BS
2862 {
2863 rtx head, tail;
2864 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
14052b68 2865 restore_line_notes (head, tail);
79c2ffde 2866 }
b4ead7d4
BS
2867 }
2868
2869 /* Done with this region. */
2870 free_pending_lists ();
2871
2872 finish_deps_global ();
2873
2874 free (bb_deps);
2875
2876 if (current_nr_blocks > 1)
2877 {
2878 int i;
2879
2880 free (prob);
2881 for (i = 0; i < current_nr_blocks; ++i)
2882 {
2883 free (dom[i]);
2884 free (pot_split[i]);
2885 free (ancestor_edges[i]);
2886 }
2887 free (dom);
2888 free (edge_to_bit);
2889 free (rgn_edges);
2890 free (pot_split);
2891 free (ancestor_edges);
2892 }
2893}
2894
2895/* Indexed by region, holds the number of death notes found in that region.
2896 Used for consistency checks. */
2897static int *deaths_in_region;
2898
2899/* Initialize data structures for region scheduling. */
2900
2901static void
2902init_regions ()
2903{
2904 sbitmap blocks;
2905 int rgn;
2906
2907 nr_regions = 0;
2908 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2909 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2910 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2911 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2912
2913 blocks = sbitmap_alloc (n_basic_blocks);
2914
2915 /* Compute regions for scheduling. */
2916 if (reload_completed
2917 || n_basic_blocks == 1
2918 || !flag_schedule_interblock)
2919 {
2920 find_single_block_region ();
2921 }
2922 else
2923 {
2924 /* Verify that a 'good' control flow graph can be built. */
2925 if (is_cfg_nonregular ())
2926 {
2927 find_single_block_region ();
2928 }
2929 else
2930 {
2931 sbitmap *dom;
2932 struct edge_list *edge_list;
2933
2934 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
2935
2936 /* The scheduler runs after flow; therefore, we can't blindly call
2937 back into find_basic_blocks since doing so could invalidate the
2938 info in global_live_at_start.
2939
2940 Consider a block consisting entirely of dead stores; after life
2941 analysis it would be a block of NOTE_INSN_DELETED notes. If
2942 we call find_basic_blocks again, then the block would be removed
2943 entirely and invalidate our the register live information.
2944
2945 We could (should?) recompute register live information. Doing
2946 so may even be beneficial. */
2947 edge_list = create_edge_list ();
2948
2949 /* Compute the dominators and post dominators. */
2950 calculate_dominance_info (NULL, dom, CDI_DOMINATORS);
2951
2952 /* build_control_flow will return nonzero if it detects unreachable
2953 blocks or any other irregularity with the cfg which prevents
2954 cross block scheduling. */
2955 if (build_control_flow (edge_list) != 0)
2956 find_single_block_region ();
2957 else
2958 find_rgns (edge_list, dom);
2959
2960 if (sched_verbose >= 3)
2961 debug_regions ();
2962
2963 /* We are done with flow's edge list. */
2964 free_edge_list (edge_list);
2965
2966 /* For now. This will move as more and more of haifa is converted
2967 to using the cfg code in flow.c. */
2968 free (dom);
2969 }
2970 }
2971
2972 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2973
2974 /* Remove all death notes from the subroutine. */
2975 for (rgn = 0; rgn < nr_regions; rgn++)
2976 {
2977 int b;
2978
2979 sbitmap_zero (blocks);
2980 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2981 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2982
2983 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2984 }
2985
2986 sbitmap_free (blocks);
2987}
2988
2989/* The one entry point in this file. DUMP_FILE is the dump file for
2990 this pass. */
2991
2992void
2993schedule_insns (dump_file)
2994 FILE *dump_file;
2995{
2996 sbitmap large_region_blocks, blocks;
2997 int rgn;
2998 int any_large_regions;
2999
3000 /* Taking care of this degenerate case makes the rest of
3001 this code simpler. */
3002 if (n_basic_blocks == 0)
3003 return;
3004
3005 nr_inter = 0;
3006 nr_spec = 0;
3007
3008 sched_init (dump_file);
3009
3010 init_regions ();
3011
3012 current_sched_info = &region_sched_info;
3013
3014 /* Schedule every region in the subroutine. */
3015 for (rgn = 0; rgn < nr_regions; rgn++)
3016 schedule_region (rgn);
3017
3018 /* Update life analysis for the subroutine. Do single block regions
3019 first so that we can verify that live_at_start didn't change. Then
6d2f8887 3020 do all other blocks. */
b4ead7d4
BS
3021 /* ??? There is an outside possibility that update_life_info, or more
3022 to the point propagate_block, could get called with non-zero flags
3023 more than once for one basic block. This would be kinda bad if it
3024 were to happen, since REG_INFO would be accumulated twice for the
3025 block, and we'd have twice the REG_DEAD notes.
3026
3027 I'm fairly certain that this _shouldn't_ happen, since I don't think
3028 that live_at_start should change at region heads. Not sure what the
3029 best way to test for this kind of thing... */
3030
3031 allocate_reg_life_data ();
3032 compute_bb_for_insn (get_max_uid ());
3033
3034 any_large_regions = 0;
3035 large_region_blocks = sbitmap_alloc (n_basic_blocks);
3036 sbitmap_ones (large_region_blocks);
3037
3038 blocks = sbitmap_alloc (n_basic_blocks);
3039
3040 for (rgn = 0; rgn < nr_regions; rgn++)
3041 if (RGN_NR_BLOCKS (rgn) > 1)
3042 any_large_regions = 1;
3043 else
3044 {
3045 sbitmap_zero (blocks);
3046 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3047 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3048
3049 /* Don't update reg info after reload, since that affects
3050 regs_ever_live, which should not change after reload. */
3051 update_life_info (blocks, UPDATE_LIFE_LOCAL,
3052 (reload_completed ? PROP_DEATH_NOTES
3053 : PROP_DEATH_NOTES | PROP_REG_INFO));
3054
3055#ifndef HAVE_conditional_execution
3056 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
3057 a count of the conditional plus unconditional deaths for this to
3058 work out. */
3059 /* In the single block case, the count of registers that died should
3060 not have changed during the schedule. */
3061 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
3062 abort ();
3063#endif
3064 }
3065
3066 if (any_large_regions)
3067 {
3068 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3069 PROP_DEATH_NOTES | PROP_REG_INFO);
3070 }
3071
3072 /* Reposition the prologue and epilogue notes in case we moved the
3073 prologue/epilogue insns. */
3074 if (reload_completed)
3075 reposition_prologue_and_epilogue_notes (get_insns ());
3076
3077 /* Delete redundant line notes. */
3078 if (write_symbols != NO_DEBUG)
3079 rm_redundant_line_notes ();
3080
3081 if (sched_verbose)
3082 {
3083 if (reload_completed == 0 && flag_schedule_interblock)
3084 {
3085 fprintf (sched_dump,
3086 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3087 nr_inter, nr_spec);
3088 }
3089 else
3090 {
3091 if (nr_inter > 0)
3092 abort ();
3093 }
3094 fprintf (sched_dump, "\n\n");
3095 }
3096
3097 /* Clean up. */
3098 free (rgn_table);
3099 free (rgn_bb_table);
3100 free (block_to_bb);
3101 free (containing_rgn);
3102
3103 sched_finish ();
3104
3105 if (edge_table)
3106 {
3107 free (edge_table);
3108 edge_table = NULL;
3109 }
3110
3111 if (in_edges)
3112 {
3113 free (in_edges);
3114 in_edges = NULL;
3115 }
3116 if (out_edges)
3117 {
3118 free (out_edges);
3119 out_edges = NULL;
3120 }
3121
3122 sbitmap_free (blocks);
3123 sbitmap_free (large_region_blocks);
3124
3125 free (deaths_in_region);
3126}
f56887a7 3127#endif