]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/modulo-sched.c
function.h: Flatten file.
[thirdparty/gcc.git] / gcc / modulo-sched.c
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
3 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tm_p.h"
29 #include "hard-reg-set.h"
30 #include "regs.h"
31 #include "hashtab.h"
32 #include "hash-set.h"
33 #include "vec.h"
34 #include "machmode.h"
35 #include "input.h"
36 #include "function.h"
37 #include "flags.h"
38 #include "insn-config.h"
39 #include "insn-attr.h"
40 #include "except.h"
41 #include "recog.h"
42 #include "sched-int.h"
43 #include "target.h"
44 #include "cfgloop.h"
45 #include "expr.h"
46 #include "params.h"
47 #include "gcov-io.h"
48 #include "ddg.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
51 #include "df.h"
52
53 #ifdef INSN_SCHEDULING
54
55 /* This file contains the implementation of the Swing Modulo Scheduler,
56 described in the following references:
57 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
58 Lifetime--sensitive modulo scheduling in a production environment.
59 IEEE Trans. on Comps., 50(3), March 2001
60 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
61 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
62 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63
64 The basic structure is:
65 1. Build a data-dependence graph (DDG) for each loop.
66 2. Use the DDG to order the insns of a loop (not in topological order
67 necessarily, but rather) trying to place each insn after all its
68 predecessors _or_ after all its successors.
69 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
70 4. Use the ordering to perform list-scheduling of the loop:
71 1. Set II = MII. We will try to schedule the loop within II cycles.
72 2. Try to schedule the insns one by one according to the ordering.
73 For each insn compute an interval of cycles by considering already-
74 scheduled preds and succs (and associated latencies); try to place
75 the insn in the cycles of this window checking for potential
76 resource conflicts (using the DFA interface).
77 Note: this is different from the cycle-scheduling of schedule_insns;
78 here the insns are not scheduled monotonically top-down (nor bottom-
79 up).
80 3. If failed in scheduling all insns - bump II++ and try again, unless
81 II reaches an upper bound MaxII, in which case report failure.
82 5. If we succeeded in scheduling the loop within II cycles, we now
83 generate prolog and epilog, decrease the counter of the loop, and
84 perform modulo variable expansion for live ranges that span more than
85 II cycles (i.e. use register copies to prevent a def from overwriting
86 itself before reaching the use).
87
88 SMS works with countable loops (1) whose control part can be easily
89 decoupled from the rest of the loop and (2) whose loop count can
90 be easily adjusted. This is because we peel a constant number of
91 iterations into a prologue and epilogue for which we want to avoid
92 emitting the control part, and a kernel which is to iterate that
93 constant number of iterations less than the original loop. So the
94 control part should be a set of insns clearly identified and having
95 its own iv, not otherwise used in the loop (at-least for now), which
96 initializes a register before the loop to the number of iterations.
97 Currently SMS relies on the do-loop pattern to recognize such loops,
98 where (1) the control part comprises of all insns defining and/or
99 using a certain 'count' register and (2) the loop count can be
100 adjusted by modifying this register prior to the loop.
101 TODO: Rely on cfgloop analysis instead. */
102 \f
103 /* This page defines partial-schedule structures and functions for
104 modulo scheduling. */
105
106 typedef struct partial_schedule *partial_schedule_ptr;
107 typedef struct ps_insn *ps_insn_ptr;
108
109 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
110 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111
112 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
113 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114
115 /* Perform signed modulo, always returning a non-negative value. */
116 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117
118 /* The number of different iterations the nodes in ps span, assuming
119 the stage boundaries are placed efficiently. */
120 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
121 + 1 + ii - 1) / ii)
122 /* The stage count of ps. */
123 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
124
125 /* A single instruction in the partial schedule. */
126 struct ps_insn
127 {
128 /* Identifies the instruction to be scheduled. Values smaller than
129 the ddg's num_nodes refer directly to ddg nodes. A value of
130 X - num_nodes refers to register move X. */
131 int id;
132
133 /* The (absolute) cycle in which the PS instruction is scheduled.
134 Same as SCHED_TIME (node). */
135 int cycle;
136
137 /* The next/prev PS_INSN in the same row. */
138 ps_insn_ptr next_in_row,
139 prev_in_row;
140
141 };
142
143 /* Information about a register move that has been added to a partial
144 schedule. */
145 struct ps_reg_move_info
146 {
147 /* The source of the move is defined by the ps_insn with id DEF.
148 The destination is used by the ps_insns with the ids in USES. */
149 int def;
150 sbitmap uses;
151
152 /* The original form of USES' instructions used OLD_REG, but they
153 should now use NEW_REG. */
154 rtx old_reg;
155 rtx new_reg;
156
157 /* The number of consecutive stages that the move occupies. */
158 int num_consecutive_stages;
159
160 /* An instruction that sets NEW_REG to the correct value. The first
161 move associated with DEF will have an rhs of OLD_REG; later moves
162 use the result of the previous move. */
163 rtx_insn *insn;
164 };
165
166 typedef struct ps_reg_move_info ps_reg_move_info;
167
168 /* Holds the partial schedule as an array of II rows. Each entry of the
169 array points to a linked list of PS_INSNs, which represents the
170 instructions that are scheduled for that row. */
171 struct partial_schedule
172 {
173 int ii; /* Number of rows in the partial schedule. */
174 int history; /* Threshold for conflict checking using DFA. */
175
176 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
177 ps_insn_ptr *rows;
178
179 /* All the moves added for this partial schedule. Index X has
180 a ps_insn id of X + g->num_nodes. */
181 vec<ps_reg_move_info> reg_moves;
182
183 /* rows_length[i] holds the number of instructions in the row.
184 It is used only (as an optimization) to back off quickly from
185 trying to schedule a node in a full row; that is, to avoid running
186 through futile DFA state transitions. */
187 int *rows_length;
188
189 /* The earliest absolute cycle of an insn in the partial schedule. */
190 int min_cycle;
191
192 /* The latest absolute cycle of an insn in the partial schedule. */
193 int max_cycle;
194
195 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
196
197 int stage_count; /* The stage count of the partial schedule. */
198 };
199
200
201 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
202 static void free_partial_schedule (partial_schedule_ptr);
203 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
204 void print_partial_schedule (partial_schedule_ptr, FILE *);
205 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
206 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
207 int, int, sbitmap, sbitmap);
208 static void rotate_partial_schedule (partial_schedule_ptr, int);
209 void set_row_column_for_ps (partial_schedule_ptr);
210 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
211 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
212
213 \f
214 /* This page defines constants and structures for the modulo scheduling
215 driver. */
216
217 static int sms_order_nodes (ddg_ptr, int, int *, int *);
218 static void set_node_sched_params (ddg_ptr);
219 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
220 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
221 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
222 rtx, rtx);
223 static int calculate_stage_count (partial_schedule_ptr, int);
224 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
225 int, int, sbitmap, sbitmap, sbitmap);
226 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
227 sbitmap, int, int *, int *, int *);
228 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
229 sbitmap, int *, sbitmap, sbitmap);
230 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
231
232 #define NODE_ASAP(node) ((node)->aux.count)
233
234 #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
235 #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
236 #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
237 #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
238 #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
239
240 /* The scheduling parameters held for each node. */
241 typedef struct node_sched_params
242 {
243 int time; /* The absolute scheduling cycle. */
244
245 int row; /* Holds time % ii. */
246 int stage; /* Holds time / ii. */
247
248 /* The column of a node inside the ps. If nodes u, v are on the same row,
249 u will precede v if column (u) < column (v). */
250 int column;
251 } *node_sched_params_ptr;
252
253 typedef struct node_sched_params node_sched_params;
254 \f
255 /* The following three functions are copied from the current scheduler
256 code in order to use sched_analyze() for computing the dependencies.
257 They are used when initializing the sched_info structure. */
258 static const char *
259 sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
260 {
261 static char tmp[80];
262
263 sprintf (tmp, "i%4d", INSN_UID (insn));
264 return tmp;
265 }
266
267 static void
268 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
269 regset used ATTRIBUTE_UNUSED)
270 {
271 }
272
273 static struct common_sched_info_def sms_common_sched_info;
274
275 static struct sched_deps_info_def sms_sched_deps_info =
276 {
277 compute_jump_reg_dependencies,
278 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
279 NULL,
280 0, 0, 0
281 };
282
283 static struct haifa_sched_info sms_sched_info =
284 {
285 NULL,
286 NULL,
287 NULL,
288 NULL,
289 NULL,
290 sms_print_insn,
291 NULL,
292 NULL, /* insn_finishes_block_p */
293 NULL, NULL,
294 NULL, NULL,
295 0, 0,
296
297 NULL, NULL, NULL, NULL,
298 NULL, NULL,
299 0
300 };
301
302 /* Partial schedule instruction ID in PS is a register move. Return
303 information about it. */
304 static struct ps_reg_move_info *
305 ps_reg_move (partial_schedule_ptr ps, int id)
306 {
307 gcc_checking_assert (id >= ps->g->num_nodes);
308 return &ps->reg_moves[id - ps->g->num_nodes];
309 }
310
311 /* Return the rtl instruction that is being scheduled by partial schedule
312 instruction ID, which belongs to schedule PS. */
313 static rtx_insn *
314 ps_rtl_insn (partial_schedule_ptr ps, int id)
315 {
316 if (id < ps->g->num_nodes)
317 return ps->g->nodes[id].insn;
318 else
319 return ps_reg_move (ps, id)->insn;
320 }
321
322 /* Partial schedule instruction ID, which belongs to PS, occurred in
323 the original (unscheduled) loop. Return the first instruction
324 in the loop that was associated with ps_rtl_insn (PS, ID).
325 If the instruction had some notes before it, this is the first
326 of those notes. */
327 static rtx_insn *
328 ps_first_note (partial_schedule_ptr ps, int id)
329 {
330 gcc_assert (id < ps->g->num_nodes);
331 return ps->g->nodes[id].first_note;
332 }
333
334 /* Return the number of consecutive stages that are occupied by
335 partial schedule instruction ID in PS. */
336 static int
337 ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
338 {
339 if (id < ps->g->num_nodes)
340 return 1;
341 else
342 return ps_reg_move (ps, id)->num_consecutive_stages;
343 }
344
345 /* Given HEAD and TAIL which are the first and last insns in a loop;
346 return the register which controls the loop. Return zero if it has
347 more than one occurrence in the loop besides the control part or the
348 do-loop pattern is not of the form we expect. */
349 static rtx
350 doloop_register_get (rtx_insn *head ATTRIBUTE_UNUSED, rtx_insn *tail ATTRIBUTE_UNUSED)
351 {
352 #ifdef HAVE_doloop_end
353 rtx reg, condition;
354 rtx_insn *insn, *first_insn_not_to_check;
355
356 if (!JUMP_P (tail))
357 return NULL_RTX;
358
359 /* TODO: Free SMS's dependence on doloop_condition_get. */
360 condition = doloop_condition_get (tail);
361 if (! condition)
362 return NULL_RTX;
363
364 if (REG_P (XEXP (condition, 0)))
365 reg = XEXP (condition, 0);
366 else if (GET_CODE (XEXP (condition, 0)) == PLUS
367 && REG_P (XEXP (XEXP (condition, 0), 0)))
368 reg = XEXP (XEXP (condition, 0), 0);
369 else
370 gcc_unreachable ();
371
372 /* Check that the COUNT_REG has no other occurrences in the loop
373 until the decrement. We assume the control part consists of
374 either a single (parallel) branch-on-count or a (non-parallel)
375 branch immediately preceded by a single (decrement) insn. */
376 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
377 : prev_nondebug_insn (tail));
378
379 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
380 if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
381 {
382 if (dump_file)
383 {
384 fprintf (dump_file, "SMS count_reg found ");
385 print_rtl_single (dump_file, reg);
386 fprintf (dump_file, " outside control in insn:\n");
387 print_rtl_single (dump_file, insn);
388 }
389
390 return NULL_RTX;
391 }
392
393 return reg;
394 #else
395 return NULL_RTX;
396 #endif
397 }
398
399 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
400 that the number of iterations is a compile-time constant. If so,
401 return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
402 this constant. Otherwise return 0. */
403 static rtx_insn *
404 const_iteration_count (rtx count_reg, basic_block pre_header,
405 int64_t * count)
406 {
407 rtx_insn *insn;
408 rtx_insn *head, *tail;
409
410 if (! pre_header)
411 return NULL;
412
413 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
414
415 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
416 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
417 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
418 {
419 rtx pat = single_set (insn);
420
421 if (CONST_INT_P (SET_SRC (pat)))
422 {
423 *count = INTVAL (SET_SRC (pat));
424 return insn;
425 }
426
427 return NULL;
428 }
429
430 return NULL;
431 }
432
433 /* A very simple resource-based lower bound on the initiation interval.
434 ??? Improve the accuracy of this bound by considering the
435 utilization of various units. */
436 static int
437 res_MII (ddg_ptr g)
438 {
439 if (targetm.sched.sms_res_mii)
440 return targetm.sched.sms_res_mii (g);
441
442 return ((g->num_nodes - g->num_debug) / issue_rate);
443 }
444
445
446 /* A vector that contains the sched data for each ps_insn. */
447 static vec<node_sched_params> node_sched_param_vec;
448
449 /* Allocate sched_params for each node and initialize it. */
450 static void
451 set_node_sched_params (ddg_ptr g)
452 {
453 node_sched_param_vec.truncate (0);
454 node_sched_param_vec.safe_grow_cleared (g->num_nodes);
455 }
456
457 /* Make sure that node_sched_param_vec has an entry for every move in PS. */
458 static void
459 extend_node_sched_params (partial_schedule_ptr ps)
460 {
461 node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
462 + ps->reg_moves.length ());
463 }
464
465 /* Update the sched_params (time, row and stage) for node U using the II,
466 the CYCLE of U and MIN_CYCLE.
467 We're not simply taking the following
468 SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
469 because the stages may not be aligned on cycle 0. */
470 static void
471 update_node_sched_params (int u, int ii, int cycle, int min_cycle)
472 {
473 int sc_until_cycle_zero;
474 int stage;
475
476 SCHED_TIME (u) = cycle;
477 SCHED_ROW (u) = SMODULO (cycle, ii);
478
479 /* The calculation of stage count is done adding the number
480 of stages before cycle zero and after cycle zero. */
481 sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
482
483 if (SCHED_TIME (u) < 0)
484 {
485 stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
486 SCHED_STAGE (u) = sc_until_cycle_zero - stage;
487 }
488 else
489 {
490 stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
491 SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
492 }
493 }
494
495 static void
496 print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
497 {
498 int i;
499
500 if (! file)
501 return;
502 for (i = 0; i < num_nodes; i++)
503 {
504 node_sched_params_ptr nsp = SCHED_PARAMS (i);
505
506 fprintf (file, "Node = %d; INSN = %d\n", i,
507 INSN_UID (ps_rtl_insn (ps, i)));
508 fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
509 fprintf (file, " time = %d:\n", nsp->time);
510 fprintf (file, " stage = %d:\n", nsp->stage);
511 }
512 }
513
514 /* Set SCHED_COLUMN for each instruction in row ROW of PS. */
515 static void
516 set_columns_for_row (partial_schedule_ptr ps, int row)
517 {
518 ps_insn_ptr cur_insn;
519 int column;
520
521 column = 0;
522 for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
523 SCHED_COLUMN (cur_insn->id) = column++;
524 }
525
526 /* Set SCHED_COLUMN for each instruction in PS. */
527 static void
528 set_columns_for_ps (partial_schedule_ptr ps)
529 {
530 int row;
531
532 for (row = 0; row < ps->ii; row++)
533 set_columns_for_row (ps, row);
534 }
535
536 /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
537 Its single predecessor has already been scheduled, as has its
538 ddg node successors. (The move may have also another move as its
539 successor, in which case that successor will be scheduled later.)
540
541 The move is part of a chain that satisfies register dependencies
542 between a producing ddg node and various consuming ddg nodes.
543 If some of these dependencies have a distance of 1 (meaning that
544 the use is upward-exposed) then DISTANCE1_USES is nonnull and
545 contains the set of uses with distance-1 dependencies.
546 DISTANCE1_USES is null otherwise.
547
548 MUST_FOLLOW is a scratch bitmap that is big enough to hold
549 all current ps_insn ids.
550
551 Return true on success. */
552 static bool
553 schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
554 sbitmap distance1_uses, sbitmap must_follow)
555 {
556 unsigned int u;
557 int this_time, this_distance, this_start, this_end, this_latency;
558 int start, end, c, ii;
559 sbitmap_iterator sbi;
560 ps_reg_move_info *move;
561 rtx_insn *this_insn;
562 ps_insn_ptr psi;
563
564 move = ps_reg_move (ps, i_reg_move);
565 ii = ps->ii;
566 if (dump_file)
567 {
568 fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
569 ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
570 PS_MIN_CYCLE (ps));
571 print_rtl_single (dump_file, move->insn);
572 fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
573 fprintf (dump_file, "=========== =========== =====\n");
574 }
575
576 start = INT_MIN;
577 end = INT_MAX;
578
579 /* For dependencies of distance 1 between a producer ddg node A
580 and consumer ddg node B, we have a chain of dependencies:
581
582 A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
583
584 where Mi is the ith move. For dependencies of distance 0 between
585 a producer ddg node A and consumer ddg node C, we have a chain of
586 dependencies:
587
588 A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
589
590 where Mi' occupies the same position as Mi but occurs a stage later.
591 We can only schedule each move once, so if we have both types of
592 chain, we model the second as:
593
594 A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
595
596 First handle the dependencies between the previously-scheduled
597 predecessor and the move. */
598 this_insn = ps_rtl_insn (ps, move->def);
599 this_latency = insn_latency (this_insn, move->insn);
600 this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
601 this_time = SCHED_TIME (move->def) - this_distance * ii;
602 this_start = this_time + this_latency;
603 this_end = this_time + ii;
604 if (dump_file)
605 fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
606 this_start, this_end, SCHED_TIME (move->def),
607 INSN_UID (this_insn), this_latency, this_distance,
608 INSN_UID (move->insn));
609
610 if (start < this_start)
611 start = this_start;
612 if (end > this_end)
613 end = this_end;
614
615 /* Handle the dependencies between the move and previously-scheduled
616 successors. */
617 EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
618 {
619 this_insn = ps_rtl_insn (ps, u);
620 this_latency = insn_latency (move->insn, this_insn);
621 if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
622 this_distance = -1;
623 else
624 this_distance = 0;
625 this_time = SCHED_TIME (u) + this_distance * ii;
626 this_start = this_time - ii;
627 this_end = this_time - this_latency;
628 if (dump_file)
629 fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
630 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
631 this_latency, this_distance, INSN_UID (this_insn));
632
633 if (start < this_start)
634 start = this_start;
635 if (end > this_end)
636 end = this_end;
637 }
638
639 if (dump_file)
640 {
641 fprintf (dump_file, "----------- ----------- -----\n");
642 fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
643 }
644
645 bitmap_clear (must_follow);
646 bitmap_set_bit (must_follow, move->def);
647
648 start = MAX (start, end - (ii - 1));
649 for (c = end; c >= start; c--)
650 {
651 psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
652 move->uses, must_follow);
653 if (psi)
654 {
655 update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
656 if (dump_file)
657 fprintf (dump_file, "\nScheduled register move INSN %d at"
658 " time %d, row %d\n\n", INSN_UID (move->insn), c,
659 SCHED_ROW (i_reg_move));
660 return true;
661 }
662 }
663
664 if (dump_file)
665 fprintf (dump_file, "\nNo available slot\n\n");
666
667 return false;
668 }
669
670 /*
671 Breaking intra-loop register anti-dependences:
672 Each intra-loop register anti-dependence implies a cross-iteration true
673 dependence of distance 1. Therefore, we can remove such false dependencies
674 and figure out if the partial schedule broke them by checking if (for a
675 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
676 if so generate a register move. The number of such moves is equal to:
677 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
678 nreg_moves = ----------------------------------- + 1 - { dependence.
679 ii { 1 if not.
680 */
681 static bool
682 schedule_reg_moves (partial_schedule_ptr ps)
683 {
684 ddg_ptr g = ps->g;
685 int ii = ps->ii;
686 int i;
687
688 for (i = 0; i < g->num_nodes; i++)
689 {
690 ddg_node_ptr u = &g->nodes[i];
691 ddg_edge_ptr e;
692 int nreg_moves = 0, i_reg_move;
693 rtx prev_reg, old_reg;
694 int first_move;
695 int distances[2];
696 sbitmap must_follow;
697 sbitmap distance1_uses;
698 rtx set = single_set (u->insn);
699
700 /* Skip instructions that do not set a register. */
701 if ((set && !REG_P (SET_DEST (set))))
702 continue;
703
704 /* Compute the number of reg_moves needed for u, by looking at life
705 ranges started at u (excluding self-loops). */
706 distances[0] = distances[1] = false;
707 for (e = u->out; e; e = e->next_out)
708 if (e->type == TRUE_DEP && e->dest != e->src)
709 {
710 int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
711 - SCHED_TIME (e->src->cuid)) / ii;
712
713 if (e->distance == 1)
714 nreg_moves4e = (SCHED_TIME (e->dest->cuid)
715 - SCHED_TIME (e->src->cuid) + ii) / ii;
716
717 /* If dest precedes src in the schedule of the kernel, then dest
718 will read before src writes and we can save one reg_copy. */
719 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
720 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
721 nreg_moves4e--;
722
723 if (nreg_moves4e >= 1)
724 {
725 /* !single_set instructions are not supported yet and
726 thus we do not except to encounter them in the loop
727 except from the doloop part. For the latter case
728 we assume no regmoves are generated as the doloop
729 instructions are tied to the branch with an edge. */
730 gcc_assert (set);
731 /* If the instruction contains auto-inc register then
732 validate that the regmov is being generated for the
733 target regsiter rather then the inc'ed register. */
734 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
735 }
736
737 if (nreg_moves4e)
738 {
739 gcc_assert (e->distance < 2);
740 distances[e->distance] = true;
741 }
742 nreg_moves = MAX (nreg_moves, nreg_moves4e);
743 }
744
745 if (nreg_moves == 0)
746 continue;
747
748 /* Create NREG_MOVES register moves. */
749 first_move = ps->reg_moves.length ();
750 ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
751 extend_node_sched_params (ps);
752
753 /* Record the moves associated with this node. */
754 first_move += ps->g->num_nodes;
755
756 /* Generate each move. */
757 old_reg = prev_reg = SET_DEST (single_set (u->insn));
758 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
759 {
760 ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
761
762 move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
763 move->uses = sbitmap_alloc (first_move + nreg_moves);
764 move->old_reg = old_reg;
765 move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
766 move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
767 move->insn = as_a <rtx_insn *> (gen_move_insn (move->new_reg,
768 copy_rtx (prev_reg)));
769 bitmap_clear (move->uses);
770
771 prev_reg = move->new_reg;
772 }
773
774 distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
775
776 if (distance1_uses)
777 bitmap_clear (distance1_uses);
778
779 /* Every use of the register defined by node may require a different
780 copy of this register, depending on the time the use is scheduled.
781 Record which uses require which move results. */
782 for (e = u->out; e; e = e->next_out)
783 if (e->type == TRUE_DEP && e->dest != e->src)
784 {
785 int dest_copy = (SCHED_TIME (e->dest->cuid)
786 - SCHED_TIME (e->src->cuid)) / ii;
787
788 if (e->distance == 1)
789 dest_copy = (SCHED_TIME (e->dest->cuid)
790 - SCHED_TIME (e->src->cuid) + ii) / ii;
791
792 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
793 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
794 dest_copy--;
795
796 if (dest_copy)
797 {
798 ps_reg_move_info *move;
799
800 move = ps_reg_move (ps, first_move + dest_copy - 1);
801 bitmap_set_bit (move->uses, e->dest->cuid);
802 if (e->distance == 1)
803 bitmap_set_bit (distance1_uses, e->dest->cuid);
804 }
805 }
806
807 must_follow = sbitmap_alloc (first_move + nreg_moves);
808 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
809 if (!schedule_reg_move (ps, first_move + i_reg_move,
810 distance1_uses, must_follow))
811 break;
812 sbitmap_free (must_follow);
813 if (distance1_uses)
814 sbitmap_free (distance1_uses);
815 if (i_reg_move < nreg_moves)
816 return false;
817 }
818 return true;
819 }
820
821 /* Emit the moves associatied with PS. Apply the substitutions
822 associated with them. */
823 static void
824 apply_reg_moves (partial_schedule_ptr ps)
825 {
826 ps_reg_move_info *move;
827 int i;
828
829 FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
830 {
831 unsigned int i_use;
832 sbitmap_iterator sbi;
833
834 EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
835 {
836 replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
837 df_insn_rescan (ps->g->nodes[i_use].insn);
838 }
839 }
840 }
841
842 /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
843 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
844 will move to cycle zero. */
845 static void
846 reset_sched_times (partial_schedule_ptr ps, int amount)
847 {
848 int row;
849 int ii = ps->ii;
850 ps_insn_ptr crr_insn;
851
852 for (row = 0; row < ii; row++)
853 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
854 {
855 int u = crr_insn->id;
856 int normalized_time = SCHED_TIME (u) - amount;
857 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
858
859 if (dump_file)
860 {
861 /* Print the scheduling times after the rotation. */
862 rtx_insn *insn = ps_rtl_insn (ps, u);
863
864 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
865 "crr_insn->cycle=%d, min_cycle=%d", u,
866 INSN_UID (insn), normalized_time, new_min_cycle);
867 if (JUMP_P (insn))
868 fprintf (dump_file, " (branch)");
869 fprintf (dump_file, "\n");
870 }
871
872 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
873 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
874
875 crr_insn->cycle = normalized_time;
876 update_node_sched_params (u, ii, normalized_time, new_min_cycle);
877 }
878 }
879
880 /* Permute the insns according to their order in PS, from row 0 to
881 row ii-1, and position them right before LAST. This schedules
882 the insns of the loop kernel. */
883 static void
884 permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
885 {
886 int ii = ps->ii;
887 int row;
888 ps_insn_ptr ps_ij;
889
890 for (row = 0; row < ii ; row++)
891 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
892 {
893 rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
894
895 if (PREV_INSN (last) != insn)
896 {
897 if (ps_ij->id < ps->g->num_nodes)
898 reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
899 PREV_INSN (last));
900 else
901 add_insn_before (insn, last, NULL);
902 }
903 }
904 }
905
906 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
907 respectively only if cycle C falls on the border of the scheduling
908 window boundaries marked by START and END cycles. STEP is the
909 direction of the window. */
910 static inline void
911 set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
912 sbitmap *tmp_precede, sbitmap must_precede, int c,
913 int start, int end, int step)
914 {
915 *tmp_precede = NULL;
916 *tmp_follow = NULL;
917
918 if (c == start)
919 {
920 if (step == 1)
921 *tmp_precede = must_precede;
922 else /* step == -1. */
923 *tmp_follow = must_follow;
924 }
925 if (c == end - step)
926 {
927 if (step == 1)
928 *tmp_follow = must_follow;
929 else /* step == -1. */
930 *tmp_precede = must_precede;
931 }
932
933 }
934
935 /* Return True if the branch can be moved to row ii-1 while
936 normalizing the partial schedule PS to start from cycle zero and thus
937 optimize the SC. Otherwise return False. */
938 static bool
939 optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
940 {
941 int amount = PS_MIN_CYCLE (ps);
942 sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
943 int start, end, step;
944 int ii = ps->ii;
945 bool ok = false;
946 int stage_count, stage_count_curr;
947
948 /* Compare the SC after normalization and SC after bringing the branch
949 to row ii-1. If they are equal just bail out. */
950 stage_count = calculate_stage_count (ps, amount);
951 stage_count_curr =
952 calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
953
954 if (stage_count == stage_count_curr)
955 {
956 if (dump_file)
957 fprintf (dump_file, "SMS SC already optimized.\n");
958
959 ok = false;
960 goto clear;
961 }
962
963 if (dump_file)
964 {
965 fprintf (dump_file, "SMS Trying to optimize branch location\n");
966 fprintf (dump_file, "SMS partial schedule before trial:\n");
967 print_partial_schedule (ps, dump_file);
968 }
969
970 /* First, normalize the partial scheduling. */
971 reset_sched_times (ps, amount);
972 rotate_partial_schedule (ps, amount);
973 if (dump_file)
974 {
975 fprintf (dump_file,
976 "SMS partial schedule after normalization (ii, %d, SC %d):\n",
977 ii, stage_count);
978 print_partial_schedule (ps, dump_file);
979 }
980
981 if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
982 {
983 ok = true;
984 goto clear;
985 }
986
987 bitmap_ones (sched_nodes);
988
989 /* Calculate the new placement of the branch. It should be in row
990 ii-1 and fall into it's scheduling window. */
991 if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
992 &step, &end) == 0)
993 {
994 bool success;
995 ps_insn_ptr next_ps_i;
996 int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
997 int row = SMODULO (branch_cycle, ps->ii);
998 int num_splits = 0;
999 sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
1000 int c;
1001
1002 if (dump_file)
1003 fprintf (dump_file, "\nTrying to schedule node %d "
1004 "INSN = %d in (%d .. %d) step %d\n",
1005 g->closing_branch->cuid,
1006 (INSN_UID (g->closing_branch->insn)), start, end, step);
1007
1008 gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1009 if (step == 1)
1010 {
1011 c = start + ii - SMODULO (start, ii) - 1;
1012 gcc_assert (c >= start);
1013 if (c >= end)
1014 {
1015 ok = false;
1016 if (dump_file)
1017 fprintf (dump_file,
1018 "SMS failed to schedule branch at cycle: %d\n", c);
1019 goto clear;
1020 }
1021 }
1022 else
1023 {
1024 c = start - SMODULO (start, ii) - 1;
1025 gcc_assert (c <= start);
1026
1027 if (c <= end)
1028 {
1029 if (dump_file)
1030 fprintf (dump_file,
1031 "SMS failed to schedule branch at cycle: %d\n", c);
1032 ok = false;
1033 goto clear;
1034 }
1035 }
1036
1037 must_precede = sbitmap_alloc (g->num_nodes);
1038 must_follow = sbitmap_alloc (g->num_nodes);
1039
1040 /* Try to schedule the branch is it's new cycle. */
1041 calculate_must_precede_follow (g->closing_branch, start, end,
1042 step, ii, sched_nodes,
1043 must_precede, must_follow);
1044
1045 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1046 must_precede, c, start, end, step);
1047
1048 /* Find the element in the partial schedule related to the closing
1049 branch so we can remove it from it's current cycle. */
1050 for (next_ps_i = ps->rows[row];
1051 next_ps_i; next_ps_i = next_ps_i->next_in_row)
1052 if (next_ps_i->id == g->closing_branch->cuid)
1053 break;
1054
1055 remove_node_from_ps (ps, next_ps_i);
1056 success =
1057 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1058 sched_nodes, &num_splits,
1059 tmp_precede, tmp_follow);
1060 gcc_assert (num_splits == 0);
1061 if (!success)
1062 {
1063 if (dump_file)
1064 fprintf (dump_file,
1065 "SMS failed to schedule branch at cycle: %d, "
1066 "bringing it back to cycle %d\n", c, branch_cycle);
1067
1068 /* The branch was failed to be placed in row ii - 1.
1069 Put it back in it's original place in the partial
1070 schedualing. */
1071 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1072 must_precede, branch_cycle, start, end,
1073 step);
1074 success =
1075 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1076 branch_cycle, sched_nodes,
1077 &num_splits, tmp_precede,
1078 tmp_follow);
1079 gcc_assert (success && (num_splits == 0));
1080 ok = false;
1081 }
1082 else
1083 {
1084 /* The branch is placed in row ii - 1. */
1085 if (dump_file)
1086 fprintf (dump_file,
1087 "SMS success in moving branch to cycle %d\n", c);
1088
1089 update_node_sched_params (g->closing_branch->cuid, ii, c,
1090 PS_MIN_CYCLE (ps));
1091 ok = true;
1092 }
1093
1094 free (must_precede);
1095 free (must_follow);
1096 }
1097
1098 clear:
1099 free (sched_nodes);
1100 return ok;
1101 }
1102
1103 static void
1104 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1105 int to_stage, rtx count_reg)
1106 {
1107 int row;
1108 ps_insn_ptr ps_ij;
1109
1110 for (row = 0; row < ps->ii; row++)
1111 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1112 {
1113 int u = ps_ij->id;
1114 int first_u, last_u;
1115 rtx_insn *u_insn;
1116
1117 /* Do not duplicate any insn which refers to count_reg as it
1118 belongs to the control part.
1119 The closing branch is scheduled as well and thus should
1120 be ignored.
1121 TODO: This should be done by analyzing the control part of
1122 the loop. */
1123 u_insn = ps_rtl_insn (ps, u);
1124 if (reg_mentioned_p (count_reg, u_insn)
1125 || JUMP_P (u_insn))
1126 continue;
1127
1128 first_u = SCHED_STAGE (u);
1129 last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1130 if (from_stage <= last_u && to_stage >= first_u)
1131 {
1132 if (u < ps->g->num_nodes)
1133 duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1134 else
1135 emit_insn (copy_rtx (PATTERN (u_insn)));
1136 }
1137 }
1138 }
1139
1140
1141 /* Generate the instructions (including reg_moves) for prolog & epilog. */
1142 static void
1143 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1144 rtx count_reg, rtx count_init)
1145 {
1146 int i;
1147 int last_stage = PS_STAGE_COUNT (ps) - 1;
1148 edge e;
1149
1150 /* Generate the prolog, inserting its insns on the loop-entry edge. */
1151 start_sequence ();
1152
1153 if (!count_init)
1154 {
1155 /* Generate instructions at the beginning of the prolog to
1156 adjust the loop count by STAGE_COUNT. If loop count is constant
1157 (count_init), this constant is adjusted by STAGE_COUNT in
1158 generate_prolog_epilog function. */
1159 rtx sub_reg = NULL_RTX;
1160
1161 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1162 gen_int_mode (last_stage,
1163 GET_MODE (count_reg)),
1164 count_reg, 1, OPTAB_DIRECT);
1165 gcc_assert (REG_P (sub_reg));
1166 if (REGNO (sub_reg) != REGNO (count_reg))
1167 emit_move_insn (count_reg, sub_reg);
1168 }
1169
1170 for (i = 0; i < last_stage; i++)
1171 duplicate_insns_of_cycles (ps, 0, i, count_reg);
1172
1173 /* Put the prolog on the entry edge. */
1174 e = loop_preheader_edge (loop);
1175 split_edge_and_insert (e, get_insns ());
1176 if (!flag_resched_modulo_sched)
1177 e->dest->flags |= BB_DISABLE_SCHEDULE;
1178
1179 end_sequence ();
1180
1181 /* Generate the epilog, inserting its insns on the loop-exit edge. */
1182 start_sequence ();
1183
1184 for (i = 0; i < last_stage; i++)
1185 duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1186
1187 /* Put the epilogue on the exit edge. */
1188 gcc_assert (single_exit (loop));
1189 e = single_exit (loop);
1190 split_edge_and_insert (e, get_insns ());
1191 if (!flag_resched_modulo_sched)
1192 e->dest->flags |= BB_DISABLE_SCHEDULE;
1193
1194 end_sequence ();
1195 }
1196
1197 /* Mark LOOP as software pipelined so the later
1198 scheduling passes don't touch it. */
1199 static void
1200 mark_loop_unsched (struct loop *loop)
1201 {
1202 unsigned i;
1203 basic_block *bbs = get_loop_body (loop);
1204
1205 for (i = 0; i < loop->num_nodes; i++)
1206 bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1207
1208 free (bbs);
1209 }
1210
1211 /* Return true if all the BBs of the loop are empty except the
1212 loop header. */
1213 static bool
1214 loop_single_full_bb_p (struct loop *loop)
1215 {
1216 unsigned i;
1217 basic_block *bbs = get_loop_body (loop);
1218
1219 for (i = 0; i < loop->num_nodes ; i++)
1220 {
1221 rtx_insn *head, *tail;
1222 bool empty_bb = true;
1223
1224 if (bbs[i] == loop->header)
1225 continue;
1226
1227 /* Make sure that basic blocks other than the header
1228 have only notes labels or jumps. */
1229 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1230 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1231 {
1232 if (NOTE_P (head) || LABEL_P (head)
1233 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1234 continue;
1235 empty_bb = false;
1236 break;
1237 }
1238
1239 if (! empty_bb)
1240 {
1241 free (bbs);
1242 return false;
1243 }
1244 }
1245 free (bbs);
1246 return true;
1247 }
1248
1249 /* Dump file:line from INSN's location info to dump_file. */
1250
1251 static void
1252 dump_insn_location (rtx_insn *insn)
1253 {
1254 if (dump_file && INSN_HAS_LOCATION (insn))
1255 {
1256 expanded_location xloc = insn_location (insn);
1257 fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1258 }
1259 }
1260
1261 /* A simple loop from SMS point of view; it is a loop that is composed of
1262 either a single basic block or two BBs - a header and a latch. */
1263 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1264 && (EDGE_COUNT (loop->latch->preds) == 1) \
1265 && (EDGE_COUNT (loop->latch->succs) == 1))
1266
1267 /* Return true if the loop is in its canonical form and false if not.
1268 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1269 static bool
1270 loop_canon_p (struct loop *loop)
1271 {
1272
1273 if (loop->inner || !loop_outer (loop))
1274 {
1275 if (dump_file)
1276 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1277 return false;
1278 }
1279
1280 if (!single_exit (loop))
1281 {
1282 if (dump_file)
1283 {
1284 rtx_insn *insn = BB_END (loop->header);
1285
1286 fprintf (dump_file, "SMS loop many exits");
1287 dump_insn_location (insn);
1288 fprintf (dump_file, "\n");
1289 }
1290 return false;
1291 }
1292
1293 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1294 {
1295 if (dump_file)
1296 {
1297 rtx_insn *insn = BB_END (loop->header);
1298
1299 fprintf (dump_file, "SMS loop many BBs.");
1300 dump_insn_location (insn);
1301 fprintf (dump_file, "\n");
1302 }
1303 return false;
1304 }
1305
1306 return true;
1307 }
1308
1309 /* If there are more than one entry for the loop,
1310 make it one by splitting the first entry edge and
1311 redirecting the others to the new BB. */
1312 static void
1313 canon_loop (struct loop *loop)
1314 {
1315 edge e;
1316 edge_iterator i;
1317
1318 /* Avoid annoying special cases of edges going to exit
1319 block. */
1320 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1321 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1322 split_edge (e);
1323
1324 if (loop->latch == loop->header
1325 || EDGE_COUNT (loop->latch->succs) > 1)
1326 {
1327 FOR_EACH_EDGE (e, i, loop->header->preds)
1328 if (e->src == loop->latch)
1329 break;
1330 split_edge (e);
1331 }
1332 }
1333
1334 /* Setup infos. */
1335 static void
1336 setup_sched_infos (void)
1337 {
1338 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1339 sizeof (sms_common_sched_info));
1340 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1341 common_sched_info = &sms_common_sched_info;
1342
1343 sched_deps_info = &sms_sched_deps_info;
1344 current_sched_info = &sms_sched_info;
1345 }
1346
1347 /* Probability in % that the sms-ed loop rolls enough so that optimized
1348 version may be entered. Just a guess. */
1349 #define PROB_SMS_ENOUGH_ITERATIONS 80
1350
1351 /* Used to calculate the upper bound of ii. */
1352 #define MAXII_FACTOR 2
1353
1354 /* Main entry point, perform SMS scheduling on the loops of the function
1355 that consist of single basic blocks. */
1356 static void
1357 sms_schedule (void)
1358 {
1359 rtx_insn *insn;
1360 ddg_ptr *g_arr, g;
1361 int * node_order;
1362 int maxii, max_asap;
1363 partial_schedule_ptr ps;
1364 basic_block bb = NULL;
1365 struct loop *loop;
1366 basic_block condition_bb = NULL;
1367 edge latch_edge;
1368 gcov_type trip_count = 0;
1369
1370 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1371 | LOOPS_HAVE_RECORDED_EXITS);
1372 if (number_of_loops (cfun) <= 1)
1373 {
1374 loop_optimizer_finalize ();
1375 return; /* There are no loops to schedule. */
1376 }
1377
1378 /* Initialize issue_rate. */
1379 if (targetm.sched.issue_rate)
1380 {
1381 int temp = reload_completed;
1382
1383 reload_completed = 1;
1384 issue_rate = targetm.sched.issue_rate ();
1385 reload_completed = temp;
1386 }
1387 else
1388 issue_rate = 1;
1389
1390 /* Initialize the scheduler. */
1391 setup_sched_infos ();
1392 haifa_sched_init ();
1393
1394 /* Allocate memory to hold the DDG array one entry for each loop.
1395 We use loop->num as index into this array. */
1396 g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1397
1398 if (dump_file)
1399 {
1400 fprintf (dump_file, "\n\nSMS analysis phase\n");
1401 fprintf (dump_file, "===================\n\n");
1402 }
1403
1404 /* Build DDGs for all the relevant loops and hold them in G_ARR
1405 indexed by the loop index. */
1406 FOR_EACH_LOOP (loop, 0)
1407 {
1408 rtx_insn *head, *tail;
1409 rtx count_reg;
1410
1411 /* For debugging. */
1412 if (dbg_cnt (sms_sched_loop) == false)
1413 {
1414 if (dump_file)
1415 fprintf (dump_file, "SMS reached max limit... \n");
1416
1417 break;
1418 }
1419
1420 if (dump_file)
1421 {
1422 rtx_insn *insn = BB_END (loop->header);
1423
1424 fprintf (dump_file, "SMS loop num: %d", loop->num);
1425 dump_insn_location (insn);
1426 fprintf (dump_file, "\n");
1427 }
1428
1429 if (! loop_canon_p (loop))
1430 continue;
1431
1432 if (! loop_single_full_bb_p (loop))
1433 {
1434 if (dump_file)
1435 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1436 continue;
1437 }
1438
1439 bb = loop->header;
1440
1441 get_ebb_head_tail (bb, bb, &head, &tail);
1442 latch_edge = loop_latch_edge (loop);
1443 gcc_assert (single_exit (loop));
1444 if (single_exit (loop)->count)
1445 trip_count = latch_edge->count / single_exit (loop)->count;
1446
1447 /* Perform SMS only on loops that their average count is above threshold. */
1448
1449 if ( latch_edge->count
1450 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1451 {
1452 if (dump_file)
1453 {
1454 dump_insn_location (tail);
1455 fprintf (dump_file, "\nSMS single-bb-loop\n");
1456 if (profile_info && flag_branch_probabilities)
1457 {
1458 fprintf (dump_file, "SMS loop-count ");
1459 fprintf (dump_file, "%"PRId64,
1460 (int64_t) bb->count);
1461 fprintf (dump_file, "\n");
1462 fprintf (dump_file, "SMS trip-count ");
1463 fprintf (dump_file, "%"PRId64,
1464 (int64_t) trip_count);
1465 fprintf (dump_file, "\n");
1466 fprintf (dump_file, "SMS profile-sum-max ");
1467 fprintf (dump_file, "%"PRId64,
1468 (int64_t) profile_info->sum_max);
1469 fprintf (dump_file, "\n");
1470 }
1471 }
1472 continue;
1473 }
1474
1475 /* Make sure this is a doloop. */
1476 if ( !(count_reg = doloop_register_get (head, tail)))
1477 {
1478 if (dump_file)
1479 fprintf (dump_file, "SMS doloop_register_get failed\n");
1480 continue;
1481 }
1482
1483 /* Don't handle BBs with calls or barriers
1484 or !single_set with the exception of instructions that include
1485 count_reg---these instructions are part of the control part
1486 that do-loop recognizes.
1487 ??? Should handle insns defining subregs. */
1488 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1489 {
1490 rtx set;
1491
1492 if (CALL_P (insn)
1493 || BARRIER_P (insn)
1494 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1495 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1496 && !reg_mentioned_p (count_reg, insn))
1497 || (INSN_P (insn) && (set = single_set (insn))
1498 && GET_CODE (SET_DEST (set)) == SUBREG))
1499 break;
1500 }
1501
1502 if (insn != NEXT_INSN (tail))
1503 {
1504 if (dump_file)
1505 {
1506 if (CALL_P (insn))
1507 fprintf (dump_file, "SMS loop-with-call\n");
1508 else if (BARRIER_P (insn))
1509 fprintf (dump_file, "SMS loop-with-barrier\n");
1510 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1511 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1512 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1513 else
1514 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1515 print_rtl_single (dump_file, insn);
1516 }
1517
1518 continue;
1519 }
1520
1521 /* Always schedule the closing branch with the rest of the
1522 instructions. The branch is rotated to be in row ii-1 at the
1523 end of the scheduling procedure to make sure it's the last
1524 instruction in the iteration. */
1525 if (! (g = create_ddg (bb, 1)))
1526 {
1527 if (dump_file)
1528 fprintf (dump_file, "SMS create_ddg failed\n");
1529 continue;
1530 }
1531
1532 g_arr[loop->num] = g;
1533 if (dump_file)
1534 fprintf (dump_file, "...OK\n");
1535
1536 }
1537 if (dump_file)
1538 {
1539 fprintf (dump_file, "\nSMS transformation phase\n");
1540 fprintf (dump_file, "=========================\n\n");
1541 }
1542
1543 /* We don't want to perform SMS on new loops - created by versioning. */
1544 FOR_EACH_LOOP (loop, 0)
1545 {
1546 rtx_insn *head, *tail;
1547 rtx count_reg;
1548 rtx_insn *count_init;
1549 int mii, rec_mii, stage_count, min_cycle;
1550 int64_t loop_count = 0;
1551 bool opt_sc_p;
1552
1553 if (! (g = g_arr[loop->num]))
1554 continue;
1555
1556 if (dump_file)
1557 {
1558 rtx_insn *insn = BB_END (loop->header);
1559
1560 fprintf (dump_file, "SMS loop num: %d", loop->num);
1561 dump_insn_location (insn);
1562 fprintf (dump_file, "\n");
1563
1564 print_ddg (dump_file, g);
1565 }
1566
1567 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1568
1569 latch_edge = loop_latch_edge (loop);
1570 gcc_assert (single_exit (loop));
1571 if (single_exit (loop)->count)
1572 trip_count = latch_edge->count / single_exit (loop)->count;
1573
1574 if (dump_file)
1575 {
1576 dump_insn_location (tail);
1577 fprintf (dump_file, "\nSMS single-bb-loop\n");
1578 if (profile_info && flag_branch_probabilities)
1579 {
1580 fprintf (dump_file, "SMS loop-count ");
1581 fprintf (dump_file, "%"PRId64,
1582 (int64_t) bb->count);
1583 fprintf (dump_file, "\n");
1584 fprintf (dump_file, "SMS profile-sum-max ");
1585 fprintf (dump_file, "%"PRId64,
1586 (int64_t) profile_info->sum_max);
1587 fprintf (dump_file, "\n");
1588 }
1589 fprintf (dump_file, "SMS doloop\n");
1590 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1591 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1592 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1593 }
1594
1595
1596 /* In case of th loop have doloop register it gets special
1597 handling. */
1598 count_init = NULL;
1599 if ((count_reg = doloop_register_get (head, tail)))
1600 {
1601 basic_block pre_header;
1602
1603 pre_header = loop_preheader_edge (loop)->src;
1604 count_init = const_iteration_count (count_reg, pre_header,
1605 &loop_count);
1606 }
1607 gcc_assert (count_reg);
1608
1609 if (dump_file && count_init)
1610 {
1611 fprintf (dump_file, "SMS const-doloop ");
1612 fprintf (dump_file, "%"PRId64,
1613 loop_count);
1614 fprintf (dump_file, "\n");
1615 }
1616
1617 node_order = XNEWVEC (int, g->num_nodes);
1618
1619 mii = 1; /* Need to pass some estimate of mii. */
1620 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1621 mii = MAX (res_MII (g), rec_mii);
1622 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1623
1624 if (dump_file)
1625 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1626 rec_mii, mii, maxii);
1627
1628 for (;;)
1629 {
1630 set_node_sched_params (g);
1631
1632 stage_count = 0;
1633 opt_sc_p = false;
1634 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1635
1636 if (ps)
1637 {
1638 /* Try to achieve optimized SC by normalizing the partial
1639 schedule (having the cycles start from cycle zero).
1640 The branch location must be placed in row ii-1 in the
1641 final scheduling. If failed, shift all instructions to
1642 position the branch in row ii-1. */
1643 opt_sc_p = optimize_sc (ps, g);
1644 if (opt_sc_p)
1645 stage_count = calculate_stage_count (ps, 0);
1646 else
1647 {
1648 /* Bring the branch to cycle ii-1. */
1649 int amount = (SCHED_TIME (g->closing_branch->cuid)
1650 - (ps->ii - 1));
1651
1652 if (dump_file)
1653 fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1654
1655 stage_count = calculate_stage_count (ps, amount);
1656 }
1657
1658 gcc_assert (stage_count >= 1);
1659 }
1660
1661 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1662 1 means that there is no interleaving between iterations thus
1663 we let the scheduling passes do the job in this case. */
1664 if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1665 || (count_init && (loop_count <= stage_count))
1666 || (flag_branch_probabilities && (trip_count <= stage_count)))
1667 {
1668 if (dump_file)
1669 {
1670 fprintf (dump_file, "SMS failed... \n");
1671 fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1672 " loop-count=", stage_count);
1673 fprintf (dump_file, "%"PRId64, loop_count);
1674 fprintf (dump_file, ", trip-count=");
1675 fprintf (dump_file, "%"PRId64, trip_count);
1676 fprintf (dump_file, ")\n");
1677 }
1678 break;
1679 }
1680
1681 if (!opt_sc_p)
1682 {
1683 /* Rotate the partial schedule to have the branch in row ii-1. */
1684 int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1685
1686 reset_sched_times (ps, amount);
1687 rotate_partial_schedule (ps, amount);
1688 }
1689
1690 set_columns_for_ps (ps);
1691
1692 min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1693 if (!schedule_reg_moves (ps))
1694 {
1695 mii = ps->ii + 1;
1696 free_partial_schedule (ps);
1697 continue;
1698 }
1699
1700 /* Moves that handle incoming values might have been added
1701 to a new first stage. Bump the stage count if so.
1702
1703 ??? Perhaps we could consider rotating the schedule here
1704 instead? */
1705 if (PS_MIN_CYCLE (ps) < min_cycle)
1706 {
1707 reset_sched_times (ps, 0);
1708 stage_count++;
1709 }
1710
1711 /* The stage count should now be correct without rotation. */
1712 gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1713 PS_STAGE_COUNT (ps) = stage_count;
1714
1715 canon_loop (loop);
1716
1717 if (dump_file)
1718 {
1719 dump_insn_location (tail);
1720 fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1721 ps->ii, stage_count);
1722 print_partial_schedule (ps, dump_file);
1723 }
1724
1725 /* case the BCT count is not known , Do loop-versioning */
1726 if (count_reg && ! count_init)
1727 {
1728 rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1729 gen_int_mode (stage_count,
1730 GET_MODE (count_reg)));
1731 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1732 * REG_BR_PROB_BASE) / 100;
1733
1734 loop_version (loop, comp_rtx, &condition_bb,
1735 prob, prob, REG_BR_PROB_BASE - prob,
1736 true);
1737 }
1738
1739 /* Set new iteration count of loop kernel. */
1740 if (count_reg && count_init)
1741 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1742 - stage_count + 1);
1743
1744 /* Now apply the scheduled kernel to the RTL of the loop. */
1745 permute_partial_schedule (ps, g->closing_branch->first_note);
1746
1747 /* Mark this loop as software pipelined so the later
1748 scheduling passes don't touch it. */
1749 if (! flag_resched_modulo_sched)
1750 mark_loop_unsched (loop);
1751
1752 /* The life-info is not valid any more. */
1753 df_set_bb_dirty (g->bb);
1754
1755 apply_reg_moves (ps);
1756 if (dump_file)
1757 print_node_sched_params (dump_file, g->num_nodes, ps);
1758 /* Generate prolog and epilog. */
1759 generate_prolog_epilog (ps, loop, count_reg, count_init);
1760 break;
1761 }
1762
1763 free_partial_schedule (ps);
1764 node_sched_param_vec.release ();
1765 free (node_order);
1766 free_ddg (g);
1767 }
1768
1769 free (g_arr);
1770
1771 /* Release scheduler data, needed until now because of DFA. */
1772 haifa_sched_finish ();
1773 loop_optimizer_finalize ();
1774 }
1775
1776 /* The SMS scheduling algorithm itself
1777 -----------------------------------
1778 Input: 'O' an ordered list of insns of a loop.
1779 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1780
1781 'Q' is the empty Set
1782 'PS' is the partial schedule; it holds the currently scheduled nodes with
1783 their cycle/slot.
1784 'PSP' previously scheduled predecessors.
1785 'PSS' previously scheduled successors.
1786 't(u)' the cycle where u is scheduled.
1787 'l(u)' is the latency of u.
1788 'd(v,u)' is the dependence distance from v to u.
1789 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1790 the node ordering phase.
1791 'check_hardware_resources_conflicts(u, PS, c)'
1792 run a trace around cycle/slot through DFA model
1793 to check resource conflicts involving instruction u
1794 at cycle c given the partial schedule PS.
1795 'add_to_partial_schedule_at_time(u, PS, c)'
1796 Add the node/instruction u to the partial schedule
1797 PS at time c.
1798 'calculate_register_pressure(PS)'
1799 Given a schedule of instructions, calculate the register
1800 pressure it implies. One implementation could be the
1801 maximum number of overlapping live ranges.
1802 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1803 registers available in the hardware.
1804
1805 1. II = MII.
1806 2. PS = empty list
1807 3. for each node u in O in pre-computed order
1808 4. if (PSP(u) != Q && PSS(u) == Q) then
1809 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1810 6. start = Early_start; end = Early_start + II - 1; step = 1
1811 11. else if (PSP(u) == Q && PSS(u) != Q) then
1812 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1813 13. start = Late_start; end = Late_start - II + 1; step = -1
1814 14. else if (PSP(u) != Q && PSS(u) != Q) then
1815 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1816 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1817 17. start = Early_start;
1818 18. end = min(Early_start + II - 1 , Late_start);
1819 19. step = 1
1820 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1821 21. start = ASAP(u); end = start + II - 1; step = 1
1822 22. endif
1823
1824 23. success = false
1825 24. for (c = start ; c != end ; c += step)
1826 25. if check_hardware_resources_conflicts(u, PS, c) then
1827 26. add_to_partial_schedule_at_time(u, PS, c)
1828 27. success = true
1829 28. break
1830 29. endif
1831 30. endfor
1832 31. if (success == false) then
1833 32. II = II + 1
1834 33. if (II > maxII) then
1835 34. finish - failed to schedule
1836 35. endif
1837 36. goto 2.
1838 37. endif
1839 38. endfor
1840 39. if (calculate_register_pressure(PS) > maxRP) then
1841 40. goto 32.
1842 41. endif
1843 42. compute epilogue & prologue
1844 43. finish - succeeded to schedule
1845
1846 ??? The algorithm restricts the scheduling window to II cycles.
1847 In rare cases, it may be better to allow windows of II+1 cycles.
1848 The window would then start and end on the same row, but with
1849 different "must precede" and "must follow" requirements. */
1850
1851 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1852 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1853 set to 0 to save compile time. */
1854 #define DFA_HISTORY SMS_DFA_HISTORY
1855
1856 /* A threshold for the number of repeated unsuccessful attempts to insert
1857 an empty row, before we flush the partial schedule and start over. */
1858 #define MAX_SPLIT_NUM 10
1859 /* Given the partial schedule PS, this function calculates and returns the
1860 cycles in which we can schedule the node with the given index I.
1861 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1862 noticed that there are several cases in which we fail to SMS the loop
1863 because the sched window of a node is empty due to tight data-deps. In
1864 such cases we want to unschedule some of the predecessors/successors
1865 until we get non-empty scheduling window. It returns -1 if the
1866 scheduling window is empty and zero otherwise. */
1867
1868 static int
1869 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1870 sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1871 int *end_p)
1872 {
1873 int start, step, end;
1874 int early_start, late_start;
1875 ddg_edge_ptr e;
1876 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1877 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1878 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1879 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1880 int psp_not_empty;
1881 int pss_not_empty;
1882 int count_preds;
1883 int count_succs;
1884
1885 /* 1. compute sched window for u (start, end, step). */
1886 bitmap_clear (psp);
1887 bitmap_clear (pss);
1888 psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1889 pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1890
1891 /* We first compute a forward range (start <= end), then decide whether
1892 to reverse it. */
1893 early_start = INT_MIN;
1894 late_start = INT_MAX;
1895 start = INT_MIN;
1896 end = INT_MAX;
1897 step = 1;
1898
1899 count_preds = 0;
1900 count_succs = 0;
1901
1902 if (dump_file && (psp_not_empty || pss_not_empty))
1903 {
1904 fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1905 "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1906 fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1907 "start", "early start", "late start", "end", "time");
1908 fprintf (dump_file, "=========== =========== =========== ==========="
1909 " =====\n");
1910 }
1911 /* Calculate early_start and limit end. Both bounds are inclusive. */
1912 if (psp_not_empty)
1913 for (e = u_node->in; e != 0; e = e->next_in)
1914 {
1915 int v = e->src->cuid;
1916
1917 if (bitmap_bit_p (sched_nodes, v))
1918 {
1919 int p_st = SCHED_TIME (v);
1920 int earliest = p_st + e->latency - (e->distance * ii);
1921 int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1922
1923 if (dump_file)
1924 {
1925 fprintf (dump_file, "%11s %11d %11s %11d %5d",
1926 "", earliest, "", latest, p_st);
1927 print_ddg_edge (dump_file, e);
1928 fprintf (dump_file, "\n");
1929 }
1930
1931 early_start = MAX (early_start, earliest);
1932 end = MIN (end, latest);
1933
1934 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1935 count_preds++;
1936 }
1937 }
1938
1939 /* Calculate late_start and limit start. Both bounds are inclusive. */
1940 if (pss_not_empty)
1941 for (e = u_node->out; e != 0; e = e->next_out)
1942 {
1943 int v = e->dest->cuid;
1944
1945 if (bitmap_bit_p (sched_nodes, v))
1946 {
1947 int s_st = SCHED_TIME (v);
1948 int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1949 int latest = s_st - e->latency + (e->distance * ii);
1950
1951 if (dump_file)
1952 {
1953 fprintf (dump_file, "%11d %11s %11d %11s %5d",
1954 earliest, "", latest, "", s_st);
1955 print_ddg_edge (dump_file, e);
1956 fprintf (dump_file, "\n");
1957 }
1958
1959 start = MAX (start, earliest);
1960 late_start = MIN (late_start, latest);
1961
1962 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1963 count_succs++;
1964 }
1965 }
1966
1967 if (dump_file && (psp_not_empty || pss_not_empty))
1968 {
1969 fprintf (dump_file, "----------- ----------- ----------- -----------"
1970 " -----\n");
1971 fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1972 start, early_start, late_start, end, "",
1973 "(max, max, min, min)");
1974 }
1975
1976 /* Get a target scheduling window no bigger than ii. */
1977 if (early_start == INT_MIN && late_start == INT_MAX)
1978 early_start = NODE_ASAP (u_node);
1979 else if (early_start == INT_MIN)
1980 early_start = late_start - (ii - 1);
1981 late_start = MIN (late_start, early_start + (ii - 1));
1982
1983 /* Apply memory dependence limits. */
1984 start = MAX (start, early_start);
1985 end = MIN (end, late_start);
1986
1987 if (dump_file && (psp_not_empty || pss_not_empty))
1988 fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1989 "", start, end, "", "");
1990
1991 /* If there are at least as many successors as predecessors, schedule the
1992 node close to its successors. */
1993 if (pss_not_empty && count_succs >= count_preds)
1994 {
1995 int tmp = end;
1996 end = start;
1997 start = tmp;
1998 step = -1;
1999 }
2000
2001 /* Now that we've finalized the window, make END an exclusive rather
2002 than an inclusive bound. */
2003 end += step;
2004
2005 *start_p = start;
2006 *step_p = step;
2007 *end_p = end;
2008 sbitmap_free (psp);
2009 sbitmap_free (pss);
2010
2011 if ((start >= end && step == 1) || (start <= end && step == -1))
2012 {
2013 if (dump_file)
2014 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2015 start, end, step);
2016 return -1;
2017 }
2018
2019 return 0;
2020 }
2021
2022 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2023 node currently been scheduled. At the end of the calculation
2024 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2025 U_NODE which are (1) already scheduled in the first/last row of
2026 U_NODE's scheduling window, (2) whose dependence inequality with U
2027 becomes an equality when U is scheduled in this same row, and (3)
2028 whose dependence latency is zero.
2029
2030 The first and last rows are calculated using the following parameters:
2031 START/END rows - The cycles that begins/ends the traversal on the window;
2032 searching for an empty cycle to schedule U_NODE.
2033 STEP - The direction in which we traverse the window.
2034 II - The initiation interval. */
2035
2036 static void
2037 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2038 int step, int ii, sbitmap sched_nodes,
2039 sbitmap must_precede, sbitmap must_follow)
2040 {
2041 ddg_edge_ptr e;
2042 int first_cycle_in_window, last_cycle_in_window;
2043
2044 gcc_assert (must_precede && must_follow);
2045
2046 /* Consider the following scheduling window:
2047 {first_cycle_in_window, first_cycle_in_window+1, ...,
2048 last_cycle_in_window}. If step is 1 then the following will be
2049 the order we traverse the window: {start=first_cycle_in_window,
2050 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2051 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2052 end=first_cycle_in_window-1} if step is -1. */
2053 first_cycle_in_window = (step == 1) ? start : end - step;
2054 last_cycle_in_window = (step == 1) ? end - step : start;
2055
2056 bitmap_clear (must_precede);
2057 bitmap_clear (must_follow);
2058
2059 if (dump_file)
2060 fprintf (dump_file, "\nmust_precede: ");
2061
2062 /* Instead of checking if:
2063 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2064 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2065 first_cycle_in_window)
2066 && e->latency == 0
2067 we use the fact that latency is non-negative:
2068 SCHED_TIME (e->src) - (e->distance * ii) <=
2069 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2070 first_cycle_in_window
2071 and check only if
2072 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
2073 for (e = u_node->in; e != 0; e = e->next_in)
2074 if (bitmap_bit_p (sched_nodes, e->src->cuid)
2075 && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2076 first_cycle_in_window))
2077 {
2078 if (dump_file)
2079 fprintf (dump_file, "%d ", e->src->cuid);
2080
2081 bitmap_set_bit (must_precede, e->src->cuid);
2082 }
2083
2084 if (dump_file)
2085 fprintf (dump_file, "\nmust_follow: ");
2086
2087 /* Instead of checking if:
2088 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2089 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2090 last_cycle_in_window)
2091 && e->latency == 0
2092 we use the fact that latency is non-negative:
2093 SCHED_TIME (e->dest) + (e->distance * ii) >=
2094 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2095 last_cycle_in_window
2096 and check only if
2097 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
2098 for (e = u_node->out; e != 0; e = e->next_out)
2099 if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2100 && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2101 last_cycle_in_window))
2102 {
2103 if (dump_file)
2104 fprintf (dump_file, "%d ", e->dest->cuid);
2105
2106 bitmap_set_bit (must_follow, e->dest->cuid);
2107 }
2108
2109 if (dump_file)
2110 fprintf (dump_file, "\n");
2111 }
2112
2113 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
2114 parameters to decide if that's possible:
2115 PS - The partial schedule.
2116 U - The serial number of U_NODE.
2117 NUM_SPLITS - The number of row splits made so far.
2118 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2119 the first row of the scheduling window)
2120 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2121 last row of the scheduling window) */
2122
2123 static bool
2124 try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2125 int u, int cycle, sbitmap sched_nodes,
2126 int *num_splits, sbitmap must_precede,
2127 sbitmap must_follow)
2128 {
2129 ps_insn_ptr psi;
2130 bool success = 0;
2131
2132 verify_partial_schedule (ps, sched_nodes);
2133 psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2134 if (psi)
2135 {
2136 SCHED_TIME (u) = cycle;
2137 bitmap_set_bit (sched_nodes, u);
2138 success = 1;
2139 *num_splits = 0;
2140 if (dump_file)
2141 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2142
2143 }
2144
2145 return success;
2146 }
2147
2148 /* This function implements the scheduling algorithm for SMS according to the
2149 above algorithm. */
2150 static partial_schedule_ptr
2151 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2152 {
2153 int ii = mii;
2154 int i, c, success, num_splits = 0;
2155 int flush_and_start_over = true;
2156 int num_nodes = g->num_nodes;
2157 int start, end, step; /* Place together into one struct? */
2158 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
2159 sbitmap must_precede = sbitmap_alloc (num_nodes);
2160 sbitmap must_follow = sbitmap_alloc (num_nodes);
2161 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
2162
2163 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2164
2165 bitmap_ones (tobe_scheduled);
2166 bitmap_clear (sched_nodes);
2167
2168 while (flush_and_start_over && (ii < maxii))
2169 {
2170
2171 if (dump_file)
2172 fprintf (dump_file, "Starting with ii=%d\n", ii);
2173 flush_and_start_over = false;
2174 bitmap_clear (sched_nodes);
2175
2176 for (i = 0; i < num_nodes; i++)
2177 {
2178 int u = nodes_order[i];
2179 ddg_node_ptr u_node = &ps->g->nodes[u];
2180 rtx insn = u_node->insn;
2181
2182 if (!NONDEBUG_INSN_P (insn))
2183 {
2184 bitmap_clear_bit (tobe_scheduled, u);
2185 continue;
2186 }
2187
2188 if (bitmap_bit_p (sched_nodes, u))
2189 continue;
2190
2191 /* Try to get non-empty scheduling window. */
2192 success = 0;
2193 if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2194 &step, &end) == 0)
2195 {
2196 if (dump_file)
2197 fprintf (dump_file, "\nTrying to schedule node %d "
2198 "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
2199 (g->nodes[u].insn)), start, end, step);
2200
2201 gcc_assert ((step > 0 && start < end)
2202 || (step < 0 && start > end));
2203
2204 calculate_must_precede_follow (u_node, start, end, step, ii,
2205 sched_nodes, must_precede,
2206 must_follow);
2207
2208 for (c = start; c != end; c += step)
2209 {
2210 sbitmap tmp_precede, tmp_follow;
2211
2212 set_must_precede_follow (&tmp_follow, must_follow,
2213 &tmp_precede, must_precede,
2214 c, start, end, step);
2215 success =
2216 try_scheduling_node_in_cycle (ps, u, c,
2217 sched_nodes,
2218 &num_splits, tmp_precede,
2219 tmp_follow);
2220 if (success)
2221 break;
2222 }
2223
2224 verify_partial_schedule (ps, sched_nodes);
2225 }
2226 if (!success)
2227 {
2228 int split_row;
2229
2230 if (ii++ == maxii)
2231 break;
2232
2233 if (num_splits >= MAX_SPLIT_NUM)
2234 {
2235 num_splits = 0;
2236 flush_and_start_over = true;
2237 verify_partial_schedule (ps, sched_nodes);
2238 reset_partial_schedule (ps, ii);
2239 verify_partial_schedule (ps, sched_nodes);
2240 break;
2241 }
2242
2243 num_splits++;
2244 /* The scheduling window is exclusive of 'end'
2245 whereas compute_split_window() expects an inclusive,
2246 ordered range. */
2247 if (step == 1)
2248 split_row = compute_split_row (sched_nodes, start, end - 1,
2249 ps->ii, u_node);
2250 else
2251 split_row = compute_split_row (sched_nodes, end + 1, start,
2252 ps->ii, u_node);
2253
2254 ps_insert_empty_row (ps, split_row, sched_nodes);
2255 i--; /* Go back and retry node i. */
2256
2257 if (dump_file)
2258 fprintf (dump_file, "num_splits=%d\n", num_splits);
2259 }
2260
2261 /* ??? If (success), check register pressure estimates. */
2262 } /* Continue with next node. */
2263 } /* While flush_and_start_over. */
2264 if (ii >= maxii)
2265 {
2266 free_partial_schedule (ps);
2267 ps = NULL;
2268 }
2269 else
2270 gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2271
2272 sbitmap_free (sched_nodes);
2273 sbitmap_free (must_precede);
2274 sbitmap_free (must_follow);
2275 sbitmap_free (tobe_scheduled);
2276
2277 return ps;
2278 }
2279
2280 /* This function inserts a new empty row into PS at the position
2281 according to SPLITROW, keeping all already scheduled instructions
2282 intact and updating their SCHED_TIME and cycle accordingly. */
2283 static void
2284 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2285 sbitmap sched_nodes)
2286 {
2287 ps_insn_ptr crr_insn;
2288 ps_insn_ptr *rows_new;
2289 int ii = ps->ii;
2290 int new_ii = ii + 1;
2291 int row;
2292 int *rows_length_new;
2293
2294 verify_partial_schedule (ps, sched_nodes);
2295
2296 /* We normalize sched_time and rotate ps to have only non-negative sched
2297 times, for simplicity of updating cycles after inserting new row. */
2298 split_row -= ps->min_cycle;
2299 split_row = SMODULO (split_row, ii);
2300 if (dump_file)
2301 fprintf (dump_file, "split_row=%d\n", split_row);
2302
2303 reset_sched_times (ps, PS_MIN_CYCLE (ps));
2304 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2305
2306 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2307 rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2308 for (row = 0; row < split_row; row++)
2309 {
2310 rows_new[row] = ps->rows[row];
2311 rows_length_new[row] = ps->rows_length[row];
2312 ps->rows[row] = NULL;
2313 for (crr_insn = rows_new[row];
2314 crr_insn; crr_insn = crr_insn->next_in_row)
2315 {
2316 int u = crr_insn->id;
2317 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2318
2319 SCHED_TIME (u) = new_time;
2320 crr_insn->cycle = new_time;
2321 SCHED_ROW (u) = new_time % new_ii;
2322 SCHED_STAGE (u) = new_time / new_ii;
2323 }
2324
2325 }
2326
2327 rows_new[split_row] = NULL;
2328
2329 for (row = split_row; row < ii; row++)
2330 {
2331 rows_new[row + 1] = ps->rows[row];
2332 rows_length_new[row + 1] = ps->rows_length[row];
2333 ps->rows[row] = NULL;
2334 for (crr_insn = rows_new[row + 1];
2335 crr_insn; crr_insn = crr_insn->next_in_row)
2336 {
2337 int u = crr_insn->id;
2338 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2339
2340 SCHED_TIME (u) = new_time;
2341 crr_insn->cycle = new_time;
2342 SCHED_ROW (u) = new_time % new_ii;
2343 SCHED_STAGE (u) = new_time / new_ii;
2344 }
2345 }
2346
2347 /* Updating ps. */
2348 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2349 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2350 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2351 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2352 free (ps->rows);
2353 ps->rows = rows_new;
2354 free (ps->rows_length);
2355 ps->rows_length = rows_length_new;
2356 ps->ii = new_ii;
2357 gcc_assert (ps->min_cycle >= 0);
2358
2359 verify_partial_schedule (ps, sched_nodes);
2360
2361 if (dump_file)
2362 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2363 ps->max_cycle);
2364 }
2365
2366 /* Given U_NODE which is the node that failed to be scheduled; LOW and
2367 UP which are the boundaries of it's scheduling window; compute using
2368 SCHED_NODES and II a row in the partial schedule that can be split
2369 which will separate a critical predecessor from a critical successor
2370 thereby expanding the window, and return it. */
2371 static int
2372 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2373 ddg_node_ptr u_node)
2374 {
2375 ddg_edge_ptr e;
2376 int lower = INT_MIN, upper = INT_MAX;
2377 int crit_pred = -1;
2378 int crit_succ = -1;
2379 int crit_cycle;
2380
2381 for (e = u_node->in; e != 0; e = e->next_in)
2382 {
2383 int v = e->src->cuid;
2384
2385 if (bitmap_bit_p (sched_nodes, v)
2386 && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2387 if (SCHED_TIME (v) > lower)
2388 {
2389 crit_pred = v;
2390 lower = SCHED_TIME (v);
2391 }
2392 }
2393
2394 if (crit_pred >= 0)
2395 {
2396 crit_cycle = SCHED_TIME (crit_pred) + 1;
2397 return SMODULO (crit_cycle, ii);
2398 }
2399
2400 for (e = u_node->out; e != 0; e = e->next_out)
2401 {
2402 int v = e->dest->cuid;
2403
2404 if (bitmap_bit_p (sched_nodes, v)
2405 && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2406 if (SCHED_TIME (v) < upper)
2407 {
2408 crit_succ = v;
2409 upper = SCHED_TIME (v);
2410 }
2411 }
2412
2413 if (crit_succ >= 0)
2414 {
2415 crit_cycle = SCHED_TIME (crit_succ);
2416 return SMODULO (crit_cycle, ii);
2417 }
2418
2419 if (dump_file)
2420 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2421
2422 return SMODULO ((low + up + 1) / 2, ii);
2423 }
2424
2425 static void
2426 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2427 {
2428 int row;
2429 ps_insn_ptr crr_insn;
2430
2431 for (row = 0; row < ps->ii; row++)
2432 {
2433 int length = 0;
2434
2435 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2436 {
2437 int u = crr_insn->id;
2438
2439 length++;
2440 gcc_assert (bitmap_bit_p (sched_nodes, u));
2441 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2442 popcount (sched_nodes) == number of insns in ps. */
2443 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2444 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2445 }
2446
2447 gcc_assert (ps->rows_length[row] == length);
2448 }
2449 }
2450
2451 \f
2452 /* This page implements the algorithm for ordering the nodes of a DDG
2453 for modulo scheduling, activated through the
2454 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2455
2456 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2457 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2458 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2459 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2460 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2461 #define DEPTH(x) (ASAP ((x)))
2462
2463 typedef struct node_order_params * nopa;
2464
2465 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2466 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2467 static nopa calculate_order_params (ddg_ptr, int, int *);
2468 static int find_max_asap (ddg_ptr, sbitmap);
2469 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2470 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2471
2472 enum sms_direction {BOTTOMUP, TOPDOWN};
2473
2474 struct node_order_params
2475 {
2476 int asap;
2477 int alap;
2478 int height;
2479 };
2480
2481 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2482 static void
2483 check_nodes_order (int *node_order, int num_nodes)
2484 {
2485 int i;
2486 sbitmap tmp = sbitmap_alloc (num_nodes);
2487
2488 bitmap_clear (tmp);
2489
2490 if (dump_file)
2491 fprintf (dump_file, "SMS final nodes order: \n");
2492
2493 for (i = 0; i < num_nodes; i++)
2494 {
2495 int u = node_order[i];
2496
2497 if (dump_file)
2498 fprintf (dump_file, "%d ", u);
2499 gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2500
2501 bitmap_set_bit (tmp, u);
2502 }
2503
2504 if (dump_file)
2505 fprintf (dump_file, "\n");
2506
2507 sbitmap_free (tmp);
2508 }
2509
2510 /* Order the nodes of G for scheduling and pass the result in
2511 NODE_ORDER. Also set aux.count of each node to ASAP.
2512 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2513 static int
2514 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2515 {
2516 int i;
2517 int rec_mii = 0;
2518 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2519
2520 nopa nops = calculate_order_params (g, mii, pmax_asap);
2521
2522 if (dump_file)
2523 print_sccs (dump_file, sccs, g);
2524
2525 order_nodes_of_sccs (sccs, node_order);
2526
2527 if (sccs->num_sccs > 0)
2528 /* First SCC has the largest recurrence_length. */
2529 rec_mii = sccs->sccs[0]->recurrence_length;
2530
2531 /* Save ASAP before destroying node_order_params. */
2532 for (i = 0; i < g->num_nodes; i++)
2533 {
2534 ddg_node_ptr v = &g->nodes[i];
2535 v->aux.count = ASAP (v);
2536 }
2537
2538 free (nops);
2539 free_ddg_all_sccs (sccs);
2540 check_nodes_order (node_order, g->num_nodes);
2541
2542 return rec_mii;
2543 }
2544
2545 static void
2546 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2547 {
2548 int i, pos = 0;
2549 ddg_ptr g = all_sccs->ddg;
2550 int num_nodes = g->num_nodes;
2551 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2552 sbitmap on_path = sbitmap_alloc (num_nodes);
2553 sbitmap tmp = sbitmap_alloc (num_nodes);
2554 sbitmap ones = sbitmap_alloc (num_nodes);
2555
2556 bitmap_clear (prev_sccs);
2557 bitmap_ones (ones);
2558
2559 /* Perform the node ordering starting from the SCC with the highest recMII.
2560 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2561 for (i = 0; i < all_sccs->num_sccs; i++)
2562 {
2563 ddg_scc_ptr scc = all_sccs->sccs[i];
2564
2565 /* Add nodes on paths from previous SCCs to the current SCC. */
2566 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2567 bitmap_ior (tmp, scc->nodes, on_path);
2568
2569 /* Add nodes on paths from the current SCC to previous SCCs. */
2570 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2571 bitmap_ior (tmp, tmp, on_path);
2572
2573 /* Remove nodes of previous SCCs from current extended SCC. */
2574 bitmap_and_compl (tmp, tmp, prev_sccs);
2575
2576 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2577 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2578 }
2579
2580 /* Handle the remaining nodes that do not belong to any scc. Each call
2581 to order_nodes_in_scc handles a single connected component. */
2582 while (pos < g->num_nodes)
2583 {
2584 bitmap_and_compl (tmp, ones, prev_sccs);
2585 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2586 }
2587 sbitmap_free (prev_sccs);
2588 sbitmap_free (on_path);
2589 sbitmap_free (tmp);
2590 sbitmap_free (ones);
2591 }
2592
2593 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2594 static struct node_order_params *
2595 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2596 {
2597 int u;
2598 int max_asap;
2599 int num_nodes = g->num_nodes;
2600 ddg_edge_ptr e;
2601 /* Allocate a place to hold ordering params for each node in the DDG. */
2602 nopa node_order_params_arr;
2603
2604 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2605 node_order_params_arr = (nopa) xcalloc (num_nodes,
2606 sizeof (struct node_order_params));
2607
2608 /* Set the aux pointer of each node to point to its order_params structure. */
2609 for (u = 0; u < num_nodes; u++)
2610 g->nodes[u].aux.info = &node_order_params_arr[u];
2611
2612 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2613 calculate ASAP, ALAP, mobility, distance, and height for each node
2614 in the dependence (direct acyclic) graph. */
2615
2616 /* We assume that the nodes in the array are in topological order. */
2617
2618 max_asap = 0;
2619 for (u = 0; u < num_nodes; u++)
2620 {
2621 ddg_node_ptr u_node = &g->nodes[u];
2622
2623 ASAP (u_node) = 0;
2624 for (e = u_node->in; e; e = e->next_in)
2625 if (e->distance == 0)
2626 ASAP (u_node) = MAX (ASAP (u_node),
2627 ASAP (e->src) + e->latency);
2628 max_asap = MAX (max_asap, ASAP (u_node));
2629 }
2630
2631 for (u = num_nodes - 1; u > -1; u--)
2632 {
2633 ddg_node_ptr u_node = &g->nodes[u];
2634
2635 ALAP (u_node) = max_asap;
2636 HEIGHT (u_node) = 0;
2637 for (e = u_node->out; e; e = e->next_out)
2638 if (e->distance == 0)
2639 {
2640 ALAP (u_node) = MIN (ALAP (u_node),
2641 ALAP (e->dest) - e->latency);
2642 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2643 HEIGHT (e->dest) + e->latency);
2644 }
2645 }
2646 if (dump_file)
2647 {
2648 fprintf (dump_file, "\nOrder params\n");
2649 for (u = 0; u < num_nodes; u++)
2650 {
2651 ddg_node_ptr u_node = &g->nodes[u];
2652
2653 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2654 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2655 }
2656 }
2657
2658 *pmax_asap = max_asap;
2659 return node_order_params_arr;
2660 }
2661
2662 static int
2663 find_max_asap (ddg_ptr g, sbitmap nodes)
2664 {
2665 unsigned int u = 0;
2666 int max_asap = -1;
2667 int result = -1;
2668 sbitmap_iterator sbi;
2669
2670 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2671 {
2672 ddg_node_ptr u_node = &g->nodes[u];
2673
2674 if (max_asap < ASAP (u_node))
2675 {
2676 max_asap = ASAP (u_node);
2677 result = u;
2678 }
2679 }
2680 return result;
2681 }
2682
2683 static int
2684 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2685 {
2686 unsigned int u = 0;
2687 int max_hv = -1;
2688 int min_mob = INT_MAX;
2689 int result = -1;
2690 sbitmap_iterator sbi;
2691
2692 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2693 {
2694 ddg_node_ptr u_node = &g->nodes[u];
2695
2696 if (max_hv < HEIGHT (u_node))
2697 {
2698 max_hv = HEIGHT (u_node);
2699 min_mob = MOB (u_node);
2700 result = u;
2701 }
2702 else if ((max_hv == HEIGHT (u_node))
2703 && (min_mob > MOB (u_node)))
2704 {
2705 min_mob = MOB (u_node);
2706 result = u;
2707 }
2708 }
2709 return result;
2710 }
2711
2712 static int
2713 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2714 {
2715 unsigned int u = 0;
2716 int max_dv = -1;
2717 int min_mob = INT_MAX;
2718 int result = -1;
2719 sbitmap_iterator sbi;
2720
2721 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2722 {
2723 ddg_node_ptr u_node = &g->nodes[u];
2724
2725 if (max_dv < DEPTH (u_node))
2726 {
2727 max_dv = DEPTH (u_node);
2728 min_mob = MOB (u_node);
2729 result = u;
2730 }
2731 else if ((max_dv == DEPTH (u_node))
2732 && (min_mob > MOB (u_node)))
2733 {
2734 min_mob = MOB (u_node);
2735 result = u;
2736 }
2737 }
2738 return result;
2739 }
2740
2741 /* Places the nodes of SCC into the NODE_ORDER array starting
2742 at position POS, according to the SMS ordering algorithm.
2743 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2744 the NODE_ORDER array, starting from position zero. */
2745 static int
2746 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2747 int * node_order, int pos)
2748 {
2749 enum sms_direction dir;
2750 int num_nodes = g->num_nodes;
2751 sbitmap workset = sbitmap_alloc (num_nodes);
2752 sbitmap tmp = sbitmap_alloc (num_nodes);
2753 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2754 sbitmap predecessors = sbitmap_alloc (num_nodes);
2755 sbitmap successors = sbitmap_alloc (num_nodes);
2756
2757 bitmap_clear (predecessors);
2758 find_predecessors (predecessors, g, nodes_ordered);
2759
2760 bitmap_clear (successors);
2761 find_successors (successors, g, nodes_ordered);
2762
2763 bitmap_clear (tmp);
2764 if (bitmap_and (tmp, predecessors, scc))
2765 {
2766 bitmap_copy (workset, tmp);
2767 dir = BOTTOMUP;
2768 }
2769 else if (bitmap_and (tmp, successors, scc))
2770 {
2771 bitmap_copy (workset, tmp);
2772 dir = TOPDOWN;
2773 }
2774 else
2775 {
2776 int u;
2777
2778 bitmap_clear (workset);
2779 if ((u = find_max_asap (g, scc)) >= 0)
2780 bitmap_set_bit (workset, u);
2781 dir = BOTTOMUP;
2782 }
2783
2784 bitmap_clear (zero_bitmap);
2785 while (!bitmap_equal_p (workset, zero_bitmap))
2786 {
2787 int v;
2788 ddg_node_ptr v_node;
2789 sbitmap v_node_preds;
2790 sbitmap v_node_succs;
2791
2792 if (dir == TOPDOWN)
2793 {
2794 while (!bitmap_equal_p (workset, zero_bitmap))
2795 {
2796 v = find_max_hv_min_mob (g, workset);
2797 v_node = &g->nodes[v];
2798 node_order[pos++] = v;
2799 v_node_succs = NODE_SUCCESSORS (v_node);
2800 bitmap_and (tmp, v_node_succs, scc);
2801
2802 /* Don't consider the already ordered successors again. */
2803 bitmap_and_compl (tmp, tmp, nodes_ordered);
2804 bitmap_ior (workset, workset, tmp);
2805 bitmap_clear_bit (workset, v);
2806 bitmap_set_bit (nodes_ordered, v);
2807 }
2808 dir = BOTTOMUP;
2809 bitmap_clear (predecessors);
2810 find_predecessors (predecessors, g, nodes_ordered);
2811 bitmap_and (workset, predecessors, scc);
2812 }
2813 else
2814 {
2815 while (!bitmap_equal_p (workset, zero_bitmap))
2816 {
2817 v = find_max_dv_min_mob (g, workset);
2818 v_node = &g->nodes[v];
2819 node_order[pos++] = v;
2820 v_node_preds = NODE_PREDECESSORS (v_node);
2821 bitmap_and (tmp, v_node_preds, scc);
2822
2823 /* Don't consider the already ordered predecessors again. */
2824 bitmap_and_compl (tmp, tmp, nodes_ordered);
2825 bitmap_ior (workset, workset, tmp);
2826 bitmap_clear_bit (workset, v);
2827 bitmap_set_bit (nodes_ordered, v);
2828 }
2829 dir = TOPDOWN;
2830 bitmap_clear (successors);
2831 find_successors (successors, g, nodes_ordered);
2832 bitmap_and (workset, successors, scc);
2833 }
2834 }
2835 sbitmap_free (tmp);
2836 sbitmap_free (workset);
2837 sbitmap_free (zero_bitmap);
2838 sbitmap_free (predecessors);
2839 sbitmap_free (successors);
2840 return pos;
2841 }
2842
2843 \f
2844 /* This page contains functions for manipulating partial-schedules during
2845 modulo scheduling. */
2846
2847 /* Create a partial schedule and allocate a memory to hold II rows. */
2848
2849 static partial_schedule_ptr
2850 create_partial_schedule (int ii, ddg_ptr g, int history)
2851 {
2852 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2853 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2854 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2855 ps->reg_moves.create (0);
2856 ps->ii = ii;
2857 ps->history = history;
2858 ps->min_cycle = INT_MAX;
2859 ps->max_cycle = INT_MIN;
2860 ps->g = g;
2861
2862 return ps;
2863 }
2864
2865 /* Free the PS_INSNs in rows array of the given partial schedule.
2866 ??? Consider caching the PS_INSN's. */
2867 static void
2868 free_ps_insns (partial_schedule_ptr ps)
2869 {
2870 int i;
2871
2872 for (i = 0; i < ps->ii; i++)
2873 {
2874 while (ps->rows[i])
2875 {
2876 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2877
2878 free (ps->rows[i]);
2879 ps->rows[i] = ps_insn;
2880 }
2881 ps->rows[i] = NULL;
2882 }
2883 }
2884
2885 /* Free all the memory allocated to the partial schedule. */
2886
2887 static void
2888 free_partial_schedule (partial_schedule_ptr ps)
2889 {
2890 ps_reg_move_info *move;
2891 unsigned int i;
2892
2893 if (!ps)
2894 return;
2895
2896 FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2897 sbitmap_free (move->uses);
2898 ps->reg_moves.release ();
2899
2900 free_ps_insns (ps);
2901 free (ps->rows);
2902 free (ps->rows_length);
2903 free (ps);
2904 }
2905
2906 /* Clear the rows array with its PS_INSNs, and create a new one with
2907 NEW_II rows. */
2908
2909 static void
2910 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2911 {
2912 if (!ps)
2913 return;
2914 free_ps_insns (ps);
2915 if (new_ii == ps->ii)
2916 return;
2917 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2918 * sizeof (ps_insn_ptr));
2919 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2920 ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2921 memset (ps->rows_length, 0, new_ii * sizeof (int));
2922 ps->ii = new_ii;
2923 ps->min_cycle = INT_MAX;
2924 ps->max_cycle = INT_MIN;
2925 }
2926
2927 /* Prints the partial schedule as an ii rows array, for each rows
2928 print the ids of the insns in it. */
2929 void
2930 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2931 {
2932 int i;
2933
2934 for (i = 0; i < ps->ii; i++)
2935 {
2936 ps_insn_ptr ps_i = ps->rows[i];
2937
2938 fprintf (dump, "\n[ROW %d ]: ", i);
2939 while (ps_i)
2940 {
2941 rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2942
2943 if (JUMP_P (insn))
2944 fprintf (dump, "%d (branch), ", INSN_UID (insn));
2945 else
2946 fprintf (dump, "%d, ", INSN_UID (insn));
2947
2948 ps_i = ps_i->next_in_row;
2949 }
2950 }
2951 }
2952
2953 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2954 static ps_insn_ptr
2955 create_ps_insn (int id, int cycle)
2956 {
2957 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2958
2959 ps_i->id = id;
2960 ps_i->next_in_row = NULL;
2961 ps_i->prev_in_row = NULL;
2962 ps_i->cycle = cycle;
2963
2964 return ps_i;
2965 }
2966
2967
2968 /* Removes the given PS_INSN from the partial schedule. */
2969 static void
2970 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2971 {
2972 int row;
2973
2974 gcc_assert (ps && ps_i);
2975
2976 row = SMODULO (ps_i->cycle, ps->ii);
2977 if (! ps_i->prev_in_row)
2978 {
2979 gcc_assert (ps_i == ps->rows[row]);
2980 ps->rows[row] = ps_i->next_in_row;
2981 if (ps->rows[row])
2982 ps->rows[row]->prev_in_row = NULL;
2983 }
2984 else
2985 {
2986 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2987 if (ps_i->next_in_row)
2988 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2989 }
2990
2991 ps->rows_length[row] -= 1;
2992 free (ps_i);
2993 return;
2994 }
2995
2996 /* Unlike what literature describes for modulo scheduling (which focuses
2997 on VLIW machines) the order of the instructions inside a cycle is
2998 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2999 where the current instruction should go relative to the already
3000 scheduled instructions in the given cycle. Go over these
3001 instructions and find the first possible column to put it in. */
3002 static bool
3003 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3004 sbitmap must_precede, sbitmap must_follow)
3005 {
3006 ps_insn_ptr next_ps_i;
3007 ps_insn_ptr first_must_follow = NULL;
3008 ps_insn_ptr last_must_precede = NULL;
3009 ps_insn_ptr last_in_row = NULL;
3010 int row;
3011
3012 if (! ps_i)
3013 return false;
3014
3015 row = SMODULO (ps_i->cycle, ps->ii);
3016
3017 /* Find the first must follow and the last must precede
3018 and insert the node immediately after the must precede
3019 but make sure that it there is no must follow after it. */
3020 for (next_ps_i = ps->rows[row];
3021 next_ps_i;
3022 next_ps_i = next_ps_i->next_in_row)
3023 {
3024 if (must_follow
3025 && bitmap_bit_p (must_follow, next_ps_i->id)
3026 && ! first_must_follow)
3027 first_must_follow = next_ps_i;
3028 if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
3029 {
3030 /* If we have already met a node that must follow, then
3031 there is no possible column. */
3032 if (first_must_follow)
3033 return false;
3034 else
3035 last_must_precede = next_ps_i;
3036 }
3037 /* The closing branch must be the last in the row. */
3038 if (must_precede
3039 && bitmap_bit_p (must_precede, next_ps_i->id)
3040 && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3041 return false;
3042
3043 last_in_row = next_ps_i;
3044 }
3045
3046 /* The closing branch is scheduled as well. Make sure there is no
3047 dependent instruction after it as the branch should be the last
3048 instruction in the row. */
3049 if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3050 {
3051 if (first_must_follow)
3052 return false;
3053 if (last_in_row)
3054 {
3055 /* Make the branch the last in the row. New instructions
3056 will be inserted at the beginning of the row or after the
3057 last must_precede instruction thus the branch is guaranteed
3058 to remain the last instruction in the row. */
3059 last_in_row->next_in_row = ps_i;
3060 ps_i->prev_in_row = last_in_row;
3061 ps_i->next_in_row = NULL;
3062 }
3063 else
3064 ps->rows[row] = ps_i;
3065 return true;
3066 }
3067
3068 /* Now insert the node after INSERT_AFTER_PSI. */
3069
3070 if (! last_must_precede)
3071 {
3072 ps_i->next_in_row = ps->rows[row];
3073 ps_i->prev_in_row = NULL;
3074 if (ps_i->next_in_row)
3075 ps_i->next_in_row->prev_in_row = ps_i;
3076 ps->rows[row] = ps_i;
3077 }
3078 else
3079 {
3080 ps_i->next_in_row = last_must_precede->next_in_row;
3081 last_must_precede->next_in_row = ps_i;
3082 ps_i->prev_in_row = last_must_precede;
3083 if (ps_i->next_in_row)
3084 ps_i->next_in_row->prev_in_row = ps_i;
3085 }
3086
3087 return true;
3088 }
3089
3090 /* Advances the PS_INSN one column in its current row; returns false
3091 in failure and true in success. Bit N is set in MUST_FOLLOW if
3092 the node with cuid N must be come after the node pointed to by
3093 PS_I when scheduled in the same cycle. */
3094 static int
3095 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3096 sbitmap must_follow)
3097 {
3098 ps_insn_ptr prev, next;
3099 int row;
3100
3101 if (!ps || !ps_i)
3102 return false;
3103
3104 row = SMODULO (ps_i->cycle, ps->ii);
3105
3106 if (! ps_i->next_in_row)
3107 return false;
3108
3109 /* Check if next_in_row is dependent on ps_i, both having same sched
3110 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
3111 if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3112 return false;
3113
3114 /* Advance PS_I over its next_in_row in the doubly linked list. */
3115 prev = ps_i->prev_in_row;
3116 next = ps_i->next_in_row;
3117
3118 if (ps_i == ps->rows[row])
3119 ps->rows[row] = next;
3120
3121 ps_i->next_in_row = next->next_in_row;
3122
3123 if (next->next_in_row)
3124 next->next_in_row->prev_in_row = ps_i;
3125
3126 next->next_in_row = ps_i;
3127 ps_i->prev_in_row = next;
3128
3129 next->prev_in_row = prev;
3130 if (prev)
3131 prev->next_in_row = next;
3132
3133 return true;
3134 }
3135
3136 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3137 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
3138 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3139 before/after (respectively) the node pointed to by PS_I when scheduled
3140 in the same cycle. */
3141 static ps_insn_ptr
3142 add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3143 sbitmap must_precede, sbitmap must_follow)
3144 {
3145 ps_insn_ptr ps_i;
3146 int row = SMODULO (cycle, ps->ii);
3147
3148 if (ps->rows_length[row] >= issue_rate)
3149 return NULL;
3150
3151 ps_i = create_ps_insn (id, cycle);
3152
3153 /* Finds and inserts PS_I according to MUST_FOLLOW and
3154 MUST_PRECEDE. */
3155 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3156 {
3157 free (ps_i);
3158 return NULL;
3159 }
3160
3161 ps->rows_length[row] += 1;
3162 return ps_i;
3163 }
3164
3165 /* Advance time one cycle. Assumes DFA is being used. */
3166 static void
3167 advance_one_cycle (void)
3168 {
3169 if (targetm.sched.dfa_pre_cycle_insn)
3170 state_transition (curr_state,
3171 targetm.sched.dfa_pre_cycle_insn ());
3172
3173 state_transition (curr_state, NULL);
3174
3175 if (targetm.sched.dfa_post_cycle_insn)
3176 state_transition (curr_state,
3177 targetm.sched.dfa_post_cycle_insn ());
3178 }
3179
3180
3181
3182 /* Checks if PS has resource conflicts according to DFA, starting from
3183 FROM cycle to TO cycle; returns true if there are conflicts and false
3184 if there are no conflicts. Assumes DFA is being used. */
3185 static int
3186 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3187 {
3188 int cycle;
3189
3190 state_reset (curr_state);
3191
3192 for (cycle = from; cycle <= to; cycle++)
3193 {
3194 ps_insn_ptr crr_insn;
3195 /* Holds the remaining issue slots in the current row. */
3196 int can_issue_more = issue_rate;
3197
3198 /* Walk through the DFA for the current row. */
3199 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3200 crr_insn;
3201 crr_insn = crr_insn->next_in_row)
3202 {
3203 rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3204
3205 if (!NONDEBUG_INSN_P (insn))
3206 continue;
3207
3208 /* Check if there is room for the current insn. */
3209 if (!can_issue_more || state_dead_lock_p (curr_state))
3210 return true;
3211
3212 /* Update the DFA state and return with failure if the DFA found
3213 resource conflicts. */
3214 if (state_transition (curr_state, insn) >= 0)
3215 return true;
3216
3217 if (targetm.sched.variable_issue)
3218 can_issue_more =
3219 targetm.sched.variable_issue (sched_dump, sched_verbose,
3220 insn, can_issue_more);
3221 /* A naked CLOBBER or USE generates no instruction, so don't
3222 let them consume issue slots. */
3223 else if (GET_CODE (PATTERN (insn)) != USE
3224 && GET_CODE (PATTERN (insn)) != CLOBBER)
3225 can_issue_more--;
3226 }
3227
3228 /* Advance the DFA to the next cycle. */
3229 advance_one_cycle ();
3230 }
3231 return false;
3232 }
3233
3234 /* Checks if the given node causes resource conflicts when added to PS at
3235 cycle C. If not the node is added to PS and returned; otherwise zero
3236 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3237 cuid N must be come before/after (respectively) the node pointed to by
3238 PS_I when scheduled in the same cycle. */
3239 ps_insn_ptr
3240 ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3241 int c, sbitmap must_precede,
3242 sbitmap must_follow)
3243 {
3244 int has_conflicts = 0;
3245 ps_insn_ptr ps_i;
3246
3247 /* First add the node to the PS, if this succeeds check for
3248 conflicts, trying different issue slots in the same row. */
3249 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3250 return NULL; /* Failed to insert the node at the given cycle. */
3251
3252 has_conflicts = ps_has_conflicts (ps, c, c)
3253 || (ps->history > 0
3254 && ps_has_conflicts (ps,
3255 c - ps->history,
3256 c + ps->history));
3257
3258 /* Try different issue slots to find one that the given node can be
3259 scheduled in without conflicts. */
3260 while (has_conflicts)
3261 {
3262 if (! ps_insn_advance_column (ps, ps_i, must_follow))
3263 break;
3264 has_conflicts = ps_has_conflicts (ps, c, c)
3265 || (ps->history > 0
3266 && ps_has_conflicts (ps,
3267 c - ps->history,
3268 c + ps->history));
3269 }
3270
3271 if (has_conflicts)
3272 {
3273 remove_node_from_ps (ps, ps_i);
3274 return NULL;
3275 }
3276
3277 ps->min_cycle = MIN (ps->min_cycle, c);
3278 ps->max_cycle = MAX (ps->max_cycle, c);
3279 return ps_i;
3280 }
3281
3282 /* Calculate the stage count of the partial schedule PS. The calculation
3283 takes into account the rotation amount passed in ROTATION_AMOUNT. */
3284 int
3285 calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3286 {
3287 int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3288 int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3289 int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3290
3291 /* The calculation of stage count is done adding the number of stages
3292 before cycle zero and after cycle zero. */
3293 stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3294
3295 return stage_count;
3296 }
3297
3298 /* Rotate the rows of PS such that insns scheduled at time
3299 START_CYCLE will appear in row 0. Updates max/min_cycles. */
3300 void
3301 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3302 {
3303 int i, row, backward_rotates;
3304 int last_row = ps->ii - 1;
3305
3306 if (start_cycle == 0)
3307 return;
3308
3309 backward_rotates = SMODULO (start_cycle, ps->ii);
3310
3311 /* Revisit later and optimize this into a single loop. */
3312 for (i = 0; i < backward_rotates; i++)
3313 {
3314 ps_insn_ptr first_row = ps->rows[0];
3315 int first_row_length = ps->rows_length[0];
3316
3317 for (row = 0; row < last_row; row++)
3318 {
3319 ps->rows[row] = ps->rows[row + 1];
3320 ps->rows_length[row] = ps->rows_length[row + 1];
3321 }
3322
3323 ps->rows[last_row] = first_row;
3324 ps->rows_length[last_row] = first_row_length;
3325 }
3326
3327 ps->max_cycle -= start_cycle;
3328 ps->min_cycle -= start_cycle;
3329 }
3330
3331 #endif /* INSN_SCHEDULING */
3332 \f
3333 /* Run instruction scheduler. */
3334 /* Perform SMS module scheduling. */
3335
3336 namespace {
3337
3338 const pass_data pass_data_sms =
3339 {
3340 RTL_PASS, /* type */
3341 "sms", /* name */
3342 OPTGROUP_NONE, /* optinfo_flags */
3343 TV_SMS, /* tv_id */
3344 0, /* properties_required */
3345 0, /* properties_provided */
3346 0, /* properties_destroyed */
3347 0, /* todo_flags_start */
3348 TODO_df_finish, /* todo_flags_finish */
3349 };
3350
3351 class pass_sms : public rtl_opt_pass
3352 {
3353 public:
3354 pass_sms (gcc::context *ctxt)
3355 : rtl_opt_pass (pass_data_sms, ctxt)
3356 {}
3357
3358 /* opt_pass methods: */
3359 virtual bool gate (function *)
3360 {
3361 return (optimize > 0 && flag_modulo_sched);
3362 }
3363
3364 virtual unsigned int execute (function *);
3365
3366 }; // class pass_sms
3367
3368 unsigned int
3369 pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3370 {
3371 #ifdef INSN_SCHEDULING
3372 basic_block bb;
3373
3374 /* Collect loop information to be used in SMS. */
3375 cfg_layout_initialize (0);
3376 sms_schedule ();
3377
3378 /* Update the life information, because we add pseudos. */
3379 max_regno = max_reg_num ();
3380
3381 /* Finalize layout changes. */
3382 FOR_EACH_BB_FN (bb, fun)
3383 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3384 bb->aux = bb->next_bb;
3385 free_dominance_info (CDI_DOMINATORS);
3386 cfg_layout_finalize ();
3387 #endif /* INSN_SCHEDULING */
3388 return 0;
3389 }
3390
3391 } // anon namespace
3392
3393 rtl_opt_pass *
3394 make_pass_sms (gcc::context *ctxt)
3395 {
3396 return new pass_sms (ctxt);
3397 }