]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/modulo-sched.c
2015-07-07 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / modulo-sched.c
CommitLineData
5b6269ed 1/* Swing Modulo Scheduling implementation.
d353bf18 2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
5b6269ed 3 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
8c4c00c1 9Software Foundation; either version 3, or (at your option) any later
5b6269ed 10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
5b6269ed 20
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
9ef16211 25#include "backend.h"
26#include "tree.h"
5b6269ed 27#include "rtl.h"
9ef16211 28#include "df.h"
29#include "diagnostic-core.h"
5b6269ed 30#include "tm_p.h"
5b6269ed 31#include "regs.h"
886c1262 32#include "profile.h"
5b6269ed 33#include "flags.h"
34#include "insn-config.h"
35#include "insn-attr.h"
36#include "except.h"
5b6269ed 37#include "recog.h"
94ea8568 38#include "cfgrtl.h"
5b6269ed 39#include "sched-int.h"
40#include "target.h"
5b6269ed 41#include "cfgloop.h"
b20a8bb4 42#include "alias.h"
34517c64 43#include "insn-codes.h"
44#include "optabs.h"
d53441c8 45#include "expmed.h"
46#include "dojump.h"
47#include "explow.h"
48#include "calls.h"
49#include "emit-rtl.h"
50#include "varasm.h"
51#include "stmt.h"
5b6269ed 52#include "expr.h"
53#include "params.h"
54#include "gcov-io.h"
5b6269ed 55#include "ddg.h"
77fce4cd 56#include "tree-pass.h"
76aaf9f0 57#include "dbgcnt.h"
f7b51129 58#include "loop-unroll.h"
5b6269ed 59
07cba988 60#ifdef INSN_SCHEDULING
5b6269ed 61
62/* This file contains the implementation of the Swing Modulo Scheduler,
63 described in the following references:
64 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
65 Lifetime--sensitive modulo scheduling in a production environment.
66 IEEE Trans. on Comps., 50(3), March 2001
67 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
68 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
365db11e 69 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
5b6269ed 70
71 The basic structure is:
72 1. Build a data-dependence graph (DDG) for each loop.
73 2. Use the DDG to order the insns of a loop (not in topological order
74 necessarily, but rather) trying to place each insn after all its
75 predecessors _or_ after all its successors.
76 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
77 4. Use the ordering to perform list-scheduling of the loop:
78 1. Set II = MII. We will try to schedule the loop within II cycles.
79 2. Try to schedule the insns one by one according to the ordering.
80 For each insn compute an interval of cycles by considering already-
81 scheduled preds and succs (and associated latencies); try to place
82 the insn in the cycles of this window checking for potential
83 resource conflicts (using the DFA interface).
84 Note: this is different from the cycle-scheduling of schedule_insns;
85 here the insns are not scheduled monotonically top-down (nor bottom-
86 up).
87 3. If failed in scheduling all insns - bump II++ and try again, unless
0870fd6e 88 II reaches an upper bound MaxII, in which case report failure.
5b6269ed 89 5. If we succeeded in scheduling the loop within II cycles, we now
90 generate prolog and epilog, decrease the counter of the loop, and
91 perform modulo variable expansion for live ranges that span more than
92 II cycles (i.e. use register copies to prevent a def from overwriting
93 itself before reaching the use).
5b6269ed 94
abbfd803 95 SMS works with countable loops (1) whose control part can be easily
96 decoupled from the rest of the loop and (2) whose loop count can
97 be easily adjusted. This is because we peel a constant number of
98 iterations into a prologue and epilogue for which we want to avoid
99 emitting the control part, and a kernel which is to iterate that
100 constant number of iterations less than the original loop. So the
101 control part should be a set of insns clearly identified and having
102 its own iv, not otherwise used in the loop (at-least for now), which
7bfc9b7c 103 initializes a register before the loop to the number of iterations.
104 Currently SMS relies on the do-loop pattern to recognize such loops,
105 where (1) the control part comprises of all insns defining and/or
106 using a certain 'count' register and (2) the loop count can be
48e1416a 107 adjusted by modifying this register prior to the loop.
7bfc9b7c 108 TODO: Rely on cfgloop analysis instead. */
5b6269ed 109\f
110/* This page defines partial-schedule structures and functions for
111 modulo scheduling. */
112
113typedef struct partial_schedule *partial_schedule_ptr;
114typedef struct ps_insn *ps_insn_ptr;
115
116/* The minimum (absolute) cycle that a node of ps was scheduled in. */
117#define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
118
119/* The maximum (absolute) cycle that a node of ps was scheduled in. */
120#define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
121
122/* Perform signed modulo, always returning a non-negative value. */
123#define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
124
125/* The number of different iterations the nodes in ps span, assuming
126 the stage boundaries are placed efficiently. */
6f7f081f 127#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
128 + 1 + ii - 1) / ii)
129/* The stage count of ps. */
130#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
5b6269ed 131
5b6269ed 132/* A single instruction in the partial schedule. */
133struct ps_insn
134{
0b79ae60 135 /* Identifies the instruction to be scheduled. Values smaller than
136 the ddg's num_nodes refer directly to ddg nodes. A value of
137 X - num_nodes refers to register move X. */
7282ef12 138 int id;
5b6269ed 139
140 /* The (absolute) cycle in which the PS instruction is scheduled.
141 Same as SCHED_TIME (node). */
142 int cycle;
143
144 /* The next/prev PS_INSN in the same row. */
145 ps_insn_ptr next_in_row,
146 prev_in_row;
147
5b6269ed 148};
149
0b79ae60 150/* Information about a register move that has been added to a partial
151 schedule. */
152struct ps_reg_move_info
153{
154 /* The source of the move is defined by the ps_insn with id DEF.
155 The destination is used by the ps_insns with the ids in USES. */
156 int def;
157 sbitmap uses;
158
159 /* The original form of USES' instructions used OLD_REG, but they
160 should now use NEW_REG. */
161 rtx old_reg;
162 rtx new_reg;
163
e9df34d5 164 /* The number of consecutive stages that the move occupies. */
165 int num_consecutive_stages;
166
0b79ae60 167 /* An instruction that sets NEW_REG to the correct value. The first
168 move associated with DEF will have an rhs of OLD_REG; later moves
169 use the result of the previous move. */
bc69df91 170 rtx_insn *insn;
0b79ae60 171};
172
173typedef struct ps_reg_move_info ps_reg_move_info;
0b79ae60 174
5b6269ed 175/* Holds the partial schedule as an array of II rows. Each entry of the
176 array points to a linked list of PS_INSNs, which represents the
177 instructions that are scheduled for that row. */
178struct partial_schedule
179{
180 int ii; /* Number of rows in the partial schedule. */
181 int history; /* Threshold for conflict checking using DFA. */
182
183 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
184 ps_insn_ptr *rows;
185
0b79ae60 186 /* All the moves added for this partial schedule. Index X has
187 a ps_insn id of X + g->num_nodes. */
f1f41a6c 188 vec<ps_reg_move_info> reg_moves;
0b79ae60 189
4815ed0c 190 /* rows_length[i] holds the number of instructions in the row.
191 It is used only (as an optimization) to back off quickly from
192 trying to schedule a node in a full row; that is, to avoid running
193 through futile DFA state transitions. */
194 int *rows_length;
195
5b6269ed 196 /* The earliest absolute cycle of an insn in the partial schedule. */
197 int min_cycle;
198
199 /* The latest absolute cycle of an insn in the partial schedule. */
200 int max_cycle;
201
202 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
6f7f081f 203
204 int stage_count; /* The stage count of the partial schedule. */
5b6269ed 205};
206
48e1416a 207
1bed606a 208static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
209static void free_partial_schedule (partial_schedule_ptr);
210static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
5b6269ed 211void print_partial_schedule (partial_schedule_ptr, FILE *);
76387907 212static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
daca8c86 213static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
7282ef12 214 int, int, sbitmap, sbitmap);
daca8c86 215static void rotate_partial_schedule (partial_schedule_ptr, int);
b6a75366 216void set_row_column_for_ps (partial_schedule_ptr);
76387907 217static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
218static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
b6a75366 219
5b6269ed 220\f
365db11e 221/* This page defines constants and structures for the modulo scheduling
5b6269ed 222 driver. */
223
c0fb94bd 224static int sms_order_nodes (ddg_ptr, int, int *, int *);
5b6269ed 225static void set_node_sched_params (ddg_ptr);
3f5be5f4 226static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
4cd001d5 227static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
c0fb94bd 228static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
7bfc9b7c 229 rtx, rtx);
a5ea7ace 230static int calculate_stage_count (partial_schedule_ptr, int);
231static void calculate_must_precede_follow (ddg_node_ptr, int, int,
232 int, int, sbitmap, sbitmap, sbitmap);
233static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
234 sbitmap, int, int *, int *, int *);
7282ef12 235static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
236 sbitmap, int *, sbitmap, sbitmap);
9bf2b489 237static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
a5ea7ace 238
7282ef12 239#define NODE_ASAP(node) ((node)->aux.count)
240
f1f41a6c 241#define SCHED_PARAMS(x) (&node_sched_param_vec[x])
7282ef12 242#define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
7282ef12 243#define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
244#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
245#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
5b6269ed 246
247/* The scheduling parameters held for each node. */
248typedef struct node_sched_params
249{
7282ef12 250 int time; /* The absolute scheduling cycle. */
5b6269ed 251
5b6269ed 252 int row; /* Holds time % ii. */
253 int stage; /* Holds time / ii. */
254
255 /* The column of a node inside the ps. If nodes u, v are on the same row,
365db11e 256 u will precede v if column (u) < column (v). */
5b6269ed 257 int column;
258} *node_sched_params_ptr;
259
0b79ae60 260typedef struct node_sched_params node_sched_params;
5b6269ed 261\f
262/* The following three functions are copied from the current scheduler
2c763ed4 263 code in order to use sched_analyze() for computing the dependencies.
5b6269ed 264 They are used when initializing the sched_info structure. */
265static const char *
b24ef467 266sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
5b6269ed 267{
268 static char tmp[80];
269
270 sprintf (tmp, "i%4d", INSN_UID (insn));
271 return tmp;
272}
273
5b6269ed 274static void
275compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
6aed13f1 276 regset used ATTRIBUTE_UNUSED)
5b6269ed 277{
278}
279
e1ab7874 280static struct common_sched_info_def sms_common_sched_info;
281
282static struct sched_deps_info_def sms_sched_deps_info =
283 {
284 compute_jump_reg_dependencies,
285 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
286 NULL,
287 0, 0, 0
288 };
289
290static struct haifa_sched_info sms_sched_info =
5b6269ed 291{
292 NULL,
293 NULL,
294 NULL,
295 NULL,
296 NULL,
297 sms_print_insn,
6a1cdb4d 298 NULL,
4db82bc9 299 NULL, /* insn_finishes_block_p */
5b6269ed 300 NULL, NULL,
301 NULL, NULL,
e1ab7874 302 0, 0,
4d64d9a4 303
d2412f57 304 NULL, NULL, NULL, NULL,
e2f4a6ff 305 NULL, NULL,
4d64d9a4 306 0
5b6269ed 307};
308
0b79ae60 309/* Partial schedule instruction ID in PS is a register move. Return
310 information about it. */
311static struct ps_reg_move_info *
312ps_reg_move (partial_schedule_ptr ps, int id)
313{
314 gcc_checking_assert (id >= ps->g->num_nodes);
f1f41a6c 315 return &ps->reg_moves[id - ps->g->num_nodes];
0b79ae60 316}
317
7282ef12 318/* Return the rtl instruction that is being scheduled by partial schedule
319 instruction ID, which belongs to schedule PS. */
bc69df91 320static rtx_insn *
7282ef12 321ps_rtl_insn (partial_schedule_ptr ps, int id)
322{
0b79ae60 323 if (id < ps->g->num_nodes)
324 return ps->g->nodes[id].insn;
325 else
326 return ps_reg_move (ps, id)->insn;
7282ef12 327}
328
9d75589a 329/* Partial schedule instruction ID, which belongs to PS, occurred in
0b79ae60 330 the original (unscheduled) loop. Return the first instruction
331 in the loop that was associated with ps_rtl_insn (PS, ID).
332 If the instruction had some notes before it, this is the first
333 of those notes. */
57c26b3a 334static rtx_insn *
7282ef12 335ps_first_note (partial_schedule_ptr ps, int id)
336{
0b79ae60 337 gcc_assert (id < ps->g->num_nodes);
7282ef12 338 return ps->g->nodes[id].first_note;
339}
340
e9df34d5 341/* Return the number of consecutive stages that are occupied by
342 partial schedule instruction ID in PS. */
343static int
344ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
345{
346 if (id < ps->g->num_nodes)
347 return 1;
348 else
349 return ps_reg_move (ps, id)->num_consecutive_stages;
350}
351
7bfc9b7c 352/* Given HEAD and TAIL which are the first and last insns in a loop;
353 return the register which controls the loop. Return zero if it has
354 more than one occurrence in the loop besides the control part or the
355 do-loop pattern is not of the form we expect. */
5b6269ed 356static rtx
4177c695 357doloop_register_get (rtx_insn *head, rtx_insn *tail)
5b6269ed 358{
bc69df91 359 rtx reg, condition;
360 rtx_insn *insn, *first_insn_not_to_check;
5b6269ed 361
7bfc9b7c 362 if (!JUMP_P (tail))
5b6269ed 363 return NULL_RTX;
364
4177c695 365 if (!targetm.code_for_doloop_end)
366 return NULL_RTX;
367
7bfc9b7c 368 /* TODO: Free SMS's dependence on doloop_condition_get. */
369 condition = doloop_condition_get (tail);
fc2abfd3 370 if (! condition)
5b6269ed 371 return NULL_RTX;
372
fc2abfd3 373 if (REG_P (XEXP (condition, 0)))
374 reg = XEXP (condition, 0);
375 else if (GET_CODE (XEXP (condition, 0)) == PLUS
376 && REG_P (XEXP (XEXP (condition, 0), 0)))
377 reg = XEXP (XEXP (condition, 0), 0);
378 else
379 gcc_unreachable ();
5b6269ed 380
7bfc9b7c 381 /* Check that the COUNT_REG has no other occurrences in the loop
382 until the decrement. We assume the control part consists of
383 either a single (parallel) branch-on-count or a (non-parallel)
384 branch immediately preceded by a single (decrement) insn. */
11dd7ac1 385 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
cf961a94 386 : prev_nondebug_insn (tail));
11dd7ac1 387
388 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
9973add0 389 if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
7bfc9b7c 390 {
391 if (dump_file)
11dd7ac1 392 {
393 fprintf (dump_file, "SMS count_reg found ");
394 print_rtl_single (dump_file, reg);
395 fprintf (dump_file, " outside control in insn:\n");
396 print_rtl_single (dump_file, insn);
397 }
7bfc9b7c 398
399 return NULL_RTX;
400 }
11dd7ac1 401
fc2abfd3 402 return reg;
5b6269ed 403}
404
405/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
406 that the number of iterations is a compile-time constant. If so,
6fe7b8c2 407 return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
5b6269ed 408 this constant. Otherwise return 0. */
6fe7b8c2 409static rtx_insn *
5b6269ed 410const_iteration_count (rtx count_reg, basic_block pre_header,
3a4303e7 411 int64_t * count)
5b6269ed 412{
6fe7b8c2 413 rtx_insn *insn;
414 rtx_insn *head, *tail;
a337a404 415
416 if (! pre_header)
6fe7b8c2 417 return NULL;
a337a404 418
6a1cdb4d 419 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
5b6269ed 420
421 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
9845d120 422 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
5b6269ed 423 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
424 {
425 rtx pat = single_set (insn);
426
971ba038 427 if (CONST_INT_P (SET_SRC (pat)))
5b6269ed 428 {
429 *count = INTVAL (SET_SRC (pat));
430 return insn;
431 }
432
6fe7b8c2 433 return NULL;
5b6269ed 434 }
435
6fe7b8c2 436 return NULL;
5b6269ed 437}
438
439/* A very simple resource-based lower bound on the initiation interval.
440 ??? Improve the accuracy of this bound by considering the
441 utilization of various units. */
442static int
443res_MII (ddg_ptr g)
444{
d52fd16a 445 if (targetm.sched.sms_res_mii)
48e1416a 446 return targetm.sched.sms_res_mii (g);
447
9845d120 448 return ((g->num_nodes - g->num_debug) / issue_rate);
5b6269ed 449}
450
451
0b79ae60 452/* A vector that contains the sched data for each ps_insn. */
f1f41a6c 453static vec<node_sched_params> node_sched_param_vec;
5b6269ed 454
7282ef12 455/* Allocate sched_params for each node and initialize it. */
5b6269ed 456static void
457set_node_sched_params (ddg_ptr g)
458{
f1f41a6c 459 node_sched_param_vec.truncate (0);
460 node_sched_param_vec.safe_grow_cleared (g->num_nodes);
5b6269ed 461}
462
e9df34d5 463/* Make sure that node_sched_param_vec has an entry for every move in PS. */
464static void
465extend_node_sched_params (partial_schedule_ptr ps)
466{
f1f41a6c 467 node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
468 + ps->reg_moves.length ());
e9df34d5 469}
470
471/* Update the sched_params (time, row and stage) for node U using the II,
472 the CYCLE of U and MIN_CYCLE.
473 We're not simply taking the following
474 SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
475 because the stages may not be aligned on cycle 0. */
476static void
477update_node_sched_params (int u, int ii, int cycle, int min_cycle)
478{
479 int sc_until_cycle_zero;
480 int stage;
481
482 SCHED_TIME (u) = cycle;
483 SCHED_ROW (u) = SMODULO (cycle, ii);
484
485 /* The calculation of stage count is done adding the number
486 of stages before cycle zero and after cycle zero. */
487 sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
488
489 if (SCHED_TIME (u) < 0)
490 {
491 stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
492 SCHED_STAGE (u) = sc_until_cycle_zero - stage;
493 }
494 else
495 {
496 stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
497 SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
498 }
499}
500
5b6269ed 501static void
0b79ae60 502print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
5b6269ed 503{
504 int i;
505
3f5be5f4 506 if (! file)
a337a404 507 return;
5b6269ed 508 for (i = 0; i < num_nodes; i++)
509 {
7282ef12 510 node_sched_params_ptr nsp = SCHED_PARAMS (i);
5b6269ed 511
896dbaed 512 fprintf (file, "Node = %d; INSN = %d\n", i,
0b79ae60 513 INSN_UID (ps_rtl_insn (ps, i)));
514 fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
3f5be5f4 515 fprintf (file, " time = %d:\n", nsp->time);
e9df34d5 516 fprintf (file, " stage = %d:\n", nsp->stage);
517 }
518}
519
520/* Set SCHED_COLUMN for each instruction in row ROW of PS. */
521static void
522set_columns_for_row (partial_schedule_ptr ps, int row)
523{
524 ps_insn_ptr cur_insn;
525 int column;
526
527 column = 0;
528 for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
529 SCHED_COLUMN (cur_insn->id) = column++;
530}
531
532/* Set SCHED_COLUMN for each instruction in PS. */
533static void
534set_columns_for_ps (partial_schedule_ptr ps)
535{
536 int row;
537
538 for (row = 0; row < ps->ii; row++)
539 set_columns_for_row (ps, row);
540}
0b79ae60 541
e9df34d5 542/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
543 Its single predecessor has already been scheduled, as has its
544 ddg node successors. (The move may have also another move as its
545 successor, in which case that successor will be scheduled later.)
546
547 The move is part of a chain that satisfies register dependencies
548 between a producing ddg node and various consuming ddg nodes.
549 If some of these dependencies have a distance of 1 (meaning that
469e33a0 550 the use is upward-exposed) then DISTANCE1_USES is nonnull and
e9df34d5 551 contains the set of uses with distance-1 dependencies.
552 DISTANCE1_USES is null otherwise.
553
554 MUST_FOLLOW is a scratch bitmap that is big enough to hold
555 all current ps_insn ids.
556
557 Return true on success. */
558static bool
559schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
560 sbitmap distance1_uses, sbitmap must_follow)
561{
562 unsigned int u;
563 int this_time, this_distance, this_start, this_end, this_latency;
564 int start, end, c, ii;
565 sbitmap_iterator sbi;
566 ps_reg_move_info *move;
bc69df91 567 rtx_insn *this_insn;
e9df34d5 568 ps_insn_ptr psi;
569
570 move = ps_reg_move (ps, i_reg_move);
571 ii = ps->ii;
572 if (dump_file)
573 {
574 fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
575 ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
576 PS_MIN_CYCLE (ps));
577 print_rtl_single (dump_file, move->insn);
578 fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
579 fprintf (dump_file, "=========== =========== =====\n");
580 }
581
582 start = INT_MIN;
583 end = INT_MAX;
584
585 /* For dependencies of distance 1 between a producer ddg node A
586 and consumer ddg node B, we have a chain of dependencies:
587
588 A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
589
590 where Mi is the ith move. For dependencies of distance 0 between
591 a producer ddg node A and consumer ddg node C, we have a chain of
592 dependencies:
593
594 A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
595
596 where Mi' occupies the same position as Mi but occurs a stage later.
597 We can only schedule each move once, so if we have both types of
598 chain, we model the second as:
599
600 A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
601
602 First handle the dependencies between the previously-scheduled
603 predecessor and the move. */
604 this_insn = ps_rtl_insn (ps, move->def);
605 this_latency = insn_latency (this_insn, move->insn);
606 this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
607 this_time = SCHED_TIME (move->def) - this_distance * ii;
608 this_start = this_time + this_latency;
609 this_end = this_time + ii;
610 if (dump_file)
611 fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
612 this_start, this_end, SCHED_TIME (move->def),
613 INSN_UID (this_insn), this_latency, this_distance,
614 INSN_UID (move->insn));
615
616 if (start < this_start)
617 start = this_start;
618 if (end > this_end)
619 end = this_end;
620
621 /* Handle the dependencies between the move and previously-scheduled
622 successors. */
0d211963 623 EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
e9df34d5 624 {
625 this_insn = ps_rtl_insn (ps, u);
626 this_latency = insn_latency (move->insn, this_insn);
08b7917c 627 if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
e9df34d5 628 this_distance = -1;
629 else
630 this_distance = 0;
631 this_time = SCHED_TIME (u) + this_distance * ii;
632 this_start = this_time - ii;
633 this_end = this_time - this_latency;
634 if (dump_file)
635 fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
636 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
637 this_latency, this_distance, INSN_UID (this_insn));
638
639 if (start < this_start)
640 start = this_start;
641 if (end > this_end)
642 end = this_end;
643 }
644
645 if (dump_file)
646 {
647 fprintf (dump_file, "----------- ----------- -----\n");
648 fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
649 }
650
53c5d9d4 651 bitmap_clear (must_follow);
08b7917c 652 bitmap_set_bit (must_follow, move->def);
e9df34d5 653
654 start = MAX (start, end - (ii - 1));
655 for (c = end; c >= start; c--)
656 {
657 psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
658 move->uses, must_follow);
659 if (psi)
660 {
661 update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
662 if (dump_file)
663 fprintf (dump_file, "\nScheduled register move INSN %d at"
664 " time %d, row %d\n\n", INSN_UID (move->insn), c,
665 SCHED_ROW (i_reg_move));
666 return true;
5b6269ed 667 }
668 }
e9df34d5 669
670 if (dump_file)
671 fprintf (dump_file, "\nNo available slot\n\n");
672
673 return false;
5b6269ed 674}
675
a337a404 676/*
677 Breaking intra-loop register anti-dependences:
678 Each intra-loop register anti-dependence implies a cross-iteration true
679 dependence of distance 1. Therefore, we can remove such false dependencies
680 and figure out if the partial schedule broke them by checking if (for a
681 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
682 if so generate a register move. The number of such moves is equal to:
683 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
06b27565 684 nreg_moves = ----------------------------------- + 1 - { dependence.
a337a404 685 ii { 1 if not.
686*/
0b79ae60 687static bool
688schedule_reg_moves (partial_schedule_ptr ps)
5b6269ed 689{
690 ddg_ptr g = ps->g;
691 int ii = ps->ii;
692 int i;
693
694 for (i = 0; i < g->num_nodes; i++)
695 {
696 ddg_node_ptr u = &g->nodes[i];
697 ddg_edge_ptr e;
698 int nreg_moves = 0, i_reg_move;
5b6269ed 699 rtx prev_reg, old_reg;
0b79ae60 700 int first_move;
e9df34d5 701 int distances[2];
702 sbitmap must_follow;
703 sbitmap distance1_uses;
d1cae43e 704 rtx set = single_set (u->insn);
705
706 /* Skip instructions that do not set a register. */
707 if ((set && !REG_P (SET_DEST (set))))
708 continue;
709
5b6269ed 710 /* Compute the number of reg_moves needed for u, by looking at life
711 ranges started at u (excluding self-loops). */
e9df34d5 712 distances[0] = distances[1] = false;
5b6269ed 713 for (e = u->out; e; e = e->next_out)
714 if (e->type == TRUE_DEP && e->dest != e->src)
715 {
7282ef12 716 int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
717 - SCHED_TIME (e->src->cuid)) / ii;
5b6269ed 718
a337a404 719 if (e->distance == 1)
7282ef12 720 nreg_moves4e = (SCHED_TIME (e->dest->cuid)
721 - SCHED_TIME (e->src->cuid) + ii) / ii;
a337a404 722
365db11e 723 /* If dest precedes src in the schedule of the kernel, then dest
5b6269ed 724 will read before src writes and we can save one reg_copy. */
7282ef12 725 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
726 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
5b6269ed 727 nreg_moves4e--;
728
d1cae43e 729 if (nreg_moves4e >= 1)
730 {
731 /* !single_set instructions are not supported yet and
732 thus we do not except to encounter them in the loop
733 except from the doloop part. For the latter case
734 we assume no regmoves are generated as the doloop
735 instructions are tied to the branch with an edge. */
736 gcc_assert (set);
2917ecd3 737 /* If the instruction contains auto-inc register then
738 validate that the regmov is being generated for the
739 target regsiter rather then the inc'ed register. */
740 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
d1cae43e 741 }
742
e9df34d5 743 if (nreg_moves4e)
744 {
745 gcc_assert (e->distance < 2);
746 distances[e->distance] = true;
747 }
5b6269ed 748 nreg_moves = MAX (nreg_moves, nreg_moves4e);
749 }
750
751 if (nreg_moves == 0)
752 continue;
753
0b79ae60 754 /* Create NREG_MOVES register moves. */
f1f41a6c 755 first_move = ps->reg_moves.length ();
756 ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
e9df34d5 757 extend_node_sched_params (ps);
0b79ae60 758
759 /* Record the moves associated with this node. */
760 first_move += ps->g->num_nodes;
0b79ae60 761
762 /* Generate each move. */
763 old_reg = prev_reg = SET_DEST (single_set (u->insn));
764 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
765 {
766 ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
767
768 move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
e9df34d5 769 move->uses = sbitmap_alloc (first_move + nreg_moves);
0b79ae60 770 move->old_reg = old_reg;
771 move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
e9df34d5 772 move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
f9a00e9e 773 move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
53c5d9d4 774 bitmap_clear (move->uses);
0b79ae60 775
776 prev_reg = move->new_reg;
777 }
778
e9df34d5 779 distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
780
5ce4a75a 781 if (distance1_uses)
782 bitmap_clear (distance1_uses);
783
5b6269ed 784 /* Every use of the register defined by node may require a different
785 copy of this register, depending on the time the use is scheduled.
0b79ae60 786 Record which uses require which move results. */
5b6269ed 787 for (e = u->out; e; e = e->next_out)
788 if (e->type == TRUE_DEP && e->dest != e->src)
789 {
7282ef12 790 int dest_copy = (SCHED_TIME (e->dest->cuid)
791 - SCHED_TIME (e->src->cuid)) / ii;
5b6269ed 792
a337a404 793 if (e->distance == 1)
7282ef12 794 dest_copy = (SCHED_TIME (e->dest->cuid)
795 - SCHED_TIME (e->src->cuid) + ii) / ii;
a337a404 796
7282ef12 797 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
798 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
5b6269ed 799 dest_copy--;
800
801 if (dest_copy)
0b79ae60 802 {
803 ps_reg_move_info *move;
5b6269ed 804
0b79ae60 805 move = ps_reg_move (ps, first_move + dest_copy - 1);
08b7917c 806 bitmap_set_bit (move->uses, e->dest->cuid);
e9df34d5 807 if (e->distance == 1)
08b7917c 808 bitmap_set_bit (distance1_uses, e->dest->cuid);
0b79ae60 809 }
810 }
e9df34d5 811
812 must_follow = sbitmap_alloc (first_move + nreg_moves);
813 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
814 if (!schedule_reg_move (ps, first_move + i_reg_move,
815 distance1_uses, must_follow))
816 break;
817 sbitmap_free (must_follow);
818 if (distance1_uses)
819 sbitmap_free (distance1_uses);
820 if (i_reg_move < nreg_moves)
821 return false;
0b79ae60 822 }
823 return true;
824}
5b6269ed 825
0b79ae60 826/* Emit the moves associatied with PS. Apply the substitutions
827 associated with them. */
828static void
829apply_reg_moves (partial_schedule_ptr ps)
830{
831 ps_reg_move_info *move;
832 int i;
5b6269ed 833
f1f41a6c 834 FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
0b79ae60 835 {
836 unsigned int i_use;
837 sbitmap_iterator sbi;
5b6269ed 838
0d211963 839 EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
0b79ae60 840 {
841 replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
842 df_insn_rescan (ps->g->nodes[i_use].insn);
5b6269ed 843 }
844 }
a5ea7ace 845}
846
6f7f081f 847/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
848 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
849 will move to cycle zero. */
5b6269ed 850static void
6f7f081f 851reset_sched_times (partial_schedule_ptr ps, int amount)
5b6269ed 852{
76387907 853 int row;
5b6269ed 854 int ii = ps->ii;
76387907 855 ps_insn_ptr crr_insn;
5b6269ed 856
76387907 857 for (row = 0; row < ii; row++)
858 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
859 {
7282ef12 860 int u = crr_insn->id;
76387907 861 int normalized_time = SCHED_TIME (u) - amount;
6f7f081f 862 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
76387907 863
6f7f081f 864 if (dump_file)
865 {
866 /* Print the scheduling times after the rotation. */
bc69df91 867 rtx_insn *insn = ps_rtl_insn (ps, u);
7282ef12 868
6f7f081f 869 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
7282ef12 870 "crr_insn->cycle=%d, min_cycle=%d", u,
871 INSN_UID (insn), normalized_time, new_min_cycle);
872 if (JUMP_P (insn))
6f7f081f 873 fprintf (dump_file, " (branch)");
874 fprintf (dump_file, "\n");
875 }
876
76387907 877 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
878 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
a5ea7ace 879
880 crr_insn->cycle = normalized_time;
881 update_node_sched_params (u, ii, normalized_time, new_min_cycle);
76387907 882 }
5b6269ed 883}
6f7f081f 884
5b6269ed 885/* Permute the insns according to their order in PS, from row 0 to
886 row ii-1, and position them right before LAST. This schedules
887 the insns of the loop kernel. */
888static void
4cd001d5 889permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
5b6269ed 890{
891 int ii = ps->ii;
892 int row;
893 ps_insn_ptr ps_ij;
894
895 for (row = 0; row < ii ; row++)
896 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
7282ef12 897 {
bc69df91 898 rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
7282ef12 899
900 if (PREV_INSN (last) != insn)
e9df34d5 901 {
902 if (ps_ij->id < ps->g->num_nodes)
903 reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
904 PREV_INSN (last));
905 else
906 add_insn_before (insn, last, NULL);
907 }
7282ef12 908 }
5b6269ed 909}
910
a5ea7ace 911/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
912 respectively only if cycle C falls on the border of the scheduling
913 window boundaries marked by START and END cycles. STEP is the
914 direction of the window. */
915static inline void
916set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
917 sbitmap *tmp_precede, sbitmap must_precede, int c,
918 int start, int end, int step)
919{
920 *tmp_precede = NULL;
921 *tmp_follow = NULL;
922
923 if (c == start)
924 {
925 if (step == 1)
926 *tmp_precede = must_precede;
927 else /* step == -1. */
928 *tmp_follow = must_follow;
929 }
930 if (c == end - step)
931 {
932 if (step == 1)
933 *tmp_follow = must_follow;
934 else /* step == -1. */
935 *tmp_precede = must_precede;
936 }
937
938}
939
940/* Return True if the branch can be moved to row ii-1 while
941 normalizing the partial schedule PS to start from cycle zero and thus
942 optimize the SC. Otherwise return False. */
943static bool
944optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
945{
946 int amount = PS_MIN_CYCLE (ps);
947 sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
948 int start, end, step;
949 int ii = ps->ii;
950 bool ok = false;
951 int stage_count, stage_count_curr;
952
953 /* Compare the SC after normalization and SC after bringing the branch
954 to row ii-1. If they are equal just bail out. */
955 stage_count = calculate_stage_count (ps, amount);
956 stage_count_curr =
7282ef12 957 calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
a5ea7ace 958
959 if (stage_count == stage_count_curr)
960 {
961 if (dump_file)
962 fprintf (dump_file, "SMS SC already optimized.\n");
963
964 ok = false;
965 goto clear;
966 }
967
968 if (dump_file)
969 {
970 fprintf (dump_file, "SMS Trying to optimize branch location\n");
971 fprintf (dump_file, "SMS partial schedule before trial:\n");
972 print_partial_schedule (ps, dump_file);
973 }
974
975 /* First, normalize the partial scheduling. */
976 reset_sched_times (ps, amount);
977 rotate_partial_schedule (ps, amount);
978 if (dump_file)
979 {
980 fprintf (dump_file,
981 "SMS partial schedule after normalization (ii, %d, SC %d):\n",
982 ii, stage_count);
983 print_partial_schedule (ps, dump_file);
984 }
985
7282ef12 986 if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
a5ea7ace 987 {
988 ok = true;
989 goto clear;
990 }
991
53c5d9d4 992 bitmap_ones (sched_nodes);
a5ea7ace 993
994 /* Calculate the new placement of the branch. It should be in row
995 ii-1 and fall into it's scheduling window. */
996 if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
997 &step, &end) == 0)
998 {
999 bool success;
1000 ps_insn_ptr next_ps_i;
7282ef12 1001 int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
a5ea7ace 1002 int row = SMODULO (branch_cycle, ps->ii);
1003 int num_splits = 0;
1004 sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
1005 int c;
1006
1007 if (dump_file)
1008 fprintf (dump_file, "\nTrying to schedule node %d "
1009 "INSN = %d in (%d .. %d) step %d\n",
1010 g->closing_branch->cuid,
1011 (INSN_UID (g->closing_branch->insn)), start, end, step);
1012
1013 gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1014 if (step == 1)
1015 {
1016 c = start + ii - SMODULO (start, ii) - 1;
1017 gcc_assert (c >= start);
1018 if (c >= end)
1019 {
1020 ok = false;
1021 if (dump_file)
1022 fprintf (dump_file,
1023 "SMS failed to schedule branch at cycle: %d\n", c);
1024 goto clear;
1025 }
1026 }
1027 else
1028 {
1029 c = start - SMODULO (start, ii) - 1;
1030 gcc_assert (c <= start);
1031
1032 if (c <= end)
1033 {
1034 if (dump_file)
1035 fprintf (dump_file,
1036 "SMS failed to schedule branch at cycle: %d\n", c);
1037 ok = false;
1038 goto clear;
1039 }
1040 }
1041
1042 must_precede = sbitmap_alloc (g->num_nodes);
1043 must_follow = sbitmap_alloc (g->num_nodes);
1044
1045 /* Try to schedule the branch is it's new cycle. */
1046 calculate_must_precede_follow (g->closing_branch, start, end,
1047 step, ii, sched_nodes,
1048 must_precede, must_follow);
1049
1050 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1051 must_precede, c, start, end, step);
1052
1053 /* Find the element in the partial schedule related to the closing
1054 branch so we can remove it from it's current cycle. */
1055 for (next_ps_i = ps->rows[row];
1056 next_ps_i; next_ps_i = next_ps_i->next_in_row)
7282ef12 1057 if (next_ps_i->id == g->closing_branch->cuid)
a5ea7ace 1058 break;
1059
9bf2b489 1060 remove_node_from_ps (ps, next_ps_i);
a5ea7ace 1061 success =
7282ef12 1062 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
a5ea7ace 1063 sched_nodes, &num_splits,
1064 tmp_precede, tmp_follow);
1065 gcc_assert (num_splits == 0);
1066 if (!success)
1067 {
1068 if (dump_file)
1069 fprintf (dump_file,
1070 "SMS failed to schedule branch at cycle: %d, "
1071 "bringing it back to cycle %d\n", c, branch_cycle);
1072
1073 /* The branch was failed to be placed in row ii - 1.
1074 Put it back in it's original place in the partial
1075 schedualing. */
1076 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1077 must_precede, branch_cycle, start, end,
1078 step);
1079 success =
7282ef12 1080 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
a5ea7ace 1081 branch_cycle, sched_nodes,
1082 &num_splits, tmp_precede,
1083 tmp_follow);
1084 gcc_assert (success && (num_splits == 0));
1085 ok = false;
1086 }
1087 else
1088 {
1089 /* The branch is placed in row ii - 1. */
1090 if (dump_file)
1091 fprintf (dump_file,
1092 "SMS success in moving branch to cycle %d\n", c);
1093
7282ef12 1094 update_node_sched_params (g->closing_branch->cuid, ii, c,
a5ea7ace 1095 PS_MIN_CYCLE (ps));
1096 ok = true;
1097 }
1098
1099 free (must_precede);
1100 free (must_follow);
1101 }
1102
1103clear:
1104 free (sched_nodes);
1105 return ok;
1106}
1107
5b6269ed 1108static void
1109duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
e9df34d5 1110 int to_stage, rtx count_reg)
5b6269ed 1111{
1112 int row;
1113 ps_insn_ptr ps_ij;
1114
1115 for (row = 0; row < ps->ii; row++)
1116 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1117 {
7282ef12 1118 int u = ps_ij->id;
e9df34d5 1119 int first_u, last_u;
bc69df91 1120 rtx_insn *u_insn;
5b6269ed 1121
7bfc9b7c 1122 /* Do not duplicate any insn which refers to count_reg as it
1123 belongs to the control part.
6f7f081f 1124 The closing branch is scheduled as well and thus should
1125 be ignored.
7bfc9b7c 1126 TODO: This should be done by analyzing the control part of
1127 the loop. */
7282ef12 1128 u_insn = ps_rtl_insn (ps, u);
1129 if (reg_mentioned_p (count_reg, u_insn)
1130 || JUMP_P (u_insn))
7bfc9b7c 1131 continue;
1132
e9df34d5 1133 first_u = SCHED_STAGE (u);
1134 last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1135 if (from_stage <= last_u && to_stage >= first_u)
5b6269ed 1136 {
e9df34d5 1137 if (u < ps->g->num_nodes)
1138 duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1139 else
1140 emit_insn (copy_rtx (PATTERN (u_insn)));
5b6269ed 1141 }
5b6269ed 1142 }
1143}
1144
1145
1146/* Generate the instructions (including reg_moves) for prolog & epilog. */
1147static void
7bfc9b7c 1148generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1149 rtx count_reg, rtx count_init)
5b6269ed 1150{
1151 int i;
1152 int last_stage = PS_STAGE_COUNT (ps) - 1;
1153 edge e;
48e1416a 1154
5b6269ed 1155 /* Generate the prolog, inserting its insns on the loop-entry edge. */
1156 start_sequence ();
1157
7bfc9b7c 1158 if (!count_init)
1159 {
1160 /* Generate instructions at the beginning of the prolog to
1161 adjust the loop count by STAGE_COUNT. If loop count is constant
1162 (count_init), this constant is adjusted by STAGE_COUNT in
1163 generate_prolog_epilog function. */
1164 rtx sub_reg = NULL_RTX;
1165
0359f9f5 1166 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1167 gen_int_mode (last_stage,
1168 GET_MODE (count_reg)),
7bfc9b7c 1169 count_reg, 1, OPTAB_DIRECT);
1170 gcc_assert (REG_P (sub_reg));
1171 if (REGNO (sub_reg) != REGNO (count_reg))
1172 emit_move_insn (count_reg, sub_reg);
1173 }
5b6269ed 1174
1175 for (i = 0; i < last_stage; i++)
e9df34d5 1176 duplicate_insns_of_cycles (ps, 0, i, count_reg);
48e1416a 1177
88e6f696 1178 /* Put the prolog on the entry edge. */
b6a75366 1179 e = loop_preheader_edge (loop);
d4473c84 1180 split_edge_and_insert (e, get_insns ());
6c3ff102 1181 if (!flag_resched_modulo_sched)
1182 e->dest->flags |= BB_DISABLE_SCHEDULE;
b6a75366 1183
5b6269ed 1184 end_sequence ();
1185
1186 /* Generate the epilog, inserting its insns on the loop-exit edge. */
1187 start_sequence ();
1188
1189 for (i = 0; i < last_stage; i++)
e9df34d5 1190 duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
48e1416a 1191
88e6f696 1192 /* Put the epilogue on the exit edge. */
d9e7e1a2 1193 gcc_assert (single_exit (loop));
1194 e = single_exit (loop);
d4473c84 1195 split_edge_and_insert (e, get_insns ());
6c3ff102 1196 if (!flag_resched_modulo_sched)
1197 e->dest->flags |= BB_DISABLE_SCHEDULE;
1198
b6a75366 1199 end_sequence ();
1200}
1201
6c3ff102 1202/* Mark LOOP as software pipelined so the later
1203 scheduling passes don't touch it. */
1204static void
1205mark_loop_unsched (struct loop *loop)
1206{
1207 unsigned i;
1208 basic_block *bbs = get_loop_body (loop);
1209
1210 for (i = 0; i < loop->num_nodes; i++)
1211 bbs[i]->flags |= BB_DISABLE_SCHEDULE;
eb05e398 1212
1213 free (bbs);
6c3ff102 1214}
1215
b6a75366 1216/* Return true if all the BBs of the loop are empty except the
1217 loop header. */
1218static bool
1219loop_single_full_bb_p (struct loop *loop)
1220{
1221 unsigned i;
1222 basic_block *bbs = get_loop_body (loop);
1223
1224 for (i = 0; i < loop->num_nodes ; i++)
5b6269ed 1225 {
6fe7b8c2 1226 rtx_insn *head, *tail;
b6a75366 1227 bool empty_bb = true;
1228
1229 if (bbs[i] == loop->header)
1230 continue;
1231
1232 /* Make sure that basic blocks other than the header
1233 have only notes labels or jumps. */
6a1cdb4d 1234 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
b6a75366 1235 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1236 {
1237 if (NOTE_P (head) || LABEL_P (head)
9845d120 1238 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
b6a75366 1239 continue;
1240 empty_bb = false;
1241 break;
1242 }
1243
1244 if (! empty_bb)
1245 {
1246 free (bbs);
1247 return false;
1248 }
1249 }
1250 free (bbs);
1251 return true;
1252}
5b6269ed 1253
56134a9f 1254/* Dump file:line from INSN's location info to dump_file. */
1255
1256static void
bc69df91 1257dump_insn_location (rtx_insn *insn)
56134a9f 1258{
0e7ae557 1259 if (dump_file && INSN_HAS_LOCATION (insn))
56134a9f 1260 {
0e7ae557 1261 expanded_location xloc = insn_location (insn);
1262 fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
56134a9f 1263 }
1264}
1265
b6a75366 1266/* A simple loop from SMS point of view; it is a loop that is composed of
1267 either a single basic block or two BBs - a header and a latch. */
1268#define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1269 && (EDGE_COUNT (loop->latch->preds) == 1) \
1270 && (EDGE_COUNT (loop->latch->succs) == 1))
5b6269ed 1271
b6a75366 1272/* Return true if the loop is in its canonical form and false if not.
1273 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1274static bool
3f5be5f4 1275loop_canon_p (struct loop *loop)
b6a75366 1276{
5b6269ed 1277
9e3536f4 1278 if (loop->inner || !loop_outer (loop))
7707ba40 1279 {
1280 if (dump_file)
1281 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
b6a75366 1282 return false;
7707ba40 1283 }
5b6269ed 1284
d9e7e1a2 1285 if (!single_exit (loop))
b6a75366 1286 {
1287 if (dump_file)
1288 {
bc69df91 1289 rtx_insn *insn = BB_END (loop->header);
48e1416a 1290
56134a9f 1291 fprintf (dump_file, "SMS loop many exits");
5169661d 1292 dump_insn_location (insn);
56134a9f 1293 fprintf (dump_file, "\n");
b6a75366 1294 }
1295 return false;
1296 }
5b6269ed 1297
b6a75366 1298 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1299 {
1300 if (dump_file)
1301 {
bc69df91 1302 rtx_insn *insn = BB_END (loop->header);
48e1416a 1303
56134a9f 1304 fprintf (dump_file, "SMS loop many BBs.");
5169661d 1305 dump_insn_location (insn);
56134a9f 1306 fprintf (dump_file, "\n");
b6a75366 1307 }
1308 return false;
5b6269ed 1309 }
1310
b6a75366 1311 return true;
1312}
5b6269ed 1313
b6a75366 1314/* If there are more than one entry for the loop,
1315 make it one by splitting the first entry edge and
1316 redirecting the others to the new BB. */
1317static void
1318canon_loop (struct loop *loop)
1319{
1320 edge e;
1321 edge_iterator i;
5b6269ed 1322
b6a75366 1323 /* Avoid annoying special cases of edges going to exit
1324 block. */
34154e27 1325 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
b6a75366 1326 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
88e6f696 1327 split_edge (e);
5b6269ed 1328
b6a75366 1329 if (loop->latch == loop->header
1330 || EDGE_COUNT (loop->latch->succs) > 1)
5b6269ed 1331 {
b6a75366 1332 FOR_EACH_EDGE (e, i, loop->header->preds)
1333 if (e->src == loop->latch)
1334 break;
88e6f696 1335 split_edge (e);
5b6269ed 1336 }
1337}
1338
e1ab7874 1339/* Setup infos. */
1340static void
1341setup_sched_infos (void)
1342{
1343 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1344 sizeof (sms_common_sched_info));
1345 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1346 common_sched_info = &sms_common_sched_info;
1347
1348 sched_deps_info = &sms_sched_deps_info;
1349 current_sched_info = &sms_sched_info;
1350}
1351
7cef6c97 1352/* Probability in % that the sms-ed loop rolls enough so that optimized
1353 version may be entered. Just a guess. */
1354#define PROB_SMS_ENOUGH_ITERATIONS 80
1355
896dbaed 1356/* Used to calculate the upper bound of ii. */
1357#define MAXII_FACTOR 2
1358
5b6269ed 1359/* Main entry point, perform SMS scheduling on the loops of the function
1360 that consist of single basic blocks. */
73b62548 1361static void
3f5be5f4 1362sms_schedule (void)
5b6269ed 1363{
bc69df91 1364 rtx_insn *insn;
5b6269ed 1365 ddg_ptr *g_arr, g;
5b6269ed 1366 int * node_order;
c0fb94bd 1367 int maxii, max_asap;
5b6269ed 1368 partial_schedule_ptr ps;
b6a75366 1369 basic_block bb = NULL;
7cef6c97 1370 struct loop *loop;
b6a75366 1371 basic_block condition_bb = NULL;
1372 edge latch_edge;
1373 gcov_type trip_count = 0;
1374
88e6f696 1375 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
dce58e66 1376 | LOOPS_HAVE_RECORDED_EXITS);
41f75a99 1377 if (number_of_loops (cfun) <= 1)
7a3bf727 1378 {
1379 loop_optimizer_finalize ();
1380 return; /* There are no loops to schedule. */
1381 }
b6a75366 1382
5b6269ed 1383 /* Initialize issue_rate. */
1384 if (targetm.sched.issue_rate)
1385 {
1386 int temp = reload_completed;
1387
1388 reload_completed = 1;
ffa47ae0 1389 issue_rate = targetm.sched.issue_rate ();
5b6269ed 1390 reload_completed = temp;
1391 }
1392 else
1393 issue_rate = 1;
1394
2c763ed4 1395 /* Initialize the scheduler. */
e1ab7874 1396 setup_sched_infos ();
1397 haifa_sched_init ();
02a8ad7e 1398
b6a75366 1399 /* Allocate memory to hold the DDG array one entry for each loop.
1400 We use loop->num as index into this array. */
41f75a99 1401 g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
5b6269ed 1402
7707ba40 1403 if (dump_file)
1404 {
1405 fprintf (dump_file, "\n\nSMS analysis phase\n");
1406 fprintf (dump_file, "===================\n\n");
1407 }
1408
5b6269ed 1409 /* Build DDGs for all the relevant loops and hold them in G_ARR
b6a75366 1410 indexed by the loop index. */
f21d4d00 1411 FOR_EACH_LOOP (loop, 0)
5b6269ed 1412 {
6fe7b8c2 1413 rtx_insn *head, *tail;
fc2abfd3 1414 rtx count_reg;
5b6269ed 1415
b6a75366 1416 /* For debugging. */
76aaf9f0 1417 if (dbg_cnt (sms_sched_loop) == false)
b6a75366 1418 {
1419 if (dump_file)
76aaf9f0 1420 fprintf (dump_file, "SMS reached max limit... \n");
5b6269ed 1421
f21d4d00 1422 break;
b6a75366 1423 }
5b6269ed 1424
7707ba40 1425 if (dump_file)
56134a9f 1426 {
bc69df91 1427 rtx_insn *insn = BB_END (loop->header);
7707ba40 1428
56134a9f 1429 fprintf (dump_file, "SMS loop num: %d", loop->num);
5169661d 1430 dump_insn_location (insn);
56134a9f 1431 fprintf (dump_file, "\n");
1432 }
7707ba40 1433
3f5be5f4 1434 if (! loop_canon_p (loop))
b6a75366 1435 continue;
5b6269ed 1436
b6a75366 1437 if (! loop_single_full_bb_p (loop))
7707ba40 1438 {
1439 if (dump_file)
1440 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
5b6269ed 1441 continue;
7707ba40 1442 }
5b6269ed 1443
b6a75366 1444 bb = loop->header;
5b6269ed 1445
6a1cdb4d 1446 get_ebb_head_tail (bb, bb, &head, &tail);
b6a75366 1447 latch_edge = loop_latch_edge (loop);
d9e7e1a2 1448 gcc_assert (single_exit (loop));
1449 if (single_exit (loop)->count)
1450 trip_count = latch_edge->count / single_exit (loop)->count;
5b6269ed 1451
f0b5f617 1452 /* Perform SMS only on loops that their average count is above threshold. */
b6a75366 1453
1454 if ( latch_edge->count
d9e7e1a2 1455 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
b6a75366 1456 {
3f5be5f4 1457 if (dump_file)
5b6269ed 1458 {
5169661d 1459 dump_insn_location (tail);
56134a9f 1460 fprintf (dump_file, "\nSMS single-bb-loop\n");
5b6269ed 1461 if (profile_info && flag_branch_probabilities)
1462 {
3f5be5f4 1463 fprintf (dump_file, "SMS loop-count ");
f03df321 1464 fprintf (dump_file, "%" PRId64,
3a4303e7 1465 (int64_t) bb->count);
3f5be5f4 1466 fprintf (dump_file, "\n");
1467 fprintf (dump_file, "SMS trip-count ");
f03df321 1468 fprintf (dump_file, "%" PRId64,
3a4303e7 1469 (int64_t) trip_count);
3f5be5f4 1470 fprintf (dump_file, "\n");
1471 fprintf (dump_file, "SMS profile-sum-max ");
f03df321 1472 fprintf (dump_file, "%" PRId64,
3a4303e7 1473 (int64_t) profile_info->sum_max);
3f5be5f4 1474 fprintf (dump_file, "\n");
5b6269ed 1475 }
1476 }
1477 continue;
1478 }
1479
1480 /* Make sure this is a doloop. */
7bfc9b7c 1481 if ( !(count_reg = doloop_register_get (head, tail)))
7707ba40 1482 {
1483 if (dump_file)
1484 fprintf (dump_file, "SMS doloop_register_get failed\n");
5b6269ed 1485 continue;
7707ba40 1486 }
5b6269ed 1487
c38b28e7 1488 /* Don't handle BBs with calls or barriers
90c2bcf0 1489 or !single_set with the exception of instructions that include
1490 count_reg---these instructions are part of the control part
1491 that do-loop recognizes.
7bfc9b7c 1492 ??? Should handle insns defining subregs. */
1493 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1494 {
1495 rtx set;
1496
1497 if (CALL_P (insn)
c38b28e7 1498 || BARRIER_P (insn)
9845d120 1499 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
90c2bcf0 1500 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1501 && !reg_mentioned_p (count_reg, insn))
7bfc9b7c 1502 || (INSN_P (insn) && (set = single_set (insn))
1503 && GET_CODE (SET_DEST (set)) == SUBREG))
1504 break;
1505 }
5b6269ed 1506
1507 if (insn != NEXT_INSN (tail))
1508 {
3f5be5f4 1509 if (dump_file)
5b6269ed 1510 {
6d7dc5b9 1511 if (CALL_P (insn))
3f5be5f4 1512 fprintf (dump_file, "SMS loop-with-call\n");
c38b28e7 1513 else if (BARRIER_P (insn))
1514 fprintf (dump_file, "SMS loop-with-barrier\n");
9845d120 1515 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
7bfc9b7c 1516 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1517 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1518 else
1519 fprintf (dump_file, "SMS loop with subreg in lhs\n");
3f5be5f4 1520 print_rtl_single (dump_file, insn);
5b6269ed 1521 }
1522
1523 continue;
1524 }
1525
6f7f081f 1526 /* Always schedule the closing branch with the rest of the
1527 instructions. The branch is rotated to be in row ii-1 at the
1528 end of the scheduling procedure to make sure it's the last
1529 instruction in the iteration. */
1530 if (! (g = create_ddg (bb, 1)))
5b6269ed 1531 {
3f5be5f4 1532 if (dump_file)
34c7f568 1533 fprintf (dump_file, "SMS create_ddg failed\n");
5b6269ed 1534 continue;
1535 }
1536
17519ba0 1537 g_arr[loop->num] = g;
7707ba40 1538 if (dump_file)
1539 fprintf (dump_file, "...OK\n");
1540
5b6269ed 1541 }
7707ba40 1542 if (dump_file)
1543 {
1544 fprintf (dump_file, "\nSMS transformation phase\n");
1545 fprintf (dump_file, "=========================\n\n");
1546 }
5b6269ed 1547
b6a75366 1548 /* We don't want to perform SMS on new loops - created by versioning. */
f21d4d00 1549 FOR_EACH_LOOP (loop, 0)
5b6269ed 1550 {
6fe7b8c2 1551 rtx_insn *head, *tail;
50fc2d35 1552 rtx count_reg;
1553 rtx_insn *count_init;
e9df34d5 1554 int mii, rec_mii, stage_count, min_cycle;
3a4303e7 1555 int64_t loop_count = 0;
0b79ae60 1556 bool opt_sc_p;
5b6269ed 1557
17519ba0 1558 if (! (g = g_arr[loop->num]))
5b6269ed 1559 continue;
1560
1561 if (dump_file)
56134a9f 1562 {
bc69df91 1563 rtx_insn *insn = BB_END (loop->header);
7707ba40 1564
56134a9f 1565 fprintf (dump_file, "SMS loop num: %d", loop->num);
5169661d 1566 dump_insn_location (insn);
56134a9f 1567 fprintf (dump_file, "\n");
7707ba40 1568
56134a9f 1569 print_ddg (dump_file, g);
1570 }
5b6269ed 1571
6a1cdb4d 1572 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
5b6269ed 1573
b6a75366 1574 latch_edge = loop_latch_edge (loop);
d9e7e1a2 1575 gcc_assert (single_exit (loop));
1576 if (single_exit (loop)->count)
1577 trip_count = latch_edge->count / single_exit (loop)->count;
5b6269ed 1578
3f5be5f4 1579 if (dump_file)
5b6269ed 1580 {
5169661d 1581 dump_insn_location (tail);
56134a9f 1582 fprintf (dump_file, "\nSMS single-bb-loop\n");
5b6269ed 1583 if (profile_info && flag_branch_probabilities)
1584 {
3f5be5f4 1585 fprintf (dump_file, "SMS loop-count ");
f03df321 1586 fprintf (dump_file, "%" PRId64,
3a4303e7 1587 (int64_t) bb->count);
3f5be5f4 1588 fprintf (dump_file, "\n");
1589 fprintf (dump_file, "SMS profile-sum-max ");
f03df321 1590 fprintf (dump_file, "%" PRId64,
3a4303e7 1591 (int64_t) profile_info->sum_max);
3f5be5f4 1592 fprintf (dump_file, "\n");
5b6269ed 1593 }
3f5be5f4 1594 fprintf (dump_file, "SMS doloop\n");
1595 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1596 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1597 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
5b6269ed 1598 }
1599
5b6269ed 1600
b6a75366 1601 /* In case of th loop have doloop register it gets special
1602 handling. */
50fc2d35 1603 count_init = NULL;
7bfc9b7c 1604 if ((count_reg = doloop_register_get (head, tail)))
b6a75366 1605 {
1606 basic_block pre_header;
1607
1608 pre_header = loop_preheader_edge (loop)->src;
1609 count_init = const_iteration_count (count_reg, pre_header,
1610 &loop_count);
1611 }
1612 gcc_assert (count_reg);
5b6269ed 1613
3f5be5f4 1614 if (dump_file && count_init)
5b6269ed 1615 {
3f5be5f4 1616 fprintf (dump_file, "SMS const-doloop ");
f03df321 1617 fprintf (dump_file, "%" PRId64,
b6a75366 1618 loop_count);
3f5be5f4 1619 fprintf (dump_file, "\n");
5b6269ed 1620 }
1621
4c36ffe6 1622 node_order = XNEWVEC (int, g->num_nodes);
5b6269ed 1623
1624 mii = 1; /* Need to pass some estimate of mii. */
c0fb94bd 1625 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
5b6269ed 1626 mii = MAX (res_MII (g), rec_mii);
c0fb94bd 1627 maxii = MAX (max_asap, MAXII_FACTOR * mii);
5b6269ed 1628
3f5be5f4 1629 if (dump_file)
1630 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
5b6269ed 1631 rec_mii, mii, maxii);
1632
0b79ae60 1633 for (;;)
a5ea7ace 1634 {
0b79ae60 1635 set_node_sched_params (g);
1636
1637 stage_count = 0;
1638 opt_sc_p = false;
1639 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1640
1641 if (ps)
a5ea7ace 1642 {
0b79ae60 1643 /* Try to achieve optimized SC by normalizing the partial
1644 schedule (having the cycles start from cycle zero).
1645 The branch location must be placed in row ii-1 in the
1646 final scheduling. If failed, shift all instructions to
1647 position the branch in row ii-1. */
1648 opt_sc_p = optimize_sc (ps, g);
1649 if (opt_sc_p)
1650 stage_count = calculate_stage_count (ps, 0);
1651 else
1652 {
1653 /* Bring the branch to cycle ii-1. */
1654 int amount = (SCHED_TIME (g->closing_branch->cuid)
1655 - (ps->ii - 1));
1656
1657 if (dump_file)
1658 fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1659
1660 stage_count = calculate_stage_count (ps, amount);
1661 }
1662
1663 gcc_assert (stage_count >= 1);
a5ea7ace 1664 }
0b79ae60 1665
1666 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1667 1 means that there is no interleaving between iterations thus
1668 we let the scheduling passes do the job in this case. */
e9df34d5 1669 if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
0b79ae60 1670 || (count_init && (loop_count <= stage_count))
1671 || (flag_branch_probabilities && (trip_count <= stage_count)))
b6a75366 1672 {
0b79ae60 1673 if (dump_file)
1674 {
1675 fprintf (dump_file, "SMS failed... \n");
1676 fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1677 " loop-count=", stage_count);
f03df321 1678 fprintf (dump_file, "%" PRId64, loop_count);
0b79ae60 1679 fprintf (dump_file, ", trip-count=");
f03df321 1680 fprintf (dump_file, "%" PRId64, trip_count);
0b79ae60 1681 fprintf (dump_file, ")\n");
1682 }
1683 break;
b6a75366 1684 }
0b79ae60 1685
a5ea7ace 1686 if (!opt_sc_p)
1687 {
1688 /* Rotate the partial schedule to have the branch in row ii-1. */
7282ef12 1689 int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
a5ea7ace 1690
1691 reset_sched_times (ps, amount);
1692 rotate_partial_schedule (ps, amount);
1693 }
6f7f081f 1694
6f7f081f 1695 set_columns_for_ps (ps);
5b6269ed 1696
e9df34d5 1697 min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
0b79ae60 1698 if (!schedule_reg_moves (ps))
1699 {
1700 mii = ps->ii + 1;
1701 free_partial_schedule (ps);
1702 continue;
1703 }
1704
e9df34d5 1705 /* Moves that handle incoming values might have been added
1706 to a new first stage. Bump the stage count if so.
1707
1708 ??? Perhaps we could consider rotating the schedule here
1709 instead? */
1710 if (PS_MIN_CYCLE (ps) < min_cycle)
1711 {
1712 reset_sched_times (ps, 0);
1713 stage_count++;
1714 }
1715
1716 /* The stage count should now be correct without rotation. */
1717 gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1718 PS_STAGE_COUNT (ps) = stage_count;
1719
6f7f081f 1720 canon_loop (loop);
1721
1722 if (dump_file)
1723 {
5169661d 1724 dump_insn_location (tail);
56134a9f 1725 fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1726 ps->ii, stage_count);
3f5be5f4 1727 print_partial_schedule (ps, dump_file);
5b6269ed 1728 }
6f7f081f 1729
896dbaed 1730 /* case the BCT count is not known , Do loop-versioning */
1731 if (count_reg && ! count_init)
1732 {
c338f2e3 1733 rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1734 gen_int_mode (stage_count,
1735 GET_MODE (count_reg)));
896dbaed 1736 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1737 * REG_BR_PROB_BASE) / 100;
1738
1739 loop_version (loop, comp_rtx, &condition_bb,
1740 prob, prob, REG_BR_PROB_BASE - prob,
1741 true);
1742 }
1743
1744 /* Set new iteration count of loop kernel. */
1745 if (count_reg && count_init)
1746 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1747 - stage_count + 1);
1748
1749 /* Now apply the scheduled kernel to the RTL of the loop. */
5b6269ed 1750 permute_partial_schedule (ps, g->closing_branch->first_note);
f045d41d 1751
896dbaed 1752 /* Mark this loop as software pipelined so the later
6c3ff102 1753 scheduling passes don't touch it. */
896dbaed 1754 if (! flag_resched_modulo_sched)
6c3ff102 1755 mark_loop_unsched (loop);
1756
896dbaed 1757 /* The life-info is not valid any more. */
1758 df_set_bb_dirty (g->bb);
f045d41d 1759
0b79ae60 1760 apply_reg_moves (ps);
896dbaed 1761 if (dump_file)
0b79ae60 1762 print_node_sched_params (dump_file, g->num_nodes, ps);
896dbaed 1763 /* Generate prolog and epilog. */
7bfc9b7c 1764 generate_prolog_epilog (ps, loop, count_reg, count_init);
0b79ae60 1765 break;
5b6269ed 1766 }
b6a75366 1767
5b6269ed 1768 free_partial_schedule (ps);
f1f41a6c 1769 node_sched_param_vec.release ();
5b6269ed 1770 free (node_order);
1771 free_ddg (g);
1772 }
1773
3fa19cbb 1774 free (g_arr);
1775
5b6269ed 1776 /* Release scheduler data, needed until now because of DFA. */
e1ab7874 1777 haifa_sched_finish ();
88e6f696 1778 loop_optimizer_finalize ();
5b6269ed 1779}
1780
1781/* The SMS scheduling algorithm itself
1782 -----------------------------------
1783 Input: 'O' an ordered list of insns of a loop.
1784 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1785
1786 'Q' is the empty Set
1787 'PS' is the partial schedule; it holds the currently scheduled nodes with
1788 their cycle/slot.
1789 'PSP' previously scheduled predecessors.
1790 'PSS' previously scheduled successors.
1791 't(u)' the cycle where u is scheduled.
1792 'l(u)' is the latency of u.
1793 'd(v,u)' is the dependence distance from v to u.
1794 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1795 the node ordering phase.
1796 'check_hardware_resources_conflicts(u, PS, c)'
1797 run a trace around cycle/slot through DFA model
1798 to check resource conflicts involving instruction u
1799 at cycle c given the partial schedule PS.
1800 'add_to_partial_schedule_at_time(u, PS, c)'
1801 Add the node/instruction u to the partial schedule
1802 PS at time c.
1803 'calculate_register_pressure(PS)'
1804 Given a schedule of instructions, calculate the register
1805 pressure it implies. One implementation could be the
1806 maximum number of overlapping live ranges.
1807 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1808 registers available in the hardware.
1809
1810 1. II = MII.
1811 2. PS = empty list
1812 3. for each node u in O in pre-computed order
1813 4. if (PSP(u) != Q && PSS(u) == Q) then
1814 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1815 6. start = Early_start; end = Early_start + II - 1; step = 1
1816 11. else if (PSP(u) == Q && PSS(u) != Q) then
1817 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1818 13. start = Late_start; end = Late_start - II + 1; step = -1
1819 14. else if (PSP(u) != Q && PSS(u) != Q) then
1820 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1821 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1822 17. start = Early_start;
1823 18. end = min(Early_start + II - 1 , Late_start);
1824 19. step = 1
1825 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1826 21. start = ASAP(u); end = start + II - 1; step = 1
1827 22. endif
1828
1829 23. success = false
1830 24. for (c = start ; c != end ; c += step)
1831 25. if check_hardware_resources_conflicts(u, PS, c) then
1832 26. add_to_partial_schedule_at_time(u, PS, c)
1833 27. success = true
1834 28. break
1835 29. endif
1836 30. endfor
1837 31. if (success == false) then
1838 32. II = II + 1
1839 33. if (II > maxII) then
1840 34. finish - failed to schedule
1841 35. endif
1842 36. goto 2.
1843 37. endif
1844 38. endfor
1845 39. if (calculate_register_pressure(PS) > maxRP) then
1846 40. goto 32.
1847 41. endif
1848 42. compute epilogue & prologue
1849 43. finish - succeeded to schedule
469e33a0 1850
1851 ??? The algorithm restricts the scheduling window to II cycles.
1852 In rare cases, it may be better to allow windows of II+1 cycles.
1853 The window would then start and end on the same row, but with
1854 different "must precede" and "must follow" requirements. */
5b6269ed 1855
1856/* A limit on the number of cycles that resource conflicts can span. ??? Should
1857 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1858 set to 0 to save compile time. */
1859#define DFA_HISTORY SMS_DFA_HISTORY
1860
76387907 1861/* A threshold for the number of repeated unsuccessful attempts to insert
1862 an empty row, before we flush the partial schedule and start over. */
1863#define MAX_SPLIT_NUM 10
b6a75366 1864/* Given the partial schedule PS, this function calculates and returns the
d90dbd3e 1865 cycles in which we can schedule the node with the given index I.
b6a75366 1866 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1867 noticed that there are several cases in which we fail to SMS the loop
1868 because the sched window of a node is empty due to tight data-deps. In
d90dbd3e 1869 such cases we want to unschedule some of the predecessors/successors
b6a75366 1870 until we get non-empty scheduling window. It returns -1 if the
1871 scheduling window is empty and zero otherwise. */
1872
1873static int
a5ea7ace 1874get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
0f7b323c 1875 sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1876 int *end_p)
b6a75366 1877{
1878 int start, step, end;
0f7b323c 1879 int early_start, late_start;
b6a75366 1880 ddg_edge_ptr e;
b6a75366 1881 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1882 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1883 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1884 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1885 int psp_not_empty;
1886 int pss_not_empty;
0f7b323c 1887 int count_preds;
1888 int count_succs;
b6a75366 1889
1890 /* 1. compute sched window for u (start, end, step). */
53c5d9d4 1891 bitmap_clear (psp);
1892 bitmap_clear (pss);
1893 psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1894 pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
b6a75366 1895
0f7b323c 1896 /* We first compute a forward range (start <= end), then decide whether
1897 to reverse it. */
1898 early_start = INT_MIN;
1899 late_start = INT_MAX;
1900 start = INT_MIN;
1901 end = INT_MAX;
1902 step = 1;
b6a75366 1903
0f7b323c 1904 count_preds = 0;
1905 count_succs = 0;
a9ac8caa 1906
2beb48b8 1907 if (dump_file && (psp_not_empty || pss_not_empty))
1908 {
1909 fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1910 "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1911 fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1912 "start", "early start", "late start", "end", "time");
1913 fprintf (dump_file, "=========== =========== =========== ==========="
1914 " =====\n");
1915 }
0f7b323c 1916 /* Calculate early_start and limit end. Both bounds are inclusive. */
1917 if (psp_not_empty)
1918 for (e = u_node->in; e != 0; e = e->next_in)
1919 {
7282ef12 1920 int v = e->src->cuid;
76387907 1921
08b7917c 1922 if (bitmap_bit_p (sched_nodes, v))
0f7b323c 1923 {
7282ef12 1924 int p_st = SCHED_TIME (v);
2beb48b8 1925 int earliest = p_st + e->latency - (e->distance * ii);
1926 int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
76387907 1927
2beb48b8 1928 if (dump_file)
1929 {
1930 fprintf (dump_file, "%11s %11d %11s %11d %5d",
1931 "", earliest, "", latest, p_st);
1932 print_ddg_edge (dump_file, e);
1933 fprintf (dump_file, "\n");
1934 }
76387907 1935
2beb48b8 1936 early_start = MAX (early_start, earliest);
1937 end = MIN (end, latest);
b6a75366 1938
0f7b323c 1939 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1940 count_preds++;
0f7b323c 1941 }
0f7b323c 1942 }
fe861e75 1943
0f7b323c 1944 /* Calculate late_start and limit start. Both bounds are inclusive. */
1945 if (pss_not_empty)
1946 for (e = u_node->out; e != 0; e = e->next_out)
1947 {
7282ef12 1948 int v = e->dest->cuid;
a9ac8caa 1949
08b7917c 1950 if (bitmap_bit_p (sched_nodes, v))
0f7b323c 1951 {
7282ef12 1952 int s_st = SCHED_TIME (v);
2beb48b8 1953 int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1954 int latest = s_st - e->latency + (e->distance * ii);
b6a75366 1955
2beb48b8 1956 if (dump_file)
1957 {
1958 fprintf (dump_file, "%11d %11s %11d %11s %5d",
1959 earliest, "", latest, "", s_st);
1960 print_ddg_edge (dump_file, e);
1961 fprintf (dump_file, "\n");
1962 }
76387907 1963
2beb48b8 1964 start = MAX (start, earliest);
1965 late_start = MIN (late_start, latest);
a9ac8caa 1966
0f7b323c 1967 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1968 count_succs++;
0f7b323c 1969 }
0f7b323c 1970 }
a9ac8caa 1971
2beb48b8 1972 if (dump_file && (psp_not_empty || pss_not_empty))
1973 {
1974 fprintf (dump_file, "----------- ----------- ----------- -----------"
1975 " -----\n");
1976 fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1977 start, early_start, late_start, end, "",
1978 "(max, max, min, min)");
1979 }
1980
0f7b323c 1981 /* Get a target scheduling window no bigger than ii. */
1982 if (early_start == INT_MIN && late_start == INT_MAX)
7282ef12 1983 early_start = NODE_ASAP (u_node);
0f7b323c 1984 else if (early_start == INT_MIN)
1985 early_start = late_start - (ii - 1);
1986 late_start = MIN (late_start, early_start + (ii - 1));
a9ac8caa 1987
0f7b323c 1988 /* Apply memory dependence limits. */
1989 start = MAX (start, early_start);
1990 end = MIN (end, late_start);
fe861e75 1991
2beb48b8 1992 if (dump_file && (psp_not_empty || pss_not_empty))
1993 fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1994 "", start, end, "", "");
1995
0f7b323c 1996 /* If there are at least as many successors as predecessors, schedule the
1997 node close to its successors. */
1998 if (pss_not_empty && count_succs >= count_preds)
b6a75366 1999 {
a4f59596 2000 std::swap (start, end);
0f7b323c 2001 step = -1;
b6a75366 2002 }
2003
0f7b323c 2004 /* Now that we've finalized the window, make END an exclusive rather
2005 than an inclusive bound. */
2006 end += step;
2007
b6a75366 2008 *start_p = start;
2009 *step_p = step;
2010 *end_p = end;
2011 sbitmap_free (psp);
2012 sbitmap_free (pss);
2013
2014 if ((start >= end && step == 1) || (start <= end && step == -1))
76387907 2015 {
2016 if (dump_file)
2017 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2018 start, end, step);
0f7b323c 2019 return -1;
76387907 2020 }
2021
0f7b323c 2022 return 0;
b6a75366 2023}
2024
fe861e75 2025/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2026 node currently been scheduled. At the end of the calculation
84004523 2027 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2028 U_NODE which are (1) already scheduled in the first/last row of
2029 U_NODE's scheduling window, (2) whose dependence inequality with U
2030 becomes an equality when U is scheduled in this same row, and (3)
2031 whose dependence latency is zero.
2032
c15d54d8 2033 The first and last rows are calculated using the following parameters:
fe861e75 2034 START/END rows - The cycles that begins/ends the traversal on the window;
2035 searching for an empty cycle to schedule U_NODE.
2036 STEP - The direction in which we traverse the window.
84004523 2037 II - The initiation interval. */
fe861e75 2038
2039static void
2040calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2041 int step, int ii, sbitmap sched_nodes,
2042 sbitmap must_precede, sbitmap must_follow)
2043{
2044 ddg_edge_ptr e;
2045 int first_cycle_in_window, last_cycle_in_window;
fe861e75 2046
2047 gcc_assert (must_precede && must_follow);
2048
2049 /* Consider the following scheduling window:
2050 {first_cycle_in_window, first_cycle_in_window+1, ...,
2051 last_cycle_in_window}. If step is 1 then the following will be
2052 the order we traverse the window: {start=first_cycle_in_window,
2053 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2054 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2055 end=first_cycle_in_window-1} if step is -1. */
2056 first_cycle_in_window = (step == 1) ? start : end - step;
2057 last_cycle_in_window = (step == 1) ? end - step : start;
2058
53c5d9d4 2059 bitmap_clear (must_precede);
2060 bitmap_clear (must_follow);
fe861e75 2061
2062 if (dump_file)
2063 fprintf (dump_file, "\nmust_precede: ");
2064
84004523 2065 /* Instead of checking if:
2066 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2067 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2068 first_cycle_in_window)
2069 && e->latency == 0
2070 we use the fact that latency is non-negative:
2071 SCHED_TIME (e->src) - (e->distance * ii) <=
2072 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2073 first_cycle_in_window
2074 and check only if
2075 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
fe861e75 2076 for (e = u_node->in; e != 0; e = e->next_in)
08b7917c 2077 if (bitmap_bit_p (sched_nodes, e->src->cuid)
7282ef12 2078 && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
84004523 2079 first_cycle_in_window))
fe861e75 2080 {
2081 if (dump_file)
2082 fprintf (dump_file, "%d ", e->src->cuid);
2083
08b7917c 2084 bitmap_set_bit (must_precede, e->src->cuid);
fe861e75 2085 }
2086
2087 if (dump_file)
2088 fprintf (dump_file, "\nmust_follow: ");
2089
84004523 2090 /* Instead of checking if:
2091 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2092 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2093 last_cycle_in_window)
2094 && e->latency == 0
2095 we use the fact that latency is non-negative:
2096 SCHED_TIME (e->dest) + (e->distance * ii) >=
48e1416a 2097 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
84004523 2098 last_cycle_in_window
2099 and check only if
2100 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
fe861e75 2101 for (e = u_node->out; e != 0; e = e->next_out)
08b7917c 2102 if (bitmap_bit_p (sched_nodes, e->dest->cuid)
7282ef12 2103 && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
84004523 2104 last_cycle_in_window))
fe861e75 2105 {
2106 if (dump_file)
2107 fprintf (dump_file, "%d ", e->dest->cuid);
2108
08b7917c 2109 bitmap_set_bit (must_follow, e->dest->cuid);
fe861e75 2110 }
2111
2112 if (dump_file)
2113 fprintf (dump_file, "\n");
2114}
2115
2116/* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
2117 parameters to decide if that's possible:
2118 PS - The partial schedule.
2119 U - The serial number of U_NODE.
f0b5f617 2120 NUM_SPLITS - The number of row splits made so far.
fe861e75 2121 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2122 the first row of the scheduling window)
2123 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2124 last row of the scheduling window) */
2125
2126static bool
7282ef12 2127try_scheduling_node_in_cycle (partial_schedule_ptr ps,
34c7f568 2128 int u, int cycle, sbitmap sched_nodes,
fe861e75 2129 int *num_splits, sbitmap must_precede,
2130 sbitmap must_follow)
2131{
2132 ps_insn_ptr psi;
2133 bool success = 0;
2134
2135 verify_partial_schedule (ps, sched_nodes);
7282ef12 2136 psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
fe861e75 2137 if (psi)
2138 {
7282ef12 2139 SCHED_TIME (u) = cycle;
08b7917c 2140 bitmap_set_bit (sched_nodes, u);
fe861e75 2141 success = 1;
2142 *num_splits = 0;
2143 if (dump_file)
34c7f568 2144 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
fe861e75 2145
2146 }
2147
2148 return success;
2149}
2150
b6a75366 2151/* This function implements the scheduling algorithm for SMS according to the
2152 above algorithm. */
5b6269ed 2153static partial_schedule_ptr
3f5be5f4 2154sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
5b6269ed 2155{
2156 int ii = mii;
76387907 2157 int i, c, success, num_splits = 0;
2158 int flush_and_start_over = true;
5b6269ed 2159 int num_nodes = g->num_nodes;
5b6269ed 2160 int start, end, step; /* Place together into one struct? */
2161 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
f045d41d 2162 sbitmap must_precede = sbitmap_alloc (num_nodes);
2163 sbitmap must_follow = sbitmap_alloc (num_nodes);
b6a75366 2164 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
f045d41d 2165
5b6269ed 2166 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2167
53c5d9d4 2168 bitmap_ones (tobe_scheduled);
2169 bitmap_clear (sched_nodes);
b6a75366 2170
76387907 2171 while (flush_and_start_over && (ii < maxii))
5b6269ed 2172 {
b6a75366 2173
5b6269ed 2174 if (dump_file)
d4473c84 2175 fprintf (dump_file, "Starting with ii=%d\n", ii);
76387907 2176 flush_and_start_over = false;
53c5d9d4 2177 bitmap_clear (sched_nodes);
5b6269ed 2178
2179 for (i = 0; i < num_nodes; i++)
2180 {
2181 int u = nodes_order[i];
b6a75366 2182 ddg_node_ptr u_node = &ps->g->nodes[u];
bf79ca12 2183 rtx_insn *insn = u_node->insn;
5b6269ed 2184
9845d120 2185 if (!NONDEBUG_INSN_P (insn))
5b6269ed 2186 {
08b7917c 2187 bitmap_clear_bit (tobe_scheduled, u);
b6a75366 2188 continue;
5b6269ed 2189 }
2190
08b7917c 2191 if (bitmap_bit_p (sched_nodes, u))
b6a75366 2192 continue;
5b6269ed 2193
b6a75366 2194 /* Try to get non-empty scheduling window. */
76387907 2195 success = 0;
a5ea7ace 2196 if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
76387907 2197 &step, &end) == 0)
2198 {
2199 if (dump_file)
2beb48b8 2200 fprintf (dump_file, "\nTrying to schedule node %d "
2201 "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
76387907 2202 (g->nodes[u].insn)), start, end, step);
76387907 2203
2204 gcc_assert ((step > 0 && start < end)
2205 || (step < 0 && start > end));
2206
fe861e75 2207 calculate_must_precede_follow (u_node, start, end, step, ii,
2208 sched_nodes, must_precede,
2209 must_follow);
2210
76387907 2211 for (c = start; c != end; c += step)
2212 {
a5ea7ace 2213 sbitmap tmp_precede, tmp_follow;
fe861e75 2214
a5ea7ace 2215 set_must_precede_follow (&tmp_follow, must_follow,
2216 &tmp_precede, must_precede,
2217 c, start, end, step);
fe861e75 2218 success =
7282ef12 2219 try_scheduling_node_in_cycle (ps, u, c,
fe861e75 2220 sched_nodes,
2221 &num_splits, tmp_precede,
2222 tmp_follow);
2223 if (success)
2224 break;
76387907 2225 }
fe861e75 2226
76387907 2227 verify_partial_schedule (ps, sched_nodes);
2228 }
2229 if (!success)
2230 {
2231 int split_row;
2232
2233 if (ii++ == maxii)
2234 break;
2235
2236 if (num_splits >= MAX_SPLIT_NUM)
2237 {
2238 num_splits = 0;
2239 flush_and_start_over = true;
2240 verify_partial_schedule (ps, sched_nodes);
2241 reset_partial_schedule (ps, ii);
2242 verify_partial_schedule (ps, sched_nodes);
2243 break;
2244 }
2245
2246 num_splits++;
7af239b5 2247 /* The scheduling window is exclusive of 'end'
2248 whereas compute_split_window() expects an inclusive,
2249 ordered range. */
76387907 2250 if (step == 1)
7af239b5 2251 split_row = compute_split_row (sched_nodes, start, end - 1,
76387907 2252 ps->ii, u_node);
2253 else
7af239b5 2254 split_row = compute_split_row (sched_nodes, end + 1, start,
76387907 2255 ps->ii, u_node);
b6a75366 2256
76387907 2257 ps_insert_empty_row (ps, split_row, sched_nodes);
2258 i--; /* Go back and retry node i. */
5b6269ed 2259
76387907 2260 if (dump_file)
2261 fprintf (dump_file, "num_splits=%d\n", num_splits);
2262 }
5b6269ed 2263
76387907 2264 /* ??? If (success), check register pressure estimates. */
2265 } /* Continue with next node. */
2266 } /* While flush_and_start_over. */
5b6269ed 2267 if (ii >= maxii)
2268 {
2269 free_partial_schedule (ps);
2270 ps = NULL;
2271 }
76387907 2272 else
53c5d9d4 2273 gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
76387907 2274
2275 sbitmap_free (sched_nodes);
2276 sbitmap_free (must_precede);
2277 sbitmap_free (must_follow);
2278 sbitmap_free (tobe_scheduled);
2279
5b6269ed 2280 return ps;
2281}
2282
76387907 2283/* This function inserts a new empty row into PS at the position
2284 according to SPLITROW, keeping all already scheduled instructions
2285 intact and updating their SCHED_TIME and cycle accordingly. */
2286static void
2287ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2288 sbitmap sched_nodes)
2289{
2290 ps_insn_ptr crr_insn;
2291 ps_insn_ptr *rows_new;
2292 int ii = ps->ii;
2293 int new_ii = ii + 1;
2294 int row;
4815ed0c 2295 int *rows_length_new;
76387907 2296
2297 verify_partial_schedule (ps, sched_nodes);
2298
2299 /* We normalize sched_time and rotate ps to have only non-negative sched
2300 times, for simplicity of updating cycles after inserting new row. */
2301 split_row -= ps->min_cycle;
2302 split_row = SMODULO (split_row, ii);
2303 if (dump_file)
2304 fprintf (dump_file, "split_row=%d\n", split_row);
2305
6f7f081f 2306 reset_sched_times (ps, PS_MIN_CYCLE (ps));
2307 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
76387907 2308
2309 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
4815ed0c 2310 rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
76387907 2311 for (row = 0; row < split_row; row++)
2312 {
2313 rows_new[row] = ps->rows[row];
4815ed0c 2314 rows_length_new[row] = ps->rows_length[row];
76387907 2315 ps->rows[row] = NULL;
2316 for (crr_insn = rows_new[row];
2317 crr_insn; crr_insn = crr_insn->next_in_row)
2318 {
7282ef12 2319 int u = crr_insn->id;
76387907 2320 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2321
2322 SCHED_TIME (u) = new_time;
2323 crr_insn->cycle = new_time;
2324 SCHED_ROW (u) = new_time % new_ii;
2325 SCHED_STAGE (u) = new_time / new_ii;
2326 }
2327
2328 }
2329
2330 rows_new[split_row] = NULL;
2331
2332 for (row = split_row; row < ii; row++)
2333 {
2334 rows_new[row + 1] = ps->rows[row];
4815ed0c 2335 rows_length_new[row + 1] = ps->rows_length[row];
76387907 2336 ps->rows[row] = NULL;
2337 for (crr_insn = rows_new[row + 1];
2338 crr_insn; crr_insn = crr_insn->next_in_row)
2339 {
7282ef12 2340 int u = crr_insn->id;
76387907 2341 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2342
2343 SCHED_TIME (u) = new_time;
2344 crr_insn->cycle = new_time;
2345 SCHED_ROW (u) = new_time % new_ii;
2346 SCHED_STAGE (u) = new_time / new_ii;
2347 }
2348 }
2349
2350 /* Updating ps. */
2351 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2352 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2353 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2354 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2355 free (ps->rows);
2356 ps->rows = rows_new;
4815ed0c 2357 free (ps->rows_length);
2358 ps->rows_length = rows_length_new;
76387907 2359 ps->ii = new_ii;
2360 gcc_assert (ps->min_cycle >= 0);
2361
2362 verify_partial_schedule (ps, sched_nodes);
2363
2364 if (dump_file)
2365 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2366 ps->max_cycle);
2367}
2368
2369/* Given U_NODE which is the node that failed to be scheduled; LOW and
2370 UP which are the boundaries of it's scheduling window; compute using
c7684b8e 2371 SCHED_NODES and II a row in the partial schedule that can be split
76387907 2372 which will separate a critical predecessor from a critical successor
2373 thereby expanding the window, and return it. */
2374static int
2375compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2376 ddg_node_ptr u_node)
2377{
2378 ddg_edge_ptr e;
2379 int lower = INT_MIN, upper = INT_MAX;
7282ef12 2380 int crit_pred = -1;
2381 int crit_succ = -1;
76387907 2382 int crit_cycle;
2383
2384 for (e = u_node->in; e != 0; e = e->next_in)
2385 {
7282ef12 2386 int v = e->src->cuid;
76387907 2387
08b7917c 2388 if (bitmap_bit_p (sched_nodes, v)
7282ef12 2389 && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2390 if (SCHED_TIME (v) > lower)
76387907 2391 {
7282ef12 2392 crit_pred = v;
2393 lower = SCHED_TIME (v);
76387907 2394 }
2395 }
2396
7282ef12 2397 if (crit_pred >= 0)
76387907 2398 {
2399 crit_cycle = SCHED_TIME (crit_pred) + 1;
2400 return SMODULO (crit_cycle, ii);
2401 }
2402
2403 for (e = u_node->out; e != 0; e = e->next_out)
2404 {
7282ef12 2405 int v = e->dest->cuid;
2406
08b7917c 2407 if (bitmap_bit_p (sched_nodes, v)
7282ef12 2408 && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2409 if (SCHED_TIME (v) < upper)
76387907 2410 {
7282ef12 2411 crit_succ = v;
2412 upper = SCHED_TIME (v);
76387907 2413 }
2414 }
2415
7282ef12 2416 if (crit_succ >= 0)
76387907 2417 {
2418 crit_cycle = SCHED_TIME (crit_succ);
2419 return SMODULO (crit_cycle, ii);
2420 }
2421
2422 if (dump_file)
2423 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2424
2425 return SMODULO ((low + up + 1) / 2, ii);
2426}
2427
2428static void
2429verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2430{
2431 int row;
2432 ps_insn_ptr crr_insn;
2433
2434 for (row = 0; row < ps->ii; row++)
4815ed0c 2435 {
2436 int length = 0;
2437
2438 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2439 {
7282ef12 2440 int u = crr_insn->id;
4815ed0c 2441
2442 length++;
08b7917c 2443 gcc_assert (bitmap_bit_p (sched_nodes, u));
4815ed0c 2444 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2445 popcount (sched_nodes) == number of insns in ps. */
2446 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2447 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2448 }
2449
2450 gcc_assert (ps->rows_length[row] == length);
2451 }
76387907 2452}
2453
5b6269ed 2454\f
2455/* This page implements the algorithm for ordering the nodes of a DDG
2456 for modulo scheduling, activated through the
2457 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2458
2459#define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2460#define ASAP(x) (ORDER_PARAMS ((x))->asap)
2461#define ALAP(x) (ORDER_PARAMS ((x))->alap)
2462#define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2463#define MOB(x) (ALAP ((x)) - ASAP ((x)))
2464#define DEPTH(x) (ASAP ((x)))
2465
2466typedef struct node_order_params * nopa;
2467
2468static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2469static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
c0fb94bd 2470static nopa calculate_order_params (ddg_ptr, int, int *);
5b6269ed 2471static int find_max_asap (ddg_ptr, sbitmap);
2472static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2473static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2474
2475enum sms_direction {BOTTOMUP, TOPDOWN};
2476
2477struct node_order_params
2478{
2479 int asap;
2480 int alap;
2481 int height;
2482};
2483
2484/* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2485static void
2486check_nodes_order (int *node_order, int num_nodes)
2487{
2488 int i;
2489 sbitmap tmp = sbitmap_alloc (num_nodes);
2490
53c5d9d4 2491 bitmap_clear (tmp);
5b6269ed 2492
a9ac8caa 2493 if (dump_file)
2494 fprintf (dump_file, "SMS final nodes order: \n");
2495
5b6269ed 2496 for (i = 0; i < num_nodes; i++)
2497 {
2498 int u = node_order[i];
2499
a9ac8caa 2500 if (dump_file)
2501 fprintf (dump_file, "%d ", u);
08b7917c 2502 gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
5b6269ed 2503
08b7917c 2504 bitmap_set_bit (tmp, u);
5b6269ed 2505 }
48e1416a 2506
a9ac8caa 2507 if (dump_file)
2508 fprintf (dump_file, "\n");
48e1416a 2509
5b6269ed 2510 sbitmap_free (tmp);
2511}
2512
2513/* Order the nodes of G for scheduling and pass the result in
2514 NODE_ORDER. Also set aux.count of each node to ASAP.
c0fb94bd 2515 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
5b6269ed 2516static int
c0fb94bd 2517sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
5b6269ed 2518{
2519 int i;
2520 int rec_mii = 0;
2521 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2522
c0fb94bd 2523 nopa nops = calculate_order_params (g, mii, pmax_asap);
5b6269ed 2524
65ef61df 2525 if (dump_file)
2526 print_sccs (dump_file, sccs, g);
2527
5b6269ed 2528 order_nodes_of_sccs (sccs, node_order);
2529
2530 if (sccs->num_sccs > 0)
2531 /* First SCC has the largest recurrence_length. */
2532 rec_mii = sccs->sccs[0]->recurrence_length;
2533
2534 /* Save ASAP before destroying node_order_params. */
2535 for (i = 0; i < g->num_nodes; i++)
2536 {
2537 ddg_node_ptr v = &g->nodes[i];
2538 v->aux.count = ASAP (v);
2539 }
2540
2541 free (nops);
2542 free_ddg_all_sccs (sccs);
2543 check_nodes_order (node_order, g->num_nodes);
2544
2545 return rec_mii;
2546}
2547
2548static void
2549order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2550{
2551 int i, pos = 0;
2552 ddg_ptr g = all_sccs->ddg;
2553 int num_nodes = g->num_nodes;
2554 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2555 sbitmap on_path = sbitmap_alloc (num_nodes);
2556 sbitmap tmp = sbitmap_alloc (num_nodes);
2557 sbitmap ones = sbitmap_alloc (num_nodes);
2558
53c5d9d4 2559 bitmap_clear (prev_sccs);
2560 bitmap_ones (ones);
5b6269ed 2561
f0b5f617 2562 /* Perform the node ordering starting from the SCC with the highest recMII.
5b6269ed 2563 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2564 for (i = 0; i < all_sccs->num_sccs; i++)
2565 {
2566 ddg_scc_ptr scc = all_sccs->sccs[i];
2567
2568 /* Add nodes on paths from previous SCCs to the current SCC. */
2569 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
53c5d9d4 2570 bitmap_ior (tmp, scc->nodes, on_path);
5b6269ed 2571
2572 /* Add nodes on paths from the current SCC to previous SCCs. */
2573 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
53c5d9d4 2574 bitmap_ior (tmp, tmp, on_path);
5b6269ed 2575
2576 /* Remove nodes of previous SCCs from current extended SCC. */
53c5d9d4 2577 bitmap_and_compl (tmp, tmp, prev_sccs);
5b6269ed 2578
2579 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2580 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2581 }
2582
2583 /* Handle the remaining nodes that do not belong to any scc. Each call
2584 to order_nodes_in_scc handles a single connected component. */
2585 while (pos < g->num_nodes)
2586 {
53c5d9d4 2587 bitmap_and_compl (tmp, ones, prev_sccs);
5b6269ed 2588 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2589 }
2590 sbitmap_free (prev_sccs);
2591 sbitmap_free (on_path);
2592 sbitmap_free (tmp);
2593 sbitmap_free (ones);
2594}
2595
2596/* MII is needed if we consider backarcs (that do not close recursive cycles). */
2597static struct node_order_params *
c0fb94bd 2598calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
5b6269ed 2599{
2600 int u;
2601 int max_asap;
2602 int num_nodes = g->num_nodes;
2603 ddg_edge_ptr e;
2604 /* Allocate a place to hold ordering params for each node in the DDG. */
2605 nopa node_order_params_arr;
2606
2607 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2608 node_order_params_arr = (nopa) xcalloc (num_nodes,
2609 sizeof (struct node_order_params));
2610
2c763ed4 2611 /* Set the aux pointer of each node to point to its order_params structure. */
5b6269ed 2612 for (u = 0; u < num_nodes; u++)
2613 g->nodes[u].aux.info = &node_order_params_arr[u];
2614
2615 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2616 calculate ASAP, ALAP, mobility, distance, and height for each node
2617 in the dependence (direct acyclic) graph. */
2618
2619 /* We assume that the nodes in the array are in topological order. */
2620
2621 max_asap = 0;
2622 for (u = 0; u < num_nodes; u++)
2623 {
2624 ddg_node_ptr u_node = &g->nodes[u];
2625
2626 ASAP (u_node) = 0;
2627 for (e = u_node->in; e; e = e->next_in)
2628 if (e->distance == 0)
2629 ASAP (u_node) = MAX (ASAP (u_node),
2630 ASAP (e->src) + e->latency);
2631 max_asap = MAX (max_asap, ASAP (u_node));
2632 }
2633
2634 for (u = num_nodes - 1; u > -1; u--)
2635 {
2636 ddg_node_ptr u_node = &g->nodes[u];
2637
2638 ALAP (u_node) = max_asap;
2639 HEIGHT (u_node) = 0;
2640 for (e = u_node->out; e; e = e->next_out)
2641 if (e->distance == 0)
2642 {
2643 ALAP (u_node) = MIN (ALAP (u_node),
2644 ALAP (e->dest) - e->latency);
2645 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2646 HEIGHT (e->dest) + e->latency);
2647 }
2648 }
a9ac8caa 2649 if (dump_file)
2650 {
2651 fprintf (dump_file, "\nOrder params\n");
2652 for (u = 0; u < num_nodes; u++)
2653 {
2654 ddg_node_ptr u_node = &g->nodes[u];
2655
2656 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2657 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2658 }
2659 }
5b6269ed 2660
c0fb94bd 2661 *pmax_asap = max_asap;
5b6269ed 2662 return node_order_params_arr;
2663}
2664
2665static int
2666find_max_asap (ddg_ptr g, sbitmap nodes)
2667{
86c1585a 2668 unsigned int u = 0;
5b6269ed 2669 int max_asap = -1;
2670 int result = -1;
3e790786 2671 sbitmap_iterator sbi;
5b6269ed 2672
0d211963 2673 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
5b6269ed 2674 {
2675 ddg_node_ptr u_node = &g->nodes[u];
2676
2677 if (max_asap < ASAP (u_node))
2678 {
2679 max_asap = ASAP (u_node);
2680 result = u;
2681 }
3e790786 2682 }
5b6269ed 2683 return result;
2684}
2685
2686static int
2687find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2688{
86c1585a 2689 unsigned int u = 0;
5b6269ed 2690 int max_hv = -1;
2691 int min_mob = INT_MAX;
2692 int result = -1;
3e790786 2693 sbitmap_iterator sbi;
5b6269ed 2694
0d211963 2695 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
5b6269ed 2696 {
2697 ddg_node_ptr u_node = &g->nodes[u];
2698
2699 if (max_hv < HEIGHT (u_node))
2700 {
2701 max_hv = HEIGHT (u_node);
2702 min_mob = MOB (u_node);
2703 result = u;
2704 }
2705 else if ((max_hv == HEIGHT (u_node))
2706 && (min_mob > MOB (u_node)))
2707 {
2708 min_mob = MOB (u_node);
2709 result = u;
2710 }
3e790786 2711 }
5b6269ed 2712 return result;
2713}
2714
2715static int
2716find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2717{
86c1585a 2718 unsigned int u = 0;
5b6269ed 2719 int max_dv = -1;
2720 int min_mob = INT_MAX;
2721 int result = -1;
3e790786 2722 sbitmap_iterator sbi;
5b6269ed 2723
0d211963 2724 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
5b6269ed 2725 {
2726 ddg_node_ptr u_node = &g->nodes[u];
2727
2728 if (max_dv < DEPTH (u_node))
2729 {
2730 max_dv = DEPTH (u_node);
2731 min_mob = MOB (u_node);
2732 result = u;
2733 }
2734 else if ((max_dv == DEPTH (u_node))
2735 && (min_mob > MOB (u_node)))
2736 {
2737 min_mob = MOB (u_node);
2738 result = u;
2739 }
3e790786 2740 }
5b6269ed 2741 return result;
2742}
2743
2744/* Places the nodes of SCC into the NODE_ORDER array starting
2745 at position POS, according to the SMS ordering algorithm.
2746 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2747 the NODE_ORDER array, starting from position zero. */
2748static int
2749order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2750 int * node_order, int pos)
2751{
2752 enum sms_direction dir;
2753 int num_nodes = g->num_nodes;
2754 sbitmap workset = sbitmap_alloc (num_nodes);
2755 sbitmap tmp = sbitmap_alloc (num_nodes);
2756 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2757 sbitmap predecessors = sbitmap_alloc (num_nodes);
2758 sbitmap successors = sbitmap_alloc (num_nodes);
2759
53c5d9d4 2760 bitmap_clear (predecessors);
5b6269ed 2761 find_predecessors (predecessors, g, nodes_ordered);
2762
53c5d9d4 2763 bitmap_clear (successors);
5b6269ed 2764 find_successors (successors, g, nodes_ordered);
2765
53c5d9d4 2766 bitmap_clear (tmp);
2767 if (bitmap_and (tmp, predecessors, scc))
5b6269ed 2768 {
53c5d9d4 2769 bitmap_copy (workset, tmp);
5b6269ed 2770 dir = BOTTOMUP;
2771 }
53c5d9d4 2772 else if (bitmap_and (tmp, successors, scc))
5b6269ed 2773 {
53c5d9d4 2774 bitmap_copy (workset, tmp);
5b6269ed 2775 dir = TOPDOWN;
2776 }
2777 else
2778 {
2779 int u;
2780
53c5d9d4 2781 bitmap_clear (workset);
5b6269ed 2782 if ((u = find_max_asap (g, scc)) >= 0)
08b7917c 2783 bitmap_set_bit (workset, u);
5b6269ed 2784 dir = BOTTOMUP;
2785 }
2786
53c5d9d4 2787 bitmap_clear (zero_bitmap);
2788 while (!bitmap_equal_p (workset, zero_bitmap))
5b6269ed 2789 {
2790 int v;
2791 ddg_node_ptr v_node;
2792 sbitmap v_node_preds;
2793 sbitmap v_node_succs;
2794
2795 if (dir == TOPDOWN)
2796 {
53c5d9d4 2797 while (!bitmap_equal_p (workset, zero_bitmap))
5b6269ed 2798 {
2799 v = find_max_hv_min_mob (g, workset);
2800 v_node = &g->nodes[v];
2801 node_order[pos++] = v;
2802 v_node_succs = NODE_SUCCESSORS (v_node);
53c5d9d4 2803 bitmap_and (tmp, v_node_succs, scc);
5b6269ed 2804
2805 /* Don't consider the already ordered successors again. */
53c5d9d4 2806 bitmap_and_compl (tmp, tmp, nodes_ordered);
2807 bitmap_ior (workset, workset, tmp);
08b7917c 2808 bitmap_clear_bit (workset, v);
2809 bitmap_set_bit (nodes_ordered, v);
5b6269ed 2810 }
2811 dir = BOTTOMUP;
53c5d9d4 2812 bitmap_clear (predecessors);
5b6269ed 2813 find_predecessors (predecessors, g, nodes_ordered);
53c5d9d4 2814 bitmap_and (workset, predecessors, scc);
5b6269ed 2815 }
2816 else
2817 {
53c5d9d4 2818 while (!bitmap_equal_p (workset, zero_bitmap))
5b6269ed 2819 {
2820 v = find_max_dv_min_mob (g, workset);
2821 v_node = &g->nodes[v];
2822 node_order[pos++] = v;
2823 v_node_preds = NODE_PREDECESSORS (v_node);
53c5d9d4 2824 bitmap_and (tmp, v_node_preds, scc);
5b6269ed 2825
2826 /* Don't consider the already ordered predecessors again. */
53c5d9d4 2827 bitmap_and_compl (tmp, tmp, nodes_ordered);
2828 bitmap_ior (workset, workset, tmp);
08b7917c 2829 bitmap_clear_bit (workset, v);
2830 bitmap_set_bit (nodes_ordered, v);
5b6269ed 2831 }
2832 dir = TOPDOWN;
53c5d9d4 2833 bitmap_clear (successors);
5b6269ed 2834 find_successors (successors, g, nodes_ordered);
53c5d9d4 2835 bitmap_and (workset, successors, scc);
5b6269ed 2836 }
2837 }
2838 sbitmap_free (tmp);
2839 sbitmap_free (workset);
2840 sbitmap_free (zero_bitmap);
2841 sbitmap_free (predecessors);
2842 sbitmap_free (successors);
2843 return pos;
2844}
2845
2846\f
2847/* This page contains functions for manipulating partial-schedules during
2848 modulo scheduling. */
2849
2850/* Create a partial schedule and allocate a memory to hold II rows. */
1bed606a 2851
2852static partial_schedule_ptr
5b6269ed 2853create_partial_schedule (int ii, ddg_ptr g, int history)
2854{
4c36ffe6 2855 partial_schedule_ptr ps = XNEW (struct partial_schedule);
5b6269ed 2856 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
4815ed0c 2857 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
f1f41a6c 2858 ps->reg_moves.create (0);
5b6269ed 2859 ps->ii = ii;
2860 ps->history = history;
2861 ps->min_cycle = INT_MAX;
2862 ps->max_cycle = INT_MIN;
2863 ps->g = g;
2864
2865 return ps;
2866}
2867
2868/* Free the PS_INSNs in rows array of the given partial schedule.
2869 ??? Consider caching the PS_INSN's. */
2870static void
2871free_ps_insns (partial_schedule_ptr ps)
2872{
2873 int i;
2874
2875 for (i = 0; i < ps->ii; i++)
2876 {
2877 while (ps->rows[i])
2878 {
2879 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2880
2881 free (ps->rows[i]);
2882 ps->rows[i] = ps_insn;
2883 }
2884 ps->rows[i] = NULL;
2885 }
2886}
2887
2888/* Free all the memory allocated to the partial schedule. */
1bed606a 2889
2890static void
5b6269ed 2891free_partial_schedule (partial_schedule_ptr ps)
2892{
0b79ae60 2893 ps_reg_move_info *move;
2894 unsigned int i;
2895
5b6269ed 2896 if (!ps)
2897 return;
0b79ae60 2898
f1f41a6c 2899 FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
0b79ae60 2900 sbitmap_free (move->uses);
f1f41a6c 2901 ps->reg_moves.release ();
0b79ae60 2902
5b6269ed 2903 free_ps_insns (ps);
2904 free (ps->rows);
4815ed0c 2905 free (ps->rows_length);
5b6269ed 2906 free (ps);
2907}
2908
2909/* Clear the rows array with its PS_INSNs, and create a new one with
2910 NEW_II rows. */
1bed606a 2911
2912static void
5b6269ed 2913reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2914{
2915 if (!ps)
2916 return;
2917 free_ps_insns (ps);
2918 if (new_ii == ps->ii)
2919 return;
2920 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2921 * sizeof (ps_insn_ptr));
2922 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
4815ed0c 2923 ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2924 memset (ps->rows_length, 0, new_ii * sizeof (int));
5b6269ed 2925 ps->ii = new_ii;
2926 ps->min_cycle = INT_MAX;
2927 ps->max_cycle = INT_MIN;
2928}
2929
2930/* Prints the partial schedule as an ii rows array, for each rows
2931 print the ids of the insns in it. */
2932void
2933print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2934{
2935 int i;
2936
2937 for (i = 0; i < ps->ii; i++)
2938 {
2939 ps_insn_ptr ps_i = ps->rows[i];
2940
34c7f568 2941 fprintf (dump, "\n[ROW %d ]: ", i);
5b6269ed 2942 while (ps_i)
2943 {
bc69df91 2944 rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
7282ef12 2945
2946 if (JUMP_P (insn))
2947 fprintf (dump, "%d (branch), ", INSN_UID (insn));
abbfd803 2948 else
7282ef12 2949 fprintf (dump, "%d, ", INSN_UID (insn));
abbfd803 2950
5b6269ed 2951 ps_i = ps_i->next_in_row;
2952 }
2953 }
2954}
2955
2956/* Creates an object of PS_INSN and initializes it to the given parameters. */
2957static ps_insn_ptr
7282ef12 2958create_ps_insn (int id, int cycle)
5b6269ed 2959{
4c36ffe6 2960 ps_insn_ptr ps_i = XNEW (struct ps_insn);
5b6269ed 2961
7282ef12 2962 ps_i->id = id;
5b6269ed 2963 ps_i->next_in_row = NULL;
2964 ps_i->prev_in_row = NULL;
5b6269ed 2965 ps_i->cycle = cycle;
2966
2967 return ps_i;
2968}
2969
2970
9bf2b489 2971/* Removes the given PS_INSN from the partial schedule. */
2972static void
5b6269ed 2973remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2974{
2975 int row;
2976
9bf2b489 2977 gcc_assert (ps && ps_i);
2978
5b6269ed 2979 row = SMODULO (ps_i->cycle, ps->ii);
2980 if (! ps_i->prev_in_row)
2981 {
9bf2b489 2982 gcc_assert (ps_i == ps->rows[row]);
5b6269ed 2983 ps->rows[row] = ps_i->next_in_row;
2984 if (ps->rows[row])
2985 ps->rows[row]->prev_in_row = NULL;
2986 }
2987 else
2988 {
2989 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2990 if (ps_i->next_in_row)
2991 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2992 }
4815ed0c 2993
2994 ps->rows_length[row] -= 1;
5b6269ed 2995 free (ps_i);
9bf2b489 2996 return;
5b6269ed 2997}
2998
f045d41d 2999/* Unlike what literature describes for modulo scheduling (which focuses
3000 on VLIW machines) the order of the instructions inside a cycle is
3001 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
3002 where the current instruction should go relative to the already
3003 scheduled instructions in the given cycle. Go over these
3004 instructions and find the first possible column to put it in. */
3005static bool
3006ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3007 sbitmap must_precede, sbitmap must_follow)
3008{
3009 ps_insn_ptr next_ps_i;
3010 ps_insn_ptr first_must_follow = NULL;
3011 ps_insn_ptr last_must_precede = NULL;
6f7f081f 3012 ps_insn_ptr last_in_row = NULL;
f045d41d 3013 int row;
3014
3015 if (! ps_i)
3016 return false;
3017
3018 row = SMODULO (ps_i->cycle, ps->ii);
3019
3020 /* Find the first must follow and the last must precede
91275768 3021 and insert the node immediately after the must precede
dac49aa5 3022 but make sure that it there is no must follow after it. */
f045d41d 3023 for (next_ps_i = ps->rows[row];
3024 next_ps_i;
3025 next_ps_i = next_ps_i->next_in_row)
3026 {
7282ef12 3027 if (must_follow
08b7917c 3028 && bitmap_bit_p (must_follow, next_ps_i->id)
f045d41d 3029 && ! first_must_follow)
3030 first_must_follow = next_ps_i;
08b7917c 3031 if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
f045d41d 3032 {
3033 /* If we have already met a node that must follow, then
3034 there is no possible column. */
3035 if (first_must_follow)
3036 return false;
3037 else
3038 last_must_precede = next_ps_i;
3039 }
6f7f081f 3040 /* The closing branch must be the last in the row. */
3041 if (must_precede
08b7917c 3042 && bitmap_bit_p (must_precede, next_ps_i->id)
7282ef12 3043 && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
6f7f081f 3044 return false;
3045
3046 last_in_row = next_ps_i;
f045d41d 3047 }
3048
6f7f081f 3049 /* The closing branch is scheduled as well. Make sure there is no
3050 dependent instruction after it as the branch should be the last
3051 instruction in the row. */
7282ef12 3052 if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
6f7f081f 3053 {
3054 if (first_must_follow)
3055 return false;
3056 if (last_in_row)
3057 {
3058 /* Make the branch the last in the row. New instructions
3059 will be inserted at the beginning of the row or after the
3060 last must_precede instruction thus the branch is guaranteed
3061 to remain the last instruction in the row. */
3062 last_in_row->next_in_row = ps_i;
3063 ps_i->prev_in_row = last_in_row;
3064 ps_i->next_in_row = NULL;
3065 }
3066 else
3067 ps->rows[row] = ps_i;
3068 return true;
3069 }
3070
f045d41d 3071 /* Now insert the node after INSERT_AFTER_PSI. */
3072
3073 if (! last_must_precede)
3074 {
3075 ps_i->next_in_row = ps->rows[row];
3076 ps_i->prev_in_row = NULL;
3077 if (ps_i->next_in_row)
3078 ps_i->next_in_row->prev_in_row = ps_i;
3079 ps->rows[row] = ps_i;
3080 }
3081 else
3082 {
3083 ps_i->next_in_row = last_must_precede->next_in_row;
3084 last_must_precede->next_in_row = ps_i;
3085 ps_i->prev_in_row = last_must_precede;
3086 if (ps_i->next_in_row)
3087 ps_i->next_in_row->prev_in_row = ps_i;
3088 }
3089
3090 return true;
3091}
3092
5b6269ed 3093/* Advances the PS_INSN one column in its current row; returns false
48e1416a 3094 in failure and true in success. Bit N is set in MUST_FOLLOW if
3095 the node with cuid N must be come after the node pointed to by
f045d41d 3096 PS_I when scheduled in the same cycle. */
5b6269ed 3097static int
f045d41d 3098ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3099 sbitmap must_follow)
5b6269ed 3100{
3101 ps_insn_ptr prev, next;
3102 int row;
3103
3104 if (!ps || !ps_i)
3105 return false;
3106
3107 row = SMODULO (ps_i->cycle, ps->ii);
3108
3109 if (! ps_i->next_in_row)
3110 return false;
3111
3112 /* Check if next_in_row is dependent on ps_i, both having same sched
3113 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
08b7917c 3114 if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
f045d41d 3115 return false;
5b6269ed 3116
91275768 3117 /* Advance PS_I over its next_in_row in the doubly linked list. */
5b6269ed 3118 prev = ps_i->prev_in_row;
3119 next = ps_i->next_in_row;
3120
3121 if (ps_i == ps->rows[row])
3122 ps->rows[row] = next;
3123
3124 ps_i->next_in_row = next->next_in_row;
3125
3126 if (next->next_in_row)
3127 next->next_in_row->prev_in_row = ps_i;
3128
3129 next->next_in_row = ps_i;
3130 ps_i->prev_in_row = next;
3131
3132 next->prev_in_row = prev;
3133 if (prev)
3134 prev->next_in_row = next;
3135
3136 return true;
3137}
3138
3139/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
48e1416a 3140 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
3141 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3142 before/after (respectively) the node pointed to by PS_I when scheduled
f045d41d 3143 in the same cycle. */
5b6269ed 3144static ps_insn_ptr
7282ef12 3145add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
f045d41d 3146 sbitmap must_precede, sbitmap must_follow)
5b6269ed 3147{
f045d41d 3148 ps_insn_ptr ps_i;
5b6269ed 3149 int row = SMODULO (cycle, ps->ii);
5b6269ed 3150
4815ed0c 3151 if (ps->rows_length[row] >= issue_rate)
5b6269ed 3152 return NULL;
3153
7282ef12 3154 ps_i = create_ps_insn (id, cycle);
f045d41d 3155
3156 /* Finds and inserts PS_I according to MUST_FOLLOW and
3157 MUST_PRECEDE. */
3158 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3159 {
3160 free (ps_i);
3161 return NULL;
3162 }
5b6269ed 3163
4815ed0c 3164 ps->rows_length[row] += 1;
5b6269ed 3165 return ps_i;
3166}
3167
3168/* Advance time one cycle. Assumes DFA is being used. */
3169static void
3170advance_one_cycle (void)
3171{
67900a4f 3172 if (targetm.sched.dfa_pre_cycle_insn)
3173 state_transition (curr_state,
ffa47ae0 3174 targetm.sched.dfa_pre_cycle_insn ());
5b6269ed 3175
67900a4f 3176 state_transition (curr_state, NULL);
5b6269ed 3177
67900a4f 3178 if (targetm.sched.dfa_post_cycle_insn)
3179 state_transition (curr_state,
ffa47ae0 3180 targetm.sched.dfa_post_cycle_insn ());
5b6269ed 3181}
3182
b6a75366 3183
b6a75366 3184
5b6269ed 3185/* Checks if PS has resource conflicts according to DFA, starting from
3186 FROM cycle to TO cycle; returns true if there are conflicts and false
3187 if there are no conflicts. Assumes DFA is being used. */
3188static int
3189ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3190{
3191 int cycle;
3192
5b6269ed 3193 state_reset (curr_state);
3194
3195 for (cycle = from; cycle <= to; cycle++)
3196 {
3197 ps_insn_ptr crr_insn;
3198 /* Holds the remaining issue slots in the current row. */
3199 int can_issue_more = issue_rate;
3200
3201 /* Walk through the DFA for the current row. */
3202 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3203 crr_insn;
3204 crr_insn = crr_insn->next_in_row)
3205 {
bc69df91 3206 rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
5b6269ed 3207
9845d120 3208 if (!NONDEBUG_INSN_P (insn))
5b6269ed 3209 continue;
3210
3211 /* Check if there is room for the current insn. */
3212 if (!can_issue_more || state_dead_lock_p (curr_state))
3213 return true;
3214
3215 /* Update the DFA state and return with failure if the DFA found
f0b5f617 3216 resource conflicts. */
5b6269ed 3217 if (state_transition (curr_state, insn) >= 0)
3218 return true;
3219
3220 if (targetm.sched.variable_issue)
3221 can_issue_more =
ffa47ae0 3222 targetm.sched.variable_issue (sched_dump, sched_verbose,
3223 insn, can_issue_more);
5b6269ed 3224 /* A naked CLOBBER or USE generates no instruction, so don't
3225 let them consume issue slots. */
3226 else if (GET_CODE (PATTERN (insn)) != USE
3227 && GET_CODE (PATTERN (insn)) != CLOBBER)
3228 can_issue_more--;
3229 }
3230
3231 /* Advance the DFA to the next cycle. */
3232 advance_one_cycle ();
3233 }
3234 return false;
3235}
3236
3237/* Checks if the given node causes resource conflicts when added to PS at
3238 cycle C. If not the node is added to PS and returned; otherwise zero
48e1416a 3239 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3240 cuid N must be come before/after (respectively) the node pointed to by
f045d41d 3241 PS_I when scheduled in the same cycle. */
b6a75366 3242ps_insn_ptr
7282ef12 3243ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
f045d41d 3244 int c, sbitmap must_precede,
3245 sbitmap must_follow)
5b6269ed 3246{
3247 int has_conflicts = 0;
3248 ps_insn_ptr ps_i;
3249
f045d41d 3250 /* First add the node to the PS, if this succeeds check for
3251 conflicts, trying different issue slots in the same row. */
3252 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
5b6269ed 3253 return NULL; /* Failed to insert the node at the given cycle. */
3254
3255 has_conflicts = ps_has_conflicts (ps, c, c)
3256 || (ps->history > 0
3257 && ps_has_conflicts (ps,
3258 c - ps->history,
3259 c + ps->history));
3260
3261 /* Try different issue slots to find one that the given node can be
3262 scheduled in without conflicts. */
3263 while (has_conflicts)
3264 {
f045d41d 3265 if (! ps_insn_advance_column (ps, ps_i, must_follow))
5b6269ed 3266 break;
3267 has_conflicts = ps_has_conflicts (ps, c, c)
3268 || (ps->history > 0
3269 && ps_has_conflicts (ps,
3270 c - ps->history,
3271 c + ps->history));
3272 }
3273
3274 if (has_conflicts)
3275 {
3276 remove_node_from_ps (ps, ps_i);
3277 return NULL;
3278 }
3279
3280 ps->min_cycle = MIN (ps->min_cycle, c);
3281 ps->max_cycle = MAX (ps->max_cycle, c);
3282 return ps_i;
3283}
3284
6f7f081f 3285/* Calculate the stage count of the partial schedule PS. The calculation
a5ea7ace 3286 takes into account the rotation amount passed in ROTATION_AMOUNT. */
6f7f081f 3287int
a5ea7ace 3288calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
6f7f081f 3289{
6f7f081f 3290 int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3291 int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3292 int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3293
3294 /* The calculation of stage count is done adding the number of stages
3295 before cycle zero and after cycle zero. */
3296 stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3297
3298 return stage_count;
3299}
3300
5b6269ed 3301/* Rotate the rows of PS such that insns scheduled at time
3302 START_CYCLE will appear in row 0. Updates max/min_cycles. */
b6a75366 3303void
5b6269ed 3304rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3305{
3306 int i, row, backward_rotates;
3307 int last_row = ps->ii - 1;
3308
3309 if (start_cycle == 0)
3310 return;
3311
3312 backward_rotates = SMODULO (start_cycle, ps->ii);
3313
3314 /* Revisit later and optimize this into a single loop. */
3315 for (i = 0; i < backward_rotates; i++)
3316 {
3317 ps_insn_ptr first_row = ps->rows[0];
4815ed0c 3318 int first_row_length = ps->rows_length[0];
5b6269ed 3319
3320 for (row = 0; row < last_row; row++)
4815ed0c 3321 {
3322 ps->rows[row] = ps->rows[row + 1];
3323 ps->rows_length[row] = ps->rows_length[row + 1];
3324 }
5b6269ed 3325
3326 ps->rows[last_row] = first_row;
4815ed0c 3327 ps->rows_length[last_row] = first_row_length;
5b6269ed 3328 }
3329
3330 ps->max_cycle -= start_cycle;
3331 ps->min_cycle -= start_cycle;
3332}
07cba988 3333
77fce4cd 3334#endif /* INSN_SCHEDULING */
3335\f
77fce4cd 3336/* Run instruction scheduler. */
3337/* Perform SMS module scheduling. */
77fce4cd 3338
cbe8bda8 3339namespace {
3340
3341const pass_data pass_data_sms =
3342{
3343 RTL_PASS, /* type */
3344 "sms", /* name */
3345 OPTGROUP_NONE, /* optinfo_flags */
cbe8bda8 3346 TV_SMS, /* tv_id */
3347 0, /* properties_required */
3348 0, /* properties_provided */
3349 0, /* properties_destroyed */
3350 0, /* todo_flags_start */
8b88439e 3351 TODO_df_finish, /* todo_flags_finish */
77fce4cd 3352};
cbe8bda8 3353
3354class pass_sms : public rtl_opt_pass
3355{
3356public:
9af5ce0c 3357 pass_sms (gcc::context *ctxt)
3358 : rtl_opt_pass (pass_data_sms, ctxt)
cbe8bda8 3359 {}
3360
3361 /* opt_pass methods: */
31315c24 3362 virtual bool gate (function *)
3363{
3364 return (optimize > 0 && flag_modulo_sched);
3365}
3366
65b0537f 3367 virtual unsigned int execute (function *);
cbe8bda8 3368
3369}; // class pass_sms
3370
65b0537f 3371unsigned int
3372pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3373{
3374#ifdef INSN_SCHEDULING
3375 basic_block bb;
3376
3377 /* Collect loop information to be used in SMS. */
3378 cfg_layout_initialize (0);
3379 sms_schedule ();
3380
3381 /* Update the life information, because we add pseudos. */
3382 max_regno = max_reg_num ();
3383
3384 /* Finalize layout changes. */
3385 FOR_EACH_BB_FN (bb, fun)
3386 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3387 bb->aux = bb->next_bb;
3388 free_dominance_info (CDI_DOMINATORS);
3389 cfg_layout_finalize ();
3390#endif /* INSN_SCHEDULING */
3391 return 0;
3392}
3393
cbe8bda8 3394} // anon namespace
3395
3396rtl_opt_pass *
3397make_pass_sms (gcc::context *ctxt)
3398{
3399 return new pass_sms (ctxt);
3400}