]>
Commit | Line | Data |
---|---|---|
d397e8c6 | 1 | /* Swing Modulo Scheduling implementation. |
99dee823 | 2 | Copyright (C) 2004-2021 Free Software Foundation, Inc. |
d397e8c6 MH |
3 | Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
d397e8c6 MH |
10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
d397e8c6 MH |
20 | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
c7131fb2 | 25 | #include "backend.h" |
957060b5 | 26 | #include "target.h" |
d397e8c6 | 27 | #include "rtl.h" |
957060b5 AM |
28 | #include "tree.h" |
29 | #include "cfghooks.h" | |
c7131fb2 | 30 | #include "df.h" |
4d0cdd0c | 31 | #include "memmodel.h" |
957060b5 | 32 | #include "optabs.h" |
d397e8c6 | 33 | #include "regs.h" |
957060b5 | 34 | #include "emit-rtl.h" |
957060b5 | 35 | #include "gcov-io.h" |
59f2e9d8 | 36 | #include "profile.h" |
d397e8c6 | 37 | #include "insn-attr.h" |
60393bbc | 38 | #include "cfgrtl.h" |
d397e8c6 | 39 | #include "sched-int.h" |
d397e8c6 | 40 | #include "cfgloop.h" |
d397e8c6 | 41 | #include "expr.h" |
d397e8c6 | 42 | #include "ddg.h" |
ef330312 | 43 | #include "tree-pass.h" |
97511ad7 | 44 | #include "dbgcnt.h" |
dd637013 | 45 | #include "loop-unroll.h" |
d397e8c6 | 46 | |
d7777192 | 47 | #ifdef INSN_SCHEDULING |
d397e8c6 MH |
48 | |
49 | /* This file contains the implementation of the Swing Modulo Scheduler, | |
50 | described in the following references: | |
51 | [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt. | |
52 | Lifetime--sensitive modulo scheduling in a production environment. | |
53 | IEEE Trans. on Comps., 50(3), March 2001 | |
54 | [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero. | |
55 | Swing Modulo Scheduling: A Lifetime Sensitive Approach. | |
1ea7e6ad | 56 | PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA). |
d397e8c6 MH |
57 | |
58 | The basic structure is: | |
59 | 1. Build a data-dependence graph (DDG) for each loop. | |
60 | 2. Use the DDG to order the insns of a loop (not in topological order | |
61 | necessarily, but rather) trying to place each insn after all its | |
62 | predecessors _or_ after all its successors. | |
63 | 3. Compute MII: a lower bound on the number of cycles to schedule the loop. | |
64 | 4. Use the ordering to perform list-scheduling of the loop: | |
65 | 1. Set II = MII. We will try to schedule the loop within II cycles. | |
66 | 2. Try to schedule the insns one by one according to the ordering. | |
67 | For each insn compute an interval of cycles by considering already- | |
68 | scheduled preds and succs (and associated latencies); try to place | |
69 | the insn in the cycles of this window checking for potential | |
70 | resource conflicts (using the DFA interface). | |
71 | Note: this is different from the cycle-scheduling of schedule_insns; | |
72 | here the insns are not scheduled monotonically top-down (nor bottom- | |
73 | up). | |
74 | 3. If failed in scheduling all insns - bump II++ and try again, unless | |
f6fe65dc | 75 | II reaches an upper bound MaxII, in which case report failure. |
d397e8c6 MH |
76 | 5. If we succeeded in scheduling the loop within II cycles, we now |
77 | generate prolog and epilog, decrease the counter of the loop, and | |
78 | perform modulo variable expansion for live ranges that span more than | |
79 | II cycles (i.e. use register copies to prevent a def from overwriting | |
80 | itself before reaching the use). | |
d397e8c6 | 81 | |
413e50a2 RE |
82 | SMS works with countable loops (1) whose control part can be easily |
83 | decoupled from the rest of the loop and (2) whose loop count can | |
84 | be easily adjusted. This is because we peel a constant number of | |
85 | iterations into a prologue and epilogue for which we want to avoid | |
86 | emitting the control part, and a kernel which is to iterate that | |
87 | constant number of iterations less than the original loop. So the | |
88 | control part should be a set of insns clearly identified and having | |
89 | its own iv, not otherwise used in the loop (at-least for now), which | |
46dc0789 MN |
90 | initializes a register before the loop to the number of iterations. |
91 | Currently SMS relies on the do-loop pattern to recognize such loops, | |
92 | where (1) the control part comprises of all insns defining and/or | |
93 | using a certain 'count' register and (2) the loop count can be | |
b8698a0f | 94 | adjusted by modifying this register prior to the loop. |
46dc0789 | 95 | TODO: Rely on cfgloop analysis instead. */ |
d397e8c6 MH |
96 | \f |
97 | /* This page defines partial-schedule structures and functions for | |
98 | modulo scheduling. */ | |
99 | ||
100 | typedef struct partial_schedule *partial_schedule_ptr; | |
101 | typedef struct ps_insn *ps_insn_ptr; | |
102 | ||
103 | /* The minimum (absolute) cycle that a node of ps was scheduled in. */ | |
104 | #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle) | |
105 | ||
106 | /* The maximum (absolute) cycle that a node of ps was scheduled in. */ | |
107 | #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle) | |
108 | ||
109 | /* Perform signed modulo, always returning a non-negative value. */ | |
110 | #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y)) | |
111 | ||
112 | /* The number of different iterations the nodes in ps span, assuming | |
113 | the stage boundaries are placed efficiently. */ | |
fc6970e4 RE |
114 | #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \ |
115 | + 1 + ii - 1) / ii) | |
116 | /* The stage count of ps. */ | |
117 | #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count) | |
d397e8c6 | 118 | |
d397e8c6 MH |
119 | /* A single instruction in the partial schedule. */ |
120 | struct ps_insn | |
121 | { | |
1287d8ea RS |
122 | /* Identifies the instruction to be scheduled. Values smaller than |
123 | the ddg's num_nodes refer directly to ddg nodes. A value of | |
124 | X - num_nodes refers to register move X. */ | |
88e9c867 | 125 | int id; |
d397e8c6 MH |
126 | |
127 | /* The (absolute) cycle in which the PS instruction is scheduled. | |
128 | Same as SCHED_TIME (node). */ | |
129 | int cycle; | |
130 | ||
131 | /* The next/prev PS_INSN in the same row. */ | |
132 | ps_insn_ptr next_in_row, | |
133 | prev_in_row; | |
134 | ||
d397e8c6 MH |
135 | }; |
136 | ||
1287d8ea RS |
137 | /* Information about a register move that has been added to a partial |
138 | schedule. */ | |
139 | struct ps_reg_move_info | |
140 | { | |
141 | /* The source of the move is defined by the ps_insn with id DEF. | |
142 | The destination is used by the ps_insns with the ids in USES. */ | |
143 | int def; | |
144 | sbitmap uses; | |
145 | ||
146 | /* The original form of USES' instructions used OLD_REG, but they | |
147 | should now use NEW_REG. */ | |
148 | rtx old_reg; | |
149 | rtx new_reg; | |
150 | ||
752cdc4e RS |
151 | /* The number of consecutive stages that the move occupies. */ |
152 | int num_consecutive_stages; | |
153 | ||
1287d8ea RS |
154 | /* An instruction that sets NEW_REG to the correct value. The first |
155 | move associated with DEF will have an rhs of OLD_REG; later moves | |
156 | use the result of the previous move. */ | |
6210ec61 | 157 | rtx_insn *insn; |
1287d8ea RS |
158 | }; |
159 | ||
d397e8c6 MH |
160 | /* Holds the partial schedule as an array of II rows. Each entry of the |
161 | array points to a linked list of PS_INSNs, which represents the | |
162 | instructions that are scheduled for that row. */ | |
163 | struct partial_schedule | |
164 | { | |
165 | int ii; /* Number of rows in the partial schedule. */ | |
166 | int history; /* Threshold for conflict checking using DFA. */ | |
167 | ||
168 | /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ | |
169 | ps_insn_ptr *rows; | |
170 | ||
1287d8ea RS |
171 | /* All the moves added for this partial schedule. Index X has |
172 | a ps_insn id of X + g->num_nodes. */ | |
9771b263 | 173 | vec<ps_reg_move_info> reg_moves; |
1287d8ea | 174 | |
81c41166 RE |
175 | /* rows_length[i] holds the number of instructions in the row. |
176 | It is used only (as an optimization) to back off quickly from | |
177 | trying to schedule a node in a full row; that is, to avoid running | |
178 | through futile DFA state transitions. */ | |
179 | int *rows_length; | |
180 | ||
d397e8c6 MH |
181 | /* The earliest absolute cycle of an insn in the partial schedule. */ |
182 | int min_cycle; | |
183 | ||
184 | /* The latest absolute cycle of an insn in the partial schedule. */ | |
185 | int max_cycle; | |
186 | ||
187 | ddg_ptr g; /* The DDG of the insns in the partial schedule. */ | |
fc6970e4 RE |
188 | |
189 | int stage_count; /* The stage count of the partial schedule. */ | |
d397e8c6 MH |
190 | }; |
191 | ||
b8698a0f | 192 | |
5f1f4746 KH |
193 | static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); |
194 | static void free_partial_schedule (partial_schedule_ptr); | |
195 | static void reset_partial_schedule (partial_schedule_ptr, int new_ii); | |
d397e8c6 | 196 | void print_partial_schedule (partial_schedule_ptr, FILE *); |
c8943832 | 197 | static void verify_partial_schedule (partial_schedule_ptr, sbitmap); |
c16162ad | 198 | static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, |
88e9c867 | 199 | int, int, sbitmap, sbitmap); |
c16162ad | 200 | static void rotate_partial_schedule (partial_schedule_ptr, int); |
f73d5666 | 201 | void set_row_column_for_ps (partial_schedule_ptr); |
c8943832 VY |
202 | static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); |
203 | static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr); | |
f73d5666 | 204 | |
d397e8c6 | 205 | \f |
1ea7e6ad | 206 | /* This page defines constants and structures for the modulo scheduling |
d397e8c6 MH |
207 | driver. */ |
208 | ||
5cd53742 | 209 | static int sms_order_nodes (ddg_ptr, int, int *, int *); |
d397e8c6 | 210 | static void set_node_sched_params (ddg_ptr); |
10d22567 | 211 | static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); |
dc01c3d1 | 212 | static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *); |
edc429ff RE |
213 | static int calculate_stage_count (partial_schedule_ptr, int); |
214 | static void calculate_must_precede_follow (ddg_node_ptr, int, int, | |
215 | int, int, sbitmap, sbitmap, sbitmap); | |
216 | static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, | |
217 | sbitmap, int, int *, int *, int *); | |
88e9c867 RS |
218 | static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int, |
219 | sbitmap, int *, sbitmap, sbitmap); | |
a9fb4f13 | 220 | static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); |
edc429ff | 221 | |
88e9c867 RS |
222 | #define NODE_ASAP(node) ((node)->aux.count) |
223 | ||
9771b263 | 224 | #define SCHED_PARAMS(x) (&node_sched_param_vec[x]) |
88e9c867 | 225 | #define SCHED_TIME(x) (SCHED_PARAMS (x)->time) |
88e9c867 RS |
226 | #define SCHED_ROW(x) (SCHED_PARAMS (x)->row) |
227 | #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage) | |
228 | #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column) | |
d397e8c6 MH |
229 | |
230 | /* The scheduling parameters held for each node. */ | |
231 | typedef struct node_sched_params | |
232 | { | |
88e9c867 | 233 | int time; /* The absolute scheduling cycle. */ |
d397e8c6 | 234 | |
d397e8c6 MH |
235 | int row; /* Holds time % ii. */ |
236 | int stage; /* Holds time / ii. */ | |
237 | ||
238 | /* The column of a node inside the ps. If nodes u, v are on the same row, | |
1ea7e6ad | 239 | u will precede v if column (u) < column (v). */ |
d397e8c6 MH |
240 | int column; |
241 | } *node_sched_params_ptr; | |
d397e8c6 MH |
242 | \f |
243 | /* The following three functions are copied from the current scheduler | |
61ada8ae | 244 | code in order to use sched_analyze() for computing the dependencies. |
d397e8c6 MH |
245 | They are used when initializing the sched_info structure. */ |
246 | static const char * | |
ce1ce33a | 247 | sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED) |
d397e8c6 MH |
248 | { |
249 | static char tmp[80]; | |
250 | ||
251 | sprintf (tmp, "i%4d", INSN_UID (insn)); | |
252 | return tmp; | |
253 | } | |
254 | ||
d397e8c6 MH |
255 | static void |
256 | compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED, | |
aef0e7a8 | 257 | regset used ATTRIBUTE_UNUSED) |
d397e8c6 MH |
258 | { |
259 | } | |
260 | ||
e855c69d AB |
261 | static struct common_sched_info_def sms_common_sched_info; |
262 | ||
263 | static struct sched_deps_info_def sms_sched_deps_info = | |
264 | { | |
265 | compute_jump_reg_dependencies, | |
266 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
267 | NULL, | |
268 | 0, 0, 0 | |
269 | }; | |
270 | ||
271 | static struct haifa_sched_info sms_sched_info = | |
d397e8c6 MH |
272 | { |
273 | NULL, | |
274 | NULL, | |
275 | NULL, | |
276 | NULL, | |
277 | NULL, | |
278 | sms_print_insn, | |
496d7bb0 | 279 | NULL, |
356c23b3 | 280 | NULL, /* insn_finishes_block_p */ |
d397e8c6 MH |
281 | NULL, NULL, |
282 | NULL, NULL, | |
e855c69d | 283 | 0, 0, |
ddbd5439 | 284 | |
86014d07 | 285 | NULL, NULL, NULL, NULL, |
26965010 | 286 | NULL, NULL, |
ddbd5439 | 287 | 0 |
d397e8c6 MH |
288 | }; |
289 | ||
1287d8ea RS |
290 | /* Partial schedule instruction ID in PS is a register move. Return |
291 | information about it. */ | |
292 | static struct ps_reg_move_info * | |
293 | ps_reg_move (partial_schedule_ptr ps, int id) | |
294 | { | |
295 | gcc_checking_assert (id >= ps->g->num_nodes); | |
9771b263 | 296 | return &ps->reg_moves[id - ps->g->num_nodes]; |
1287d8ea RS |
297 | } |
298 | ||
88e9c867 RS |
299 | /* Return the rtl instruction that is being scheduled by partial schedule |
300 | instruction ID, which belongs to schedule PS. */ | |
6210ec61 | 301 | static rtx_insn * |
88e9c867 RS |
302 | ps_rtl_insn (partial_schedule_ptr ps, int id) |
303 | { | |
1287d8ea RS |
304 | if (id < ps->g->num_nodes) |
305 | return ps->g->nodes[id].insn; | |
306 | else | |
307 | return ps_reg_move (ps, id)->insn; | |
88e9c867 RS |
308 | } |
309 | ||
073a8998 | 310 | /* Partial schedule instruction ID, which belongs to PS, occurred in |
1287d8ea RS |
311 | the original (unscheduled) loop. Return the first instruction |
312 | in the loop that was associated with ps_rtl_insn (PS, ID). | |
313 | If the instruction had some notes before it, this is the first | |
314 | of those notes. */ | |
fee3e72c | 315 | static rtx_insn * |
88e9c867 RS |
316 | ps_first_note (partial_schedule_ptr ps, int id) |
317 | { | |
1287d8ea | 318 | gcc_assert (id < ps->g->num_nodes); |
88e9c867 RS |
319 | return ps->g->nodes[id].first_note; |
320 | } | |
321 | ||
752cdc4e RS |
322 | /* Return the number of consecutive stages that are occupied by |
323 | partial schedule instruction ID in PS. */ | |
324 | static int | |
325 | ps_num_consecutive_stages (partial_schedule_ptr ps, int id) | |
326 | { | |
327 | if (id < ps->g->num_nodes) | |
328 | return 1; | |
329 | else | |
330 | return ps_reg_move (ps, id)->num_consecutive_stages; | |
331 | } | |
332 | ||
46dc0789 MN |
333 | /* Given HEAD and TAIL which are the first and last insns in a loop; |
334 | return the register which controls the loop. Return zero if it has | |
335 | more than one occurrence in the loop besides the control part or the | |
336 | do-loop pattern is not of the form we expect. */ | |
d397e8c6 | 337 | static rtx |
89f7b21f | 338 | doloop_register_get (rtx_insn *head, rtx_insn *tail) |
d397e8c6 | 339 | { |
6210ec61 DM |
340 | rtx reg, condition; |
341 | rtx_insn *insn, *first_insn_not_to_check; | |
d397e8c6 | 342 | |
46dc0789 | 343 | if (!JUMP_P (tail)) |
d397e8c6 MH |
344 | return NULL_RTX; |
345 | ||
89f7b21f RS |
346 | if (!targetm.code_for_doloop_end) |
347 | return NULL_RTX; | |
348 | ||
46dc0789 MN |
349 | /* TODO: Free SMS's dependence on doloop_condition_get. */ |
350 | condition = doloop_condition_get (tail); | |
75c70254 | 351 | if (! condition) |
d397e8c6 MH |
352 | return NULL_RTX; |
353 | ||
75c70254 SB |
354 | if (REG_P (XEXP (condition, 0))) |
355 | reg = XEXP (condition, 0); | |
356 | else if (GET_CODE (XEXP (condition, 0)) == PLUS | |
357 | && REG_P (XEXP (XEXP (condition, 0), 0))) | |
358 | reg = XEXP (XEXP (condition, 0), 0); | |
359 | else | |
360 | gcc_unreachable (); | |
d397e8c6 | 361 | |
46dc0789 MN |
362 | /* Check that the COUNT_REG has no other occurrences in the loop |
363 | until the decrement. We assume the control part consists of | |
364 | either a single (parallel) branch-on-count or a (non-parallel) | |
365 | branch immediately preceded by a single (decrement) insn. */ | |
8eb75619 | 366 | first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail |
ca756625 | 367 | : prev_nondebug_insn (tail)); |
8eb75619 AZ |
368 | |
369 | for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) | |
06d5d63d | 370 | if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn)) |
46dc0789 MN |
371 | { |
372 | if (dump_file) | |
8eb75619 AZ |
373 | { |
374 | fprintf (dump_file, "SMS count_reg found "); | |
375 | print_rtl_single (dump_file, reg); | |
376 | fprintf (dump_file, " outside control in insn:\n"); | |
377 | print_rtl_single (dump_file, insn); | |
378 | } | |
46dc0789 MN |
379 | |
380 | return NULL_RTX; | |
381 | } | |
8eb75619 | 382 | |
75c70254 | 383 | return reg; |
d397e8c6 MH |
384 | } |
385 | ||
386 | /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so | |
387 | that the number of iterations is a compile-time constant. If so, | |
52d251b5 | 388 | return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to |
d397e8c6 | 389 | this constant. Otherwise return 0. */ |
52d251b5 | 390 | static rtx_insn * |
d397e8c6 | 391 | const_iteration_count (rtx count_reg, basic_block pre_header, |
4eb8f93d | 392 | int64_t *count, bool* adjust_inplace) |
d397e8c6 | 393 | { |
52d251b5 DM |
394 | rtx_insn *insn; |
395 | rtx_insn *head, *tail; | |
d331e204 | 396 | |
4eb8f93d RZ |
397 | *adjust_inplace = false; |
398 | bool read_after = false; | |
399 | ||
d331e204 | 400 | if (! pre_header) |
52d251b5 | 401 | return NULL; |
d331e204 | 402 | |
496d7bb0 | 403 | get_ebb_head_tail (pre_header, pre_header, &head, &tail); |
d397e8c6 MH |
404 | |
405 | for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) | |
4eb8f93d RZ |
406 | if (single_set (insn) && rtx_equal_p (count_reg, |
407 | SET_DEST (single_set (insn)))) | |
d397e8c6 MH |
408 | { |
409 | rtx pat = single_set (insn); | |
410 | ||
481683e1 | 411 | if (CONST_INT_P (SET_SRC (pat))) |
d397e8c6 MH |
412 | { |
413 | *count = INTVAL (SET_SRC (pat)); | |
4eb8f93d | 414 | *adjust_inplace = !read_after; |
d397e8c6 MH |
415 | return insn; |
416 | } | |
417 | ||
52d251b5 | 418 | return NULL; |
d397e8c6 | 419 | } |
4eb8f93d RZ |
420 | else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn)) |
421 | { | |
422 | read_after = true; | |
423 | if (reg_set_p (count_reg, insn)) | |
424 | break; | |
425 | } | |
d397e8c6 | 426 | |
52d251b5 | 427 | return NULL; |
d397e8c6 MH |
428 | } |
429 | ||
430 | /* A very simple resource-based lower bound on the initiation interval. | |
431 | ??? Improve the accuracy of this bound by considering the | |
432 | utilization of various units. */ | |
433 | static int | |
434 | res_MII (ddg_ptr g) | |
435 | { | |
67186a97 | 436 | if (targetm.sched.sms_res_mii) |
b8698a0f L |
437 | return targetm.sched.sms_res_mii (g); |
438 | ||
06d5d63d | 439 | return g->num_nodes / issue_rate; |
d397e8c6 MH |
440 | } |
441 | ||
442 | ||
1287d8ea | 443 | /* A vector that contains the sched data for each ps_insn. */ |
9771b263 | 444 | static vec<node_sched_params> node_sched_param_vec; |
d397e8c6 | 445 | |
88e9c867 | 446 | /* Allocate sched_params for each node and initialize it. */ |
d397e8c6 MH |
447 | static void |
448 | set_node_sched_params (ddg_ptr g) | |
449 | { | |
9771b263 | 450 | node_sched_param_vec.truncate (0); |
cb3874dc | 451 | node_sched_param_vec.safe_grow_cleared (g->num_nodes, true); |
d397e8c6 MH |
452 | } |
453 | ||
752cdc4e RS |
454 | /* Make sure that node_sched_param_vec has an entry for every move in PS. */ |
455 | static void | |
456 | extend_node_sched_params (partial_schedule_ptr ps) | |
457 | { | |
9771b263 | 458 | node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes |
cb3874dc | 459 | + ps->reg_moves.length (), true); |
752cdc4e RS |
460 | } |
461 | ||
462 | /* Update the sched_params (time, row and stage) for node U using the II, | |
463 | the CYCLE of U and MIN_CYCLE. | |
464 | We're not simply taking the following | |
465 | SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); | |
466 | because the stages may not be aligned on cycle 0. */ | |
467 | static void | |
468 | update_node_sched_params (int u, int ii, int cycle, int min_cycle) | |
469 | { | |
470 | int sc_until_cycle_zero; | |
471 | int stage; | |
472 | ||
473 | SCHED_TIME (u) = cycle; | |
474 | SCHED_ROW (u) = SMODULO (cycle, ii); | |
475 | ||
476 | /* The calculation of stage count is done adding the number | |
477 | of stages before cycle zero and after cycle zero. */ | |
478 | sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); | |
479 | ||
480 | if (SCHED_TIME (u) < 0) | |
481 | { | |
482 | stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); | |
483 | SCHED_STAGE (u) = sc_until_cycle_zero - stage; | |
484 | } | |
485 | else | |
486 | { | |
487 | stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); | |
488 | SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; | |
489 | } | |
490 | } | |
491 | ||
d397e8c6 | 492 | static void |
1287d8ea | 493 | print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps) |
d397e8c6 MH |
494 | { |
495 | int i; | |
496 | ||
10d22567 | 497 | if (! file) |
d331e204 | 498 | return; |
d397e8c6 MH |
499 | for (i = 0; i < num_nodes; i++) |
500 | { | |
88e9c867 | 501 | node_sched_params_ptr nsp = SCHED_PARAMS (i); |
d397e8c6 | 502 | |
76b4f0f7 | 503 | fprintf (file, "Node = %d; INSN = %d\n", i, |
1287d8ea RS |
504 | INSN_UID (ps_rtl_insn (ps, i))); |
505 | fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i])); | |
10d22567 | 506 | fprintf (file, " time = %d:\n", nsp->time); |
752cdc4e RS |
507 | fprintf (file, " stage = %d:\n", nsp->stage); |
508 | } | |
509 | } | |
510 | ||
511 | /* Set SCHED_COLUMN for each instruction in row ROW of PS. */ | |
512 | static void | |
513 | set_columns_for_row (partial_schedule_ptr ps, int row) | |
514 | { | |
515 | ps_insn_ptr cur_insn; | |
516 | int column; | |
517 | ||
518 | column = 0; | |
519 | for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row) | |
520 | SCHED_COLUMN (cur_insn->id) = column++; | |
521 | } | |
522 | ||
523 | /* Set SCHED_COLUMN for each instruction in PS. */ | |
524 | static void | |
525 | set_columns_for_ps (partial_schedule_ptr ps) | |
526 | { | |
527 | int row; | |
528 | ||
529 | for (row = 0; row < ps->ii; row++) | |
530 | set_columns_for_row (ps, row); | |
531 | } | |
1287d8ea | 532 | |
752cdc4e RS |
533 | /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS. |
534 | Its single predecessor has already been scheduled, as has its | |
535 | ddg node successors. (The move may have also another move as its | |
536 | successor, in which case that successor will be scheduled later.) | |
537 | ||
538 | The move is part of a chain that satisfies register dependencies | |
539 | between a producing ddg node and various consuming ddg nodes. | |
540 | If some of these dependencies have a distance of 1 (meaning that | |
3e6aef7c | 541 | the use is upward-exposed) then DISTANCE1_USES is nonnull and |
752cdc4e RS |
542 | contains the set of uses with distance-1 dependencies. |
543 | DISTANCE1_USES is null otherwise. | |
544 | ||
545 | MUST_FOLLOW is a scratch bitmap that is big enough to hold | |
546 | all current ps_insn ids. | |
547 | ||
548 | Return true on success. */ | |
549 | static bool | |
550 | schedule_reg_move (partial_schedule_ptr ps, int i_reg_move, | |
551 | sbitmap distance1_uses, sbitmap must_follow) | |
552 | { | |
553 | unsigned int u; | |
554 | int this_time, this_distance, this_start, this_end, this_latency; | |
555 | int start, end, c, ii; | |
556 | sbitmap_iterator sbi; | |
557 | ps_reg_move_info *move; | |
6210ec61 | 558 | rtx_insn *this_insn; |
752cdc4e RS |
559 | ps_insn_ptr psi; |
560 | ||
561 | move = ps_reg_move (ps, i_reg_move); | |
562 | ii = ps->ii; | |
563 | if (dump_file) | |
564 | { | |
565 | fprintf (dump_file, "Scheduling register move INSN %d; ii = %d" | |
566 | ", min cycle = %d\n\n", INSN_UID (move->insn), ii, | |
567 | PS_MIN_CYCLE (ps)); | |
568 | print_rtl_single (dump_file, move->insn); | |
569 | fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time"); | |
570 | fprintf (dump_file, "=========== =========== =====\n"); | |
571 | } | |
572 | ||
573 | start = INT_MIN; | |
574 | end = INT_MAX; | |
575 | ||
576 | /* For dependencies of distance 1 between a producer ddg node A | |
577 | and consumer ddg node B, we have a chain of dependencies: | |
578 | ||
579 | A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B | |
580 | ||
581 | where Mi is the ith move. For dependencies of distance 0 between | |
582 | a producer ddg node A and consumer ddg node C, we have a chain of | |
583 | dependencies: | |
584 | ||
585 | A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C | |
586 | ||
587 | where Mi' occupies the same position as Mi but occurs a stage later. | |
588 | We can only schedule each move once, so if we have both types of | |
589 | chain, we model the second as: | |
590 | ||
591 | A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C | |
592 | ||
593 | First handle the dependencies between the previously-scheduled | |
594 | predecessor and the move. */ | |
595 | this_insn = ps_rtl_insn (ps, move->def); | |
596 | this_latency = insn_latency (this_insn, move->insn); | |
597 | this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0; | |
598 | this_time = SCHED_TIME (move->def) - this_distance * ii; | |
599 | this_start = this_time + this_latency; | |
600 | this_end = this_time + ii; | |
601 | if (dump_file) | |
602 | fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", | |
603 | this_start, this_end, SCHED_TIME (move->def), | |
604 | INSN_UID (this_insn), this_latency, this_distance, | |
605 | INSN_UID (move->insn)); | |
606 | ||
607 | if (start < this_start) | |
608 | start = this_start; | |
609 | if (end > this_end) | |
610 | end = this_end; | |
611 | ||
612 | /* Handle the dependencies between the move and previously-scheduled | |
613 | successors. */ | |
d4ac4ce2 | 614 | EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi) |
752cdc4e RS |
615 | { |
616 | this_insn = ps_rtl_insn (ps, u); | |
617 | this_latency = insn_latency (move->insn, this_insn); | |
d7c028c0 | 618 | if (distance1_uses && !bitmap_bit_p (distance1_uses, u)) |
752cdc4e RS |
619 | this_distance = -1; |
620 | else | |
621 | this_distance = 0; | |
622 | this_time = SCHED_TIME (u) + this_distance * ii; | |
623 | this_start = this_time - ii; | |
624 | this_end = this_time - this_latency; | |
625 | if (dump_file) | |
626 | fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", | |
627 | this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn), | |
628 | this_latency, this_distance, INSN_UID (this_insn)); | |
629 | ||
630 | if (start < this_start) | |
631 | start = this_start; | |
632 | if (end > this_end) | |
633 | end = this_end; | |
634 | } | |
635 | ||
636 | if (dump_file) | |
637 | { | |
638 | fprintf (dump_file, "----------- ----------- -----\n"); | |
639 | fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)"); | |
640 | } | |
641 | ||
f61e445a | 642 | bitmap_clear (must_follow); |
d7c028c0 | 643 | bitmap_set_bit (must_follow, move->def); |
752cdc4e RS |
644 | |
645 | start = MAX (start, end - (ii - 1)); | |
646 | for (c = end; c >= start; c--) | |
647 | { | |
648 | psi = ps_add_node_check_conflicts (ps, i_reg_move, c, | |
649 | move->uses, must_follow); | |
650 | if (psi) | |
651 | { | |
652 | update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps)); | |
653 | if (dump_file) | |
654 | fprintf (dump_file, "\nScheduled register move INSN %d at" | |
655 | " time %d, row %d\n\n", INSN_UID (move->insn), c, | |
656 | SCHED_ROW (i_reg_move)); | |
657 | return true; | |
d397e8c6 MH |
658 | } |
659 | } | |
752cdc4e RS |
660 | |
661 | if (dump_file) | |
662 | fprintf (dump_file, "\nNo available slot\n\n"); | |
663 | ||
664 | return false; | |
d397e8c6 MH |
665 | } |
666 | ||
d331e204 MH |
667 | /* |
668 | Breaking intra-loop register anti-dependences: | |
669 | Each intra-loop register anti-dependence implies a cross-iteration true | |
670 | dependence of distance 1. Therefore, we can remove such false dependencies | |
671 | and figure out if the partial schedule broke them by checking if (for a | |
672 | true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and | |
673 | if so generate a register move. The number of such moves is equal to: | |
674 | SCHED_TIME (use) - SCHED_TIME (def) { 0 broken | |
aabcd309 | 675 | nreg_moves = ----------------------------------- + 1 - { dependence. |
d331e204 MH |
676 | ii { 1 if not. |
677 | */ | |
1287d8ea RS |
678 | static bool |
679 | schedule_reg_moves (partial_schedule_ptr ps) | |
d397e8c6 MH |
680 | { |
681 | ddg_ptr g = ps->g; | |
682 | int ii = ps->ii; | |
683 | int i; | |
684 | ||
685 | for (i = 0; i < g->num_nodes; i++) | |
686 | { | |
687 | ddg_node_ptr u = &g->nodes[i]; | |
688 | ddg_edge_ptr e; | |
689 | int nreg_moves = 0, i_reg_move; | |
d397e8c6 | 690 | rtx prev_reg, old_reg; |
1287d8ea | 691 | int first_move; |
752cdc4e | 692 | int distances[2]; |
752cdc4e | 693 | sbitmap distance1_uses; |
442b891d RE |
694 | rtx set = single_set (u->insn); |
695 | ||
696 | /* Skip instructions that do not set a register. */ | |
3a123ed7 | 697 | if (set && !REG_P (SET_DEST (set))) |
442b891d | 698 | continue; |
3a123ed7 | 699 | |
d397e8c6 MH |
700 | /* Compute the number of reg_moves needed for u, by looking at life |
701 | ranges started at u (excluding self-loops). */ | |
752cdc4e | 702 | distances[0] = distances[1] = false; |
d397e8c6 MH |
703 | for (e = u->out; e; e = e->next_out) |
704 | if (e->type == TRUE_DEP && e->dest != e->src) | |
705 | { | |
88e9c867 RS |
706 | int nreg_moves4e = (SCHED_TIME (e->dest->cuid) |
707 | - SCHED_TIME (e->src->cuid)) / ii; | |
d397e8c6 | 708 | |
d331e204 | 709 | if (e->distance == 1) |
88e9c867 RS |
710 | nreg_moves4e = (SCHED_TIME (e->dest->cuid) |
711 | - SCHED_TIME (e->src->cuid) + ii) / ii; | |
d331e204 | 712 | |
1ea7e6ad | 713 | /* If dest precedes src in the schedule of the kernel, then dest |
d397e8c6 | 714 | will read before src writes and we can save one reg_copy. */ |
88e9c867 RS |
715 | if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) |
716 | && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) | |
d397e8c6 MH |
717 | nreg_moves4e--; |
718 | ||
442b891d RE |
719 | if (nreg_moves4e >= 1) |
720 | { | |
721 | /* !single_set instructions are not supported yet and | |
722 | thus we do not except to encounter them in the loop | |
723 | except from the doloop part. For the latter case | |
724 | we assume no regmoves are generated as the doloop | |
725 | instructions are tied to the branch with an edge. */ | |
726 | gcc_assert (set); | |
d8edf83d RE |
727 | /* If the instruction contains auto-inc register then |
728 | validate that the regmov is being generated for the | |
729 | target regsiter rather then the inc'ed register. */ | |
730 | gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); | |
442b891d RE |
731 | } |
732 | ||
752cdc4e RS |
733 | if (nreg_moves4e) |
734 | { | |
735 | gcc_assert (e->distance < 2); | |
736 | distances[e->distance] = true; | |
737 | } | |
d397e8c6 MH |
738 | nreg_moves = MAX (nreg_moves, nreg_moves4e); |
739 | } | |
740 | ||
741 | if (nreg_moves == 0) | |
742 | continue; | |
743 | ||
1287d8ea | 744 | /* Create NREG_MOVES register moves. */ |
9771b263 | 745 | first_move = ps->reg_moves.length (); |
cb3874dc | 746 | ps->reg_moves.safe_grow_cleared (first_move + nreg_moves, true); |
752cdc4e | 747 | extend_node_sched_params (ps); |
1287d8ea RS |
748 | |
749 | /* Record the moves associated with this node. */ | |
750 | first_move += ps->g->num_nodes; | |
1287d8ea RS |
751 | |
752 | /* Generate each move. */ | |
3a123ed7 JJ |
753 | old_reg = prev_reg = SET_DEST (set); |
754 | if (HARD_REGISTER_P (old_reg)) | |
755 | return false; | |
756 | ||
1287d8ea RS |
757 | for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) |
758 | { | |
759 | ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move); | |
760 | ||
761 | move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i; | |
752cdc4e | 762 | move->uses = sbitmap_alloc (first_move + nreg_moves); |
1287d8ea RS |
763 | move->old_reg = old_reg; |
764 | move->new_reg = gen_reg_rtx (GET_MODE (prev_reg)); | |
752cdc4e | 765 | move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1; |
1476d1bd | 766 | move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg)); |
f61e445a | 767 | bitmap_clear (move->uses); |
1287d8ea RS |
768 | |
769 | prev_reg = move->new_reg; | |
770 | } | |
771 | ||
752cdc4e RS |
772 | distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL; |
773 | ||
33444996 FY |
774 | if (distance1_uses) |
775 | bitmap_clear (distance1_uses); | |
776 | ||
d397e8c6 MH |
777 | /* Every use of the register defined by node may require a different |
778 | copy of this register, depending on the time the use is scheduled. | |
1287d8ea | 779 | Record which uses require which move results. */ |
d397e8c6 MH |
780 | for (e = u->out; e; e = e->next_out) |
781 | if (e->type == TRUE_DEP && e->dest != e->src) | |
782 | { | |
88e9c867 RS |
783 | int dest_copy = (SCHED_TIME (e->dest->cuid) |
784 | - SCHED_TIME (e->src->cuid)) / ii; | |
d397e8c6 | 785 | |
d331e204 | 786 | if (e->distance == 1) |
88e9c867 RS |
787 | dest_copy = (SCHED_TIME (e->dest->cuid) |
788 | - SCHED_TIME (e->src->cuid) + ii) / ii; | |
d331e204 | 789 | |
88e9c867 RS |
790 | if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) |
791 | && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) | |
d397e8c6 MH |
792 | dest_copy--; |
793 | ||
794 | if (dest_copy) | |
1287d8ea RS |
795 | { |
796 | ps_reg_move_info *move; | |
d397e8c6 | 797 | |
1287d8ea | 798 | move = ps_reg_move (ps, first_move + dest_copy - 1); |
d7c028c0 | 799 | bitmap_set_bit (move->uses, e->dest->cuid); |
752cdc4e | 800 | if (e->distance == 1) |
d7c028c0 | 801 | bitmap_set_bit (distance1_uses, e->dest->cuid); |
1287d8ea RS |
802 | } |
803 | } | |
752cdc4e | 804 | |
7ba9e72d | 805 | auto_sbitmap must_follow (first_move + nreg_moves); |
752cdc4e RS |
806 | for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) |
807 | if (!schedule_reg_move (ps, first_move + i_reg_move, | |
808 | distance1_uses, must_follow)) | |
809 | break; | |
752cdc4e RS |
810 | if (distance1_uses) |
811 | sbitmap_free (distance1_uses); | |
812 | if (i_reg_move < nreg_moves) | |
813 | return false; | |
1287d8ea RS |
814 | } |
815 | return true; | |
816 | } | |
d397e8c6 | 817 | |
5764ee3c | 818 | /* Emit the moves associated with PS. Apply the substitutions |
1287d8ea RS |
819 | associated with them. */ |
820 | static void | |
821 | apply_reg_moves (partial_schedule_ptr ps) | |
822 | { | |
823 | ps_reg_move_info *move; | |
824 | int i; | |
d397e8c6 | 825 | |
9771b263 | 826 | FOR_EACH_VEC_ELT (ps->reg_moves, i, move) |
1287d8ea RS |
827 | { |
828 | unsigned int i_use; | |
829 | sbitmap_iterator sbi; | |
d397e8c6 | 830 | |
d4ac4ce2 | 831 | EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi) |
1287d8ea RS |
832 | { |
833 | replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg); | |
834 | df_insn_rescan (ps->g->nodes[i_use].insn); | |
d397e8c6 MH |
835 | } |
836 | } | |
edc429ff RE |
837 | } |
838 | ||
fc6970e4 RE |
839 | /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of |
840 | SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT | |
841 | will move to cycle zero. */ | |
d397e8c6 | 842 | static void |
fc6970e4 | 843 | reset_sched_times (partial_schedule_ptr ps, int amount) |
d397e8c6 | 844 | { |
c8943832 | 845 | int row; |
d397e8c6 | 846 | int ii = ps->ii; |
c8943832 | 847 | ps_insn_ptr crr_insn; |
d397e8c6 | 848 | |
c8943832 VY |
849 | for (row = 0; row < ii; row++) |
850 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |
851 | { | |
88e9c867 | 852 | int u = crr_insn->id; |
c8943832 | 853 | int normalized_time = SCHED_TIME (u) - amount; |
fc6970e4 | 854 | int new_min_cycle = PS_MIN_CYCLE (ps) - amount; |
c8943832 | 855 | |
fc6970e4 RE |
856 | if (dump_file) |
857 | { | |
858 | /* Print the scheduling times after the rotation. */ | |
6210ec61 | 859 | rtx_insn *insn = ps_rtl_insn (ps, u); |
88e9c867 | 860 | |
fc6970e4 | 861 | fprintf (dump_file, "crr_insn->node=%d (insn id %d), " |
88e9c867 RS |
862 | "crr_insn->cycle=%d, min_cycle=%d", u, |
863 | INSN_UID (insn), normalized_time, new_min_cycle); | |
864 | if (JUMP_P (insn)) | |
fc6970e4 RE |
865 | fprintf (dump_file, " (branch)"); |
866 | fprintf (dump_file, "\n"); | |
867 | } | |
868 | ||
c8943832 VY |
869 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); |
870 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | |
edc429ff RE |
871 | |
872 | crr_insn->cycle = normalized_time; | |
873 | update_node_sched_params (u, ii, normalized_time, new_min_cycle); | |
c8943832 | 874 | } |
d397e8c6 | 875 | } |
fc6970e4 | 876 | |
d397e8c6 MH |
877 | /* Permute the insns according to their order in PS, from row 0 to |
878 | row ii-1, and position them right before LAST. This schedules | |
879 | the insns of the loop kernel. */ | |
880 | static void | |
dc01c3d1 | 881 | permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last) |
d397e8c6 MH |
882 | { |
883 | int ii = ps->ii; | |
884 | int row; | |
885 | ps_insn_ptr ps_ij; | |
886 | ||
887 | for (row = 0; row < ii ; row++) | |
888 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |
88e9c867 | 889 | { |
6210ec61 | 890 | rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id); |
88e9c867 RS |
891 | |
892 | if (PREV_INSN (last) != insn) | |
752cdc4e RS |
893 | { |
894 | if (ps_ij->id < ps->g->num_nodes) | |
895 | reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn, | |
896 | PREV_INSN (last)); | |
897 | else | |
898 | add_insn_before (insn, last, NULL); | |
899 | } | |
88e9c867 | 900 | } |
d397e8c6 MH |
901 | } |
902 | ||
edc429ff RE |
903 | /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE |
904 | respectively only if cycle C falls on the border of the scheduling | |
905 | window boundaries marked by START and END cycles. STEP is the | |
906 | direction of the window. */ | |
907 | static inline void | |
908 | set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow, | |
909 | sbitmap *tmp_precede, sbitmap must_precede, int c, | |
910 | int start, int end, int step) | |
911 | { | |
912 | *tmp_precede = NULL; | |
913 | *tmp_follow = NULL; | |
914 | ||
915 | if (c == start) | |
916 | { | |
917 | if (step == 1) | |
918 | *tmp_precede = must_precede; | |
919 | else /* step == -1. */ | |
920 | *tmp_follow = must_follow; | |
921 | } | |
922 | if (c == end - step) | |
923 | { | |
924 | if (step == 1) | |
925 | *tmp_follow = must_follow; | |
926 | else /* step == -1. */ | |
927 | *tmp_precede = must_precede; | |
928 | } | |
929 | ||
930 | } | |
931 | ||
932 | /* Return True if the branch can be moved to row ii-1 while | |
933 | normalizing the partial schedule PS to start from cycle zero and thus | |
934 | optimize the SC. Otherwise return False. */ | |
935 | static bool | |
936 | optimize_sc (partial_schedule_ptr ps, ddg_ptr g) | |
937 | { | |
938 | int amount = PS_MIN_CYCLE (ps); | |
edc429ff RE |
939 | int start, end, step; |
940 | int ii = ps->ii; | |
941 | bool ok = false; | |
942 | int stage_count, stage_count_curr; | |
943 | ||
944 | /* Compare the SC after normalization and SC after bringing the branch | |
945 | to row ii-1. If they are equal just bail out. */ | |
946 | stage_count = calculate_stage_count (ps, amount); | |
947 | stage_count_curr = | |
88e9c867 | 948 | calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1)); |
edc429ff RE |
949 | |
950 | if (stage_count == stage_count_curr) | |
951 | { | |
952 | if (dump_file) | |
953 | fprintf (dump_file, "SMS SC already optimized.\n"); | |
954 | ||
7ba9e72d | 955 | return false; |
edc429ff RE |
956 | } |
957 | ||
958 | if (dump_file) | |
959 | { | |
960 | fprintf (dump_file, "SMS Trying to optimize branch location\n"); | |
961 | fprintf (dump_file, "SMS partial schedule before trial:\n"); | |
962 | print_partial_schedule (ps, dump_file); | |
963 | } | |
964 | ||
965 | /* First, normalize the partial scheduling. */ | |
966 | reset_sched_times (ps, amount); | |
967 | rotate_partial_schedule (ps, amount); | |
968 | if (dump_file) | |
969 | { | |
970 | fprintf (dump_file, | |
971 | "SMS partial schedule after normalization (ii, %d, SC %d):\n", | |
972 | ii, stage_count); | |
973 | print_partial_schedule (ps, dump_file); | |
974 | } | |
975 | ||
88e9c867 | 976 | if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1) |
7ba9e72d | 977 | return true; |
edc429ff | 978 | |
7ba9e72d | 979 | auto_sbitmap sched_nodes (g->num_nodes); |
f61e445a | 980 | bitmap_ones (sched_nodes); |
edc429ff RE |
981 | |
982 | /* Calculate the new placement of the branch. It should be in row | |
983 | ii-1 and fall into it's scheduling window. */ | |
984 | if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start, | |
985 | &step, &end) == 0) | |
986 | { | |
987 | bool success; | |
988 | ps_insn_ptr next_ps_i; | |
88e9c867 | 989 | int branch_cycle = SCHED_TIME (g->closing_branch->cuid); |
edc429ff RE |
990 | int row = SMODULO (branch_cycle, ps->ii); |
991 | int num_splits = 0; | |
7ba9e72d | 992 | sbitmap tmp_precede, tmp_follow; |
38a51663 | 993 | int min_cycle, c; |
edc429ff RE |
994 | |
995 | if (dump_file) | |
996 | fprintf (dump_file, "\nTrying to schedule node %d " | |
997 | "INSN = %d in (%d .. %d) step %d\n", | |
998 | g->closing_branch->cuid, | |
999 | (INSN_UID (g->closing_branch->insn)), start, end, step); | |
1000 | ||
1001 | gcc_assert ((step > 0 && start < end) || (step < 0 && start > end)); | |
1002 | if (step == 1) | |
1003 | { | |
1004 | c = start + ii - SMODULO (start, ii) - 1; | |
1005 | gcc_assert (c >= start); | |
1006 | if (c >= end) | |
1007 | { | |
edc429ff RE |
1008 | if (dump_file) |
1009 | fprintf (dump_file, | |
1010 | "SMS failed to schedule branch at cycle: %d\n", c); | |
7ba9e72d | 1011 | return false; |
edc429ff RE |
1012 | } |
1013 | } | |
1014 | else | |
1015 | { | |
1016 | c = start - SMODULO (start, ii) - 1; | |
1017 | gcc_assert (c <= start); | |
1018 | ||
1019 | if (c <= end) | |
1020 | { | |
1021 | if (dump_file) | |
1022 | fprintf (dump_file, | |
1023 | "SMS failed to schedule branch at cycle: %d\n", c); | |
7ba9e72d | 1024 | return false; |
edc429ff RE |
1025 | } |
1026 | } | |
1027 | ||
7ba9e72d TS |
1028 | auto_sbitmap must_precede (g->num_nodes); |
1029 | auto_sbitmap must_follow (g->num_nodes); | |
edc429ff RE |
1030 | |
1031 | /* Try to schedule the branch is it's new cycle. */ | |
1032 | calculate_must_precede_follow (g->closing_branch, start, end, | |
1033 | step, ii, sched_nodes, | |
1034 | must_precede, must_follow); | |
1035 | ||
1036 | set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, | |
1037 | must_precede, c, start, end, step); | |
1038 | ||
1039 | /* Find the element in the partial schedule related to the closing | |
1040 | branch so we can remove it from it's current cycle. */ | |
1041 | for (next_ps_i = ps->rows[row]; | |
1042 | next_ps_i; next_ps_i = next_ps_i->next_in_row) | |
88e9c867 | 1043 | if (next_ps_i->id == g->closing_branch->cuid) |
edc429ff RE |
1044 | break; |
1045 | ||
38a51663 | 1046 | min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); |
a9fb4f13 | 1047 | remove_node_from_ps (ps, next_ps_i); |
edc429ff | 1048 | success = |
88e9c867 | 1049 | try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c, |
edc429ff RE |
1050 | sched_nodes, &num_splits, |
1051 | tmp_precede, tmp_follow); | |
1052 | gcc_assert (num_splits == 0); | |
1053 | if (!success) | |
1054 | { | |
1055 | if (dump_file) | |
1056 | fprintf (dump_file, | |
1057 | "SMS failed to schedule branch at cycle: %d, " | |
1058 | "bringing it back to cycle %d\n", c, branch_cycle); | |
1059 | ||
1060 | /* The branch was failed to be placed in row ii - 1. | |
1061 | Put it back in it's original place in the partial | |
1062 | schedualing. */ | |
1063 | set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, | |
1064 | must_precede, branch_cycle, start, end, | |
1065 | step); | |
1066 | success = | |
88e9c867 | 1067 | try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, |
edc429ff RE |
1068 | branch_cycle, sched_nodes, |
1069 | &num_splits, tmp_precede, | |
1070 | tmp_follow); | |
1071 | gcc_assert (success && (num_splits == 0)); | |
1072 | ok = false; | |
1073 | } | |
1074 | else | |
1075 | { | |
1076 | /* The branch is placed in row ii - 1. */ | |
1077 | if (dump_file) | |
1078 | fprintf (dump_file, | |
1079 | "SMS success in moving branch to cycle %d\n", c); | |
1080 | ||
88e9c867 | 1081 | update_node_sched_params (g->closing_branch->cuid, ii, c, |
edc429ff RE |
1082 | PS_MIN_CYCLE (ps)); |
1083 | ok = true; | |
1084 | } | |
1085 | ||
38a51663 JL |
1086 | /* This might have been added to a new first stage. */ |
1087 | if (PS_MIN_CYCLE (ps) < min_cycle) | |
1088 | reset_sched_times (ps, 0); | |
edc429ff RE |
1089 | } |
1090 | ||
edc429ff RE |
1091 | return ok; |
1092 | } | |
1093 | ||
d397e8c6 MH |
1094 | static void |
1095 | duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, | |
d6a05b49 | 1096 | int to_stage, rtx count_reg, class loop *loop) |
d397e8c6 MH |
1097 | { |
1098 | int row; | |
1099 | ps_insn_ptr ps_ij; | |
d6a05b49 | 1100 | copy_bb_data id; |
d397e8c6 MH |
1101 | |
1102 | for (row = 0; row < ps->ii; row++) | |
1103 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |
1104 | { | |
88e9c867 | 1105 | int u = ps_ij->id; |
752cdc4e | 1106 | int first_u, last_u; |
6210ec61 | 1107 | rtx_insn *u_insn; |
d397e8c6 | 1108 | |
46dc0789 MN |
1109 | /* Do not duplicate any insn which refers to count_reg as it |
1110 | belongs to the control part. | |
fc6970e4 RE |
1111 | The closing branch is scheduled as well and thus should |
1112 | be ignored. | |
46dc0789 MN |
1113 | TODO: This should be done by analyzing the control part of |
1114 | the loop. */ | |
88e9c867 RS |
1115 | u_insn = ps_rtl_insn (ps, u); |
1116 | if (reg_mentioned_p (count_reg, u_insn) | |
1117 | || JUMP_P (u_insn)) | |
46dc0789 MN |
1118 | continue; |
1119 | ||
752cdc4e RS |
1120 | first_u = SCHED_STAGE (u); |
1121 | last_u = first_u + ps_num_consecutive_stages (ps, u) - 1; | |
1122 | if (from_stage <= last_u && to_stage >= first_u) | |
d397e8c6 | 1123 | { |
752cdc4e | 1124 | if (u < ps->g->num_nodes) |
d6a05b49 RB |
1125 | duplicate_insn_chain (ps_first_note (ps, u), u_insn, |
1126 | loop, &id); | |
752cdc4e RS |
1127 | else |
1128 | emit_insn (copy_rtx (PATTERN (u_insn))); | |
d397e8c6 | 1129 | } |
d397e8c6 MH |
1130 | } |
1131 | } | |
1132 | ||
1133 | ||
1134 | /* Generate the instructions (including reg_moves) for prolog & epilog. */ | |
1135 | static void | |
99b1c316 | 1136 | generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop, |
4eb8f93d | 1137 | rtx count_reg, bool adjust_init) |
d397e8c6 MH |
1138 | { |
1139 | int i; | |
1140 | int last_stage = PS_STAGE_COUNT (ps) - 1; | |
1141 | edge e; | |
b8698a0f | 1142 | |
d397e8c6 MH |
1143 | /* Generate the prolog, inserting its insns on the loop-entry edge. */ |
1144 | start_sequence (); | |
1145 | ||
4eb8f93d | 1146 | if (adjust_init) |
46dc0789 MN |
1147 | { |
1148 | /* Generate instructions at the beginning of the prolog to | |
4eb8f93d RZ |
1149 | adjust the loop count by STAGE_COUNT. If loop count is constant |
1150 | and it not used anywhere in prologue, this constant is adjusted by | |
1151 | STAGE_COUNT outside of generate_prolog_epilog function. */ | |
46dc0789 MN |
1152 | rtx sub_reg = NULL_RTX; |
1153 | ||
2f1cd2eb RS |
1154 | sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg, |
1155 | gen_int_mode (last_stage, | |
1156 | GET_MODE (count_reg)), | |
46dc0789 MN |
1157 | count_reg, 1, OPTAB_DIRECT); |
1158 | gcc_assert (REG_P (sub_reg)); | |
1159 | if (REGNO (sub_reg) != REGNO (count_reg)) | |
1160 | emit_move_insn (count_reg, sub_reg); | |
1161 | } | |
d397e8c6 MH |
1162 | |
1163 | for (i = 0; i < last_stage; i++) | |
d6a05b49 | 1164 | duplicate_insns_of_cycles (ps, 0, i, count_reg, loop); |
b8698a0f | 1165 | |
598ec7bd | 1166 | /* Put the prolog on the entry edge. */ |
f73d5666 | 1167 | e = loop_preheader_edge (loop); |
62e5bf5d | 1168 | split_edge_and_insert (e, get_insns ()); |
cc1efdff RE |
1169 | if (!flag_resched_modulo_sched) |
1170 | e->dest->flags |= BB_DISABLE_SCHEDULE; | |
f73d5666 | 1171 | |
d397e8c6 MH |
1172 | end_sequence (); |
1173 | ||
1174 | /* Generate the epilog, inserting its insns on the loop-exit edge. */ | |
1175 | start_sequence (); | |
1176 | ||
1177 | for (i = 0; i < last_stage; i++) | |
d6a05b49 | 1178 | duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop); |
b8698a0f | 1179 | |
598ec7bd | 1180 | /* Put the epilogue on the exit edge. */ |
ac8f6c69 ZD |
1181 | gcc_assert (single_exit (loop)); |
1182 | e = single_exit (loop); | |
62e5bf5d | 1183 | split_edge_and_insert (e, get_insns ()); |
cc1efdff RE |
1184 | if (!flag_resched_modulo_sched) |
1185 | e->dest->flags |= BB_DISABLE_SCHEDULE; | |
1186 | ||
f73d5666 MH |
1187 | end_sequence (); |
1188 | } | |
1189 | ||
cc1efdff RE |
1190 | /* Mark LOOP as software pipelined so the later |
1191 | scheduling passes don't touch it. */ | |
1192 | static void | |
99b1c316 | 1193 | mark_loop_unsched (class loop *loop) |
cc1efdff RE |
1194 | { |
1195 | unsigned i; | |
1196 | basic_block *bbs = get_loop_body (loop); | |
1197 | ||
1198 | for (i = 0; i < loop->num_nodes; i++) | |
1199 | bbs[i]->flags |= BB_DISABLE_SCHEDULE; | |
20936cee RE |
1200 | |
1201 | free (bbs); | |
cc1efdff RE |
1202 | } |
1203 | ||
f73d5666 MH |
1204 | /* Return true if all the BBs of the loop are empty except the |
1205 | loop header. */ | |
1206 | static bool | |
99b1c316 | 1207 | loop_single_full_bb_p (class loop *loop) |
f73d5666 MH |
1208 | { |
1209 | unsigned i; | |
1210 | basic_block *bbs = get_loop_body (loop); | |
1211 | ||
1212 | for (i = 0; i < loop->num_nodes ; i++) | |
d397e8c6 | 1213 | { |
52d251b5 | 1214 | rtx_insn *head, *tail; |
f73d5666 MH |
1215 | bool empty_bb = true; |
1216 | ||
1217 | if (bbs[i] == loop->header) | |
1218 | continue; | |
1219 | ||
1220 | /* Make sure that basic blocks other than the header | |
1221 | have only notes labels or jumps. */ | |
496d7bb0 | 1222 | get_ebb_head_tail (bbs[i], bbs[i], &head, &tail); |
f73d5666 MH |
1223 | for (; head != NEXT_INSN (tail); head = NEXT_INSN (head)) |
1224 | { | |
1225 | if (NOTE_P (head) || LABEL_P (head) | |
b5b8b0ac | 1226 | || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head)))) |
f73d5666 MH |
1227 | continue; |
1228 | empty_bb = false; | |
1229 | break; | |
1230 | } | |
1231 | ||
1232 | if (! empty_bb) | |
1233 | { | |
1234 | free (bbs); | |
1235 | return false; | |
1236 | } | |
1237 | } | |
1238 | free (bbs); | |
1239 | return true; | |
1240 | } | |
d397e8c6 | 1241 | |
22439481 JJ |
1242 | /* Dump file:line from INSN's location info to dump_file. */ |
1243 | ||
1244 | static void | |
6210ec61 | 1245 | dump_insn_location (rtx_insn *insn) |
22439481 | 1246 | { |
ffa4602f | 1247 | if (dump_file && INSN_HAS_LOCATION (insn)) |
22439481 | 1248 | { |
ffa4602f EB |
1249 | expanded_location xloc = insn_location (insn); |
1250 | fprintf (dump_file, " %s:%i", xloc.file, xloc.line); | |
22439481 JJ |
1251 | } |
1252 | } | |
1253 | ||
f73d5666 MH |
1254 | /* A simple loop from SMS point of view; it is a loop that is composed of |
1255 | either a single basic block or two BBs - a header and a latch. */ | |
1256 | #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \ | |
1257 | && (EDGE_COUNT (loop->latch->preds) == 1) \ | |
1258 | && (EDGE_COUNT (loop->latch->succs) == 1)) | |
d397e8c6 | 1259 | |
f73d5666 MH |
1260 | /* Return true if the loop is in its canonical form and false if not. |
1261 | i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */ | |
1262 | static bool | |
99b1c316 | 1263 | loop_canon_p (class loop *loop) |
f73d5666 | 1264 | { |
d397e8c6 | 1265 | |
9ba025a2 | 1266 | if (loop->inner || !loop_outer (loop)) |
1a46d33d RE |
1267 | { |
1268 | if (dump_file) | |
1269 | fprintf (dump_file, "SMS loop inner or !loop_outer\n"); | |
f73d5666 | 1270 | return false; |
1a46d33d | 1271 | } |
d397e8c6 | 1272 | |
ac8f6c69 | 1273 | if (!single_exit (loop)) |
f73d5666 MH |
1274 | { |
1275 | if (dump_file) | |
1276 | { | |
6210ec61 | 1277 | rtx_insn *insn = BB_END (loop->header); |
b8698a0f | 1278 | |
22439481 | 1279 | fprintf (dump_file, "SMS loop many exits"); |
5368224f | 1280 | dump_insn_location (insn); |
22439481 | 1281 | fprintf (dump_file, "\n"); |
f73d5666 MH |
1282 | } |
1283 | return false; | |
1284 | } | |
d397e8c6 | 1285 | |
f73d5666 MH |
1286 | if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop)) |
1287 | { | |
1288 | if (dump_file) | |
1289 | { | |
6210ec61 | 1290 | rtx_insn *insn = BB_END (loop->header); |
b8698a0f | 1291 | |
22439481 | 1292 | fprintf (dump_file, "SMS loop many BBs."); |
5368224f | 1293 | dump_insn_location (insn); |
22439481 | 1294 | fprintf (dump_file, "\n"); |
f73d5666 MH |
1295 | } |
1296 | return false; | |
d397e8c6 MH |
1297 | } |
1298 | ||
f73d5666 MH |
1299 | return true; |
1300 | } | |
d397e8c6 | 1301 | |
f73d5666 MH |
1302 | /* If there are more than one entry for the loop, |
1303 | make it one by splitting the first entry edge and | |
1304 | redirecting the others to the new BB. */ | |
1305 | static void | |
99b1c316 | 1306 | canon_loop (class loop *loop) |
f73d5666 MH |
1307 | { |
1308 | edge e; | |
1309 | edge_iterator i; | |
d397e8c6 | 1310 | |
f73d5666 MH |
1311 | /* Avoid annoying special cases of edges going to exit |
1312 | block. */ | |
fefa31b5 | 1313 | FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
f73d5666 | 1314 | if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1)) |
598ec7bd | 1315 | split_edge (e); |
d397e8c6 | 1316 | |
f73d5666 MH |
1317 | if (loop->latch == loop->header |
1318 | || EDGE_COUNT (loop->latch->succs) > 1) | |
d397e8c6 | 1319 | { |
f73d5666 MH |
1320 | FOR_EACH_EDGE (e, i, loop->header->preds) |
1321 | if (e->src == loop->latch) | |
1322 | break; | |
598ec7bd | 1323 | split_edge (e); |
d397e8c6 MH |
1324 | } |
1325 | } | |
1326 | ||
e855c69d AB |
1327 | /* Setup infos. */ |
1328 | static void | |
1329 | setup_sched_infos (void) | |
1330 | { | |
1331 | memcpy (&sms_common_sched_info, &haifa_common_sched_info, | |
1332 | sizeof (sms_common_sched_info)); | |
1333 | sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS; | |
1334 | common_sched_info = &sms_common_sched_info; | |
1335 | ||
1336 | sched_deps_info = &sms_sched_deps_info; | |
1337 | current_sched_info = &sms_sched_info; | |
1338 | } | |
1339 | ||
03cb2019 ZD |
1340 | /* Probability in % that the sms-ed loop rolls enough so that optimized |
1341 | version may be entered. Just a guess. */ | |
1342 | #define PROB_SMS_ENOUGH_ITERATIONS 80 | |
1343 | ||
d397e8c6 MH |
1344 | /* Main entry point, perform SMS scheduling on the loops of the function |
1345 | that consist of single basic blocks. */ | |
efa2fa34 | 1346 | static void |
10d22567 | 1347 | sms_schedule (void) |
d397e8c6 | 1348 | { |
6210ec61 | 1349 | rtx_insn *insn; |
d397e8c6 | 1350 | ddg_ptr *g_arr, g; |
d397e8c6 | 1351 | int * node_order; |
5cd53742 | 1352 | int maxii, max_asap; |
d397e8c6 | 1353 | partial_schedule_ptr ps; |
f73d5666 | 1354 | basic_block bb = NULL; |
99b1c316 | 1355 | class loop *loop; |
f73d5666 MH |
1356 | basic_block condition_bb = NULL; |
1357 | edge latch_edge; | |
a9007865 | 1358 | HOST_WIDE_INT trip_count, max_trip_count; |
f73d5666 | 1359 | |
598ec7bd | 1360 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS |
6270df4c | 1361 | | LOOPS_HAVE_RECORDED_EXITS); |
0fc822d0 | 1362 | if (number_of_loops (cfun) <= 1) |
d51157de ZD |
1363 | { |
1364 | loop_optimizer_finalize (); | |
1365 | return; /* There are no loops to schedule. */ | |
1366 | } | |
f73d5666 | 1367 | |
d397e8c6 MH |
1368 | /* Initialize issue_rate. */ |
1369 | if (targetm.sched.issue_rate) | |
1370 | { | |
1371 | int temp = reload_completed; | |
1372 | ||
1373 | reload_completed = 1; | |
1c91de89 | 1374 | issue_rate = targetm.sched.issue_rate (); |
d397e8c6 MH |
1375 | reload_completed = temp; |
1376 | } | |
1377 | else | |
1378 | issue_rate = 1; | |
1379 | ||
61ada8ae | 1380 | /* Initialize the scheduler. */ |
e855c69d AB |
1381 | setup_sched_infos (); |
1382 | haifa_sched_init (); | |
1a1a5f4b | 1383 | |
f73d5666 MH |
1384 | /* Allocate memory to hold the DDG array one entry for each loop. |
1385 | We use loop->num as index into this array. */ | |
0fc822d0 | 1386 | g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun)); |
d397e8c6 | 1387 | |
1a46d33d RE |
1388 | if (dump_file) |
1389 | { | |
1390 | fprintf (dump_file, "\n\nSMS analysis phase\n"); | |
1391 | fprintf (dump_file, "===================\n\n"); | |
1392 | } | |
1393 | ||
d397e8c6 | 1394 | /* Build DDGs for all the relevant loops and hold them in G_ARR |
f73d5666 | 1395 | indexed by the loop index. */ |
f0bd40b1 | 1396 | FOR_EACH_LOOP (loop, 0) |
d397e8c6 | 1397 | { |
52d251b5 | 1398 | rtx_insn *head, *tail; |
75c70254 | 1399 | rtx count_reg; |
d397e8c6 | 1400 | |
f73d5666 | 1401 | /* For debugging. */ |
97511ad7 | 1402 | if (dbg_cnt (sms_sched_loop) == false) |
f73d5666 MH |
1403 | { |
1404 | if (dump_file) | |
97511ad7 | 1405 | fprintf (dump_file, "SMS reached max limit... \n"); |
d397e8c6 | 1406 | |
f0bd40b1 | 1407 | break; |
f73d5666 | 1408 | } |
d397e8c6 | 1409 | |
1a46d33d | 1410 | if (dump_file) |
22439481 | 1411 | { |
6210ec61 | 1412 | rtx_insn *insn = BB_END (loop->header); |
1a46d33d | 1413 | |
22439481 | 1414 | fprintf (dump_file, "SMS loop num: %d", loop->num); |
5368224f | 1415 | dump_insn_location (insn); |
22439481 JJ |
1416 | fprintf (dump_file, "\n"); |
1417 | } | |
1a46d33d | 1418 | |
10d22567 | 1419 | if (! loop_canon_p (loop)) |
f73d5666 | 1420 | continue; |
d397e8c6 | 1421 | |
f73d5666 | 1422 | if (! loop_single_full_bb_p (loop)) |
1a46d33d RE |
1423 | { |
1424 | if (dump_file) | |
1425 | fprintf (dump_file, "SMS not loop_single_full_bb_p\n"); | |
d397e8c6 | 1426 | continue; |
1a46d33d | 1427 | } |
d397e8c6 | 1428 | |
f73d5666 | 1429 | bb = loop->header; |
d397e8c6 | 1430 | |
496d7bb0 | 1431 | get_ebb_head_tail (bb, bb, &head, &tail); |
f73d5666 | 1432 | latch_edge = loop_latch_edge (loop); |
ac8f6c69 | 1433 | gcc_assert (single_exit (loop)); |
a9007865 JH |
1434 | trip_count = get_estimated_loop_iterations_int (loop); |
1435 | max_trip_count = get_max_loop_iterations_int (loop); | |
d397e8c6 | 1436 | |
fa10beec | 1437 | /* Perform SMS only on loops that their average count is above threshold. */ |
f73d5666 | 1438 | |
ef30ab83 JH |
1439 | if ( latch_edge->count () > profile_count::zero () |
1440 | && (latch_edge->count() | |
1441 | < single_exit (loop)->count ().apply_scale | |
028d4092 | 1442 | (param_sms_loop_average_count_threshold, 1))) |
f73d5666 | 1443 | { |
10d22567 | 1444 | if (dump_file) |
d397e8c6 | 1445 | { |
5368224f | 1446 | dump_insn_location (tail); |
22439481 | 1447 | fprintf (dump_file, "\nSMS single-bb-loop\n"); |
d397e8c6 MH |
1448 | if (profile_info && flag_branch_probabilities) |
1449 | { | |
10d22567 | 1450 | fprintf (dump_file, "SMS loop-count "); |
16998094 | 1451 | fprintf (dump_file, "%" PRId64, |
3995f3a2 | 1452 | (int64_t) bb->count.to_gcov_type ()); |
10d22567 ZD |
1453 | fprintf (dump_file, "\n"); |
1454 | fprintf (dump_file, "SMS trip-count "); | |
a9007865 JH |
1455 | fprintf (dump_file, "%" PRId64 "max %" PRId64, |
1456 | (int64_t) trip_count, (int64_t) max_trip_count); | |
10d22567 | 1457 | fprintf (dump_file, "\n"); |
d397e8c6 MH |
1458 | } |
1459 | } | |
1460 | continue; | |
1461 | } | |
1462 | ||
1463 | /* Make sure this is a doloop. */ | |
46dc0789 | 1464 | if ( !(count_reg = doloop_register_get (head, tail))) |
1a46d33d RE |
1465 | { |
1466 | if (dump_file) | |
1467 | fprintf (dump_file, "SMS doloop_register_get failed\n"); | |
d397e8c6 | 1468 | continue; |
1a46d33d | 1469 | } |
d397e8c6 | 1470 | |
29f3fd5b | 1471 | /* Don't handle BBs with calls or barriers |
ce7b3761 RE |
1472 | or !single_set with the exception of instructions that include |
1473 | count_reg---these instructions are part of the control part | |
1474 | that do-loop recognizes. | |
46dc0789 MN |
1475 | ??? Should handle insns defining subregs. */ |
1476 | for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | |
1477 | { | |
1478 | rtx set; | |
1479 | ||
1480 | if (CALL_P (insn) | |
29f3fd5b | 1481 | || BARRIER_P (insn) |
b5b8b0ac | 1482 | || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) |
ce7b3761 RE |
1483 | && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE |
1484 | && !reg_mentioned_p (count_reg, insn)) | |
46dc0789 MN |
1485 | || (INSN_P (insn) && (set = single_set (insn)) |
1486 | && GET_CODE (SET_DEST (set)) == SUBREG)) | |
1487 | break; | |
1488 | } | |
d397e8c6 MH |
1489 | |
1490 | if (insn != NEXT_INSN (tail)) | |
1491 | { | |
10d22567 | 1492 | if (dump_file) |
d397e8c6 | 1493 | { |
4b4bf941 | 1494 | if (CALL_P (insn)) |
10d22567 | 1495 | fprintf (dump_file, "SMS loop-with-call\n"); |
29f3fd5b SB |
1496 | else if (BARRIER_P (insn)) |
1497 | fprintf (dump_file, "SMS loop-with-barrier\n"); | |
b5b8b0ac | 1498 | else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) |
46dc0789 MN |
1499 | && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) |
1500 | fprintf (dump_file, "SMS loop-with-not-single-set\n"); | |
1501 | else | |
1502 | fprintf (dump_file, "SMS loop with subreg in lhs\n"); | |
10d22567 | 1503 | print_rtl_single (dump_file, insn); |
d397e8c6 MH |
1504 | } |
1505 | ||
1506 | continue; | |
1507 | } | |
1508 | ||
fc6970e4 RE |
1509 | /* Always schedule the closing branch with the rest of the |
1510 | instructions. The branch is rotated to be in row ii-1 at the | |
1511 | end of the scheduling procedure to make sure it's the last | |
1512 | instruction in the iteration. */ | |
1513 | if (! (g = create_ddg (bb, 1))) | |
d397e8c6 | 1514 | { |
10d22567 | 1515 | if (dump_file) |
de82c453 | 1516 | fprintf (dump_file, "SMS create_ddg failed\n"); |
d397e8c6 MH |
1517 | continue; |
1518 | } | |
1519 | ||
42fd6772 | 1520 | g_arr[loop->num] = g; |
1a46d33d RE |
1521 | if (dump_file) |
1522 | fprintf (dump_file, "...OK\n"); | |
1523 | ||
d397e8c6 | 1524 | } |
1a46d33d RE |
1525 | if (dump_file) |
1526 | { | |
1527 | fprintf (dump_file, "\nSMS transformation phase\n"); | |
1528 | fprintf (dump_file, "=========================\n\n"); | |
1529 | } | |
d397e8c6 | 1530 | |
f73d5666 | 1531 | /* We don't want to perform SMS on new loops - created by versioning. */ |
f0bd40b1 | 1532 | FOR_EACH_LOOP (loop, 0) |
d397e8c6 | 1533 | { |
52d251b5 | 1534 | rtx_insn *head, *tail; |
e8a54173 DM |
1535 | rtx count_reg; |
1536 | rtx_insn *count_init; | |
752cdc4e | 1537 | int mii, rec_mii, stage_count, min_cycle; |
a9243bfc | 1538 | int64_t loop_count = 0; |
4eb8f93d RZ |
1539 | bool opt_sc_p, adjust_inplace = false; |
1540 | basic_block pre_header; | |
d397e8c6 | 1541 | |
42fd6772 | 1542 | if (! (g = g_arr[loop->num])) |
d397e8c6 MH |
1543 | continue; |
1544 | ||
1545 | if (dump_file) | |
22439481 | 1546 | { |
6210ec61 | 1547 | rtx_insn *insn = BB_END (loop->header); |
1a46d33d | 1548 | |
22439481 | 1549 | fprintf (dump_file, "SMS loop num: %d", loop->num); |
5368224f | 1550 | dump_insn_location (insn); |
22439481 | 1551 | fprintf (dump_file, "\n"); |
1a46d33d | 1552 | |
22439481 JJ |
1553 | print_ddg (dump_file, g); |
1554 | } | |
d397e8c6 | 1555 | |
496d7bb0 | 1556 | get_ebb_head_tail (loop->header, loop->header, &head, &tail); |
d397e8c6 | 1557 | |
f73d5666 | 1558 | latch_edge = loop_latch_edge (loop); |
ac8f6c69 | 1559 | gcc_assert (single_exit (loop)); |
a9007865 JH |
1560 | trip_count = get_estimated_loop_iterations_int (loop); |
1561 | max_trip_count = get_max_loop_iterations_int (loop); | |
d397e8c6 | 1562 | |
10d22567 | 1563 | if (dump_file) |
d397e8c6 | 1564 | { |
5368224f | 1565 | dump_insn_location (tail); |
22439481 | 1566 | fprintf (dump_file, "\nSMS single-bb-loop\n"); |
d397e8c6 MH |
1567 | if (profile_info && flag_branch_probabilities) |
1568 | { | |
10d22567 | 1569 | fprintf (dump_file, "SMS loop-count "); |
16998094 | 1570 | fprintf (dump_file, "%" PRId64, |
3995f3a2 | 1571 | (int64_t) bb->count.to_gcov_type ()); |
10d22567 | 1572 | fprintf (dump_file, "\n"); |
d397e8c6 | 1573 | } |
10d22567 ZD |
1574 | fprintf (dump_file, "SMS doloop\n"); |
1575 | fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes); | |
1576 | fprintf (dump_file, "SMS num-loads %d\n", g->num_loads); | |
1577 | fprintf (dump_file, "SMS num-stores %d\n", g->num_stores); | |
d397e8c6 MH |
1578 | } |
1579 | ||
d397e8c6 | 1580 | |
4eb8f93d | 1581 | count_reg = doloop_register_get (head, tail); |
f73d5666 | 1582 | gcc_assert (count_reg); |
d397e8c6 | 1583 | |
4eb8f93d RZ |
1584 | pre_header = loop_preheader_edge (loop)->src; |
1585 | count_init = const_iteration_count (count_reg, pre_header, &loop_count, | |
1586 | &adjust_inplace); | |
1587 | ||
10d22567 | 1588 | if (dump_file && count_init) |
d397e8c6 | 1589 | { |
10d22567 | 1590 | fprintf (dump_file, "SMS const-doloop "); |
16998094 | 1591 | fprintf (dump_file, "%" PRId64, |
f73d5666 | 1592 | loop_count); |
10d22567 | 1593 | fprintf (dump_file, "\n"); |
d397e8c6 MH |
1594 | } |
1595 | ||
5ed6ace5 | 1596 | node_order = XNEWVEC (int, g->num_nodes); |
d397e8c6 MH |
1597 | |
1598 | mii = 1; /* Need to pass some estimate of mii. */ | |
5cd53742 | 1599 | rec_mii = sms_order_nodes (g, mii, node_order, &max_asap); |
d397e8c6 | 1600 | mii = MAX (res_MII (g), rec_mii); |
9fe3064b | 1601 | mii = MAX (mii, 1); |
c420be8b | 1602 | maxii = MAX (max_asap, param_sms_max_ii_factor * mii); |
d397e8c6 | 1603 | |
10d22567 ZD |
1604 | if (dump_file) |
1605 | fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", | |
d397e8c6 MH |
1606 | rec_mii, mii, maxii); |
1607 | ||
1287d8ea | 1608 | for (;;) |
edc429ff | 1609 | { |
1287d8ea RS |
1610 | set_node_sched_params (g); |
1611 | ||
1612 | stage_count = 0; | |
1613 | opt_sc_p = false; | |
1614 | ps = sms_schedule_by_order (g, mii, maxii, node_order); | |
1615 | ||
1616 | if (ps) | |
edc429ff | 1617 | { |
1287d8ea RS |
1618 | /* Try to achieve optimized SC by normalizing the partial |
1619 | schedule (having the cycles start from cycle zero). | |
1620 | The branch location must be placed in row ii-1 in the | |
1621 | final scheduling. If failed, shift all instructions to | |
1622 | position the branch in row ii-1. */ | |
1623 | opt_sc_p = optimize_sc (ps, g); | |
1624 | if (opt_sc_p) | |
1625 | stage_count = calculate_stage_count (ps, 0); | |
1626 | else | |
1627 | { | |
1628 | /* Bring the branch to cycle ii-1. */ | |
1629 | int amount = (SCHED_TIME (g->closing_branch->cuid) | |
1630 | - (ps->ii - 1)); | |
1631 | ||
1632 | if (dump_file) | |
1633 | fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); | |
1634 | ||
1635 | stage_count = calculate_stage_count (ps, amount); | |
1636 | } | |
1637 | ||
1638 | gcc_assert (stage_count >= 1); | |
edc429ff | 1639 | } |
1287d8ea | 1640 | |
c420be8b | 1641 | /* The default value of param_sms_min_sc is 2 as stage count of |
1287d8ea RS |
1642 | 1 means that there is no interleaving between iterations thus |
1643 | we let the scheduling passes do the job in this case. */ | |
028d4092 | 1644 | if (stage_count < param_sms_min_sc |
1287d8ea | 1645 | || (count_init && (loop_count <= stage_count)) |
a9007865 JH |
1646 | || (max_trip_count >= 0 && max_trip_count <= stage_count) |
1647 | || (trip_count >= 0 && trip_count <= stage_count)) | |
f73d5666 | 1648 | { |
1287d8ea RS |
1649 | if (dump_file) |
1650 | { | |
1651 | fprintf (dump_file, "SMS failed... \n"); | |
1652 | fprintf (dump_file, "SMS sched-failed (stage-count=%d," | |
1653 | " loop-count=", stage_count); | |
16998094 | 1654 | fprintf (dump_file, "%" PRId64, loop_count); |
1287d8ea | 1655 | fprintf (dump_file, ", trip-count="); |
a9007865 JH |
1656 | fprintf (dump_file, "%" PRId64 "max %" PRId64, |
1657 | (int64_t) trip_count, (int64_t) max_trip_count); | |
1287d8ea RS |
1658 | fprintf (dump_file, ")\n"); |
1659 | } | |
1660 | break; | |
f73d5666 | 1661 | } |
1287d8ea | 1662 | |
edc429ff RE |
1663 | if (!opt_sc_p) |
1664 | { | |
1665 | /* Rotate the partial schedule to have the branch in row ii-1. */ | |
88e9c867 | 1666 | int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1); |
edc429ff RE |
1667 | |
1668 | reset_sched_times (ps, amount); | |
1669 | rotate_partial_schedule (ps, amount); | |
1670 | } | |
fc6970e4 | 1671 | |
fc6970e4 | 1672 | set_columns_for_ps (ps); |
d397e8c6 | 1673 | |
752cdc4e | 1674 | min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); |
1287d8ea RS |
1675 | if (!schedule_reg_moves (ps)) |
1676 | { | |
1677 | mii = ps->ii + 1; | |
1678 | free_partial_schedule (ps); | |
1679 | continue; | |
1680 | } | |
1681 | ||
752cdc4e RS |
1682 | /* Moves that handle incoming values might have been added |
1683 | to a new first stage. Bump the stage count if so. | |
1684 | ||
1685 | ??? Perhaps we could consider rotating the schedule here | |
1686 | instead? */ | |
1687 | if (PS_MIN_CYCLE (ps) < min_cycle) | |
1688 | { | |
1689 | reset_sched_times (ps, 0); | |
1690 | stage_count++; | |
1691 | } | |
1692 | ||
1693 | /* The stage count should now be correct without rotation. */ | |
1694 | gcc_checking_assert (stage_count == calculate_stage_count (ps, 0)); | |
1695 | PS_STAGE_COUNT (ps) = stage_count; | |
1696 | ||
fc6970e4 RE |
1697 | canon_loop (loop); |
1698 | ||
1699 | if (dump_file) | |
1700 | { | |
5368224f | 1701 | dump_insn_location (tail); |
22439481 JJ |
1702 | fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n", |
1703 | ps->ii, stage_count); | |
10d22567 | 1704 | print_partial_schedule (ps, dump_file); |
d397e8c6 | 1705 | } |
fc6970e4 | 1706 | |
4eb8f93d RZ |
1707 | if (count_init) |
1708 | { | |
1709 | if (adjust_inplace) | |
1710 | { | |
1711 | /* When possible, set new iteration count of loop kernel in | |
1712 | place. Otherwise, generate_prolog_epilog creates an insn | |
1713 | to adjust. */ | |
1714 | SET_SRC (single_set (count_init)) = GEN_INT (loop_count | |
1715 | - stage_count + 1); | |
1716 | } | |
1717 | } | |
1718 | else | |
76b4f0f7 | 1719 | { |
4eb8f93d | 1720 | /* case the BCT count is not known , Do loop-versioning */ |
4789c0ce RS |
1721 | rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg, |
1722 | gen_int_mode (stage_count, | |
1723 | GET_MODE (count_reg))); | |
357067f2 JH |
1724 | profile_probability prob = profile_probability::guessed_always () |
1725 | .apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100); | |
76b4f0f7 VY |
1726 | |
1727 | loop_version (loop, comp_rtx, &condition_bb, | |
357067f2 | 1728 | prob, prob.invert (), |
af2bbc51 | 1729 | prob, prob.invert (), true); |
4eb8f93d | 1730 | } |
76b4f0f7 VY |
1731 | |
1732 | /* Now apply the scheduled kernel to the RTL of the loop. */ | |
d397e8c6 | 1733 | permute_partial_schedule (ps, g->closing_branch->first_note); |
d72372e4 | 1734 | |
76b4f0f7 | 1735 | /* Mark this loop as software pipelined so the later |
cc1efdff | 1736 | scheduling passes don't touch it. */ |
76b4f0f7 | 1737 | if (! flag_resched_modulo_sched) |
cc1efdff RE |
1738 | mark_loop_unsched (loop); |
1739 | ||
76b4f0f7 VY |
1740 | /* The life-info is not valid any more. */ |
1741 | df_set_bb_dirty (g->bb); | |
d72372e4 | 1742 | |
1287d8ea | 1743 | apply_reg_moves (ps); |
76b4f0f7 | 1744 | if (dump_file) |
1287d8ea | 1745 | print_node_sched_params (dump_file, g->num_nodes, ps); |
76b4f0f7 | 1746 | /* Generate prolog and epilog. */ |
4eb8f93d | 1747 | generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace); |
1287d8ea | 1748 | break; |
d397e8c6 | 1749 | } |
f73d5666 | 1750 | |
d397e8c6 | 1751 | free_partial_schedule (ps); |
9771b263 | 1752 | node_sched_param_vec.release (); |
d397e8c6 MH |
1753 | free (node_order); |
1754 | free_ddg (g); | |
1755 | } | |
1756 | ||
f4daf7e4 UP |
1757 | free (g_arr); |
1758 | ||
d397e8c6 | 1759 | /* Release scheduler data, needed until now because of DFA. */ |
e855c69d | 1760 | haifa_sched_finish (); |
598ec7bd | 1761 | loop_optimizer_finalize (); |
d397e8c6 MH |
1762 | } |
1763 | ||
1764 | /* The SMS scheduling algorithm itself | |
1765 | ----------------------------------- | |
1766 | Input: 'O' an ordered list of insns of a loop. | |
1767 | Output: A scheduling of the loop - kernel, prolog, and epilogue. | |
1768 | ||
1769 | 'Q' is the empty Set | |
1770 | 'PS' is the partial schedule; it holds the currently scheduled nodes with | |
1771 | their cycle/slot. | |
1772 | 'PSP' previously scheduled predecessors. | |
1773 | 'PSS' previously scheduled successors. | |
1774 | 't(u)' the cycle where u is scheduled. | |
1775 | 'l(u)' is the latency of u. | |
1776 | 'd(v,u)' is the dependence distance from v to u. | |
1777 | 'ASAP(u)' the earliest time at which u could be scheduled as computed in | |
1778 | the node ordering phase. | |
1779 | 'check_hardware_resources_conflicts(u, PS, c)' | |
1780 | run a trace around cycle/slot through DFA model | |
1781 | to check resource conflicts involving instruction u | |
1782 | at cycle c given the partial schedule PS. | |
1783 | 'add_to_partial_schedule_at_time(u, PS, c)' | |
1784 | Add the node/instruction u to the partial schedule | |
1785 | PS at time c. | |
1786 | 'calculate_register_pressure(PS)' | |
1787 | Given a schedule of instructions, calculate the register | |
1788 | pressure it implies. One implementation could be the | |
1789 | maximum number of overlapping live ranges. | |
1790 | 'maxRP' The maximum allowed register pressure, it is usually derived from the number | |
1791 | registers available in the hardware. | |
1792 | ||
1793 | 1. II = MII. | |
1794 | 2. PS = empty list | |
1795 | 3. for each node u in O in pre-computed order | |
1796 | 4. if (PSP(u) != Q && PSS(u) == Q) then | |
1797 | 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |
1798 | 6. start = Early_start; end = Early_start + II - 1; step = 1 | |
1799 | 11. else if (PSP(u) == Q && PSS(u) != Q) then | |
1800 | 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |
1801 | 13. start = Late_start; end = Late_start - II + 1; step = -1 | |
1802 | 14. else if (PSP(u) != Q && PSS(u) != Q) then | |
1803 | 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |
1804 | 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |
1805 | 17. start = Early_start; | |
1806 | 18. end = min(Early_start + II - 1 , Late_start); | |
1807 | 19. step = 1 | |
1808 | 20. else "if (PSP(u) == Q && PSS(u) == Q)" | |
1809 | 21. start = ASAP(u); end = start + II - 1; step = 1 | |
1810 | 22. endif | |
1811 | ||
1812 | 23. success = false | |
1813 | 24. for (c = start ; c != end ; c += step) | |
1814 | 25. if check_hardware_resources_conflicts(u, PS, c) then | |
1815 | 26. add_to_partial_schedule_at_time(u, PS, c) | |
1816 | 27. success = true | |
1817 | 28. break | |
1818 | 29. endif | |
1819 | 30. endfor | |
1820 | 31. if (success == false) then | |
1821 | 32. II = II + 1 | |
1822 | 33. if (II > maxII) then | |
1823 | 34. finish - failed to schedule | |
1824 | 35. endif | |
1825 | 36. goto 2. | |
1826 | 37. endif | |
1827 | 38. endfor | |
1828 | 39. if (calculate_register_pressure(PS) > maxRP) then | |
1829 | 40. goto 32. | |
1830 | 41. endif | |
1831 | 42. compute epilogue & prologue | |
1832 | 43. finish - succeeded to schedule | |
3e6aef7c RS |
1833 | |
1834 | ??? The algorithm restricts the scheduling window to II cycles. | |
1835 | In rare cases, it may be better to allow windows of II+1 cycles. | |
1836 | The window would then start and end on the same row, but with | |
1837 | different "must precede" and "must follow" requirements. */ | |
d397e8c6 | 1838 | |
c8943832 VY |
1839 | /* A threshold for the number of repeated unsuccessful attempts to insert |
1840 | an empty row, before we flush the partial schedule and start over. */ | |
1841 | #define MAX_SPLIT_NUM 10 | |
f73d5666 | 1842 | /* Given the partial schedule PS, this function calculates and returns the |
315682fb | 1843 | cycles in which we can schedule the node with the given index I. |
f73d5666 MH |
1844 | NOTE: Here we do the backtracking in SMS, in some special cases. We have |
1845 | noticed that there are several cases in which we fail to SMS the loop | |
1846 | because the sched window of a node is empty due to tight data-deps. In | |
315682fb | 1847 | such cases we want to unschedule some of the predecessors/successors |
f73d5666 MH |
1848 | until we get non-empty scheduling window. It returns -1 if the |
1849 | scheduling window is empty and zero otherwise. */ | |
1850 | ||
1851 | static int | |
edc429ff | 1852 | get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, |
fe43febc RS |
1853 | sbitmap sched_nodes, int ii, int *start_p, int *step_p, |
1854 | int *end_p) | |
f73d5666 MH |
1855 | { |
1856 | int start, step, end; | |
fe43febc | 1857 | int early_start, late_start; |
f73d5666 | 1858 | ddg_edge_ptr e; |
7ba9e72d TS |
1859 | auto_sbitmap psp (ps->g->num_nodes); |
1860 | auto_sbitmap pss (ps->g->num_nodes); | |
f73d5666 MH |
1861 | sbitmap u_node_preds = NODE_PREDECESSORS (u_node); |
1862 | sbitmap u_node_succs = NODE_SUCCESSORS (u_node); | |
1863 | int psp_not_empty; | |
1864 | int pss_not_empty; | |
fe43febc RS |
1865 | int count_preds; |
1866 | int count_succs; | |
f73d5666 MH |
1867 | |
1868 | /* 1. compute sched window for u (start, end, step). */ | |
f61e445a LC |
1869 | bitmap_clear (psp); |
1870 | bitmap_clear (pss); | |
1871 | psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes); | |
1872 | pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes); | |
f73d5666 | 1873 | |
fe43febc RS |
1874 | /* We first compute a forward range (start <= end), then decide whether |
1875 | to reverse it. */ | |
1876 | early_start = INT_MIN; | |
1877 | late_start = INT_MAX; | |
1878 | start = INT_MIN; | |
1879 | end = INT_MAX; | |
1880 | step = 1; | |
f73d5666 | 1881 | |
fe43febc RS |
1882 | count_preds = 0; |
1883 | count_succs = 0; | |
05223046 | 1884 | |
3e762578 RS |
1885 | if (dump_file && (psp_not_empty || pss_not_empty)) |
1886 | { | |
1887 | fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)" | |
1888 | "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii); | |
1889 | fprintf (dump_file, "%11s %11s %11s %11s %5s\n", | |
1890 | "start", "early start", "late start", "end", "time"); | |
1891 | fprintf (dump_file, "=========== =========== =========== ===========" | |
1892 | " =====\n"); | |
1893 | } | |
fe43febc RS |
1894 | /* Calculate early_start and limit end. Both bounds are inclusive. */ |
1895 | if (psp_not_empty) | |
1896 | for (e = u_node->in; e != 0; e = e->next_in) | |
1897 | { | |
88e9c867 | 1898 | int v = e->src->cuid; |
c8943832 | 1899 | |
d7c028c0 | 1900 | if (bitmap_bit_p (sched_nodes, v)) |
fe43febc | 1901 | { |
88e9c867 | 1902 | int p_st = SCHED_TIME (v); |
3e762578 RS |
1903 | int earliest = p_st + e->latency - (e->distance * ii); |
1904 | int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX); | |
c8943832 | 1905 | |
3e762578 RS |
1906 | if (dump_file) |
1907 | { | |
1908 | fprintf (dump_file, "%11s %11d %11s %11d %5d", | |
1909 | "", earliest, "", latest, p_st); | |
1910 | print_ddg_edge (dump_file, e); | |
1911 | fprintf (dump_file, "\n"); | |
1912 | } | |
c8943832 | 1913 | |
3e762578 RS |
1914 | early_start = MAX (early_start, earliest); |
1915 | end = MIN (end, latest); | |
f73d5666 | 1916 | |
fe43febc RS |
1917 | if (e->type == TRUE_DEP && e->data_type == REG_DEP) |
1918 | count_preds++; | |
fe43febc | 1919 | } |
fe43febc | 1920 | } |
d48b46e0 | 1921 | |
fe43febc RS |
1922 | /* Calculate late_start and limit start. Both bounds are inclusive. */ |
1923 | if (pss_not_empty) | |
1924 | for (e = u_node->out; e != 0; e = e->next_out) | |
1925 | { | |
88e9c867 | 1926 | int v = e->dest->cuid; |
05223046 | 1927 | |
d7c028c0 | 1928 | if (bitmap_bit_p (sched_nodes, v)) |
fe43febc | 1929 | { |
88e9c867 | 1930 | int s_st = SCHED_TIME (v); |
3e762578 RS |
1931 | int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN); |
1932 | int latest = s_st - e->latency + (e->distance * ii); | |
f73d5666 | 1933 | |
3e762578 RS |
1934 | if (dump_file) |
1935 | { | |
1936 | fprintf (dump_file, "%11d %11s %11d %11s %5d", | |
1937 | earliest, "", latest, "", s_st); | |
1938 | print_ddg_edge (dump_file, e); | |
1939 | fprintf (dump_file, "\n"); | |
1940 | } | |
c8943832 | 1941 | |
3e762578 RS |
1942 | start = MAX (start, earliest); |
1943 | late_start = MIN (late_start, latest); | |
05223046 | 1944 | |
fe43febc RS |
1945 | if (e->type == TRUE_DEP && e->data_type == REG_DEP) |
1946 | count_succs++; | |
fe43febc | 1947 | } |
fe43febc | 1948 | } |
05223046 | 1949 | |
3e762578 RS |
1950 | if (dump_file && (psp_not_empty || pss_not_empty)) |
1951 | { | |
1952 | fprintf (dump_file, "----------- ----------- ----------- -----------" | |
1953 | " -----\n"); | |
1954 | fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n", | |
1955 | start, early_start, late_start, end, "", | |
1956 | "(max, max, min, min)"); | |
1957 | } | |
1958 | ||
fe43febc RS |
1959 | /* Get a target scheduling window no bigger than ii. */ |
1960 | if (early_start == INT_MIN && late_start == INT_MAX) | |
88e9c867 | 1961 | early_start = NODE_ASAP (u_node); |
fe43febc RS |
1962 | else if (early_start == INT_MIN) |
1963 | early_start = late_start - (ii - 1); | |
1964 | late_start = MIN (late_start, early_start + (ii - 1)); | |
05223046 | 1965 | |
fe43febc RS |
1966 | /* Apply memory dependence limits. */ |
1967 | start = MAX (start, early_start); | |
1968 | end = MIN (end, late_start); | |
d48b46e0 | 1969 | |
3e762578 RS |
1970 | if (dump_file && (psp_not_empty || pss_not_empty)) |
1971 | fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n", | |
1972 | "", start, end, "", ""); | |
1973 | ||
fe43febc RS |
1974 | /* If there are at least as many successors as predecessors, schedule the |
1975 | node close to its successors. */ | |
1976 | if (pss_not_empty && count_succs >= count_preds) | |
f73d5666 | 1977 | { |
6b4db501 | 1978 | std::swap (start, end); |
fe43febc | 1979 | step = -1; |
f73d5666 MH |
1980 | } |
1981 | ||
fe43febc RS |
1982 | /* Now that we've finalized the window, make END an exclusive rather |
1983 | than an inclusive bound. */ | |
1984 | end += step; | |
1985 | ||
f73d5666 MH |
1986 | *start_p = start; |
1987 | *step_p = step; | |
1988 | *end_p = end; | |
f73d5666 MH |
1989 | |
1990 | if ((start >= end && step == 1) || (start <= end && step == -1)) | |
c8943832 VY |
1991 | { |
1992 | if (dump_file) | |
1993 | fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", | |
1994 | start, end, step); | |
fe43febc | 1995 | return -1; |
c8943832 VY |
1996 | } |
1997 | ||
fe43febc | 1998 | return 0; |
f73d5666 MH |
1999 | } |
2000 | ||
d48b46e0 RE |
2001 | /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the |
2002 | node currently been scheduled. At the end of the calculation | |
0de5a32f AZ |
2003 | MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of |
2004 | U_NODE which are (1) already scheduled in the first/last row of | |
2005 | U_NODE's scheduling window, (2) whose dependence inequality with U | |
2006 | becomes an equality when U is scheduled in this same row, and (3) | |
2007 | whose dependence latency is zero. | |
2008 | ||
4117d6f7 | 2009 | The first and last rows are calculated using the following parameters: |
d48b46e0 RE |
2010 | START/END rows - The cycles that begins/ends the traversal on the window; |
2011 | searching for an empty cycle to schedule U_NODE. | |
2012 | STEP - The direction in which we traverse the window. | |
0de5a32f | 2013 | II - The initiation interval. */ |
d48b46e0 RE |
2014 | |
2015 | static void | |
2016 | calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end, | |
2017 | int step, int ii, sbitmap sched_nodes, | |
2018 | sbitmap must_precede, sbitmap must_follow) | |
2019 | { | |
2020 | ddg_edge_ptr e; | |
2021 | int first_cycle_in_window, last_cycle_in_window; | |
d48b46e0 RE |
2022 | |
2023 | gcc_assert (must_precede && must_follow); | |
2024 | ||
2025 | /* Consider the following scheduling window: | |
2026 | {first_cycle_in_window, first_cycle_in_window+1, ..., | |
2027 | last_cycle_in_window}. If step is 1 then the following will be | |
2028 | the order we traverse the window: {start=first_cycle_in_window, | |
2029 | first_cycle_in_window+1, ..., end=last_cycle_in_window+1}, | |
2030 | or {start=last_cycle_in_window, last_cycle_in_window-1, ..., | |
2031 | end=first_cycle_in_window-1} if step is -1. */ | |
2032 | first_cycle_in_window = (step == 1) ? start : end - step; | |
2033 | last_cycle_in_window = (step == 1) ? end - step : start; | |
2034 | ||
f61e445a LC |
2035 | bitmap_clear (must_precede); |
2036 | bitmap_clear (must_follow); | |
d48b46e0 RE |
2037 | |
2038 | if (dump_file) | |
2039 | fprintf (dump_file, "\nmust_precede: "); | |
2040 | ||
0de5a32f AZ |
2041 | /* Instead of checking if: |
2042 | (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window) | |
2043 | && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) == | |
2044 | first_cycle_in_window) | |
2045 | && e->latency == 0 | |
2046 | we use the fact that latency is non-negative: | |
2047 | SCHED_TIME (e->src) - (e->distance * ii) <= | |
2048 | SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <= | |
2049 | first_cycle_in_window | |
2050 | and check only if | |
2051 | SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ | |
d48b46e0 | 2052 | for (e = u_node->in; e != 0; e = e->next_in) |
d7c028c0 | 2053 | if (bitmap_bit_p (sched_nodes, e->src->cuid) |
88e9c867 | 2054 | && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) == |
0de5a32f | 2055 | first_cycle_in_window)) |
d48b46e0 RE |
2056 | { |
2057 | if (dump_file) | |
2058 | fprintf (dump_file, "%d ", e->src->cuid); | |
2059 | ||
d7c028c0 | 2060 | bitmap_set_bit (must_precede, e->src->cuid); |
d48b46e0 RE |
2061 | } |
2062 | ||
2063 | if (dump_file) | |
2064 | fprintf (dump_file, "\nmust_follow: "); | |
2065 | ||
0de5a32f AZ |
2066 | /* Instead of checking if: |
2067 | (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window) | |
2068 | && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) == | |
2069 | last_cycle_in_window) | |
2070 | && e->latency == 0 | |
2071 | we use the fact that latency is non-negative: | |
2072 | SCHED_TIME (e->dest) + (e->distance * ii) >= | |
b8698a0f | 2073 | SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >= |
0de5a32f AZ |
2074 | last_cycle_in_window |
2075 | and check only if | |
2076 | SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ | |
d48b46e0 | 2077 | for (e = u_node->out; e != 0; e = e->next_out) |
d7c028c0 | 2078 | if (bitmap_bit_p (sched_nodes, e->dest->cuid) |
88e9c867 | 2079 | && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) == |
0de5a32f | 2080 | last_cycle_in_window)) |
d48b46e0 RE |
2081 | { |
2082 | if (dump_file) | |
2083 | fprintf (dump_file, "%d ", e->dest->cuid); | |
2084 | ||
d7c028c0 | 2085 | bitmap_set_bit (must_follow, e->dest->cuid); |
d48b46e0 RE |
2086 | } |
2087 | ||
2088 | if (dump_file) | |
2089 | fprintf (dump_file, "\n"); | |
2090 | } | |
2091 | ||
2092 | /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following | |
2093 | parameters to decide if that's possible: | |
2094 | PS - The partial schedule. | |
2095 | U - The serial number of U_NODE. | |
fa10beec | 2096 | NUM_SPLITS - The number of row splits made so far. |
d48b46e0 RE |
2097 | MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at |
2098 | the first row of the scheduling window) | |
2099 | MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the | |
2100 | last row of the scheduling window) */ | |
2101 | ||
2102 | static bool | |
88e9c867 | 2103 | try_scheduling_node_in_cycle (partial_schedule_ptr ps, |
de82c453 | 2104 | int u, int cycle, sbitmap sched_nodes, |
d48b46e0 RE |
2105 | int *num_splits, sbitmap must_precede, |
2106 | sbitmap must_follow) | |
2107 | { | |
2108 | ps_insn_ptr psi; | |
2109 | bool success = 0; | |
2110 | ||
2111 | verify_partial_schedule (ps, sched_nodes); | |
88e9c867 | 2112 | psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow); |
d48b46e0 RE |
2113 | if (psi) |
2114 | { | |
88e9c867 | 2115 | SCHED_TIME (u) = cycle; |
d7c028c0 | 2116 | bitmap_set_bit (sched_nodes, u); |
d48b46e0 RE |
2117 | success = 1; |
2118 | *num_splits = 0; | |
2119 | if (dump_file) | |
de82c453 | 2120 | fprintf (dump_file, "Scheduled w/o split in %d\n", cycle); |
d48b46e0 RE |
2121 | |
2122 | } | |
2123 | ||
2124 | return success; | |
2125 | } | |
2126 | ||
f73d5666 MH |
2127 | /* This function implements the scheduling algorithm for SMS according to the |
2128 | above algorithm. */ | |
d397e8c6 | 2129 | static partial_schedule_ptr |
10d22567 | 2130 | sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order) |
d397e8c6 MH |
2131 | { |
2132 | int ii = mii; | |
c8943832 VY |
2133 | int i, c, success, num_splits = 0; |
2134 | int flush_and_start_over = true; | |
d397e8c6 | 2135 | int num_nodes = g->num_nodes; |
d397e8c6 | 2136 | int start, end, step; /* Place together into one struct? */ |
7ba9e72d TS |
2137 | auto_sbitmap sched_nodes (num_nodes); |
2138 | auto_sbitmap must_precede (num_nodes); | |
2139 | auto_sbitmap must_follow (num_nodes); | |
2140 | auto_sbitmap tobe_scheduled (num_nodes); | |
d72372e4 | 2141 | |
c420be8b RZ |
2142 | /* Value of param_sms_dfa_history is a limit on the number of cycles that |
2143 | resource conflicts can span. ??? Should be provided by DFA, and be | |
2144 | dependent on the type of insn scheduled. Set to 0 by default to save | |
2145 | compile time. */ | |
2146 | partial_schedule_ptr ps = create_partial_schedule (ii, g, | |
2147 | param_sms_dfa_history); | |
d397e8c6 | 2148 | |
f61e445a LC |
2149 | bitmap_ones (tobe_scheduled); |
2150 | bitmap_clear (sched_nodes); | |
f73d5666 | 2151 | |
c8943832 | 2152 | while (flush_and_start_over && (ii < maxii)) |
d397e8c6 | 2153 | { |
f73d5666 | 2154 | |
d397e8c6 | 2155 | if (dump_file) |
62e5bf5d | 2156 | fprintf (dump_file, "Starting with ii=%d\n", ii); |
c8943832 | 2157 | flush_and_start_over = false; |
f61e445a | 2158 | bitmap_clear (sched_nodes); |
d397e8c6 MH |
2159 | |
2160 | for (i = 0; i < num_nodes; i++) | |
2161 | { | |
2162 | int u = nodes_order[i]; | |
f73d5666 | 2163 | ddg_node_ptr u_node = &ps->g->nodes[u]; |
21afc57d | 2164 | rtx_insn *insn = u_node->insn; |
d397e8c6 | 2165 | |
06d5d63d | 2166 | gcc_checking_assert (NONDEBUG_INSN_P (insn)); |
d397e8c6 | 2167 | |
d7c028c0 | 2168 | if (bitmap_bit_p (sched_nodes, u)) |
f73d5666 | 2169 | continue; |
d397e8c6 | 2170 | |
f73d5666 | 2171 | /* Try to get non-empty scheduling window. */ |
c8943832 | 2172 | success = 0; |
edc429ff | 2173 | if (get_sched_window (ps, u_node, sched_nodes, ii, &start, |
c8943832 VY |
2174 | &step, &end) == 0) |
2175 | { | |
2176 | if (dump_file) | |
3e762578 RS |
2177 | fprintf (dump_file, "\nTrying to schedule node %d " |
2178 | "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID | |
c8943832 | 2179 | (g->nodes[u].insn)), start, end, step); |
c8943832 VY |
2180 | |
2181 | gcc_assert ((step > 0 && start < end) | |
2182 | || (step < 0 && start > end)); | |
2183 | ||
d48b46e0 RE |
2184 | calculate_must_precede_follow (u_node, start, end, step, ii, |
2185 | sched_nodes, must_precede, | |
2186 | must_follow); | |
2187 | ||
c8943832 VY |
2188 | for (c = start; c != end; c += step) |
2189 | { | |
edc429ff | 2190 | sbitmap tmp_precede, tmp_follow; |
d48b46e0 | 2191 | |
edc429ff RE |
2192 | set_must_precede_follow (&tmp_follow, must_follow, |
2193 | &tmp_precede, must_precede, | |
2194 | c, start, end, step); | |
d48b46e0 | 2195 | success = |
88e9c867 | 2196 | try_scheduling_node_in_cycle (ps, u, c, |
d48b46e0 RE |
2197 | sched_nodes, |
2198 | &num_splits, tmp_precede, | |
2199 | tmp_follow); | |
2200 | if (success) | |
2201 | break; | |
c8943832 | 2202 | } |
d48b46e0 | 2203 | |
c8943832 VY |
2204 | verify_partial_schedule (ps, sched_nodes); |
2205 | } | |
2206 | if (!success) | |
2207 | { | |
2208 | int split_row; | |
2209 | ||
2210 | if (ii++ == maxii) | |
2211 | break; | |
2212 | ||
2213 | if (num_splits >= MAX_SPLIT_NUM) | |
2214 | { | |
2215 | num_splits = 0; | |
2216 | flush_and_start_over = true; | |
2217 | verify_partial_schedule (ps, sched_nodes); | |
2218 | reset_partial_schedule (ps, ii); | |
2219 | verify_partial_schedule (ps, sched_nodes); | |
2220 | break; | |
2221 | } | |
2222 | ||
2223 | num_splits++; | |
8de9b877 AZ |
2224 | /* The scheduling window is exclusive of 'end' |
2225 | whereas compute_split_window() expects an inclusive, | |
2226 | ordered range. */ | |
c8943832 | 2227 | if (step == 1) |
8de9b877 | 2228 | split_row = compute_split_row (sched_nodes, start, end - 1, |
c8943832 VY |
2229 | ps->ii, u_node); |
2230 | else | |
8de9b877 | 2231 | split_row = compute_split_row (sched_nodes, end + 1, start, |
c8943832 | 2232 | ps->ii, u_node); |
f73d5666 | 2233 | |
c8943832 VY |
2234 | ps_insert_empty_row (ps, split_row, sched_nodes); |
2235 | i--; /* Go back and retry node i. */ | |
d397e8c6 | 2236 | |
c8943832 VY |
2237 | if (dump_file) |
2238 | fprintf (dump_file, "num_splits=%d\n", num_splits); | |
2239 | } | |
d397e8c6 | 2240 | |
c8943832 VY |
2241 | /* ??? If (success), check register pressure estimates. */ |
2242 | } /* Continue with next node. */ | |
2243 | } /* While flush_and_start_over. */ | |
d397e8c6 MH |
2244 | if (ii >= maxii) |
2245 | { | |
2246 | free_partial_schedule (ps); | |
2247 | ps = NULL; | |
2248 | } | |
c8943832 | 2249 | else |
f61e445a | 2250 | gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes)); |
c8943832 | 2251 | |
d397e8c6 MH |
2252 | return ps; |
2253 | } | |
2254 | ||
c8943832 VY |
2255 | /* This function inserts a new empty row into PS at the position |
2256 | according to SPLITROW, keeping all already scheduled instructions | |
2257 | intact and updating their SCHED_TIME and cycle accordingly. */ | |
2258 | static void | |
2259 | ps_insert_empty_row (partial_schedule_ptr ps, int split_row, | |
2260 | sbitmap sched_nodes) | |
2261 | { | |
2262 | ps_insn_ptr crr_insn; | |
2263 | ps_insn_ptr *rows_new; | |
2264 | int ii = ps->ii; | |
2265 | int new_ii = ii + 1; | |
2266 | int row; | |
81c41166 | 2267 | int *rows_length_new; |
c8943832 VY |
2268 | |
2269 | verify_partial_schedule (ps, sched_nodes); | |
2270 | ||
2271 | /* We normalize sched_time and rotate ps to have only non-negative sched | |
2272 | times, for simplicity of updating cycles after inserting new row. */ | |
2273 | split_row -= ps->min_cycle; | |
2274 | split_row = SMODULO (split_row, ii); | |
2275 | if (dump_file) | |
2276 | fprintf (dump_file, "split_row=%d\n", split_row); | |
2277 | ||
fc6970e4 RE |
2278 | reset_sched_times (ps, PS_MIN_CYCLE (ps)); |
2279 | rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); | |
c8943832 VY |
2280 | |
2281 | rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); | |
81c41166 | 2282 | rows_length_new = (int *) xcalloc (new_ii, sizeof (int)); |
c8943832 VY |
2283 | for (row = 0; row < split_row; row++) |
2284 | { | |
2285 | rows_new[row] = ps->rows[row]; | |
81c41166 | 2286 | rows_length_new[row] = ps->rows_length[row]; |
c8943832 VY |
2287 | ps->rows[row] = NULL; |
2288 | for (crr_insn = rows_new[row]; | |
2289 | crr_insn; crr_insn = crr_insn->next_in_row) | |
2290 | { | |
88e9c867 | 2291 | int u = crr_insn->id; |
c8943832 VY |
2292 | int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); |
2293 | ||
2294 | SCHED_TIME (u) = new_time; | |
2295 | crr_insn->cycle = new_time; | |
2296 | SCHED_ROW (u) = new_time % new_ii; | |
2297 | SCHED_STAGE (u) = new_time / new_ii; | |
2298 | } | |
2299 | ||
2300 | } | |
2301 | ||
2302 | rows_new[split_row] = NULL; | |
2303 | ||
2304 | for (row = split_row; row < ii; row++) | |
2305 | { | |
2306 | rows_new[row + 1] = ps->rows[row]; | |
81c41166 | 2307 | rows_length_new[row + 1] = ps->rows_length[row]; |
c8943832 VY |
2308 | ps->rows[row] = NULL; |
2309 | for (crr_insn = rows_new[row + 1]; | |
2310 | crr_insn; crr_insn = crr_insn->next_in_row) | |
2311 | { | |
88e9c867 | 2312 | int u = crr_insn->id; |
c8943832 VY |
2313 | int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; |
2314 | ||
2315 | SCHED_TIME (u) = new_time; | |
2316 | crr_insn->cycle = new_time; | |
2317 | SCHED_ROW (u) = new_time % new_ii; | |
2318 | SCHED_STAGE (u) = new_time / new_ii; | |
2319 | } | |
2320 | } | |
2321 | ||
2322 | /* Updating ps. */ | |
2323 | ps->min_cycle = ps->min_cycle + ps->min_cycle / ii | |
2324 | + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0); | |
2325 | ps->max_cycle = ps->max_cycle + ps->max_cycle / ii | |
2326 | + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); | |
2327 | free (ps->rows); | |
2328 | ps->rows = rows_new; | |
81c41166 RE |
2329 | free (ps->rows_length); |
2330 | ps->rows_length = rows_length_new; | |
c8943832 VY |
2331 | ps->ii = new_ii; |
2332 | gcc_assert (ps->min_cycle >= 0); | |
2333 | ||
2334 | verify_partial_schedule (ps, sched_nodes); | |
2335 | ||
2336 | if (dump_file) | |
2337 | fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle, | |
2338 | ps->max_cycle); | |
2339 | } | |
2340 | ||
2341 | /* Given U_NODE which is the node that failed to be scheduled; LOW and | |
2342 | UP which are the boundaries of it's scheduling window; compute using | |
84fbffb2 | 2343 | SCHED_NODES and II a row in the partial schedule that can be split |
c8943832 VY |
2344 | which will separate a critical predecessor from a critical successor |
2345 | thereby expanding the window, and return it. */ | |
2346 | static int | |
2347 | compute_split_row (sbitmap sched_nodes, int low, int up, int ii, | |
2348 | ddg_node_ptr u_node) | |
2349 | { | |
2350 | ddg_edge_ptr e; | |
2351 | int lower = INT_MIN, upper = INT_MAX; | |
88e9c867 RS |
2352 | int crit_pred = -1; |
2353 | int crit_succ = -1; | |
c8943832 VY |
2354 | int crit_cycle; |
2355 | ||
2356 | for (e = u_node->in; e != 0; e = e->next_in) | |
2357 | { | |
88e9c867 | 2358 | int v = e->src->cuid; |
c8943832 | 2359 | |
d7c028c0 | 2360 | if (bitmap_bit_p (sched_nodes, v) |
88e9c867 RS |
2361 | && (low == SCHED_TIME (v) + e->latency - (e->distance * ii))) |
2362 | if (SCHED_TIME (v) > lower) | |
c8943832 | 2363 | { |
88e9c867 RS |
2364 | crit_pred = v; |
2365 | lower = SCHED_TIME (v); | |
c8943832 VY |
2366 | } |
2367 | } | |
2368 | ||
88e9c867 | 2369 | if (crit_pred >= 0) |
c8943832 VY |
2370 | { |
2371 | crit_cycle = SCHED_TIME (crit_pred) + 1; | |
2372 | return SMODULO (crit_cycle, ii); | |
2373 | } | |
2374 | ||
2375 | for (e = u_node->out; e != 0; e = e->next_out) | |
2376 | { | |
88e9c867 RS |
2377 | int v = e->dest->cuid; |
2378 | ||
d7c028c0 | 2379 | if (bitmap_bit_p (sched_nodes, v) |
88e9c867 RS |
2380 | && (up == SCHED_TIME (v) - e->latency + (e->distance * ii))) |
2381 | if (SCHED_TIME (v) < upper) | |
c8943832 | 2382 | { |
88e9c867 RS |
2383 | crit_succ = v; |
2384 | upper = SCHED_TIME (v); | |
c8943832 VY |
2385 | } |
2386 | } | |
2387 | ||
88e9c867 | 2388 | if (crit_succ >= 0) |
c8943832 VY |
2389 | { |
2390 | crit_cycle = SCHED_TIME (crit_succ); | |
2391 | return SMODULO (crit_cycle, ii); | |
2392 | } | |
2393 | ||
2394 | if (dump_file) | |
2395 | fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n"); | |
2396 | ||
2397 | return SMODULO ((low + up + 1) / 2, ii); | |
2398 | } | |
2399 | ||
2400 | static void | |
2401 | verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes) | |
2402 | { | |
2403 | int row; | |
2404 | ps_insn_ptr crr_insn; | |
2405 | ||
2406 | for (row = 0; row < ps->ii; row++) | |
81c41166 RE |
2407 | { |
2408 | int length = 0; | |
2409 | ||
2410 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |
2411 | { | |
88e9c867 | 2412 | int u = crr_insn->id; |
81c41166 RE |
2413 | |
2414 | length++; | |
d7c028c0 | 2415 | gcc_assert (bitmap_bit_p (sched_nodes, u)); |
81c41166 RE |
2416 | /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by |
2417 | popcount (sched_nodes) == number of insns in ps. */ | |
2418 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | |
2419 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | |
2420 | } | |
2421 | ||
2422 | gcc_assert (ps->rows_length[row] == length); | |
2423 | } | |
c8943832 VY |
2424 | } |
2425 | ||
d397e8c6 MH |
2426 | \f |
2427 | /* This page implements the algorithm for ordering the nodes of a DDG | |
2428 | for modulo scheduling, activated through the | |
2429 | "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */ | |
2430 | ||
2431 | #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info) | |
2432 | #define ASAP(x) (ORDER_PARAMS ((x))->asap) | |
2433 | #define ALAP(x) (ORDER_PARAMS ((x))->alap) | |
2434 | #define HEIGHT(x) (ORDER_PARAMS ((x))->height) | |
2435 | #define MOB(x) (ALAP ((x)) - ASAP ((x))) | |
2436 | #define DEPTH(x) (ASAP ((x))) | |
2437 | ||
2438 | typedef struct node_order_params * nopa; | |
2439 | ||
2440 | static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result); | |
2441 | static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int); | |
5cd53742 | 2442 | static nopa calculate_order_params (ddg_ptr, int, int *); |
d397e8c6 MH |
2443 | static int find_max_asap (ddg_ptr, sbitmap); |
2444 | static int find_max_hv_min_mob (ddg_ptr, sbitmap); | |
2445 | static int find_max_dv_min_mob (ddg_ptr, sbitmap); | |
2446 | ||
2447 | enum sms_direction {BOTTOMUP, TOPDOWN}; | |
2448 | ||
2449 | struct node_order_params | |
2450 | { | |
2451 | int asap; | |
2452 | int alap; | |
2453 | int height; | |
2454 | }; | |
2455 | ||
2456 | /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */ | |
2457 | static void | |
2458 | check_nodes_order (int *node_order, int num_nodes) | |
2459 | { | |
2460 | int i; | |
7ba9e72d | 2461 | auto_sbitmap tmp (num_nodes); |
d397e8c6 | 2462 | |
f61e445a | 2463 | bitmap_clear (tmp); |
d397e8c6 | 2464 | |
05223046 RE |
2465 | if (dump_file) |
2466 | fprintf (dump_file, "SMS final nodes order: \n"); | |
2467 | ||
d397e8c6 MH |
2468 | for (i = 0; i < num_nodes; i++) |
2469 | { | |
2470 | int u = node_order[i]; | |
2471 | ||
05223046 RE |
2472 | if (dump_file) |
2473 | fprintf (dump_file, "%d ", u); | |
d7c028c0 | 2474 | gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u)); |
d397e8c6 | 2475 | |
d7c028c0 | 2476 | bitmap_set_bit (tmp, u); |
d397e8c6 | 2477 | } |
b8698a0f | 2478 | |
05223046 RE |
2479 | if (dump_file) |
2480 | fprintf (dump_file, "\n"); | |
d397e8c6 MH |
2481 | } |
2482 | ||
2483 | /* Order the nodes of G for scheduling and pass the result in | |
2484 | NODE_ORDER. Also set aux.count of each node to ASAP. | |
5cd53742 | 2485 | Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */ |
d397e8c6 | 2486 | static int |
5cd53742 | 2487 | sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap) |
d397e8c6 MH |
2488 | { |
2489 | int i; | |
2490 | int rec_mii = 0; | |
2491 | ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g); | |
2492 | ||
5cd53742 | 2493 | nopa nops = calculate_order_params (g, mii, pmax_asap); |
d397e8c6 | 2494 | |
8cec1624 RE |
2495 | if (dump_file) |
2496 | print_sccs (dump_file, sccs, g); | |
2497 | ||
d397e8c6 MH |
2498 | order_nodes_of_sccs (sccs, node_order); |
2499 | ||
2500 | if (sccs->num_sccs > 0) | |
2501 | /* First SCC has the largest recurrence_length. */ | |
2502 | rec_mii = sccs->sccs[0]->recurrence_length; | |
2503 | ||
2504 | /* Save ASAP before destroying node_order_params. */ | |
2505 | for (i = 0; i < g->num_nodes; i++) | |
2506 | { | |
2507 | ddg_node_ptr v = &g->nodes[i]; | |
2508 | v->aux.count = ASAP (v); | |
2509 | } | |
2510 | ||
2511 | free (nops); | |
2512 | free_ddg_all_sccs (sccs); | |
2513 | check_nodes_order (node_order, g->num_nodes); | |
2514 | ||
2515 | return rec_mii; | |
2516 | } | |
2517 | ||
2518 | static void | |
2519 | order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order) | |
2520 | { | |
2521 | int i, pos = 0; | |
2522 | ddg_ptr g = all_sccs->ddg; | |
2523 | int num_nodes = g->num_nodes; | |
7ba9e72d TS |
2524 | auto_sbitmap prev_sccs (num_nodes); |
2525 | auto_sbitmap on_path (num_nodes); | |
2526 | auto_sbitmap tmp (num_nodes); | |
2527 | auto_sbitmap ones (num_nodes); | |
d397e8c6 | 2528 | |
f61e445a LC |
2529 | bitmap_clear (prev_sccs); |
2530 | bitmap_ones (ones); | |
d397e8c6 | 2531 | |
fa10beec | 2532 | /* Perform the node ordering starting from the SCC with the highest recMII. |
d397e8c6 MH |
2533 | For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */ |
2534 | for (i = 0; i < all_sccs->num_sccs; i++) | |
2535 | { | |
2536 | ddg_scc_ptr scc = all_sccs->sccs[i]; | |
2537 | ||
2538 | /* Add nodes on paths from previous SCCs to the current SCC. */ | |
2539 | find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes); | |
f61e445a | 2540 | bitmap_ior (tmp, scc->nodes, on_path); |
d397e8c6 MH |
2541 | |
2542 | /* Add nodes on paths from the current SCC to previous SCCs. */ | |
2543 | find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs); | |
f61e445a | 2544 | bitmap_ior (tmp, tmp, on_path); |
d397e8c6 MH |
2545 | |
2546 | /* Remove nodes of previous SCCs from current extended SCC. */ | |
f61e445a | 2547 | bitmap_and_compl (tmp, tmp, prev_sccs); |
d397e8c6 MH |
2548 | |
2549 | pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |
2550 | /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */ | |
2551 | } | |
2552 | ||
2553 | /* Handle the remaining nodes that do not belong to any scc. Each call | |
2554 | to order_nodes_in_scc handles a single connected component. */ | |
2555 | while (pos < g->num_nodes) | |
2556 | { | |
f61e445a | 2557 | bitmap_and_compl (tmp, ones, prev_sccs); |
d397e8c6 MH |
2558 | pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); |
2559 | } | |
d397e8c6 MH |
2560 | } |
2561 | ||
2562 | /* MII is needed if we consider backarcs (that do not close recursive cycles). */ | |
2563 | static struct node_order_params * | |
5cd53742 | 2564 | calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap) |
d397e8c6 MH |
2565 | { |
2566 | int u; | |
2567 | int max_asap; | |
2568 | int num_nodes = g->num_nodes; | |
2569 | ddg_edge_ptr e; | |
2570 | /* Allocate a place to hold ordering params for each node in the DDG. */ | |
2571 | nopa node_order_params_arr; | |
2572 | ||
2573 | /* Initialize of ASAP/ALAP/HEIGHT to zero. */ | |
2574 | node_order_params_arr = (nopa) xcalloc (num_nodes, | |
2575 | sizeof (struct node_order_params)); | |
2576 | ||
61ada8ae | 2577 | /* Set the aux pointer of each node to point to its order_params structure. */ |
d397e8c6 MH |
2578 | for (u = 0; u < num_nodes; u++) |
2579 | g->nodes[u].aux.info = &node_order_params_arr[u]; | |
2580 | ||
2581 | /* Disregarding a backarc from each recursive cycle to obtain a DAG, | |
2582 | calculate ASAP, ALAP, mobility, distance, and height for each node | |
2583 | in the dependence (direct acyclic) graph. */ | |
2584 | ||
2585 | /* We assume that the nodes in the array are in topological order. */ | |
2586 | ||
2587 | max_asap = 0; | |
2588 | for (u = 0; u < num_nodes; u++) | |
2589 | { | |
2590 | ddg_node_ptr u_node = &g->nodes[u]; | |
2591 | ||
2592 | ASAP (u_node) = 0; | |
2593 | for (e = u_node->in; e; e = e->next_in) | |
2594 | if (e->distance == 0) | |
2595 | ASAP (u_node) = MAX (ASAP (u_node), | |
2596 | ASAP (e->src) + e->latency); | |
2597 | max_asap = MAX (max_asap, ASAP (u_node)); | |
2598 | } | |
2599 | ||
2600 | for (u = num_nodes - 1; u > -1; u--) | |
2601 | { | |
2602 | ddg_node_ptr u_node = &g->nodes[u]; | |
2603 | ||
2604 | ALAP (u_node) = max_asap; | |
2605 | HEIGHT (u_node) = 0; | |
2606 | for (e = u_node->out; e; e = e->next_out) | |
2607 | if (e->distance == 0) | |
2608 | { | |
2609 | ALAP (u_node) = MIN (ALAP (u_node), | |
2610 | ALAP (e->dest) - e->latency); | |
2611 | HEIGHT (u_node) = MAX (HEIGHT (u_node), | |
2612 | HEIGHT (e->dest) + e->latency); | |
2613 | } | |
2614 | } | |
05223046 RE |
2615 | if (dump_file) |
2616 | { | |
2617 | fprintf (dump_file, "\nOrder params\n"); | |
2618 | for (u = 0; u < num_nodes; u++) | |
2619 | { | |
2620 | ddg_node_ptr u_node = &g->nodes[u]; | |
2621 | ||
2622 | fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u, | |
2623 | ASAP (u_node), ALAP (u_node), HEIGHT (u_node)); | |
2624 | } | |
2625 | } | |
d397e8c6 | 2626 | |
5cd53742 | 2627 | *pmax_asap = max_asap; |
d397e8c6 MH |
2628 | return node_order_params_arr; |
2629 | } | |
2630 | ||
2631 | static int | |
2632 | find_max_asap (ddg_ptr g, sbitmap nodes) | |
2633 | { | |
dfea6c85 | 2634 | unsigned int u = 0; |
d397e8c6 MH |
2635 | int max_asap = -1; |
2636 | int result = -1; | |
b6e7e9af | 2637 | sbitmap_iterator sbi; |
d397e8c6 | 2638 | |
d4ac4ce2 | 2639 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi) |
d397e8c6 MH |
2640 | { |
2641 | ddg_node_ptr u_node = &g->nodes[u]; | |
2642 | ||
2643 | if (max_asap < ASAP (u_node)) | |
2644 | { | |
2645 | max_asap = ASAP (u_node); | |
2646 | result = u; | |
2647 | } | |
b6e7e9af | 2648 | } |
d397e8c6 MH |
2649 | return result; |
2650 | } | |
2651 | ||
2652 | static int | |
2653 | find_max_hv_min_mob (ddg_ptr g, sbitmap nodes) | |
2654 | { | |
dfea6c85 | 2655 | unsigned int u = 0; |
d397e8c6 MH |
2656 | int max_hv = -1; |
2657 | int min_mob = INT_MAX; | |
2658 | int result = -1; | |
b6e7e9af | 2659 | sbitmap_iterator sbi; |
d397e8c6 | 2660 | |
d4ac4ce2 | 2661 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi) |
d397e8c6 MH |
2662 | { |
2663 | ddg_node_ptr u_node = &g->nodes[u]; | |
2664 | ||
2665 | if (max_hv < HEIGHT (u_node)) | |
2666 | { | |
2667 | max_hv = HEIGHT (u_node); | |
2668 | min_mob = MOB (u_node); | |
2669 | result = u; | |
2670 | } | |
2671 | else if ((max_hv == HEIGHT (u_node)) | |
2672 | && (min_mob > MOB (u_node))) | |
2673 | { | |
2674 | min_mob = MOB (u_node); | |
2675 | result = u; | |
2676 | } | |
b6e7e9af | 2677 | } |
d397e8c6 MH |
2678 | return result; |
2679 | } | |
2680 | ||
2681 | static int | |
2682 | find_max_dv_min_mob (ddg_ptr g, sbitmap nodes) | |
2683 | { | |
dfea6c85 | 2684 | unsigned int u = 0; |
d397e8c6 MH |
2685 | int max_dv = -1; |
2686 | int min_mob = INT_MAX; | |
2687 | int result = -1; | |
b6e7e9af | 2688 | sbitmap_iterator sbi; |
d397e8c6 | 2689 | |
d4ac4ce2 | 2690 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi) |
d397e8c6 MH |
2691 | { |
2692 | ddg_node_ptr u_node = &g->nodes[u]; | |
2693 | ||
2694 | if (max_dv < DEPTH (u_node)) | |
2695 | { | |
2696 | max_dv = DEPTH (u_node); | |
2697 | min_mob = MOB (u_node); | |
2698 | result = u; | |
2699 | } | |
2700 | else if ((max_dv == DEPTH (u_node)) | |
2701 | && (min_mob > MOB (u_node))) | |
2702 | { | |
2703 | min_mob = MOB (u_node); | |
2704 | result = u; | |
2705 | } | |
b6e7e9af | 2706 | } |
d397e8c6 MH |
2707 | return result; |
2708 | } | |
2709 | ||
2710 | /* Places the nodes of SCC into the NODE_ORDER array starting | |
2711 | at position POS, according to the SMS ordering algorithm. | |
2712 | NODES_ORDERED (in&out parameter) holds the bitset of all nodes in | |
2713 | the NODE_ORDER array, starting from position zero. */ | |
2714 | static int | |
2715 | order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc, | |
2716 | int * node_order, int pos) | |
2717 | { | |
2718 | enum sms_direction dir; | |
2719 | int num_nodes = g->num_nodes; | |
7ba9e72d TS |
2720 | auto_sbitmap workset (num_nodes); |
2721 | auto_sbitmap tmp (num_nodes); | |
d397e8c6 | 2722 | sbitmap zero_bitmap = sbitmap_alloc (num_nodes); |
7ba9e72d TS |
2723 | auto_sbitmap predecessors (num_nodes); |
2724 | auto_sbitmap successors (num_nodes); | |
d397e8c6 | 2725 | |
f61e445a | 2726 | bitmap_clear (predecessors); |
d397e8c6 MH |
2727 | find_predecessors (predecessors, g, nodes_ordered); |
2728 | ||
f61e445a | 2729 | bitmap_clear (successors); |
d397e8c6 MH |
2730 | find_successors (successors, g, nodes_ordered); |
2731 | ||
f61e445a LC |
2732 | bitmap_clear (tmp); |
2733 | if (bitmap_and (tmp, predecessors, scc)) | |
d397e8c6 | 2734 | { |
f61e445a | 2735 | bitmap_copy (workset, tmp); |
d397e8c6 MH |
2736 | dir = BOTTOMUP; |
2737 | } | |
f61e445a | 2738 | else if (bitmap_and (tmp, successors, scc)) |
d397e8c6 | 2739 | { |
f61e445a | 2740 | bitmap_copy (workset, tmp); |
d397e8c6 MH |
2741 | dir = TOPDOWN; |
2742 | } | |
2743 | else | |
2744 | { | |
2745 | int u; | |
2746 | ||
f61e445a | 2747 | bitmap_clear (workset); |
d397e8c6 | 2748 | if ((u = find_max_asap (g, scc)) >= 0) |
d7c028c0 | 2749 | bitmap_set_bit (workset, u); |
d397e8c6 MH |
2750 | dir = BOTTOMUP; |
2751 | } | |
2752 | ||
f61e445a LC |
2753 | bitmap_clear (zero_bitmap); |
2754 | while (!bitmap_equal_p (workset, zero_bitmap)) | |
d397e8c6 MH |
2755 | { |
2756 | int v; | |
2757 | ddg_node_ptr v_node; | |
2758 | sbitmap v_node_preds; | |
2759 | sbitmap v_node_succs; | |
2760 | ||
2761 | if (dir == TOPDOWN) | |
2762 | { | |
f61e445a | 2763 | while (!bitmap_equal_p (workset, zero_bitmap)) |
d397e8c6 MH |
2764 | { |
2765 | v = find_max_hv_min_mob (g, workset); | |
2766 | v_node = &g->nodes[v]; | |
2767 | node_order[pos++] = v; | |
2768 | v_node_succs = NODE_SUCCESSORS (v_node); | |
f61e445a | 2769 | bitmap_and (tmp, v_node_succs, scc); |
d397e8c6 MH |
2770 | |
2771 | /* Don't consider the already ordered successors again. */ | |
f61e445a LC |
2772 | bitmap_and_compl (tmp, tmp, nodes_ordered); |
2773 | bitmap_ior (workset, workset, tmp); | |
d7c028c0 LC |
2774 | bitmap_clear_bit (workset, v); |
2775 | bitmap_set_bit (nodes_ordered, v); | |
d397e8c6 MH |
2776 | } |
2777 | dir = BOTTOMUP; | |
f61e445a | 2778 | bitmap_clear (predecessors); |
d397e8c6 | 2779 | find_predecessors (predecessors, g, nodes_ordered); |
f61e445a | 2780 | bitmap_and (workset, predecessors, scc); |
d397e8c6 MH |
2781 | } |
2782 | else | |
2783 | { | |
f61e445a | 2784 | while (!bitmap_equal_p (workset, zero_bitmap)) |
d397e8c6 MH |
2785 | { |
2786 | v = find_max_dv_min_mob (g, workset); | |
2787 | v_node = &g->nodes[v]; | |
2788 | node_order[pos++] = v; | |
2789 | v_node_preds = NODE_PREDECESSORS (v_node); | |
f61e445a | 2790 | bitmap_and (tmp, v_node_preds, scc); |
d397e8c6 MH |
2791 | |
2792 | /* Don't consider the already ordered predecessors again. */ | |
f61e445a LC |
2793 | bitmap_and_compl (tmp, tmp, nodes_ordered); |
2794 | bitmap_ior (workset, workset, tmp); | |
d7c028c0 LC |
2795 | bitmap_clear_bit (workset, v); |
2796 | bitmap_set_bit (nodes_ordered, v); | |
d397e8c6 MH |
2797 | } |
2798 | dir = TOPDOWN; | |
f61e445a | 2799 | bitmap_clear (successors); |
d397e8c6 | 2800 | find_successors (successors, g, nodes_ordered); |
f61e445a | 2801 | bitmap_and (workset, successors, scc); |
d397e8c6 MH |
2802 | } |
2803 | } | |
d397e8c6 | 2804 | sbitmap_free (zero_bitmap); |
d397e8c6 MH |
2805 | return pos; |
2806 | } | |
2807 | ||
2808 | \f | |
2809 | /* This page contains functions for manipulating partial-schedules during | |
2810 | modulo scheduling. */ | |
2811 | ||
2812 | /* Create a partial schedule and allocate a memory to hold II rows. */ | |
5f1f4746 KH |
2813 | |
2814 | static partial_schedule_ptr | |
d397e8c6 MH |
2815 | create_partial_schedule (int ii, ddg_ptr g, int history) |
2816 | { | |
5ed6ace5 | 2817 | partial_schedule_ptr ps = XNEW (struct partial_schedule); |
d397e8c6 | 2818 | ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); |
81c41166 | 2819 | ps->rows_length = (int *) xcalloc (ii, sizeof (int)); |
9771b263 | 2820 | ps->reg_moves.create (0); |
d397e8c6 MH |
2821 | ps->ii = ii; |
2822 | ps->history = history; | |
2823 | ps->min_cycle = INT_MAX; | |
2824 | ps->max_cycle = INT_MIN; | |
2825 | ps->g = g; | |
2826 | ||
2827 | return ps; | |
2828 | } | |
2829 | ||
2830 | /* Free the PS_INSNs in rows array of the given partial schedule. | |
2831 | ??? Consider caching the PS_INSN's. */ | |
2832 | static void | |
2833 | free_ps_insns (partial_schedule_ptr ps) | |
2834 | { | |
2835 | int i; | |
2836 | ||
2837 | for (i = 0; i < ps->ii; i++) | |
2838 | { | |
2839 | while (ps->rows[i]) | |
2840 | { | |
2841 | ps_insn_ptr ps_insn = ps->rows[i]->next_in_row; | |
2842 | ||
2843 | free (ps->rows[i]); | |
2844 | ps->rows[i] = ps_insn; | |
2845 | } | |
2846 | ps->rows[i] = NULL; | |
2847 | } | |
2848 | } | |
2849 | ||
2850 | /* Free all the memory allocated to the partial schedule. */ | |
5f1f4746 KH |
2851 | |
2852 | static void | |
d397e8c6 MH |
2853 | free_partial_schedule (partial_schedule_ptr ps) |
2854 | { | |
1287d8ea RS |
2855 | ps_reg_move_info *move; |
2856 | unsigned int i; | |
2857 | ||
d397e8c6 MH |
2858 | if (!ps) |
2859 | return; | |
1287d8ea | 2860 | |
9771b263 | 2861 | FOR_EACH_VEC_ELT (ps->reg_moves, i, move) |
1287d8ea | 2862 | sbitmap_free (move->uses); |
9771b263 | 2863 | ps->reg_moves.release (); |
1287d8ea | 2864 | |
d397e8c6 MH |
2865 | free_ps_insns (ps); |
2866 | free (ps->rows); | |
81c41166 | 2867 | free (ps->rows_length); |
d397e8c6 MH |
2868 | free (ps); |
2869 | } | |
2870 | ||
2871 | /* Clear the rows array with its PS_INSNs, and create a new one with | |
2872 | NEW_II rows. */ | |
5f1f4746 KH |
2873 | |
2874 | static void | |
d397e8c6 MH |
2875 | reset_partial_schedule (partial_schedule_ptr ps, int new_ii) |
2876 | { | |
2877 | if (!ps) | |
2878 | return; | |
2879 | free_ps_insns (ps); | |
2880 | if (new_ii == ps->ii) | |
2881 | return; | |
2882 | ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii | |
2883 | * sizeof (ps_insn_ptr)); | |
2884 | memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); | |
81c41166 RE |
2885 | ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int)); |
2886 | memset (ps->rows_length, 0, new_ii * sizeof (int)); | |
d397e8c6 MH |
2887 | ps->ii = new_ii; |
2888 | ps->min_cycle = INT_MAX; | |
2889 | ps->max_cycle = INT_MIN; | |
2890 | } | |
2891 | ||
2892 | /* Prints the partial schedule as an ii rows array, for each rows | |
2893 | print the ids of the insns in it. */ | |
2894 | void | |
2895 | print_partial_schedule (partial_schedule_ptr ps, FILE *dump) | |
2896 | { | |
2897 | int i; | |
2898 | ||
2899 | for (i = 0; i < ps->ii; i++) | |
2900 | { | |
2901 | ps_insn_ptr ps_i = ps->rows[i]; | |
2902 | ||
de82c453 | 2903 | fprintf (dump, "\n[ROW %d ]: ", i); |
d397e8c6 MH |
2904 | while (ps_i) |
2905 | { | |
6210ec61 | 2906 | rtx_insn *insn = ps_rtl_insn (ps, ps_i->id); |
88e9c867 RS |
2907 | |
2908 | if (JUMP_P (insn)) | |
2909 | fprintf (dump, "%d (branch), ", INSN_UID (insn)); | |
413e50a2 | 2910 | else |
88e9c867 | 2911 | fprintf (dump, "%d, ", INSN_UID (insn)); |
413e50a2 | 2912 | |
d397e8c6 MH |
2913 | ps_i = ps_i->next_in_row; |
2914 | } | |
2915 | } | |
2916 | } | |
2917 | ||
2918 | /* Creates an object of PS_INSN and initializes it to the given parameters. */ | |
2919 | static ps_insn_ptr | |
88e9c867 | 2920 | create_ps_insn (int id, int cycle) |
d397e8c6 | 2921 | { |
5ed6ace5 | 2922 | ps_insn_ptr ps_i = XNEW (struct ps_insn); |
d397e8c6 | 2923 | |
88e9c867 | 2924 | ps_i->id = id; |
d397e8c6 MH |
2925 | ps_i->next_in_row = NULL; |
2926 | ps_i->prev_in_row = NULL; | |
d397e8c6 MH |
2927 | ps_i->cycle = cycle; |
2928 | ||
2929 | return ps_i; | |
2930 | } | |
2931 | ||
2932 | ||
a9fb4f13 RE |
2933 | /* Removes the given PS_INSN from the partial schedule. */ |
2934 | static void | |
d397e8c6 MH |
2935 | remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) |
2936 | { | |
2937 | int row; | |
2938 | ||
a9fb4f13 RE |
2939 | gcc_assert (ps && ps_i); |
2940 | ||
d397e8c6 MH |
2941 | row = SMODULO (ps_i->cycle, ps->ii); |
2942 | if (! ps_i->prev_in_row) | |
2943 | { | |
a9fb4f13 | 2944 | gcc_assert (ps_i == ps->rows[row]); |
d397e8c6 MH |
2945 | ps->rows[row] = ps_i->next_in_row; |
2946 | if (ps->rows[row]) | |
2947 | ps->rows[row]->prev_in_row = NULL; | |
2948 | } | |
2949 | else | |
2950 | { | |
2951 | ps_i->prev_in_row->next_in_row = ps_i->next_in_row; | |
2952 | if (ps_i->next_in_row) | |
2953 | ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; | |
2954 | } | |
81c41166 RE |
2955 | |
2956 | ps->rows_length[row] -= 1; | |
d397e8c6 | 2957 | free (ps_i); |
a9fb4f13 | 2958 | return; |
d397e8c6 MH |
2959 | } |
2960 | ||
d72372e4 MH |
2961 | /* Unlike what literature describes for modulo scheduling (which focuses |
2962 | on VLIW machines) the order of the instructions inside a cycle is | |
2963 | important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know | |
2964 | where the current instruction should go relative to the already | |
2965 | scheduled instructions in the given cycle. Go over these | |
2966 | instructions and find the first possible column to put it in. */ | |
2967 | static bool | |
2968 | ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, | |
2969 | sbitmap must_precede, sbitmap must_follow) | |
2970 | { | |
2971 | ps_insn_ptr next_ps_i; | |
2972 | ps_insn_ptr first_must_follow = NULL; | |
2973 | ps_insn_ptr last_must_precede = NULL; | |
fc6970e4 | 2974 | ps_insn_ptr last_in_row = NULL; |
d72372e4 MH |
2975 | int row; |
2976 | ||
2977 | if (! ps_i) | |
2978 | return false; | |
2979 | ||
2980 | row = SMODULO (ps_i->cycle, ps->ii); | |
2981 | ||
2982 | /* Find the first must follow and the last must precede | |
2a7e31df | 2983 | and insert the node immediately after the must precede |
471854f8 | 2984 | but make sure that it there is no must follow after it. */ |
d72372e4 MH |
2985 | for (next_ps_i = ps->rows[row]; |
2986 | next_ps_i; | |
2987 | next_ps_i = next_ps_i->next_in_row) | |
2988 | { | |
88e9c867 | 2989 | if (must_follow |
d7c028c0 | 2990 | && bitmap_bit_p (must_follow, next_ps_i->id) |
d72372e4 MH |
2991 | && ! first_must_follow) |
2992 | first_must_follow = next_ps_i; | |
d7c028c0 | 2993 | if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id)) |
d72372e4 MH |
2994 | { |
2995 | /* If we have already met a node that must follow, then | |
2996 | there is no possible column. */ | |
2997 | if (first_must_follow) | |
2998 | return false; | |
2999 | else | |
3000 | last_must_precede = next_ps_i; | |
3001 | } | |
fc6970e4 | 3002 | /* The closing branch must be the last in the row. */ |
8d64622f | 3003 | if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id))) |
fc6970e4 RE |
3004 | return false; |
3005 | ||
3006 | last_in_row = next_ps_i; | |
d72372e4 MH |
3007 | } |
3008 | ||
fc6970e4 RE |
3009 | /* The closing branch is scheduled as well. Make sure there is no |
3010 | dependent instruction after it as the branch should be the last | |
3011 | instruction in the row. */ | |
88e9c867 | 3012 | if (JUMP_P (ps_rtl_insn (ps, ps_i->id))) |
fc6970e4 RE |
3013 | { |
3014 | if (first_must_follow) | |
3015 | return false; | |
3016 | if (last_in_row) | |
3017 | { | |
3018 | /* Make the branch the last in the row. New instructions | |
3019 | will be inserted at the beginning of the row or after the | |
3020 | last must_precede instruction thus the branch is guaranteed | |
3021 | to remain the last instruction in the row. */ | |
3022 | last_in_row->next_in_row = ps_i; | |
3023 | ps_i->prev_in_row = last_in_row; | |
3024 | ps_i->next_in_row = NULL; | |
3025 | } | |
3026 | else | |
3027 | ps->rows[row] = ps_i; | |
3028 | return true; | |
3029 | } | |
3030 | ||
d72372e4 MH |
3031 | /* Now insert the node after INSERT_AFTER_PSI. */ |
3032 | ||
3033 | if (! last_must_precede) | |
3034 | { | |
3035 | ps_i->next_in_row = ps->rows[row]; | |
3036 | ps_i->prev_in_row = NULL; | |
3037 | if (ps_i->next_in_row) | |
3038 | ps_i->next_in_row->prev_in_row = ps_i; | |
3039 | ps->rows[row] = ps_i; | |
3040 | } | |
3041 | else | |
3042 | { | |
3043 | ps_i->next_in_row = last_must_precede->next_in_row; | |
3044 | last_must_precede->next_in_row = ps_i; | |
3045 | ps_i->prev_in_row = last_must_precede; | |
3046 | if (ps_i->next_in_row) | |
3047 | ps_i->next_in_row->prev_in_row = ps_i; | |
3048 | } | |
3049 | ||
3050 | return true; | |
3051 | } | |
3052 | ||
d397e8c6 | 3053 | /* Advances the PS_INSN one column in its current row; returns false |
b8698a0f L |
3054 | in failure and true in success. Bit N is set in MUST_FOLLOW if |
3055 | the node with cuid N must be come after the node pointed to by | |
d72372e4 | 3056 | PS_I when scheduled in the same cycle. */ |
d397e8c6 | 3057 | static int |
d72372e4 MH |
3058 | ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, |
3059 | sbitmap must_follow) | |
d397e8c6 MH |
3060 | { |
3061 | ps_insn_ptr prev, next; | |
3062 | int row; | |
3063 | ||
3064 | if (!ps || !ps_i) | |
3065 | return false; | |
3066 | ||
3067 | row = SMODULO (ps_i->cycle, ps->ii); | |
3068 | ||
3069 | if (! ps_i->next_in_row) | |
3070 | return false; | |
3071 | ||
3072 | /* Check if next_in_row is dependent on ps_i, both having same sched | |
3073 | times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ | |
d7c028c0 | 3074 | if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id)) |
d72372e4 | 3075 | return false; |
d397e8c6 | 3076 | |
2a7e31df | 3077 | /* Advance PS_I over its next_in_row in the doubly linked list. */ |
d397e8c6 MH |
3078 | prev = ps_i->prev_in_row; |
3079 | next = ps_i->next_in_row; | |
3080 | ||
3081 | if (ps_i == ps->rows[row]) | |
3082 | ps->rows[row] = next; | |
3083 | ||
3084 | ps_i->next_in_row = next->next_in_row; | |
3085 | ||
3086 | if (next->next_in_row) | |
3087 | next->next_in_row->prev_in_row = ps_i; | |
3088 | ||
3089 | next->next_in_row = ps_i; | |
3090 | ps_i->prev_in_row = next; | |
3091 | ||
3092 | next->prev_in_row = prev; | |
3093 | if (prev) | |
3094 | prev->next_in_row = next; | |
3095 | ||
3096 | return true; | |
3097 | } | |
3098 | ||
3099 | /* Inserts a DDG_NODE to the given partial schedule at the given cycle. | |
b8698a0f L |
3100 | Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is |
3101 | set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come | |
3102 | before/after (respectively) the node pointed to by PS_I when scheduled | |
d72372e4 | 3103 | in the same cycle. */ |
d397e8c6 | 3104 | static ps_insn_ptr |
88e9c867 | 3105 | add_node_to_ps (partial_schedule_ptr ps, int id, int cycle, |
d72372e4 | 3106 | sbitmap must_precede, sbitmap must_follow) |
d397e8c6 | 3107 | { |
d72372e4 | 3108 | ps_insn_ptr ps_i; |
d397e8c6 | 3109 | int row = SMODULO (cycle, ps->ii); |
d397e8c6 | 3110 | |
81c41166 | 3111 | if (ps->rows_length[row] >= issue_rate) |
d397e8c6 MH |
3112 | return NULL; |
3113 | ||
88e9c867 | 3114 | ps_i = create_ps_insn (id, cycle); |
d72372e4 MH |
3115 | |
3116 | /* Finds and inserts PS_I according to MUST_FOLLOW and | |
3117 | MUST_PRECEDE. */ | |
3118 | if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow)) | |
3119 | { | |
3120 | free (ps_i); | |
3121 | return NULL; | |
3122 | } | |
d397e8c6 | 3123 | |
81c41166 | 3124 | ps->rows_length[row] += 1; |
d397e8c6 MH |
3125 | return ps_i; |
3126 | } | |
3127 | ||
3128 | /* Advance time one cycle. Assumes DFA is being used. */ | |
3129 | static void | |
3130 | advance_one_cycle (void) | |
3131 | { | |
fa0aee89 PB |
3132 | if (targetm.sched.dfa_pre_cycle_insn) |
3133 | state_transition (curr_state, | |
1c91de89 | 3134 | targetm.sched.dfa_pre_cycle_insn ()); |
d397e8c6 | 3135 | |
fa0aee89 | 3136 | state_transition (curr_state, NULL); |
d397e8c6 | 3137 | |
fa0aee89 PB |
3138 | if (targetm.sched.dfa_post_cycle_insn) |
3139 | state_transition (curr_state, | |
1c91de89 | 3140 | targetm.sched.dfa_post_cycle_insn ()); |
d397e8c6 MH |
3141 | } |
3142 | ||
f73d5666 | 3143 | |
f73d5666 | 3144 | |
d397e8c6 MH |
3145 | /* Checks if PS has resource conflicts according to DFA, starting from |
3146 | FROM cycle to TO cycle; returns true if there are conflicts and false | |
3147 | if there are no conflicts. Assumes DFA is being used. */ | |
3148 | static int | |
3149 | ps_has_conflicts (partial_schedule_ptr ps, int from, int to) | |
3150 | { | |
3151 | int cycle; | |
3152 | ||
d397e8c6 MH |
3153 | state_reset (curr_state); |
3154 | ||
3155 | for (cycle = from; cycle <= to; cycle++) | |
3156 | { | |
3157 | ps_insn_ptr crr_insn; | |
3158 | /* Holds the remaining issue slots in the current row. */ | |
3159 | int can_issue_more = issue_rate; | |
3160 | ||
3161 | /* Walk through the DFA for the current row. */ | |
3162 | for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)]; | |
3163 | crr_insn; | |
3164 | crr_insn = crr_insn->next_in_row) | |
3165 | { | |
6210ec61 | 3166 | rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id); |
d397e8c6 | 3167 | |
d397e8c6 MH |
3168 | /* Check if there is room for the current insn. */ |
3169 | if (!can_issue_more || state_dead_lock_p (curr_state)) | |
3170 | return true; | |
3171 | ||
3172 | /* Update the DFA state and return with failure if the DFA found | |
fa10beec | 3173 | resource conflicts. */ |
d397e8c6 MH |
3174 | if (state_transition (curr_state, insn) >= 0) |
3175 | return true; | |
3176 | ||
3177 | if (targetm.sched.variable_issue) | |
3178 | can_issue_more = | |
1c91de89 KH |
3179 | targetm.sched.variable_issue (sched_dump, sched_verbose, |
3180 | insn, can_issue_more); | |
d397e8c6 MH |
3181 | /* A naked CLOBBER or USE generates no instruction, so don't |
3182 | let them consume issue slots. */ | |
3183 | else if (GET_CODE (PATTERN (insn)) != USE | |
3184 | && GET_CODE (PATTERN (insn)) != CLOBBER) | |
3185 | can_issue_more--; | |
3186 | } | |
3187 | ||
3188 | /* Advance the DFA to the next cycle. */ | |
3189 | advance_one_cycle (); | |
3190 | } | |
3191 | return false; | |
3192 | } | |
3193 | ||
3194 | /* Checks if the given node causes resource conflicts when added to PS at | |
3195 | cycle C. If not the node is added to PS and returned; otherwise zero | |
b8698a0f L |
3196 | is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with |
3197 | cuid N must be come before/after (respectively) the node pointed to by | |
d72372e4 | 3198 | PS_I when scheduled in the same cycle. */ |
f73d5666 | 3199 | ps_insn_ptr |
88e9c867 | 3200 | ps_add_node_check_conflicts (partial_schedule_ptr ps, int n, |
d72372e4 MH |
3201 | int c, sbitmap must_precede, |
3202 | sbitmap must_follow) | |
d397e8c6 | 3203 | { |
faab8a70 | 3204 | int i, first, amount, has_conflicts = 0; |
d397e8c6 MH |
3205 | ps_insn_ptr ps_i; |
3206 | ||
d72372e4 MH |
3207 | /* First add the node to the PS, if this succeeds check for |
3208 | conflicts, trying different issue slots in the same row. */ | |
3209 | if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow))) | |
d397e8c6 MH |
3210 | return NULL; /* Failed to insert the node at the given cycle. */ |
3211 | ||
faab8a70 | 3212 | while (1) |
d397e8c6 | 3213 | { |
faab8a70 RZ |
3214 | has_conflicts = ps_has_conflicts (ps, c, c); |
3215 | if (ps->history > 0 && !has_conflicts) | |
3216 | { | |
3217 | /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h, | |
3218 | but not more than ii intervals. */ | |
3219 | first = c - ps->history; | |
3220 | amount = 2 * ps->history + 1; | |
3221 | if (amount > ps->ii) | |
3222 | amount = ps->ii; | |
3223 | for (i = first; i < first + amount; i++) | |
3224 | { | |
3225 | has_conflicts = ps_has_conflicts (ps, | |
3226 | i - ps->history, | |
3227 | i + ps->history); | |
3228 | if (has_conflicts) | |
3229 | break; | |
3230 | } | |
3231 | } | |
3232 | if (!has_conflicts) | |
3233 | break; | |
3234 | /* Try different issue slots to find one that the given node can be | |
3235 | scheduled in without conflicts. */ | |
d72372e4 | 3236 | if (! ps_insn_advance_column (ps, ps_i, must_follow)) |
d397e8c6 | 3237 | break; |
d397e8c6 MH |
3238 | } |
3239 | ||
3240 | if (has_conflicts) | |
3241 | { | |
3242 | remove_node_from_ps (ps, ps_i); | |
3243 | return NULL; | |
3244 | } | |
3245 | ||
3246 | ps->min_cycle = MIN (ps->min_cycle, c); | |
3247 | ps->max_cycle = MAX (ps->max_cycle, c); | |
3248 | return ps_i; | |
3249 | } | |
3250 | ||
fc6970e4 | 3251 | /* Calculate the stage count of the partial schedule PS. The calculation |
edc429ff | 3252 | takes into account the rotation amount passed in ROTATION_AMOUNT. */ |
fc6970e4 | 3253 | int |
edc429ff | 3254 | calculate_stage_count (partial_schedule_ptr ps, int rotation_amount) |
fc6970e4 | 3255 | { |
fc6970e4 RE |
3256 | int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; |
3257 | int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; | |
3258 | int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii); | |
3259 | ||
3260 | /* The calculation of stage count is done adding the number of stages | |
3261 | before cycle zero and after cycle zero. */ | |
3262 | stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii); | |
3263 | ||
3264 | return stage_count; | |
3265 | } | |
3266 | ||
d397e8c6 MH |
3267 | /* Rotate the rows of PS such that insns scheduled at time |
3268 | START_CYCLE will appear in row 0. Updates max/min_cycles. */ | |
f73d5666 | 3269 | void |
d397e8c6 MH |
3270 | rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle) |
3271 | { | |
3272 | int i, row, backward_rotates; | |
3273 | int last_row = ps->ii - 1; | |
3274 | ||
3275 | if (start_cycle == 0) | |
3276 | return; | |
3277 | ||
3278 | backward_rotates = SMODULO (start_cycle, ps->ii); | |
3279 | ||
3280 | /* Revisit later and optimize this into a single loop. */ | |
3281 | for (i = 0; i < backward_rotates; i++) | |
3282 | { | |
3283 | ps_insn_ptr first_row = ps->rows[0]; | |
81c41166 | 3284 | int first_row_length = ps->rows_length[0]; |
d397e8c6 MH |
3285 | |
3286 | for (row = 0; row < last_row; row++) | |
81c41166 RE |
3287 | { |
3288 | ps->rows[row] = ps->rows[row + 1]; | |
3289 | ps->rows_length[row] = ps->rows_length[row + 1]; | |
3290 | } | |
d397e8c6 MH |
3291 | |
3292 | ps->rows[last_row] = first_row; | |
81c41166 | 3293 | ps->rows_length[last_row] = first_row_length; |
d397e8c6 MH |
3294 | } |
3295 | ||
3296 | ps->max_cycle -= start_cycle; | |
3297 | ps->min_cycle -= start_cycle; | |
3298 | } | |
d7777192 | 3299 | |
ef330312 PB |
3300 | #endif /* INSN_SCHEDULING */ |
3301 | \f | |
ef330312 PB |
3302 | /* Run instruction scheduler. */ |
3303 | /* Perform SMS module scheduling. */ | |
ef330312 | 3304 | |
27a4cd48 DM |
3305 | namespace { |
3306 | ||
3307 | const pass_data pass_data_sms = | |
3308 | { | |
3309 | RTL_PASS, /* type */ | |
3310 | "sms", /* name */ | |
3311 | OPTGROUP_NONE, /* optinfo_flags */ | |
27a4cd48 DM |
3312 | TV_SMS, /* tv_id */ |
3313 | 0, /* properties_required */ | |
3314 | 0, /* properties_provided */ | |
3315 | 0, /* properties_destroyed */ | |
3316 | 0, /* todo_flags_start */ | |
3bea341f | 3317 | TODO_df_finish, /* todo_flags_finish */ |
ef330312 | 3318 | }; |
27a4cd48 DM |
3319 | |
3320 | class pass_sms : public rtl_opt_pass | |
3321 | { | |
3322 | public: | |
c3284718 RS |
3323 | pass_sms (gcc::context *ctxt) |
3324 | : rtl_opt_pass (pass_data_sms, ctxt) | |
27a4cd48 DM |
3325 | {} |
3326 | ||
3327 | /* opt_pass methods: */ | |
1a3d085c TS |
3328 | virtual bool gate (function *) |
3329 | { | |
3330 | return (optimize > 0 && flag_modulo_sched); | |
3331 | } | |
3332 | ||
be55bfe6 | 3333 | virtual unsigned int execute (function *); |
27a4cd48 DM |
3334 | |
3335 | }; // class pass_sms | |
3336 | ||
be55bfe6 TS |
3337 | unsigned int |
3338 | pass_sms::execute (function *fun ATTRIBUTE_UNUSED) | |
3339 | { | |
3340 | #ifdef INSN_SCHEDULING | |
3341 | basic_block bb; | |
3342 | ||
3343 | /* Collect loop information to be used in SMS. */ | |
3344 | cfg_layout_initialize (0); | |
3345 | sms_schedule (); | |
3346 | ||
3347 | /* Update the life information, because we add pseudos. */ | |
3348 | max_regno = max_reg_num (); | |
3349 | ||
3350 | /* Finalize layout changes. */ | |
3351 | FOR_EACH_BB_FN (bb, fun) | |
3352 | if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun)) | |
3353 | bb->aux = bb->next_bb; | |
3354 | free_dominance_info (CDI_DOMINATORS); | |
3355 | cfg_layout_finalize (); | |
3356 | #endif /* INSN_SCHEDULING */ | |
3357 | return 0; | |
3358 | } | |
3359 | ||
27a4cd48 DM |
3360 | } // anon namespace |
3361 | ||
3362 | rtl_opt_pass * | |
3363 | make_pass_sms (gcc::context *ctxt) | |
3364 | { | |
3365 | return new pass_sms (ctxt); | |
3366 | } |