]>
Commit | Line | Data |
---|---|---|
d397e8c6 | 1 | /* Swing Modulo Scheduling implementation. |
fa10beec | 2 | Copyright (C) 2004, 2005, 2006, 2007, 2008 |
d397e8c6 MH |
3 | Free Software Foundation, Inc. |
4 | Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com> | |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 10 | Software Foundation; either version 3, or (at your option) any later |
d397e8c6 MH |
11 | version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
d397e8c6 MH |
21 | |
22 | ||
23 | #include "config.h" | |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
26 | #include "tm.h" | |
27 | #include "toplev.h" | |
28 | #include "rtl.h" | |
29 | #include "tm_p.h" | |
30 | #include "hard-reg-set.h" | |
d397e8c6 MH |
31 | #include "regs.h" |
32 | #include "function.h" | |
33 | #include "flags.h" | |
34 | #include "insn-config.h" | |
35 | #include "insn-attr.h" | |
36 | #include "except.h" | |
37 | #include "toplev.h" | |
38 | #include "recog.h" | |
39 | #include "sched-int.h" | |
40 | #include "target.h" | |
41 | #include "cfglayout.h" | |
42 | #include "cfgloop.h" | |
43 | #include "cfghooks.h" | |
44 | #include "expr.h" | |
45 | #include "params.h" | |
46 | #include "gcov-io.h" | |
d397e8c6 | 47 | #include "ddg.h" |
ef330312 PB |
48 | #include "timevar.h" |
49 | #include "tree-pass.h" | |
97511ad7 | 50 | #include "dbgcnt.h" |
7a8cba34 | 51 | #include "df.h" |
d397e8c6 | 52 | |
d7777192 | 53 | #ifdef INSN_SCHEDULING |
d397e8c6 MH |
54 | |
55 | /* This file contains the implementation of the Swing Modulo Scheduler, | |
56 | described in the following references: | |
57 | [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt. | |
58 | Lifetime--sensitive modulo scheduling in a production environment. | |
59 | IEEE Trans. on Comps., 50(3), March 2001 | |
60 | [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero. | |
61 | Swing Modulo Scheduling: A Lifetime Sensitive Approach. | |
1ea7e6ad | 62 | PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA). |
d397e8c6 MH |
63 | |
64 | The basic structure is: | |
65 | 1. Build a data-dependence graph (DDG) for each loop. | |
66 | 2. Use the DDG to order the insns of a loop (not in topological order | |
67 | necessarily, but rather) trying to place each insn after all its | |
68 | predecessors _or_ after all its successors. | |
69 | 3. Compute MII: a lower bound on the number of cycles to schedule the loop. | |
70 | 4. Use the ordering to perform list-scheduling of the loop: | |
71 | 1. Set II = MII. We will try to schedule the loop within II cycles. | |
72 | 2. Try to schedule the insns one by one according to the ordering. | |
73 | For each insn compute an interval of cycles by considering already- | |
74 | scheduled preds and succs (and associated latencies); try to place | |
75 | the insn in the cycles of this window checking for potential | |
76 | resource conflicts (using the DFA interface). | |
77 | Note: this is different from the cycle-scheduling of schedule_insns; | |
78 | here the insns are not scheduled monotonically top-down (nor bottom- | |
79 | up). | |
80 | 3. If failed in scheduling all insns - bump II++ and try again, unless | |
f6fe65dc | 81 | II reaches an upper bound MaxII, in which case report failure. |
d397e8c6 MH |
82 | 5. If we succeeded in scheduling the loop within II cycles, we now |
83 | generate prolog and epilog, decrease the counter of the loop, and | |
84 | perform modulo variable expansion for live ranges that span more than | |
85 | II cycles (i.e. use register copies to prevent a def from overwriting | |
86 | itself before reaching the use). | |
d397e8c6 | 87 | |
46dc0789 MN |
88 | SMS works with countable loops (1) whose control part can be easily |
89 | decoupled from the rest of the loop and (2) whose loop count can | |
90 | be easily adjusted. This is because we peel a constant number of | |
91 | iterations into a prologue and epilogue for which we want to avoid | |
92 | emitting the control part, and a kernel which is to iterate that | |
93 | constant number of iterations less than the original loop. So the | |
94 | control part should be a set of insns clearly identified and having | |
95 | its own iv, not otherwise used in the loop (at-least for now), which | |
96 | initializes a register before the loop to the number of iterations. | |
97 | Currently SMS relies on the do-loop pattern to recognize such loops, | |
98 | where (1) the control part comprises of all insns defining and/or | |
99 | using a certain 'count' register and (2) the loop count can be | |
b8698a0f | 100 | adjusted by modifying this register prior to the loop. |
46dc0789 | 101 | TODO: Rely on cfgloop analysis instead. */ |
d397e8c6 MH |
102 | \f |
103 | /* This page defines partial-schedule structures and functions for | |
104 | modulo scheduling. */ | |
105 | ||
106 | typedef struct partial_schedule *partial_schedule_ptr; | |
107 | typedef struct ps_insn *ps_insn_ptr; | |
108 | ||
109 | /* The minimum (absolute) cycle that a node of ps was scheduled in. */ | |
110 | #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle) | |
111 | ||
112 | /* The maximum (absolute) cycle that a node of ps was scheduled in. */ | |
113 | #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle) | |
114 | ||
115 | /* Perform signed modulo, always returning a non-negative value. */ | |
116 | #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y)) | |
117 | ||
118 | /* The number of different iterations the nodes in ps span, assuming | |
119 | the stage boundaries are placed efficiently. */ | |
120 | #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \ | |
121 | + 1 + (ps)->ii - 1) / (ps)->ii) | |
122 | ||
d397e8c6 MH |
123 | /* A single instruction in the partial schedule. */ |
124 | struct ps_insn | |
125 | { | |
126 | /* The corresponding DDG_NODE. */ | |
127 | ddg_node_ptr node; | |
128 | ||
129 | /* The (absolute) cycle in which the PS instruction is scheduled. | |
130 | Same as SCHED_TIME (node). */ | |
131 | int cycle; | |
132 | ||
133 | /* The next/prev PS_INSN in the same row. */ | |
134 | ps_insn_ptr next_in_row, | |
135 | prev_in_row; | |
136 | ||
137 | /* The number of nodes in the same row that come after this node. */ | |
138 | int row_rest_count; | |
139 | }; | |
140 | ||
141 | /* Holds the partial schedule as an array of II rows. Each entry of the | |
142 | array points to a linked list of PS_INSNs, which represents the | |
143 | instructions that are scheduled for that row. */ | |
144 | struct partial_schedule | |
145 | { | |
146 | int ii; /* Number of rows in the partial schedule. */ | |
147 | int history; /* Threshold for conflict checking using DFA. */ | |
148 | ||
149 | /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ | |
150 | ps_insn_ptr *rows; | |
151 | ||
152 | /* The earliest absolute cycle of an insn in the partial schedule. */ | |
153 | int min_cycle; | |
154 | ||
155 | /* The latest absolute cycle of an insn in the partial schedule. */ | |
156 | int max_cycle; | |
157 | ||
158 | ddg_ptr g; /* The DDG of the insns in the partial schedule. */ | |
159 | }; | |
160 | ||
f73d5666 MH |
161 | /* We use this to record all the register replacements we do in |
162 | the kernel so we can undo SMS if it is not profitable. */ | |
163 | struct undo_replace_buff_elem | |
164 | { | |
165 | rtx insn; | |
166 | rtx orig_reg; | |
167 | rtx new_reg; | |
168 | struct undo_replace_buff_elem *next; | |
169 | }; | |
d397e8c6 | 170 | |
f73d5666 | 171 | |
b8698a0f | 172 | |
5f1f4746 KH |
173 | static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); |
174 | static void free_partial_schedule (partial_schedule_ptr); | |
175 | static void reset_partial_schedule (partial_schedule_ptr, int new_ii); | |
d397e8c6 | 176 | void print_partial_schedule (partial_schedule_ptr, FILE *); |
c8943832 | 177 | static void verify_partial_schedule (partial_schedule_ptr, sbitmap); |
c16162ad KH |
178 | static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, |
179 | ddg_node_ptr node, int cycle, | |
180 | sbitmap must_precede, | |
181 | sbitmap must_follow); | |
182 | static void rotate_partial_schedule (partial_schedule_ptr, int); | |
f73d5666 | 183 | void set_row_column_for_ps (partial_schedule_ptr); |
c8943832 VY |
184 | static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); |
185 | static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr); | |
f73d5666 | 186 | |
d397e8c6 | 187 | \f |
1ea7e6ad | 188 | /* This page defines constants and structures for the modulo scheduling |
d397e8c6 MH |
189 | driver. */ |
190 | ||
5cd53742 | 191 | static int sms_order_nodes (ddg_ptr, int, int *, int *); |
d397e8c6 | 192 | static void set_node_sched_params (ddg_ptr); |
10d22567 | 193 | static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); |
5cd53742 DZ |
194 | static void permute_partial_schedule (partial_schedule_ptr, rtx); |
195 | static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, | |
46dc0789 | 196 | rtx, rtx); |
5cd53742 DZ |
197 | static void duplicate_insns_of_cycles (partial_schedule_ptr, |
198 | int, int, int, rtx); | |
d397e8c6 | 199 | |
d397e8c6 MH |
200 | #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) |
201 | #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) | |
202 | #define SCHED_FIRST_REG_MOVE(x) \ | |
203 | (((node_sched_params_ptr)(x)->aux.info)->first_reg_move) | |
204 | #define SCHED_NREG_MOVES(x) \ | |
205 | (((node_sched_params_ptr)(x)->aux.info)->nreg_moves) | |
206 | #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row) | |
207 | #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage) | |
208 | #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column) | |
209 | ||
210 | /* The scheduling parameters held for each node. */ | |
211 | typedef struct node_sched_params | |
212 | { | |
213 | int asap; /* A lower-bound on the absolute scheduling cycle. */ | |
214 | int time; /* The absolute scheduling cycle (time >= asap). */ | |
215 | ||
216 | /* The following field (first_reg_move) is a pointer to the first | |
217 | register-move instruction added to handle the modulo-variable-expansion | |
218 | of the register defined by this node. This register-move copies the | |
219 | original register defined by the node. */ | |
220 | rtx first_reg_move; | |
221 | ||
1ea7e6ad | 222 | /* The number of register-move instructions added, immediately preceding |
d397e8c6 MH |
223 | first_reg_move. */ |
224 | int nreg_moves; | |
225 | ||
226 | int row; /* Holds time % ii. */ | |
227 | int stage; /* Holds time / ii. */ | |
228 | ||
229 | /* The column of a node inside the ps. If nodes u, v are on the same row, | |
1ea7e6ad | 230 | u will precede v if column (u) < column (v). */ |
d397e8c6 MH |
231 | int column; |
232 | } *node_sched_params_ptr; | |
233 | ||
234 | \f | |
235 | /* The following three functions are copied from the current scheduler | |
61ada8ae | 236 | code in order to use sched_analyze() for computing the dependencies. |
d397e8c6 MH |
237 | They are used when initializing the sched_info structure. */ |
238 | static const char * | |
e855c69d | 239 | sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED) |
d397e8c6 MH |
240 | { |
241 | static char tmp[80]; | |
242 | ||
243 | sprintf (tmp, "i%4d", INSN_UID (insn)); | |
244 | return tmp; | |
245 | } | |
246 | ||
d397e8c6 MH |
247 | static void |
248 | compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED, | |
249 | regset cond_exec ATTRIBUTE_UNUSED, | |
250 | regset used ATTRIBUTE_UNUSED, | |
251 | regset set ATTRIBUTE_UNUSED) | |
252 | { | |
253 | } | |
254 | ||
e855c69d AB |
255 | static struct common_sched_info_def sms_common_sched_info; |
256 | ||
257 | static struct sched_deps_info_def sms_sched_deps_info = | |
258 | { | |
259 | compute_jump_reg_dependencies, | |
260 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
261 | NULL, | |
262 | 0, 0, 0 | |
263 | }; | |
264 | ||
265 | static struct haifa_sched_info sms_sched_info = | |
d397e8c6 MH |
266 | { |
267 | NULL, | |
268 | NULL, | |
269 | NULL, | |
270 | NULL, | |
271 | NULL, | |
272 | sms_print_insn, | |
496d7bb0 | 273 | NULL, |
356c23b3 | 274 | NULL, /* insn_finishes_block_p */ |
d397e8c6 MH |
275 | NULL, NULL, |
276 | NULL, NULL, | |
e855c69d | 277 | 0, 0, |
ddbd5439 | 278 | |
b8698a0f | 279 | NULL, NULL, NULL, |
ddbd5439 | 280 | 0 |
d397e8c6 MH |
281 | }; |
282 | ||
46dc0789 MN |
283 | /* Given HEAD and TAIL which are the first and last insns in a loop; |
284 | return the register which controls the loop. Return zero if it has | |
285 | more than one occurrence in the loop besides the control part or the | |
286 | do-loop pattern is not of the form we expect. */ | |
d397e8c6 | 287 | static rtx |
46dc0789 | 288 | doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED) |
d397e8c6 | 289 | { |
aeb55665 | 290 | #ifdef HAVE_doloop_end |
8eb75619 | 291 | rtx reg, condition, insn, first_insn_not_to_check; |
d397e8c6 | 292 | |
46dc0789 | 293 | if (!JUMP_P (tail)) |
d397e8c6 MH |
294 | return NULL_RTX; |
295 | ||
46dc0789 MN |
296 | /* TODO: Free SMS's dependence on doloop_condition_get. */ |
297 | condition = doloop_condition_get (tail); | |
75c70254 | 298 | if (! condition) |
d397e8c6 MH |
299 | return NULL_RTX; |
300 | ||
75c70254 SB |
301 | if (REG_P (XEXP (condition, 0))) |
302 | reg = XEXP (condition, 0); | |
303 | else if (GET_CODE (XEXP (condition, 0)) == PLUS | |
304 | && REG_P (XEXP (XEXP (condition, 0), 0))) | |
305 | reg = XEXP (XEXP (condition, 0), 0); | |
306 | else | |
307 | gcc_unreachable (); | |
d397e8c6 | 308 | |
46dc0789 MN |
309 | /* Check that the COUNT_REG has no other occurrences in the loop |
310 | until the decrement. We assume the control part consists of | |
311 | either a single (parallel) branch-on-count or a (non-parallel) | |
312 | branch immediately preceded by a single (decrement) insn. */ | |
8eb75619 AZ |
313 | first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail |
314 | : PREV_INSN (tail)); | |
315 | ||
316 | for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) | |
317 | if (reg_mentioned_p (reg, insn)) | |
46dc0789 MN |
318 | { |
319 | if (dump_file) | |
8eb75619 AZ |
320 | { |
321 | fprintf (dump_file, "SMS count_reg found "); | |
322 | print_rtl_single (dump_file, reg); | |
323 | fprintf (dump_file, " outside control in insn:\n"); | |
324 | print_rtl_single (dump_file, insn); | |
325 | } | |
46dc0789 MN |
326 | |
327 | return NULL_RTX; | |
328 | } | |
8eb75619 | 329 | |
75c70254 | 330 | return reg; |
aeb55665 RH |
331 | #else |
332 | return NULL_RTX; | |
333 | #endif | |
d397e8c6 MH |
334 | } |
335 | ||
336 | /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so | |
337 | that the number of iterations is a compile-time constant. If so, | |
338 | return the rtx that sets COUNT_REG to a constant, and set COUNT to | |
339 | this constant. Otherwise return 0. */ | |
340 | static rtx | |
341 | const_iteration_count (rtx count_reg, basic_block pre_header, | |
342 | HOST_WIDEST_INT * count) | |
343 | { | |
344 | rtx insn; | |
345 | rtx head, tail; | |
d331e204 MH |
346 | |
347 | if (! pre_header) | |
348 | return NULL_RTX; | |
349 | ||
496d7bb0 | 350 | get_ebb_head_tail (pre_header, pre_header, &head, &tail); |
d397e8c6 MH |
351 | |
352 | for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) | |
b5b8b0ac | 353 | if (NONDEBUG_INSN_P (insn) && single_set (insn) && |
d397e8c6 MH |
354 | rtx_equal_p (count_reg, SET_DEST (single_set (insn)))) |
355 | { | |
356 | rtx pat = single_set (insn); | |
357 | ||
481683e1 | 358 | if (CONST_INT_P (SET_SRC (pat))) |
d397e8c6 MH |
359 | { |
360 | *count = INTVAL (SET_SRC (pat)); | |
361 | return insn; | |
362 | } | |
363 | ||
364 | return NULL_RTX; | |
365 | } | |
366 | ||
367 | return NULL_RTX; | |
368 | } | |
369 | ||
370 | /* A very simple resource-based lower bound on the initiation interval. | |
371 | ??? Improve the accuracy of this bound by considering the | |
372 | utilization of various units. */ | |
373 | static int | |
374 | res_MII (ddg_ptr g) | |
375 | { | |
67186a97 | 376 | if (targetm.sched.sms_res_mii) |
b8698a0f L |
377 | return targetm.sched.sms_res_mii (g); |
378 | ||
b5b8b0ac | 379 | return ((g->num_nodes - g->num_debug) / issue_rate); |
d397e8c6 MH |
380 | } |
381 | ||
382 | ||
383 | /* Points to the array that contains the sched data for each node. */ | |
384 | static node_sched_params_ptr node_sched_params; | |
385 | ||
386 | /* Allocate sched_params for each node and initialize it. Assumes that | |
387 | the aux field of each node contain the asap bound (computed earlier), | |
388 | and copies it into the sched_params field. */ | |
389 | static void | |
390 | set_node_sched_params (ddg_ptr g) | |
391 | { | |
392 | int i; | |
393 | ||
394 | /* Allocate for each node in the DDG a place to hold the "sched_data". */ | |
395 | /* Initialize ASAP/ALAP/HIGHT to zero. */ | |
396 | node_sched_params = (node_sched_params_ptr) | |
397 | xcalloc (g->num_nodes, | |
398 | sizeof (struct node_sched_params)); | |
399 | ||
400 | /* Set the pointer of the general data of the node to point to the | |
61ada8ae | 401 | appropriate sched_params structure. */ |
d397e8c6 MH |
402 | for (i = 0; i < g->num_nodes; i++) |
403 | { | |
404 | /* Watch out for aliasing problems? */ | |
405 | node_sched_params[i].asap = g->nodes[i].aux.count; | |
406 | g->nodes[i].aux.info = &node_sched_params[i]; | |
407 | } | |
408 | } | |
409 | ||
410 | static void | |
76b4f0f7 | 411 | print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) |
d397e8c6 MH |
412 | { |
413 | int i; | |
414 | ||
10d22567 | 415 | if (! file) |
d331e204 | 416 | return; |
d397e8c6 MH |
417 | for (i = 0; i < num_nodes; i++) |
418 | { | |
419 | node_sched_params_ptr nsp = &node_sched_params[i]; | |
420 | rtx reg_move = nsp->first_reg_move; | |
421 | int j; | |
422 | ||
76b4f0f7 VY |
423 | fprintf (file, "Node = %d; INSN = %d\n", i, |
424 | (INSN_UID (g->nodes[i].insn))); | |
10d22567 ZD |
425 | fprintf (file, " asap = %d:\n", nsp->asap); |
426 | fprintf (file, " time = %d:\n", nsp->time); | |
427 | fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); | |
d397e8c6 MH |
428 | for (j = 0; j < nsp->nreg_moves; j++) |
429 | { | |
10d22567 ZD |
430 | fprintf (file, " reg_move = "); |
431 | print_rtl_single (file, reg_move); | |
d397e8c6 MH |
432 | reg_move = PREV_INSN (reg_move); |
433 | } | |
434 | } | |
435 | } | |
436 | ||
d331e204 MH |
437 | /* |
438 | Breaking intra-loop register anti-dependences: | |
439 | Each intra-loop register anti-dependence implies a cross-iteration true | |
440 | dependence of distance 1. Therefore, we can remove such false dependencies | |
441 | and figure out if the partial schedule broke them by checking if (for a | |
442 | true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and | |
443 | if so generate a register move. The number of such moves is equal to: | |
444 | SCHED_TIME (use) - SCHED_TIME (def) { 0 broken | |
aabcd309 | 445 | nreg_moves = ----------------------------------- + 1 - { dependence. |
d331e204 MH |
446 | ii { 1 if not. |
447 | */ | |
f73d5666 | 448 | static struct undo_replace_buff_elem * |
5f705558 | 449 | generate_reg_moves (partial_schedule_ptr ps, bool rescan) |
d397e8c6 MH |
450 | { |
451 | ddg_ptr g = ps->g; | |
452 | int ii = ps->ii; | |
453 | int i; | |
f73d5666 | 454 | struct undo_replace_buff_elem *reg_move_replaces = NULL; |
d397e8c6 MH |
455 | |
456 | for (i = 0; i < g->num_nodes; i++) | |
457 | { | |
458 | ddg_node_ptr u = &g->nodes[i]; | |
459 | ddg_edge_ptr e; | |
460 | int nreg_moves = 0, i_reg_move; | |
461 | sbitmap *uses_of_defs; | |
462 | rtx last_reg_move; | |
463 | rtx prev_reg, old_reg; | |
464 | ||
465 | /* Compute the number of reg_moves needed for u, by looking at life | |
466 | ranges started at u (excluding self-loops). */ | |
467 | for (e = u->out; e; e = e->next_out) | |
468 | if (e->type == TRUE_DEP && e->dest != e->src) | |
469 | { | |
470 | int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; | |
471 | ||
d331e204 MH |
472 | if (e->distance == 1) |
473 | nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; | |
474 | ||
1ea7e6ad | 475 | /* If dest precedes src in the schedule of the kernel, then dest |
d397e8c6 MH |
476 | will read before src writes and we can save one reg_copy. */ |
477 | if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) | |
478 | && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | |
479 | nreg_moves4e--; | |
480 | ||
481 | nreg_moves = MAX (nreg_moves, nreg_moves4e); | |
482 | } | |
483 | ||
484 | if (nreg_moves == 0) | |
485 | continue; | |
486 | ||
487 | /* Every use of the register defined by node may require a different | |
488 | copy of this register, depending on the time the use is scheduled. | |
489 | Set a bitmap vector, telling which nodes use each copy of this | |
490 | register. */ | |
491 | uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes); | |
492 | sbitmap_vector_zero (uses_of_defs, nreg_moves); | |
493 | for (e = u->out; e; e = e->next_out) | |
494 | if (e->type == TRUE_DEP && e->dest != e->src) | |
495 | { | |
496 | int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; | |
497 | ||
d331e204 MH |
498 | if (e->distance == 1) |
499 | dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; | |
500 | ||
d397e8c6 MH |
501 | if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) |
502 | && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | |
503 | dest_copy--; | |
504 | ||
505 | if (dest_copy) | |
506 | SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid); | |
507 | } | |
508 | ||
509 | /* Now generate the reg_moves, attaching relevant uses to them. */ | |
510 | SCHED_NREG_MOVES (u) = nreg_moves; | |
511 | old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn))); | |
1a46d33d RE |
512 | /* Insert the reg-moves right before the notes which precede |
513 | the insn they relates to. */ | |
514 | last_reg_move = u->first_note; | |
d397e8c6 MH |
515 | |
516 | for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) | |
517 | { | |
dfea6c85 | 518 | unsigned int i_use = 0; |
d397e8c6 MH |
519 | rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg)); |
520 | rtx reg_move = gen_move_insn (new_reg, prev_reg); | |
b6e7e9af | 521 | sbitmap_iterator sbi; |
d397e8c6 | 522 | |
6fb5fa3c | 523 | add_insn_before (reg_move, last_reg_move, NULL); |
d397e8c6 MH |
524 | last_reg_move = reg_move; |
525 | ||
526 | if (!SCHED_FIRST_REG_MOVE (u)) | |
527 | SCHED_FIRST_REG_MOVE (u) = reg_move; | |
528 | ||
b6e7e9af | 529 | EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi) |
f73d5666 MH |
530 | { |
531 | struct undo_replace_buff_elem *rep; | |
532 | ||
533 | rep = (struct undo_replace_buff_elem *) | |
534 | xcalloc (1, sizeof (struct undo_replace_buff_elem)); | |
535 | rep->insn = g->nodes[i_use].insn; | |
536 | rep->orig_reg = old_reg; | |
537 | rep->new_reg = new_reg; | |
538 | ||
539 | if (! reg_move_replaces) | |
540 | reg_move_replaces = rep; | |
541 | else | |
542 | { | |
543 | rep->next = reg_move_replaces; | |
544 | reg_move_replaces = rep; | |
545 | } | |
546 | ||
547 | replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); | |
5f705558 KZ |
548 | if (rescan) |
549 | df_insn_rescan (g->nodes[i_use].insn); | |
b6e7e9af | 550 | } |
d397e8c6 MH |
551 | |
552 | prev_reg = new_reg; | |
553 | } | |
f4daf7e4 | 554 | sbitmap_vector_free (uses_of_defs); |
d397e8c6 | 555 | } |
f73d5666 MH |
556 | return reg_move_replaces; |
557 | } | |
558 | ||
f73d5666 MH |
559 | /* Free memory allocated for the undo buffer. */ |
560 | static void | |
561 | free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) | |
562 | { | |
563 | ||
564 | while (reg_move_replaces) | |
565 | { | |
566 | struct undo_replace_buff_elem *rep = reg_move_replaces; | |
567 | ||
568 | reg_move_replaces = reg_move_replaces->next; | |
569 | free (rep); | |
570 | } | |
d397e8c6 MH |
571 | } |
572 | ||
573 | /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values | |
574 | of SCHED_ROW and SCHED_STAGE. */ | |
575 | static void | |
576 | normalize_sched_times (partial_schedule_ptr ps) | |
577 | { | |
c8943832 | 578 | int row; |
d397e8c6 MH |
579 | int amount = PS_MIN_CYCLE (ps); |
580 | int ii = ps->ii; | |
c8943832 | 581 | ps_insn_ptr crr_insn; |
d397e8c6 | 582 | |
c8943832 VY |
583 | for (row = 0; row < ii; row++) |
584 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |
585 | { | |
586 | ddg_node_ptr u = crr_insn->node; | |
587 | int normalized_time = SCHED_TIME (u) - amount; | |
588 | ||
589 | if (dump_file) | |
590 | fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ | |
591 | min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME | |
592 | (u), ps->min_cycle); | |
593 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | |
594 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | |
595 | SCHED_TIME (u) = normalized_time; | |
596 | SCHED_ROW (u) = normalized_time % ii; | |
597 | SCHED_STAGE (u) = normalized_time / ii; | |
598 | } | |
d397e8c6 MH |
599 | } |
600 | ||
601 | /* Set SCHED_COLUMN of each node according to its position in PS. */ | |
602 | static void | |
603 | set_columns_for_ps (partial_schedule_ptr ps) | |
604 | { | |
605 | int row; | |
606 | ||
607 | for (row = 0; row < ps->ii; row++) | |
608 | { | |
609 | ps_insn_ptr cur_insn = ps->rows[row]; | |
610 | int column = 0; | |
611 | ||
612 | for (; cur_insn; cur_insn = cur_insn->next_in_row) | |
613 | SCHED_COLUMN (cur_insn->node) = column++; | |
614 | } | |
615 | } | |
616 | ||
617 | /* Permute the insns according to their order in PS, from row 0 to | |
618 | row ii-1, and position them right before LAST. This schedules | |
619 | the insns of the loop kernel. */ | |
620 | static void | |
621 | permute_partial_schedule (partial_schedule_ptr ps, rtx last) | |
622 | { | |
623 | int ii = ps->ii; | |
624 | int row; | |
625 | ps_insn_ptr ps_ij; | |
626 | ||
627 | for (row = 0; row < ii ; row++) | |
628 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |
629 | if (PREV_INSN (last) != ps_ij->node->insn) | |
630 | reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn, | |
631 | PREV_INSN (last)); | |
632 | } | |
633 | ||
d397e8c6 MH |
634 | static void |
635 | duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, | |
46dc0789 | 636 | int to_stage, int for_prolog, rtx count_reg) |
d397e8c6 MH |
637 | { |
638 | int row; | |
639 | ps_insn_ptr ps_ij; | |
640 | ||
641 | for (row = 0; row < ps->ii; row++) | |
642 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |
643 | { | |
644 | ddg_node_ptr u_node = ps_ij->node; | |
645 | int j, i_reg_moves; | |
646 | rtx reg_move = NULL_RTX; | |
647 | ||
46dc0789 MN |
648 | /* Do not duplicate any insn which refers to count_reg as it |
649 | belongs to the control part. | |
650 | TODO: This should be done by analyzing the control part of | |
651 | the loop. */ | |
652 | if (reg_mentioned_p (count_reg, u_node->insn)) | |
653 | continue; | |
654 | ||
d397e8c6 MH |
655 | if (for_prolog) |
656 | { | |
657 | /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing | |
1ea7e6ad | 658 | number of reg_moves starting with the second occurrence of |
d397e8c6 | 659 | u_node, which is generated if its SCHED_STAGE <= to_stage. */ |
d331e204 | 660 | i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1; |
d397e8c6 MH |
661 | i_reg_moves = MAX (i_reg_moves, 0); |
662 | i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); | |
663 | ||
664 | /* The reg_moves start from the *first* reg_move backwards. */ | |
665 | if (i_reg_moves) | |
666 | { | |
667 | reg_move = SCHED_FIRST_REG_MOVE (u_node); | |
668 | for (j = 1; j < i_reg_moves; j++) | |
669 | reg_move = PREV_INSN (reg_move); | |
670 | } | |
671 | } | |
672 | else /* It's for the epilog. */ | |
673 | { | |
674 | /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves, | |
675 | starting to decrease one stage after u_node no longer occurs; | |
676 | that is, generate all reg_moves until | |
677 | SCHED_STAGE (u_node) == from_stage - 1. */ | |
678 | i_reg_moves = SCHED_NREG_MOVES (u_node) | |
679 | - (from_stage - SCHED_STAGE (u_node) - 1); | |
680 | i_reg_moves = MAX (i_reg_moves, 0); | |
681 | i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); | |
682 | ||
683 | /* The reg_moves start from the *last* reg_move forwards. */ | |
684 | if (i_reg_moves) | |
685 | { | |
686 | reg_move = SCHED_FIRST_REG_MOVE (u_node); | |
687 | for (j = 1; j < SCHED_NREG_MOVES (u_node); j++) | |
688 | reg_move = PREV_INSN (reg_move); | |
689 | } | |
690 | } | |
691 | ||
692 | for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) | |
693 | emit_insn (copy_rtx (PATTERN (reg_move))); | |
d397e8c6 MH |
694 | if (SCHED_STAGE (u_node) >= from_stage |
695 | && SCHED_STAGE (u_node) <= to_stage) | |
696 | duplicate_insn_chain (u_node->first_note, u_node->insn); | |
697 | } | |
698 | } | |
699 | ||
700 | ||
701 | /* Generate the instructions (including reg_moves) for prolog & epilog. */ | |
702 | static void | |
46dc0789 MN |
703 | generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop, |
704 | rtx count_reg, rtx count_init) | |
d397e8c6 MH |
705 | { |
706 | int i; | |
707 | int last_stage = PS_STAGE_COUNT (ps) - 1; | |
708 | edge e; | |
b8698a0f | 709 | |
d397e8c6 MH |
710 | /* Generate the prolog, inserting its insns on the loop-entry edge. */ |
711 | start_sequence (); | |
712 | ||
46dc0789 MN |
713 | if (!count_init) |
714 | { | |
715 | /* Generate instructions at the beginning of the prolog to | |
716 | adjust the loop count by STAGE_COUNT. If loop count is constant | |
717 | (count_init), this constant is adjusted by STAGE_COUNT in | |
718 | generate_prolog_epilog function. */ | |
719 | rtx sub_reg = NULL_RTX; | |
720 | ||
721 | sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, | |
722 | count_reg, GEN_INT (last_stage), | |
723 | count_reg, 1, OPTAB_DIRECT); | |
724 | gcc_assert (REG_P (sub_reg)); | |
725 | if (REGNO (sub_reg) != REGNO (count_reg)) | |
726 | emit_move_insn (count_reg, sub_reg); | |
727 | } | |
d397e8c6 MH |
728 | |
729 | for (i = 0; i < last_stage; i++) | |
46dc0789 | 730 | duplicate_insns_of_cycles (ps, 0, i, 1, count_reg); |
b8698a0f | 731 | |
598ec7bd | 732 | /* Put the prolog on the entry edge. */ |
f73d5666 | 733 | e = loop_preheader_edge (loop); |
62e5bf5d | 734 | split_edge_and_insert (e, get_insns ()); |
f73d5666 | 735 | |
d397e8c6 MH |
736 | end_sequence (); |
737 | ||
738 | /* Generate the epilog, inserting its insns on the loop-exit edge. */ | |
739 | start_sequence (); | |
740 | ||
741 | for (i = 0; i < last_stage; i++) | |
46dc0789 | 742 | duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg); |
b8698a0f | 743 | |
598ec7bd | 744 | /* Put the epilogue on the exit edge. */ |
ac8f6c69 ZD |
745 | gcc_assert (single_exit (loop)); |
746 | e = single_exit (loop); | |
62e5bf5d | 747 | split_edge_and_insert (e, get_insns ()); |
f73d5666 MH |
748 | end_sequence (); |
749 | } | |
750 | ||
f73d5666 MH |
751 | /* Return true if all the BBs of the loop are empty except the |
752 | loop header. */ | |
753 | static bool | |
754 | loop_single_full_bb_p (struct loop *loop) | |
755 | { | |
756 | unsigned i; | |
757 | basic_block *bbs = get_loop_body (loop); | |
758 | ||
759 | for (i = 0; i < loop->num_nodes ; i++) | |
d397e8c6 | 760 | { |
f73d5666 MH |
761 | rtx head, tail; |
762 | bool empty_bb = true; | |
763 | ||
764 | if (bbs[i] == loop->header) | |
765 | continue; | |
766 | ||
767 | /* Make sure that basic blocks other than the header | |
768 | have only notes labels or jumps. */ | |
496d7bb0 | 769 | get_ebb_head_tail (bbs[i], bbs[i], &head, &tail); |
f73d5666 MH |
770 | for (; head != NEXT_INSN (tail); head = NEXT_INSN (head)) |
771 | { | |
772 | if (NOTE_P (head) || LABEL_P (head) | |
b5b8b0ac | 773 | || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head)))) |
f73d5666 MH |
774 | continue; |
775 | empty_bb = false; | |
776 | break; | |
777 | } | |
778 | ||
779 | if (! empty_bb) | |
780 | { | |
781 | free (bbs); | |
782 | return false; | |
783 | } | |
784 | } | |
785 | free (bbs); | |
786 | return true; | |
787 | } | |
d397e8c6 | 788 | |
f73d5666 MH |
789 | /* A simple loop from SMS point of view; it is a loop that is composed of |
790 | either a single basic block or two BBs - a header and a latch. */ | |
791 | #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \ | |
792 | && (EDGE_COUNT (loop->latch->preds) == 1) \ | |
793 | && (EDGE_COUNT (loop->latch->succs) == 1)) | |
d397e8c6 | 794 | |
f73d5666 MH |
795 | /* Return true if the loop is in its canonical form and false if not. |
796 | i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */ | |
797 | static bool | |
10d22567 | 798 | loop_canon_p (struct loop *loop) |
f73d5666 | 799 | { |
d397e8c6 | 800 | |
9ba025a2 | 801 | if (loop->inner || !loop_outer (loop)) |
1a46d33d RE |
802 | { |
803 | if (dump_file) | |
804 | fprintf (dump_file, "SMS loop inner or !loop_outer\n"); | |
f73d5666 | 805 | return false; |
1a46d33d | 806 | } |
d397e8c6 | 807 | |
ac8f6c69 | 808 | if (!single_exit (loop)) |
f73d5666 MH |
809 | { |
810 | if (dump_file) | |
811 | { | |
07c02828 | 812 | rtx insn = BB_END (loop->header); |
b8698a0f | 813 | |
f73d5666 | 814 | fprintf (dump_file, "SMS loop many exits "); |
07c02828 TM |
815 | fprintf (dump_file, " %s %d (file, line)\n", |
816 | insn_file (insn), insn_line (insn)); | |
f73d5666 MH |
817 | } |
818 | return false; | |
819 | } | |
d397e8c6 | 820 | |
f73d5666 MH |
821 | if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop)) |
822 | { | |
823 | if (dump_file) | |
824 | { | |
07c02828 | 825 | rtx insn = BB_END (loop->header); |
b8698a0f | 826 | |
f73d5666 | 827 | fprintf (dump_file, "SMS loop many BBs. "); |
07c02828 TM |
828 | fprintf (dump_file, " %s %d (file, line)\n", |
829 | insn_file (insn), insn_line (insn)); | |
f73d5666 MH |
830 | } |
831 | return false; | |
d397e8c6 MH |
832 | } |
833 | ||
f73d5666 MH |
834 | return true; |
835 | } | |
d397e8c6 | 836 | |
f73d5666 MH |
837 | /* If there are more than one entry for the loop, |
838 | make it one by splitting the first entry edge and | |
839 | redirecting the others to the new BB. */ | |
840 | static void | |
841 | canon_loop (struct loop *loop) | |
842 | { | |
843 | edge e; | |
844 | edge_iterator i; | |
d397e8c6 | 845 | |
f73d5666 MH |
846 | /* Avoid annoying special cases of edges going to exit |
847 | block. */ | |
848 | FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds) | |
849 | if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1)) | |
598ec7bd | 850 | split_edge (e); |
d397e8c6 | 851 | |
f73d5666 MH |
852 | if (loop->latch == loop->header |
853 | || EDGE_COUNT (loop->latch->succs) > 1) | |
d397e8c6 | 854 | { |
f73d5666 MH |
855 | FOR_EACH_EDGE (e, i, loop->header->preds) |
856 | if (e->src == loop->latch) | |
857 | break; | |
598ec7bd | 858 | split_edge (e); |
d397e8c6 MH |
859 | } |
860 | } | |
861 | ||
e855c69d AB |
862 | /* Setup infos. */ |
863 | static void | |
864 | setup_sched_infos (void) | |
865 | { | |
866 | memcpy (&sms_common_sched_info, &haifa_common_sched_info, | |
867 | sizeof (sms_common_sched_info)); | |
868 | sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS; | |
869 | common_sched_info = &sms_common_sched_info; | |
870 | ||
871 | sched_deps_info = &sms_sched_deps_info; | |
872 | current_sched_info = &sms_sched_info; | |
873 | } | |
874 | ||
03cb2019 ZD |
875 | /* Probability in % that the sms-ed loop rolls enough so that optimized |
876 | version may be entered. Just a guess. */ | |
877 | #define PROB_SMS_ENOUGH_ITERATIONS 80 | |
878 | ||
76b4f0f7 VY |
879 | /* Used to calculate the upper bound of ii. */ |
880 | #define MAXII_FACTOR 2 | |
881 | ||
d397e8c6 MH |
882 | /* Main entry point, perform SMS scheduling on the loops of the function |
883 | that consist of single basic blocks. */ | |
efa2fa34 | 884 | static void |
10d22567 | 885 | sms_schedule (void) |
d397e8c6 | 886 | { |
d397e8c6 MH |
887 | rtx insn; |
888 | ddg_ptr *g_arr, g; | |
d397e8c6 | 889 | int * node_order; |
5cd53742 | 890 | int maxii, max_asap; |
42fd6772 | 891 | loop_iterator li; |
d397e8c6 | 892 | partial_schedule_ptr ps; |
f73d5666 | 893 | basic_block bb = NULL; |
03cb2019 | 894 | struct loop *loop; |
f73d5666 MH |
895 | basic_block condition_bb = NULL; |
896 | edge latch_edge; | |
897 | gcov_type trip_count = 0; | |
898 | ||
598ec7bd | 899 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS |
6270df4c | 900 | | LOOPS_HAVE_RECORDED_EXITS); |
d51157de ZD |
901 | if (number_of_loops () <= 1) |
902 | { | |
903 | loop_optimizer_finalize (); | |
904 | return; /* There are no loops to schedule. */ | |
905 | } | |
f73d5666 | 906 | |
d397e8c6 MH |
907 | /* Initialize issue_rate. */ |
908 | if (targetm.sched.issue_rate) | |
909 | { | |
910 | int temp = reload_completed; | |
911 | ||
912 | reload_completed = 1; | |
1c91de89 | 913 | issue_rate = targetm.sched.issue_rate (); |
d397e8c6 MH |
914 | reload_completed = temp; |
915 | } | |
916 | else | |
917 | issue_rate = 1; | |
918 | ||
61ada8ae | 919 | /* Initialize the scheduler. */ |
e855c69d AB |
920 | setup_sched_infos (); |
921 | haifa_sched_init (); | |
1a1a5f4b | 922 | |
f73d5666 MH |
923 | /* Allocate memory to hold the DDG array one entry for each loop. |
924 | We use loop->num as index into this array. */ | |
42fd6772 | 925 | g_arr = XCNEWVEC (ddg_ptr, number_of_loops ()); |
d397e8c6 | 926 | |
1a46d33d RE |
927 | if (dump_file) |
928 | { | |
929 | fprintf (dump_file, "\n\nSMS analysis phase\n"); | |
930 | fprintf (dump_file, "===================\n\n"); | |
931 | } | |
932 | ||
d397e8c6 | 933 | /* Build DDGs for all the relevant loops and hold them in G_ARR |
f73d5666 | 934 | indexed by the loop index. */ |
42fd6772 | 935 | FOR_EACH_LOOP (li, loop, 0) |
d397e8c6 MH |
936 | { |
937 | rtx head, tail; | |
75c70254 | 938 | rtx count_reg; |
d397e8c6 | 939 | |
f73d5666 | 940 | /* For debugging. */ |
97511ad7 | 941 | if (dbg_cnt (sms_sched_loop) == false) |
f73d5666 MH |
942 | { |
943 | if (dump_file) | |
97511ad7 | 944 | fprintf (dump_file, "SMS reached max limit... \n"); |
d397e8c6 | 945 | |
f73d5666 MH |
946 | break; |
947 | } | |
d397e8c6 | 948 | |
1a46d33d RE |
949 | if (dump_file) |
950 | { | |
951 | rtx insn = BB_END (loop->header); | |
952 | ||
953 | fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n", | |
954 | loop->num, insn_file (insn), insn_line (insn)); | |
955 | ||
956 | } | |
957 | ||
10d22567 | 958 | if (! loop_canon_p (loop)) |
f73d5666 | 959 | continue; |
d397e8c6 | 960 | |
f73d5666 | 961 | if (! loop_single_full_bb_p (loop)) |
1a46d33d RE |
962 | { |
963 | if (dump_file) | |
964 | fprintf (dump_file, "SMS not loop_single_full_bb_p\n"); | |
d397e8c6 | 965 | continue; |
1a46d33d | 966 | } |
d397e8c6 | 967 | |
f73d5666 | 968 | bb = loop->header; |
d397e8c6 | 969 | |
496d7bb0 | 970 | get_ebb_head_tail (bb, bb, &head, &tail); |
f73d5666 | 971 | latch_edge = loop_latch_edge (loop); |
ac8f6c69 ZD |
972 | gcc_assert (single_exit (loop)); |
973 | if (single_exit (loop)->count) | |
974 | trip_count = latch_edge->count / single_exit (loop)->count; | |
d397e8c6 | 975 | |
fa10beec | 976 | /* Perform SMS only on loops that their average count is above threshold. */ |
f73d5666 MH |
977 | |
978 | if ( latch_edge->count | |
ac8f6c69 | 979 | && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)) |
f73d5666 | 980 | { |
10d22567 | 981 | if (dump_file) |
d397e8c6 | 982 | { |
07c02828 TM |
983 | fprintf (dump_file, " %s %d (file, line)\n", |
984 | insn_file (tail), insn_line (tail)); | |
10d22567 | 985 | fprintf (dump_file, "SMS single-bb-loop\n"); |
d397e8c6 MH |
986 | if (profile_info && flag_branch_probabilities) |
987 | { | |
10d22567 ZD |
988 | fprintf (dump_file, "SMS loop-count "); |
989 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
d397e8c6 | 990 | (HOST_WIDEST_INT) bb->count); |
10d22567 ZD |
991 | fprintf (dump_file, "\n"); |
992 | fprintf (dump_file, "SMS trip-count "); | |
993 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
f73d5666 | 994 | (HOST_WIDEST_INT) trip_count); |
10d22567 ZD |
995 | fprintf (dump_file, "\n"); |
996 | fprintf (dump_file, "SMS profile-sum-max "); | |
997 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
d397e8c6 | 998 | (HOST_WIDEST_INT) profile_info->sum_max); |
10d22567 | 999 | fprintf (dump_file, "\n"); |
d397e8c6 MH |
1000 | } |
1001 | } | |
1002 | continue; | |
1003 | } | |
1004 | ||
1005 | /* Make sure this is a doloop. */ | |
46dc0789 | 1006 | if ( !(count_reg = doloop_register_get (head, tail))) |
1a46d33d RE |
1007 | { |
1008 | if (dump_file) | |
1009 | fprintf (dump_file, "SMS doloop_register_get failed\n"); | |
d397e8c6 | 1010 | continue; |
1a46d33d | 1011 | } |
d397e8c6 | 1012 | |
9b6dd300 VY |
1013 | /* Don't handle BBs with calls or barriers, or !single_set insns, |
1014 | or auto-increment insns (to avoid creating invalid reg-moves | |
b8698a0f | 1015 | for the auto-increment insns). |
46dc0789 MN |
1016 | ??? Should handle auto-increment insns. |
1017 | ??? Should handle insns defining subregs. */ | |
1018 | for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | |
1019 | { | |
1020 | rtx set; | |
1021 | ||
1022 | if (CALL_P (insn) | |
1023 | || BARRIER_P (insn) | |
b5b8b0ac | 1024 | || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) |
46dc0789 MN |
1025 | && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE) |
1026 | || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) | |
1027 | || (INSN_P (insn) && (set = single_set (insn)) | |
1028 | && GET_CODE (SET_DEST (set)) == SUBREG)) | |
1029 | break; | |
1030 | } | |
d397e8c6 MH |
1031 | |
1032 | if (insn != NEXT_INSN (tail)) | |
1033 | { | |
10d22567 | 1034 | if (dump_file) |
d397e8c6 | 1035 | { |
4b4bf941 | 1036 | if (CALL_P (insn)) |
10d22567 | 1037 | fprintf (dump_file, "SMS loop-with-call\n"); |
4b4bf941 | 1038 | else if (BARRIER_P (insn)) |
10d22567 | 1039 | fprintf (dump_file, "SMS loop-with-barrier\n"); |
9b6dd300 VY |
1040 | else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) |
1041 | fprintf (dump_file, "SMS reg inc\n"); | |
b5b8b0ac | 1042 | else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) |
46dc0789 MN |
1043 | && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) |
1044 | fprintf (dump_file, "SMS loop-with-not-single-set\n"); | |
1045 | else | |
1046 | fprintf (dump_file, "SMS loop with subreg in lhs\n"); | |
10d22567 | 1047 | print_rtl_single (dump_file, insn); |
d397e8c6 MH |
1048 | } |
1049 | ||
1050 | continue; | |
1051 | } | |
1052 | ||
6fb5fa3c | 1053 | if (! (g = create_ddg (bb, 0))) |
d397e8c6 | 1054 | { |
10d22567 | 1055 | if (dump_file) |
de82c453 | 1056 | fprintf (dump_file, "SMS create_ddg failed\n"); |
d397e8c6 MH |
1057 | continue; |
1058 | } | |
1059 | ||
42fd6772 | 1060 | g_arr[loop->num] = g; |
1a46d33d RE |
1061 | if (dump_file) |
1062 | fprintf (dump_file, "...OK\n"); | |
1063 | ||
d397e8c6 | 1064 | } |
1a46d33d RE |
1065 | if (dump_file) |
1066 | { | |
1067 | fprintf (dump_file, "\nSMS transformation phase\n"); | |
1068 | fprintf (dump_file, "=========================\n\n"); | |
1069 | } | |
d397e8c6 | 1070 | |
f73d5666 | 1071 | /* We don't want to perform SMS on new loops - created by versioning. */ |
677cc14d | 1072 | FOR_EACH_LOOP (li, loop, 0) |
d397e8c6 MH |
1073 | { |
1074 | rtx head, tail; | |
75c70254 | 1075 | rtx count_reg, count_init; |
d397e8c6 | 1076 | int mii, rec_mii; |
f73d5666 | 1077 | unsigned stage_count = 0; |
d397e8c6 MH |
1078 | HOST_WIDEST_INT loop_count = 0; |
1079 | ||
42fd6772 | 1080 | if (! (g = g_arr[loop->num])) |
d397e8c6 MH |
1081 | continue; |
1082 | ||
1083 | if (dump_file) | |
1a46d33d RE |
1084 | { |
1085 | rtx insn = BB_END (loop->header); | |
1086 | ||
1087 | fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n", | |
1088 | loop->num, insn_file (insn), insn_line (insn)); | |
1089 | ||
1090 | print_ddg (dump_file, g); | |
1091 | } | |
d397e8c6 | 1092 | |
496d7bb0 | 1093 | get_ebb_head_tail (loop->header, loop->header, &head, &tail); |
d397e8c6 | 1094 | |
f73d5666 | 1095 | latch_edge = loop_latch_edge (loop); |
ac8f6c69 ZD |
1096 | gcc_assert (single_exit (loop)); |
1097 | if (single_exit (loop)->count) | |
1098 | trip_count = latch_edge->count / single_exit (loop)->count; | |
d397e8c6 | 1099 | |
10d22567 | 1100 | if (dump_file) |
d397e8c6 | 1101 | { |
07c02828 TM |
1102 | fprintf (dump_file, " %s %d (file, line)\n", |
1103 | insn_file (tail), insn_line (tail)); | |
10d22567 | 1104 | fprintf (dump_file, "SMS single-bb-loop\n"); |
d397e8c6 MH |
1105 | if (profile_info && flag_branch_probabilities) |
1106 | { | |
10d22567 ZD |
1107 | fprintf (dump_file, "SMS loop-count "); |
1108 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
d397e8c6 | 1109 | (HOST_WIDEST_INT) bb->count); |
10d22567 ZD |
1110 | fprintf (dump_file, "\n"); |
1111 | fprintf (dump_file, "SMS profile-sum-max "); | |
1112 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
d397e8c6 | 1113 | (HOST_WIDEST_INT) profile_info->sum_max); |
10d22567 | 1114 | fprintf (dump_file, "\n"); |
d397e8c6 | 1115 | } |
10d22567 ZD |
1116 | fprintf (dump_file, "SMS doloop\n"); |
1117 | fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes); | |
1118 | fprintf (dump_file, "SMS num-loads %d\n", g->num_loads); | |
1119 | fprintf (dump_file, "SMS num-stores %d\n", g->num_stores); | |
d397e8c6 MH |
1120 | } |
1121 | ||
d397e8c6 | 1122 | |
f73d5666 MH |
1123 | /* In case of th loop have doloop register it gets special |
1124 | handling. */ | |
1125 | count_init = NULL_RTX; | |
46dc0789 | 1126 | if ((count_reg = doloop_register_get (head, tail))) |
f73d5666 MH |
1127 | { |
1128 | basic_block pre_header; | |
1129 | ||
1130 | pre_header = loop_preheader_edge (loop)->src; | |
1131 | count_init = const_iteration_count (count_reg, pre_header, | |
1132 | &loop_count); | |
1133 | } | |
1134 | gcc_assert (count_reg); | |
d397e8c6 | 1135 | |
10d22567 | 1136 | if (dump_file && count_init) |
d397e8c6 | 1137 | { |
10d22567 ZD |
1138 | fprintf (dump_file, "SMS const-doloop "); |
1139 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
f73d5666 | 1140 | loop_count); |
10d22567 | 1141 | fprintf (dump_file, "\n"); |
d397e8c6 MH |
1142 | } |
1143 | ||
5ed6ace5 | 1144 | node_order = XNEWVEC (int, g->num_nodes); |
d397e8c6 MH |
1145 | |
1146 | mii = 1; /* Need to pass some estimate of mii. */ | |
5cd53742 | 1147 | rec_mii = sms_order_nodes (g, mii, node_order, &max_asap); |
d397e8c6 | 1148 | mii = MAX (res_MII (g), rec_mii); |
5cd53742 | 1149 | maxii = MAX (max_asap, MAXII_FACTOR * mii); |
d397e8c6 | 1150 | |
10d22567 ZD |
1151 | if (dump_file) |
1152 | fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", | |
d397e8c6 MH |
1153 | rec_mii, mii, maxii); |
1154 | ||
1155 | /* After sms_order_nodes and before sms_schedule_by_order, to copy over | |
1156 | ASAP. */ | |
1157 | set_node_sched_params (g); | |
1158 | ||
10d22567 | 1159 | ps = sms_schedule_by_order (g, mii, maxii, node_order); |
d397e8c6 | 1160 | |
db87b56d | 1161 | if (ps){ |
d397e8c6 | 1162 | stage_count = PS_STAGE_COUNT (ps); |
db87b56d | 1163 | gcc_assert(stage_count >= 1); |
b8698a0f | 1164 | } |
d397e8c6 | 1165 | |
f73d5666 MH |
1166 | /* Stage count of 1 means that there is no interleaving between |
1167 | iterations, let the scheduling passes do the job. */ | |
db87b56d | 1168 | if (stage_count <= 1 |
f73d5666 MH |
1169 | || (count_init && (loop_count <= stage_count)) |
1170 | || (flag_branch_probabilities && (trip_count <= stage_count))) | |
d397e8c6 MH |
1171 | { |
1172 | if (dump_file) | |
f73d5666 MH |
1173 | { |
1174 | fprintf (dump_file, "SMS failed... \n"); | |
1175 | fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); | |
1176 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); | |
1177 | fprintf (dump_file, ", trip-count="); | |
1178 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); | |
1179 | fprintf (dump_file, ")\n"); | |
1180 | } | |
1181 | continue; | |
d397e8c6 MH |
1182 | } |
1183 | else | |
1184 | { | |
f73d5666 | 1185 | struct undo_replace_buff_elem *reg_move_replaces; |
d397e8c6 | 1186 | |
10d22567 | 1187 | if (dump_file) |
d397e8c6 | 1188 | { |
10d22567 | 1189 | fprintf (dump_file, |
d397e8c6 MH |
1190 | "SMS succeeded %d %d (with ii, sc)\n", ps->ii, |
1191 | stage_count); | |
10d22567 ZD |
1192 | print_partial_schedule (ps, dump_file); |
1193 | fprintf (dump_file, | |
d397e8c6 MH |
1194 | "SMS Branch (%d) will later be scheduled at cycle %d.\n", |
1195 | g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); | |
1196 | } | |
1197 | ||
d397e8c6 MH |
1198 | /* Set the stage boundaries. If the DDG is built with closing_branch_deps, |
1199 | the closing_branch was scheduled and should appear in the last (ii-1) | |
1200 | row. Otherwise, we are free to schedule the branch, and we let nodes | |
1201 | that were scheduled at the first PS_MIN_CYCLE cycle appear in the first | |
b8698a0f | 1202 | row; this should reduce stage_count to minimum. |
46dc0789 MN |
1203 | TODO: Revisit the issue of scheduling the insns of the |
1204 | control part relative to the branch when the control part | |
1205 | has more than one insn. */ | |
d397e8c6 MH |
1206 | normalize_sched_times (ps); |
1207 | rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); | |
1208 | set_columns_for_ps (ps); | |
b8698a0f | 1209 | |
76b4f0f7 VY |
1210 | canon_loop (loop); |
1211 | ||
1212 | /* case the BCT count is not known , Do loop-versioning */ | |
1213 | if (count_reg && ! count_init) | |
1214 | { | |
1215 | rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg, | |
1216 | GEN_INT(stage_count)); | |
1217 | unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS | |
1218 | * REG_BR_PROB_BASE) / 100; | |
1219 | ||
1220 | loop_version (loop, comp_rtx, &condition_bb, | |
1221 | prob, prob, REG_BR_PROB_BASE - prob, | |
1222 | true); | |
1223 | } | |
1224 | ||
1225 | /* Set new iteration count of loop kernel. */ | |
1226 | if (count_reg && count_init) | |
1227 | SET_SRC (single_set (count_init)) = GEN_INT (loop_count | |
1228 | - stage_count + 1); | |
1229 | ||
1230 | /* Now apply the scheduled kernel to the RTL of the loop. */ | |
d397e8c6 | 1231 | permute_partial_schedule (ps, g->closing_branch->first_note); |
d72372e4 | 1232 | |
76b4f0f7 VY |
1233 | /* Mark this loop as software pipelined so the later |
1234 | scheduling passes doesn't touch it. */ | |
1235 | if (! flag_resched_modulo_sched) | |
1236 | g->bb->flags |= BB_DISABLE_SCHEDULE; | |
1237 | /* The life-info is not valid any more. */ | |
1238 | df_set_bb_dirty (g->bb); | |
d72372e4 | 1239 | |
76b4f0f7 VY |
1240 | reg_move_replaces = generate_reg_moves (ps, true); |
1241 | if (dump_file) | |
1242 | print_node_sched_params (dump_file, g->num_nodes, g); | |
1243 | /* Generate prolog and epilog. */ | |
46dc0789 | 1244 | generate_prolog_epilog (ps, loop, count_reg, count_init); |
b8698a0f | 1245 | |
f73d5666 | 1246 | free_undo_replace_buff (reg_move_replaces); |
d397e8c6 | 1247 | } |
f73d5666 | 1248 | |
d397e8c6 MH |
1249 | free_partial_schedule (ps); |
1250 | free (node_sched_params); | |
1251 | free (node_order); | |
1252 | free_ddg (g); | |
1253 | } | |
1254 | ||
f4daf7e4 UP |
1255 | free (g_arr); |
1256 | ||
d397e8c6 | 1257 | /* Release scheduler data, needed until now because of DFA. */ |
e855c69d | 1258 | haifa_sched_finish (); |
598ec7bd | 1259 | loop_optimizer_finalize (); |
d397e8c6 MH |
1260 | } |
1261 | ||
1262 | /* The SMS scheduling algorithm itself | |
1263 | ----------------------------------- | |
1264 | Input: 'O' an ordered list of insns of a loop. | |
1265 | Output: A scheduling of the loop - kernel, prolog, and epilogue. | |
1266 | ||
1267 | 'Q' is the empty Set | |
1268 | 'PS' is the partial schedule; it holds the currently scheduled nodes with | |
1269 | their cycle/slot. | |
1270 | 'PSP' previously scheduled predecessors. | |
1271 | 'PSS' previously scheduled successors. | |
1272 | 't(u)' the cycle where u is scheduled. | |
1273 | 'l(u)' is the latency of u. | |
1274 | 'd(v,u)' is the dependence distance from v to u. | |
1275 | 'ASAP(u)' the earliest time at which u could be scheduled as computed in | |
1276 | the node ordering phase. | |
1277 | 'check_hardware_resources_conflicts(u, PS, c)' | |
1278 | run a trace around cycle/slot through DFA model | |
1279 | to check resource conflicts involving instruction u | |
1280 | at cycle c given the partial schedule PS. | |
1281 | 'add_to_partial_schedule_at_time(u, PS, c)' | |
1282 | Add the node/instruction u to the partial schedule | |
1283 | PS at time c. | |
1284 | 'calculate_register_pressure(PS)' | |
1285 | Given a schedule of instructions, calculate the register | |
1286 | pressure it implies. One implementation could be the | |
1287 | maximum number of overlapping live ranges. | |
1288 | 'maxRP' The maximum allowed register pressure, it is usually derived from the number | |
1289 | registers available in the hardware. | |
1290 | ||
1291 | 1. II = MII. | |
1292 | 2. PS = empty list | |
1293 | 3. for each node u in O in pre-computed order | |
1294 | 4. if (PSP(u) != Q && PSS(u) == Q) then | |
1295 | 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |
1296 | 6. start = Early_start; end = Early_start + II - 1; step = 1 | |
1297 | 11. else if (PSP(u) == Q && PSS(u) != Q) then | |
1298 | 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |
1299 | 13. start = Late_start; end = Late_start - II + 1; step = -1 | |
1300 | 14. else if (PSP(u) != Q && PSS(u) != Q) then | |
1301 | 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |
1302 | 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |
1303 | 17. start = Early_start; | |
1304 | 18. end = min(Early_start + II - 1 , Late_start); | |
1305 | 19. step = 1 | |
1306 | 20. else "if (PSP(u) == Q && PSS(u) == Q)" | |
1307 | 21. start = ASAP(u); end = start + II - 1; step = 1 | |
1308 | 22. endif | |
1309 | ||
1310 | 23. success = false | |
1311 | 24. for (c = start ; c != end ; c += step) | |
1312 | 25. if check_hardware_resources_conflicts(u, PS, c) then | |
1313 | 26. add_to_partial_schedule_at_time(u, PS, c) | |
1314 | 27. success = true | |
1315 | 28. break | |
1316 | 29. endif | |
1317 | 30. endfor | |
1318 | 31. if (success == false) then | |
1319 | 32. II = II + 1 | |
1320 | 33. if (II > maxII) then | |
1321 | 34. finish - failed to schedule | |
1322 | 35. endif | |
1323 | 36. goto 2. | |
1324 | 37. endif | |
1325 | 38. endfor | |
1326 | 39. if (calculate_register_pressure(PS) > maxRP) then | |
1327 | 40. goto 32. | |
1328 | 41. endif | |
1329 | 42. compute epilogue & prologue | |
1330 | 43. finish - succeeded to schedule | |
1331 | */ | |
1332 | ||
1333 | /* A limit on the number of cycles that resource conflicts can span. ??? Should | |
1334 | be provided by DFA, and be dependent on the type of insn scheduled. Currently | |
1335 | set to 0 to save compile time. */ | |
1336 | #define DFA_HISTORY SMS_DFA_HISTORY | |
1337 | ||
c8943832 VY |
1338 | /* A threshold for the number of repeated unsuccessful attempts to insert |
1339 | an empty row, before we flush the partial schedule and start over. */ | |
1340 | #define MAX_SPLIT_NUM 10 | |
f73d5666 | 1341 | /* Given the partial schedule PS, this function calculates and returns the |
315682fb | 1342 | cycles in which we can schedule the node with the given index I. |
f73d5666 MH |
1343 | NOTE: Here we do the backtracking in SMS, in some special cases. We have |
1344 | noticed that there are several cases in which we fail to SMS the loop | |
1345 | because the sched window of a node is empty due to tight data-deps. In | |
315682fb | 1346 | such cases we want to unschedule some of the predecessors/successors |
f73d5666 MH |
1347 | until we get non-empty scheduling window. It returns -1 if the |
1348 | scheduling window is empty and zero otherwise. */ | |
1349 | ||
1350 | static int | |
1351 | get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i, | |
1352 | sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) | |
1353 | { | |
1354 | int start, step, end; | |
1355 | ddg_edge_ptr e; | |
1356 | int u = nodes_order [i]; | |
1357 | ddg_node_ptr u_node = &ps->g->nodes[u]; | |
1358 | sbitmap psp = sbitmap_alloc (ps->g->num_nodes); | |
1359 | sbitmap pss = sbitmap_alloc (ps->g->num_nodes); | |
1360 | sbitmap u_node_preds = NODE_PREDECESSORS (u_node); | |
1361 | sbitmap u_node_succs = NODE_SUCCESSORS (u_node); | |
1362 | int psp_not_empty; | |
1363 | int pss_not_empty; | |
1364 | ||
1365 | /* 1. compute sched window for u (start, end, step). */ | |
1366 | sbitmap_zero (psp); | |
1367 | sbitmap_zero (pss); | |
1368 | psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); | |
1369 | pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); | |
1370 | ||
1371 | if (psp_not_empty && !pss_not_empty) | |
1372 | { | |
1373 | int early_start = INT_MIN; | |
1374 | ||
1375 | end = INT_MAX; | |
1376 | for (e = u_node->in; e != 0; e = e->next_in) | |
1377 | { | |
1378 | ddg_node_ptr v_node = e->src; | |
c8943832 VY |
1379 | |
1380 | if (dump_file) | |
b8698a0f | 1381 | { |
c8943832 VY |
1382 | fprintf (dump_file, "\nProcessing edge: "); |
1383 | print_ddg_edge (dump_file, e); | |
1384 | fprintf (dump_file, | |
1385 | "\nScheduling %d (%d) in psp_not_empty," | |
05223046 | 1386 | " checking p %d (%d): ", u_node->cuid, |
c8943832 VY |
1387 | INSN_UID (u_node->insn), v_node->cuid, INSN_UID |
1388 | (v_node->insn)); | |
1389 | } | |
1390 | ||
f73d5666 MH |
1391 | if (TEST_BIT (sched_nodes, v_node->cuid)) |
1392 | { | |
05223046 | 1393 | int p_st = SCHED_TIME (v_node); |
f73d5666 | 1394 | |
05223046 RE |
1395 | early_start = |
1396 | MAX (early_start, p_st + e->latency - (e->distance * ii)); | |
1397 | ||
1398 | if (dump_file) | |
b8698a0f | 1399 | fprintf (dump_file, |
d48b46e0 RE |
1400 | "pred st = %d; early_start = %d; latency: %d", |
1401 | p_st, early_start, e->latency); | |
f73d5666 MH |
1402 | |
1403 | if (e->data_type == MEM_DEP) | |
1404 | end = MIN (end, SCHED_TIME (v_node) + ii - 1); | |
1405 | } | |
05223046 RE |
1406 | else if (dump_file) |
1407 | fprintf (dump_file, "the node is not scheduled\n"); | |
f73d5666 MH |
1408 | } |
1409 | start = early_start; | |
1410 | end = MIN (end, early_start + ii); | |
d48b46e0 | 1411 | /* Schedule the node close to it's predecessors. */ |
f73d5666 | 1412 | step = 1; |
c8943832 VY |
1413 | |
1414 | if (dump_file) | |
1415 | fprintf (dump_file, | |
1416 | "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", | |
1417 | u_node->cuid, INSN_UID (u_node->insn), start, end, step); | |
f73d5666 MH |
1418 | } |
1419 | ||
1420 | else if (!psp_not_empty && pss_not_empty) | |
1421 | { | |
1422 | int late_start = INT_MAX; | |
1423 | ||
1424 | end = INT_MIN; | |
1425 | for (e = u_node->out; e != 0; e = e->next_out) | |
1426 | { | |
1427 | ddg_node_ptr v_node = e->dest; | |
c8943832 VY |
1428 | |
1429 | if (dump_file) | |
1430 | { | |
1431 | fprintf (dump_file, "\nProcessing edge:"); | |
1432 | print_ddg_edge (dump_file, e); | |
1433 | fprintf (dump_file, | |
1434 | "\nScheduling %d (%d) in pss_not_empty," | |
05223046 | 1435 | " checking s %d (%d): ", u_node->cuid, |
c8943832 VY |
1436 | INSN_UID (u_node->insn), v_node->cuid, INSN_UID |
1437 | (v_node->insn)); | |
1438 | } | |
1439 | ||
f73d5666 MH |
1440 | if (TEST_BIT (sched_nodes, v_node->cuid)) |
1441 | { | |
05223046 RE |
1442 | int s_st = SCHED_TIME (v_node); |
1443 | ||
1444 | late_start = MIN (late_start, | |
1445 | s_st - e->latency + (e->distance * ii)); | |
1446 | ||
1447 | if (dump_file) | |
b8698a0f | 1448 | fprintf (dump_file, |
d48b46e0 RE |
1449 | "succ st = %d; late_start = %d; latency = %d", |
1450 | s_st, late_start, e->latency); | |
c8943832 | 1451 | |
f73d5666 MH |
1452 | if (e->data_type == MEM_DEP) |
1453 | end = MAX (end, SCHED_TIME (v_node) - ii + 1); | |
c8943832 VY |
1454 | if (dump_file) |
1455 | fprintf (dump_file, "end = %d\n", end); | |
1456 | ||
f73d5666 | 1457 | } |
c8943832 VY |
1458 | else if (dump_file) |
1459 | fprintf (dump_file, "the node is not scheduled\n"); | |
1460 | ||
f73d5666 MH |
1461 | } |
1462 | start = late_start; | |
1463 | end = MAX (end, late_start - ii); | |
d48b46e0 | 1464 | /* Schedule the node close to it's successors. */ |
f73d5666 | 1465 | step = -1; |
c8943832 VY |
1466 | |
1467 | if (dump_file) | |
1468 | fprintf (dump_file, | |
1469 | "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", | |
1470 | u_node->cuid, INSN_UID (u_node->insn), start, end, step); | |
1471 | ||
f73d5666 MH |
1472 | } |
1473 | ||
1474 | else if (psp_not_empty && pss_not_empty) | |
1475 | { | |
1476 | int early_start = INT_MIN; | |
1477 | int late_start = INT_MAX; | |
d48b46e0 RE |
1478 | int count_preds = 0; |
1479 | int count_succs = 0; | |
f73d5666 MH |
1480 | |
1481 | start = INT_MIN; | |
1482 | end = INT_MAX; | |
1483 | for (e = u_node->in; e != 0; e = e->next_in) | |
1484 | { | |
1485 | ddg_node_ptr v_node = e->src; | |
1486 | ||
c8943832 VY |
1487 | if (dump_file) |
1488 | { | |
1489 | fprintf (dump_file, "\nProcessing edge:"); | |
1490 | print_ddg_edge (dump_file, e); | |
1491 | fprintf (dump_file, | |
1492 | "\nScheduling %d (%d) in psp_pss_not_empty," | |
1493 | " checking p %d (%d): ", u_node->cuid, INSN_UID | |
1494 | (u_node->insn), v_node->cuid, INSN_UID | |
1495 | (v_node->insn)); | |
1496 | } | |
1497 | ||
f73d5666 MH |
1498 | if (TEST_BIT (sched_nodes, v_node->cuid)) |
1499 | { | |
05223046 RE |
1500 | int p_st = SCHED_TIME (v_node); |
1501 | ||
f73d5666 | 1502 | early_start = MAX (early_start, |
05223046 | 1503 | p_st + e->latency |
f73d5666 | 1504 | - (e->distance * ii)); |
05223046 RE |
1505 | |
1506 | if (dump_file) | |
b8698a0f | 1507 | fprintf (dump_file, |
d48b46e0 RE |
1508 | "pred st = %d; early_start = %d; latency = %d", |
1509 | p_st, early_start, e->latency); | |
1510 | ||
1511 | if (e->type == TRUE_DEP && e->data_type == REG_DEP) | |
1512 | count_preds++; | |
05223046 | 1513 | |
f73d5666 MH |
1514 | if (e->data_type == MEM_DEP) |
1515 | end = MIN (end, SCHED_TIME (v_node) + ii - 1); | |
1516 | } | |
05223046 RE |
1517 | else if (dump_file) |
1518 | fprintf (dump_file, "the node is not scheduled\n"); | |
1519 | ||
f73d5666 MH |
1520 | } |
1521 | for (e = u_node->out; e != 0; e = e->next_out) | |
1522 | { | |
1523 | ddg_node_ptr v_node = e->dest; | |
1524 | ||
c8943832 VY |
1525 | if (dump_file) |
1526 | { | |
1527 | fprintf (dump_file, "\nProcessing edge:"); | |
1528 | print_ddg_edge (dump_file, e); | |
1529 | fprintf (dump_file, | |
1530 | "\nScheduling %d (%d) in psp_pss_not_empty," | |
1531 | " checking s %d (%d): ", u_node->cuid, INSN_UID | |
1532 | (u_node->insn), v_node->cuid, INSN_UID | |
1533 | (v_node->insn)); | |
1534 | } | |
1535 | ||
f73d5666 MH |
1536 | if (TEST_BIT (sched_nodes, v_node->cuid)) |
1537 | { | |
05223046 RE |
1538 | int s_st = SCHED_TIME (v_node); |
1539 | ||
f73d5666 | 1540 | late_start = MIN (late_start, |
05223046 | 1541 | s_st - e->latency |
f73d5666 | 1542 | + (e->distance * ii)); |
05223046 | 1543 | |
d48b46e0 | 1544 | if (dump_file) |
b8698a0f | 1545 | fprintf (dump_file, |
d48b46e0 RE |
1546 | "succ st = %d; late_start = %d; latency = %d", |
1547 | s_st, late_start, e->latency); | |
1548 | ||
1549 | if (e->type == TRUE_DEP && e->data_type == REG_DEP) | |
1550 | count_succs++; | |
05223046 | 1551 | |
f73d5666 MH |
1552 | if (e->data_type == MEM_DEP) |
1553 | start = MAX (start, SCHED_TIME (v_node) - ii + 1); | |
1554 | } | |
05223046 RE |
1555 | else if (dump_file) |
1556 | fprintf (dump_file, "the node is not scheduled\n"); | |
1557 | ||
f73d5666 MH |
1558 | } |
1559 | start = MAX (start, early_start); | |
1560 | end = MIN (end, MIN (early_start + ii, late_start + 1)); | |
1561 | step = 1; | |
d48b46e0 RE |
1562 | /* If there are more successors than predecessors schedule the |
1563 | node close to it's successors. */ | |
1564 | if (count_succs >= count_preds) | |
1565 | { | |
1566 | int old_start = start; | |
1567 | ||
1568 | start = end - 1; | |
1569 | end = old_start - 1; | |
1570 | step = -1; | |
1571 | } | |
f73d5666 MH |
1572 | } |
1573 | else /* psp is empty && pss is empty. */ | |
1574 | { | |
1575 | start = SCHED_ASAP (u_node); | |
1576 | end = start + ii; | |
1577 | step = 1; | |
1578 | } | |
1579 | ||
1580 | *start_p = start; | |
1581 | *step_p = step; | |
1582 | *end_p = end; | |
1583 | sbitmap_free (psp); | |
1584 | sbitmap_free (pss); | |
1585 | ||
1586 | if ((start >= end && step == 1) || (start <= end && step == -1)) | |
c8943832 VY |
1587 | { |
1588 | if (dump_file) | |
1589 | fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", | |
1590 | start, end, step); | |
f73d5666 | 1591 | return -1; |
c8943832 VY |
1592 | } |
1593 | ||
f73d5666 MH |
1594 | return 0; |
1595 | } | |
1596 | ||
d48b46e0 RE |
1597 | /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the |
1598 | node currently been scheduled. At the end of the calculation | |
0de5a32f AZ |
1599 | MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of |
1600 | U_NODE which are (1) already scheduled in the first/last row of | |
1601 | U_NODE's scheduling window, (2) whose dependence inequality with U | |
1602 | becomes an equality when U is scheduled in this same row, and (3) | |
1603 | whose dependence latency is zero. | |
1604 | ||
4117d6f7 | 1605 | The first and last rows are calculated using the following parameters: |
d48b46e0 RE |
1606 | START/END rows - The cycles that begins/ends the traversal on the window; |
1607 | searching for an empty cycle to schedule U_NODE. | |
1608 | STEP - The direction in which we traverse the window. | |
0de5a32f | 1609 | II - The initiation interval. */ |
d48b46e0 RE |
1610 | |
1611 | static void | |
1612 | calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end, | |
1613 | int step, int ii, sbitmap sched_nodes, | |
1614 | sbitmap must_precede, sbitmap must_follow) | |
1615 | { | |
1616 | ddg_edge_ptr e; | |
1617 | int first_cycle_in_window, last_cycle_in_window; | |
d48b46e0 RE |
1618 | |
1619 | gcc_assert (must_precede && must_follow); | |
1620 | ||
1621 | /* Consider the following scheduling window: | |
1622 | {first_cycle_in_window, first_cycle_in_window+1, ..., | |
1623 | last_cycle_in_window}. If step is 1 then the following will be | |
1624 | the order we traverse the window: {start=first_cycle_in_window, | |
1625 | first_cycle_in_window+1, ..., end=last_cycle_in_window+1}, | |
1626 | or {start=last_cycle_in_window, last_cycle_in_window-1, ..., | |
1627 | end=first_cycle_in_window-1} if step is -1. */ | |
1628 | first_cycle_in_window = (step == 1) ? start : end - step; | |
1629 | last_cycle_in_window = (step == 1) ? end - step : start; | |
1630 | ||
d48b46e0 RE |
1631 | sbitmap_zero (must_precede); |
1632 | sbitmap_zero (must_follow); | |
1633 | ||
1634 | if (dump_file) | |
1635 | fprintf (dump_file, "\nmust_precede: "); | |
1636 | ||
0de5a32f AZ |
1637 | /* Instead of checking if: |
1638 | (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window) | |
1639 | && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) == | |
1640 | first_cycle_in_window) | |
1641 | && e->latency == 0 | |
1642 | we use the fact that latency is non-negative: | |
1643 | SCHED_TIME (e->src) - (e->distance * ii) <= | |
1644 | SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <= | |
1645 | first_cycle_in_window | |
1646 | and check only if | |
1647 | SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ | |
d48b46e0 RE |
1648 | for (e = u_node->in; e != 0; e = e->next_in) |
1649 | if (TEST_BIT (sched_nodes, e->src->cuid) | |
0de5a32f AZ |
1650 | && ((SCHED_TIME (e->src) - (e->distance * ii)) == |
1651 | first_cycle_in_window)) | |
d48b46e0 RE |
1652 | { |
1653 | if (dump_file) | |
1654 | fprintf (dump_file, "%d ", e->src->cuid); | |
1655 | ||
1656 | SET_BIT (must_precede, e->src->cuid); | |
1657 | } | |
1658 | ||
1659 | if (dump_file) | |
1660 | fprintf (dump_file, "\nmust_follow: "); | |
1661 | ||
0de5a32f AZ |
1662 | /* Instead of checking if: |
1663 | (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window) | |
1664 | && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) == | |
1665 | last_cycle_in_window) | |
1666 | && e->latency == 0 | |
1667 | we use the fact that latency is non-negative: | |
1668 | SCHED_TIME (e->dest) + (e->distance * ii) >= | |
b8698a0f | 1669 | SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >= |
0de5a32f AZ |
1670 | last_cycle_in_window |
1671 | and check only if | |
1672 | SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ | |
d48b46e0 RE |
1673 | for (e = u_node->out; e != 0; e = e->next_out) |
1674 | if (TEST_BIT (sched_nodes, e->dest->cuid) | |
0de5a32f AZ |
1675 | && ((SCHED_TIME (e->dest) + (e->distance * ii)) == |
1676 | last_cycle_in_window)) | |
d48b46e0 RE |
1677 | { |
1678 | if (dump_file) | |
1679 | fprintf (dump_file, "%d ", e->dest->cuid); | |
1680 | ||
1681 | SET_BIT (must_follow, e->dest->cuid); | |
1682 | } | |
1683 | ||
1684 | if (dump_file) | |
1685 | fprintf (dump_file, "\n"); | |
1686 | } | |
1687 | ||
1688 | /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following | |
1689 | parameters to decide if that's possible: | |
1690 | PS - The partial schedule. | |
1691 | U - The serial number of U_NODE. | |
fa10beec | 1692 | NUM_SPLITS - The number of row splits made so far. |
d48b46e0 RE |
1693 | MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at |
1694 | the first row of the scheduling window) | |
1695 | MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the | |
1696 | last row of the scheduling window) */ | |
1697 | ||
1698 | static bool | |
1699 | try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node, | |
de82c453 | 1700 | int u, int cycle, sbitmap sched_nodes, |
d48b46e0 RE |
1701 | int *num_splits, sbitmap must_precede, |
1702 | sbitmap must_follow) | |
1703 | { | |
1704 | ps_insn_ptr psi; | |
1705 | bool success = 0; | |
1706 | ||
1707 | verify_partial_schedule (ps, sched_nodes); | |
de82c453 | 1708 | psi = ps_add_node_check_conflicts (ps, u_node, cycle, |
d48b46e0 RE |
1709 | must_precede, must_follow); |
1710 | if (psi) | |
1711 | { | |
de82c453 | 1712 | SCHED_TIME (u_node) = cycle; |
d48b46e0 RE |
1713 | SET_BIT (sched_nodes, u); |
1714 | success = 1; | |
1715 | *num_splits = 0; | |
1716 | if (dump_file) | |
de82c453 | 1717 | fprintf (dump_file, "Scheduled w/o split in %d\n", cycle); |
d48b46e0 RE |
1718 | |
1719 | } | |
1720 | ||
1721 | return success; | |
1722 | } | |
1723 | ||
f73d5666 MH |
1724 | /* This function implements the scheduling algorithm for SMS according to the |
1725 | above algorithm. */ | |
d397e8c6 | 1726 | static partial_schedule_ptr |
10d22567 | 1727 | sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order) |
d397e8c6 MH |
1728 | { |
1729 | int ii = mii; | |
c8943832 VY |
1730 | int i, c, success, num_splits = 0; |
1731 | int flush_and_start_over = true; | |
d397e8c6 | 1732 | int num_nodes = g->num_nodes; |
d397e8c6 MH |
1733 | int start, end, step; /* Place together into one struct? */ |
1734 | sbitmap sched_nodes = sbitmap_alloc (num_nodes); | |
d72372e4 MH |
1735 | sbitmap must_precede = sbitmap_alloc (num_nodes); |
1736 | sbitmap must_follow = sbitmap_alloc (num_nodes); | |
f73d5666 | 1737 | sbitmap tobe_scheduled = sbitmap_alloc (num_nodes); |
d72372e4 | 1738 | |
d397e8c6 MH |
1739 | partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY); |
1740 | ||
f73d5666 MH |
1741 | sbitmap_ones (tobe_scheduled); |
1742 | sbitmap_zero (sched_nodes); | |
1743 | ||
c8943832 | 1744 | while (flush_and_start_over && (ii < maxii)) |
d397e8c6 | 1745 | { |
f73d5666 | 1746 | |
d397e8c6 | 1747 | if (dump_file) |
62e5bf5d | 1748 | fprintf (dump_file, "Starting with ii=%d\n", ii); |
c8943832 VY |
1749 | flush_and_start_over = false; |
1750 | sbitmap_zero (sched_nodes); | |
d397e8c6 MH |
1751 | |
1752 | for (i = 0; i < num_nodes; i++) | |
1753 | { | |
1754 | int u = nodes_order[i]; | |
f73d5666 | 1755 | ddg_node_ptr u_node = &ps->g->nodes[u]; |
d397e8c6 MH |
1756 | rtx insn = u_node->insn; |
1757 | ||
b5b8b0ac | 1758 | if (!NONDEBUG_INSN_P (insn)) |
d397e8c6 | 1759 | { |
f73d5666 MH |
1760 | RESET_BIT (tobe_scheduled, u); |
1761 | continue; | |
d397e8c6 MH |
1762 | } |
1763 | ||
f73d5666 | 1764 | if (JUMP_P (insn)) /* Closing branch handled later. */ |
d397e8c6 | 1765 | { |
f73d5666 MH |
1766 | RESET_BIT (tobe_scheduled, u); |
1767 | continue; | |
d397e8c6 MH |
1768 | } |
1769 | ||
f73d5666 MH |
1770 | if (TEST_BIT (sched_nodes, u)) |
1771 | continue; | |
d397e8c6 | 1772 | |
f73d5666 | 1773 | /* Try to get non-empty scheduling window. */ |
c8943832 VY |
1774 | success = 0; |
1775 | if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, | |
1776 | &step, &end) == 0) | |
1777 | { | |
1778 | if (dump_file) | |
1779 | fprintf (dump_file, "\nTrying to schedule node %d \ | |
1780 | INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID | |
1781 | (g->nodes[u].insn)), start, end, step); | |
c8943832 VY |
1782 | |
1783 | gcc_assert ((step > 0 && start < end) | |
1784 | || (step < 0 && start > end)); | |
1785 | ||
d48b46e0 RE |
1786 | calculate_must_precede_follow (u_node, start, end, step, ii, |
1787 | sched_nodes, must_precede, | |
1788 | must_follow); | |
1789 | ||
c8943832 VY |
1790 | for (c = start; c != end; c += step) |
1791 | { | |
d48b46e0 RE |
1792 | sbitmap tmp_precede = NULL; |
1793 | sbitmap tmp_follow = NULL; | |
c8943832 | 1794 | |
d48b46e0 | 1795 | if (c == start) |
c8943832 | 1796 | { |
d48b46e0 RE |
1797 | if (step == 1) |
1798 | tmp_precede = must_precede; | |
1799 | else /* step == -1. */ | |
1800 | tmp_follow = must_follow; | |
c8943832 | 1801 | } |
d48b46e0 RE |
1802 | if (c == end - step) |
1803 | { | |
1804 | if (step == 1) | |
1805 | tmp_follow = must_follow; | |
1806 | else /* step == -1. */ | |
1807 | tmp_precede = must_precede; | |
1808 | } | |
1809 | ||
1810 | success = | |
1811 | try_scheduling_node_in_cycle (ps, u_node, u, c, | |
1812 | sched_nodes, | |
1813 | &num_splits, tmp_precede, | |
1814 | tmp_follow); | |
1815 | if (success) | |
1816 | break; | |
c8943832 | 1817 | } |
d48b46e0 | 1818 | |
c8943832 VY |
1819 | verify_partial_schedule (ps, sched_nodes); |
1820 | } | |
1821 | if (!success) | |
1822 | { | |
1823 | int split_row; | |
1824 | ||
1825 | if (ii++ == maxii) | |
1826 | break; | |
1827 | ||
1828 | if (num_splits >= MAX_SPLIT_NUM) | |
1829 | { | |
1830 | num_splits = 0; | |
1831 | flush_and_start_over = true; | |
1832 | verify_partial_schedule (ps, sched_nodes); | |
1833 | reset_partial_schedule (ps, ii); | |
1834 | verify_partial_schedule (ps, sched_nodes); | |
1835 | break; | |
1836 | } | |
1837 | ||
1838 | num_splits++; | |
8de9b877 AZ |
1839 | /* The scheduling window is exclusive of 'end' |
1840 | whereas compute_split_window() expects an inclusive, | |
1841 | ordered range. */ | |
c8943832 | 1842 | if (step == 1) |
8de9b877 | 1843 | split_row = compute_split_row (sched_nodes, start, end - 1, |
c8943832 VY |
1844 | ps->ii, u_node); |
1845 | else | |
8de9b877 | 1846 | split_row = compute_split_row (sched_nodes, end + 1, start, |
c8943832 | 1847 | ps->ii, u_node); |
f73d5666 | 1848 | |
c8943832 VY |
1849 | ps_insert_empty_row (ps, split_row, sched_nodes); |
1850 | i--; /* Go back and retry node i. */ | |
d397e8c6 | 1851 | |
c8943832 VY |
1852 | if (dump_file) |
1853 | fprintf (dump_file, "num_splits=%d\n", num_splits); | |
1854 | } | |
d397e8c6 | 1855 | |
c8943832 VY |
1856 | /* ??? If (success), check register pressure estimates. */ |
1857 | } /* Continue with next node. */ | |
1858 | } /* While flush_and_start_over. */ | |
d397e8c6 MH |
1859 | if (ii >= maxii) |
1860 | { | |
1861 | free_partial_schedule (ps); | |
1862 | ps = NULL; | |
1863 | } | |
c8943832 VY |
1864 | else |
1865 | gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes)); | |
1866 | ||
1867 | sbitmap_free (sched_nodes); | |
1868 | sbitmap_free (must_precede); | |
1869 | sbitmap_free (must_follow); | |
1870 | sbitmap_free (tobe_scheduled); | |
1871 | ||
d397e8c6 MH |
1872 | return ps; |
1873 | } | |
1874 | ||
c8943832 VY |
1875 | /* This function inserts a new empty row into PS at the position |
1876 | according to SPLITROW, keeping all already scheduled instructions | |
1877 | intact and updating their SCHED_TIME and cycle accordingly. */ | |
1878 | static void | |
1879 | ps_insert_empty_row (partial_schedule_ptr ps, int split_row, | |
1880 | sbitmap sched_nodes) | |
1881 | { | |
1882 | ps_insn_ptr crr_insn; | |
1883 | ps_insn_ptr *rows_new; | |
1884 | int ii = ps->ii; | |
1885 | int new_ii = ii + 1; | |
1886 | int row; | |
1887 | ||
1888 | verify_partial_schedule (ps, sched_nodes); | |
1889 | ||
1890 | /* We normalize sched_time and rotate ps to have only non-negative sched | |
1891 | times, for simplicity of updating cycles after inserting new row. */ | |
1892 | split_row -= ps->min_cycle; | |
1893 | split_row = SMODULO (split_row, ii); | |
1894 | if (dump_file) | |
1895 | fprintf (dump_file, "split_row=%d\n", split_row); | |
1896 | ||
1897 | normalize_sched_times (ps); | |
1898 | rotate_partial_schedule (ps, ps->min_cycle); | |
1899 | ||
1900 | rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); | |
1901 | for (row = 0; row < split_row; row++) | |
1902 | { | |
1903 | rows_new[row] = ps->rows[row]; | |
1904 | ps->rows[row] = NULL; | |
1905 | for (crr_insn = rows_new[row]; | |
1906 | crr_insn; crr_insn = crr_insn->next_in_row) | |
1907 | { | |
1908 | ddg_node_ptr u = crr_insn->node; | |
1909 | int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); | |
1910 | ||
1911 | SCHED_TIME (u) = new_time; | |
1912 | crr_insn->cycle = new_time; | |
1913 | SCHED_ROW (u) = new_time % new_ii; | |
1914 | SCHED_STAGE (u) = new_time / new_ii; | |
1915 | } | |
1916 | ||
1917 | } | |
1918 | ||
1919 | rows_new[split_row] = NULL; | |
1920 | ||
1921 | for (row = split_row; row < ii; row++) | |
1922 | { | |
1923 | rows_new[row + 1] = ps->rows[row]; | |
1924 | ps->rows[row] = NULL; | |
1925 | for (crr_insn = rows_new[row + 1]; | |
1926 | crr_insn; crr_insn = crr_insn->next_in_row) | |
1927 | { | |
1928 | ddg_node_ptr u = crr_insn->node; | |
1929 | int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; | |
1930 | ||
1931 | SCHED_TIME (u) = new_time; | |
1932 | crr_insn->cycle = new_time; | |
1933 | SCHED_ROW (u) = new_time % new_ii; | |
1934 | SCHED_STAGE (u) = new_time / new_ii; | |
1935 | } | |
1936 | } | |
1937 | ||
1938 | /* Updating ps. */ | |
1939 | ps->min_cycle = ps->min_cycle + ps->min_cycle / ii | |
1940 | + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0); | |
1941 | ps->max_cycle = ps->max_cycle + ps->max_cycle / ii | |
1942 | + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); | |
1943 | free (ps->rows); | |
1944 | ps->rows = rows_new; | |
1945 | ps->ii = new_ii; | |
1946 | gcc_assert (ps->min_cycle >= 0); | |
1947 | ||
1948 | verify_partial_schedule (ps, sched_nodes); | |
1949 | ||
1950 | if (dump_file) | |
1951 | fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle, | |
1952 | ps->max_cycle); | |
1953 | } | |
1954 | ||
1955 | /* Given U_NODE which is the node that failed to be scheduled; LOW and | |
1956 | UP which are the boundaries of it's scheduling window; compute using | |
84fbffb2 | 1957 | SCHED_NODES and II a row in the partial schedule that can be split |
c8943832 VY |
1958 | which will separate a critical predecessor from a critical successor |
1959 | thereby expanding the window, and return it. */ | |
1960 | static int | |
1961 | compute_split_row (sbitmap sched_nodes, int low, int up, int ii, | |
1962 | ddg_node_ptr u_node) | |
1963 | { | |
1964 | ddg_edge_ptr e; | |
1965 | int lower = INT_MIN, upper = INT_MAX; | |
1966 | ddg_node_ptr crit_pred = NULL; | |
1967 | ddg_node_ptr crit_succ = NULL; | |
1968 | int crit_cycle; | |
1969 | ||
1970 | for (e = u_node->in; e != 0; e = e->next_in) | |
1971 | { | |
1972 | ddg_node_ptr v_node = e->src; | |
1973 | ||
1974 | if (TEST_BIT (sched_nodes, v_node->cuid) | |
1975 | && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii))) | |
1976 | if (SCHED_TIME (v_node) > lower) | |
1977 | { | |
1978 | crit_pred = v_node; | |
1979 | lower = SCHED_TIME (v_node); | |
1980 | } | |
1981 | } | |
1982 | ||
1983 | if (crit_pred != NULL) | |
1984 | { | |
1985 | crit_cycle = SCHED_TIME (crit_pred) + 1; | |
1986 | return SMODULO (crit_cycle, ii); | |
1987 | } | |
1988 | ||
1989 | for (e = u_node->out; e != 0; e = e->next_out) | |
1990 | { | |
1991 | ddg_node_ptr v_node = e->dest; | |
1992 | if (TEST_BIT (sched_nodes, v_node->cuid) | |
1993 | && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii))) | |
1994 | if (SCHED_TIME (v_node) < upper) | |
1995 | { | |
1996 | crit_succ = v_node; | |
1997 | upper = SCHED_TIME (v_node); | |
1998 | } | |
1999 | } | |
2000 | ||
2001 | if (crit_succ != NULL) | |
2002 | { | |
2003 | crit_cycle = SCHED_TIME (crit_succ); | |
2004 | return SMODULO (crit_cycle, ii); | |
2005 | } | |
2006 | ||
2007 | if (dump_file) | |
2008 | fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n"); | |
2009 | ||
2010 | return SMODULO ((low + up + 1) / 2, ii); | |
2011 | } | |
2012 | ||
2013 | static void | |
2014 | verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes) | |
2015 | { | |
2016 | int row; | |
2017 | ps_insn_ptr crr_insn; | |
2018 | ||
2019 | for (row = 0; row < ps->ii; row++) | |
2020 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |
2021 | { | |
2022 | ddg_node_ptr u = crr_insn->node; | |
2023 | ||
2024 | gcc_assert (TEST_BIT (sched_nodes, u->cuid)); | |
2025 | /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by | |
2026 | popcount (sched_nodes) == number of insns in ps. */ | |
2027 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | |
2028 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | |
2029 | } | |
2030 | } | |
2031 | ||
d397e8c6 MH |
2032 | \f |
2033 | /* This page implements the algorithm for ordering the nodes of a DDG | |
2034 | for modulo scheduling, activated through the | |
2035 | "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */ | |
2036 | ||
2037 | #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info) | |
2038 | #define ASAP(x) (ORDER_PARAMS ((x))->asap) | |
2039 | #define ALAP(x) (ORDER_PARAMS ((x))->alap) | |
2040 | #define HEIGHT(x) (ORDER_PARAMS ((x))->height) | |
2041 | #define MOB(x) (ALAP ((x)) - ASAP ((x))) | |
2042 | #define DEPTH(x) (ASAP ((x))) | |
2043 | ||
2044 | typedef struct node_order_params * nopa; | |
2045 | ||
2046 | static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result); | |
2047 | static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int); | |
5cd53742 | 2048 | static nopa calculate_order_params (ddg_ptr, int, int *); |
d397e8c6 MH |
2049 | static int find_max_asap (ddg_ptr, sbitmap); |
2050 | static int find_max_hv_min_mob (ddg_ptr, sbitmap); | |
2051 | static int find_max_dv_min_mob (ddg_ptr, sbitmap); | |
2052 | ||
2053 | enum sms_direction {BOTTOMUP, TOPDOWN}; | |
2054 | ||
2055 | struct node_order_params | |
2056 | { | |
2057 | int asap; | |
2058 | int alap; | |
2059 | int height; | |
2060 | }; | |
2061 | ||
2062 | /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */ | |
2063 | static void | |
2064 | check_nodes_order (int *node_order, int num_nodes) | |
2065 | { | |
2066 | int i; | |
2067 | sbitmap tmp = sbitmap_alloc (num_nodes); | |
2068 | ||
2069 | sbitmap_zero (tmp); | |
2070 | ||
05223046 RE |
2071 | if (dump_file) |
2072 | fprintf (dump_file, "SMS final nodes order: \n"); | |
2073 | ||
d397e8c6 MH |
2074 | for (i = 0; i < num_nodes; i++) |
2075 | { | |
2076 | int u = node_order[i]; | |
2077 | ||
05223046 RE |
2078 | if (dump_file) |
2079 | fprintf (dump_file, "%d ", u); | |
41806d92 | 2080 | gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u)); |
d397e8c6 MH |
2081 | |
2082 | SET_BIT (tmp, u); | |
2083 | } | |
b8698a0f | 2084 | |
05223046 RE |
2085 | if (dump_file) |
2086 | fprintf (dump_file, "\n"); | |
b8698a0f | 2087 | |
d397e8c6 MH |
2088 | sbitmap_free (tmp); |
2089 | } | |
2090 | ||
2091 | /* Order the nodes of G for scheduling and pass the result in | |
2092 | NODE_ORDER. Also set aux.count of each node to ASAP. | |
5cd53742 | 2093 | Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */ |
d397e8c6 | 2094 | static int |
5cd53742 | 2095 | sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap) |
d397e8c6 MH |
2096 | { |
2097 | int i; | |
2098 | int rec_mii = 0; | |
2099 | ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g); | |
2100 | ||
5cd53742 | 2101 | nopa nops = calculate_order_params (g, mii, pmax_asap); |
d397e8c6 | 2102 | |
8cec1624 RE |
2103 | if (dump_file) |
2104 | print_sccs (dump_file, sccs, g); | |
2105 | ||
d397e8c6 MH |
2106 | order_nodes_of_sccs (sccs, node_order); |
2107 | ||
2108 | if (sccs->num_sccs > 0) | |
2109 | /* First SCC has the largest recurrence_length. */ | |
2110 | rec_mii = sccs->sccs[0]->recurrence_length; | |
2111 | ||
2112 | /* Save ASAP before destroying node_order_params. */ | |
2113 | for (i = 0; i < g->num_nodes; i++) | |
2114 | { | |
2115 | ddg_node_ptr v = &g->nodes[i]; | |
2116 | v->aux.count = ASAP (v); | |
2117 | } | |
2118 | ||
2119 | free (nops); | |
2120 | free_ddg_all_sccs (sccs); | |
2121 | check_nodes_order (node_order, g->num_nodes); | |
2122 | ||
2123 | return rec_mii; | |
2124 | } | |
2125 | ||
2126 | static void | |
2127 | order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order) | |
2128 | { | |
2129 | int i, pos = 0; | |
2130 | ddg_ptr g = all_sccs->ddg; | |
2131 | int num_nodes = g->num_nodes; | |
2132 | sbitmap prev_sccs = sbitmap_alloc (num_nodes); | |
2133 | sbitmap on_path = sbitmap_alloc (num_nodes); | |
2134 | sbitmap tmp = sbitmap_alloc (num_nodes); | |
2135 | sbitmap ones = sbitmap_alloc (num_nodes); | |
2136 | ||
2137 | sbitmap_zero (prev_sccs); | |
2138 | sbitmap_ones (ones); | |
2139 | ||
fa10beec | 2140 | /* Perform the node ordering starting from the SCC with the highest recMII. |
d397e8c6 MH |
2141 | For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */ |
2142 | for (i = 0; i < all_sccs->num_sccs; i++) | |
2143 | { | |
2144 | ddg_scc_ptr scc = all_sccs->sccs[i]; | |
2145 | ||
2146 | /* Add nodes on paths from previous SCCs to the current SCC. */ | |
2147 | find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes); | |
2148 | sbitmap_a_or_b (tmp, scc->nodes, on_path); | |
2149 | ||
2150 | /* Add nodes on paths from the current SCC to previous SCCs. */ | |
2151 | find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs); | |
2152 | sbitmap_a_or_b (tmp, tmp, on_path); | |
2153 | ||
2154 | /* Remove nodes of previous SCCs from current extended SCC. */ | |
2155 | sbitmap_difference (tmp, tmp, prev_sccs); | |
2156 | ||
2157 | pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |
2158 | /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */ | |
2159 | } | |
2160 | ||
2161 | /* Handle the remaining nodes that do not belong to any scc. Each call | |
2162 | to order_nodes_in_scc handles a single connected component. */ | |
2163 | while (pos < g->num_nodes) | |
2164 | { | |
2165 | sbitmap_difference (tmp, ones, prev_sccs); | |
2166 | pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |
2167 | } | |
2168 | sbitmap_free (prev_sccs); | |
2169 | sbitmap_free (on_path); | |
2170 | sbitmap_free (tmp); | |
2171 | sbitmap_free (ones); | |
2172 | } | |
2173 | ||
2174 | /* MII is needed if we consider backarcs (that do not close recursive cycles). */ | |
2175 | static struct node_order_params * | |
5cd53742 | 2176 | calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap) |
d397e8c6 MH |
2177 | { |
2178 | int u; | |
2179 | int max_asap; | |
2180 | int num_nodes = g->num_nodes; | |
2181 | ddg_edge_ptr e; | |
2182 | /* Allocate a place to hold ordering params for each node in the DDG. */ | |
2183 | nopa node_order_params_arr; | |
2184 | ||
2185 | /* Initialize of ASAP/ALAP/HEIGHT to zero. */ | |
2186 | node_order_params_arr = (nopa) xcalloc (num_nodes, | |
2187 | sizeof (struct node_order_params)); | |
2188 | ||
61ada8ae | 2189 | /* Set the aux pointer of each node to point to its order_params structure. */ |
d397e8c6 MH |
2190 | for (u = 0; u < num_nodes; u++) |
2191 | g->nodes[u].aux.info = &node_order_params_arr[u]; | |
2192 | ||
2193 | /* Disregarding a backarc from each recursive cycle to obtain a DAG, | |
2194 | calculate ASAP, ALAP, mobility, distance, and height for each node | |
2195 | in the dependence (direct acyclic) graph. */ | |
2196 | ||
2197 | /* We assume that the nodes in the array are in topological order. */ | |
2198 | ||
2199 | max_asap = 0; | |
2200 | for (u = 0; u < num_nodes; u++) | |
2201 | { | |
2202 | ddg_node_ptr u_node = &g->nodes[u]; | |
2203 | ||
2204 | ASAP (u_node) = 0; | |
2205 | for (e = u_node->in; e; e = e->next_in) | |
2206 | if (e->distance == 0) | |
2207 | ASAP (u_node) = MAX (ASAP (u_node), | |
2208 | ASAP (e->src) + e->latency); | |
2209 | max_asap = MAX (max_asap, ASAP (u_node)); | |
2210 | } | |
2211 | ||
2212 | for (u = num_nodes - 1; u > -1; u--) | |
2213 | { | |
2214 | ddg_node_ptr u_node = &g->nodes[u]; | |
2215 | ||
2216 | ALAP (u_node) = max_asap; | |
2217 | HEIGHT (u_node) = 0; | |
2218 | for (e = u_node->out; e; e = e->next_out) | |
2219 | if (e->distance == 0) | |
2220 | { | |
2221 | ALAP (u_node) = MIN (ALAP (u_node), | |
2222 | ALAP (e->dest) - e->latency); | |
2223 | HEIGHT (u_node) = MAX (HEIGHT (u_node), | |
2224 | HEIGHT (e->dest) + e->latency); | |
2225 | } | |
2226 | } | |
05223046 RE |
2227 | if (dump_file) |
2228 | { | |
2229 | fprintf (dump_file, "\nOrder params\n"); | |
2230 | for (u = 0; u < num_nodes; u++) | |
2231 | { | |
2232 | ddg_node_ptr u_node = &g->nodes[u]; | |
2233 | ||
2234 | fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u, | |
2235 | ASAP (u_node), ALAP (u_node), HEIGHT (u_node)); | |
2236 | } | |
2237 | } | |
d397e8c6 | 2238 | |
5cd53742 | 2239 | *pmax_asap = max_asap; |
d397e8c6 MH |
2240 | return node_order_params_arr; |
2241 | } | |
2242 | ||
2243 | static int | |
2244 | find_max_asap (ddg_ptr g, sbitmap nodes) | |
2245 | { | |
dfea6c85 | 2246 | unsigned int u = 0; |
d397e8c6 MH |
2247 | int max_asap = -1; |
2248 | int result = -1; | |
b6e7e9af | 2249 | sbitmap_iterator sbi; |
d397e8c6 | 2250 | |
b6e7e9af | 2251 | EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) |
d397e8c6 MH |
2252 | { |
2253 | ddg_node_ptr u_node = &g->nodes[u]; | |
2254 | ||
2255 | if (max_asap < ASAP (u_node)) | |
2256 | { | |
2257 | max_asap = ASAP (u_node); | |
2258 | result = u; | |
2259 | } | |
b6e7e9af | 2260 | } |
d397e8c6 MH |
2261 | return result; |
2262 | } | |
2263 | ||
2264 | static int | |
2265 | find_max_hv_min_mob (ddg_ptr g, sbitmap nodes) | |
2266 | { | |
dfea6c85 | 2267 | unsigned int u = 0; |
d397e8c6 MH |
2268 | int max_hv = -1; |
2269 | int min_mob = INT_MAX; | |
2270 | int result = -1; | |
b6e7e9af | 2271 | sbitmap_iterator sbi; |
d397e8c6 | 2272 | |
b6e7e9af | 2273 | EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) |
d397e8c6 MH |
2274 | { |
2275 | ddg_node_ptr u_node = &g->nodes[u]; | |
2276 | ||
2277 | if (max_hv < HEIGHT (u_node)) | |
2278 | { | |
2279 | max_hv = HEIGHT (u_node); | |
2280 | min_mob = MOB (u_node); | |
2281 | result = u; | |
2282 | } | |
2283 | else if ((max_hv == HEIGHT (u_node)) | |
2284 | && (min_mob > MOB (u_node))) | |
2285 | { | |
2286 | min_mob = MOB (u_node); | |
2287 | result = u; | |
2288 | } | |
b6e7e9af | 2289 | } |
d397e8c6 MH |
2290 | return result; |
2291 | } | |
2292 | ||
2293 | static int | |
2294 | find_max_dv_min_mob (ddg_ptr g, sbitmap nodes) | |
2295 | { | |
dfea6c85 | 2296 | unsigned int u = 0; |
d397e8c6 MH |
2297 | int max_dv = -1; |
2298 | int min_mob = INT_MAX; | |
2299 | int result = -1; | |
b6e7e9af | 2300 | sbitmap_iterator sbi; |
d397e8c6 | 2301 | |
b6e7e9af | 2302 | EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) |
d397e8c6 MH |
2303 | { |
2304 | ddg_node_ptr u_node = &g->nodes[u]; | |
2305 | ||
2306 | if (max_dv < DEPTH (u_node)) | |
2307 | { | |
2308 | max_dv = DEPTH (u_node); | |
2309 | min_mob = MOB (u_node); | |
2310 | result = u; | |
2311 | } | |
2312 | else if ((max_dv == DEPTH (u_node)) | |
2313 | && (min_mob > MOB (u_node))) | |
2314 | { | |
2315 | min_mob = MOB (u_node); | |
2316 | result = u; | |
2317 | } | |
b6e7e9af | 2318 | } |
d397e8c6 MH |
2319 | return result; |
2320 | } | |
2321 | ||
2322 | /* Places the nodes of SCC into the NODE_ORDER array starting | |
2323 | at position POS, according to the SMS ordering algorithm. | |
2324 | NODES_ORDERED (in&out parameter) holds the bitset of all nodes in | |
2325 | the NODE_ORDER array, starting from position zero. */ | |
2326 | static int | |
2327 | order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc, | |
2328 | int * node_order, int pos) | |
2329 | { | |
2330 | enum sms_direction dir; | |
2331 | int num_nodes = g->num_nodes; | |
2332 | sbitmap workset = sbitmap_alloc (num_nodes); | |
2333 | sbitmap tmp = sbitmap_alloc (num_nodes); | |
2334 | sbitmap zero_bitmap = sbitmap_alloc (num_nodes); | |
2335 | sbitmap predecessors = sbitmap_alloc (num_nodes); | |
2336 | sbitmap successors = sbitmap_alloc (num_nodes); | |
2337 | ||
2338 | sbitmap_zero (predecessors); | |
2339 | find_predecessors (predecessors, g, nodes_ordered); | |
2340 | ||
2341 | sbitmap_zero (successors); | |
2342 | find_successors (successors, g, nodes_ordered); | |
2343 | ||
2344 | sbitmap_zero (tmp); | |
2345 | if (sbitmap_a_and_b_cg (tmp, predecessors, scc)) | |
2346 | { | |
2347 | sbitmap_copy (workset, tmp); | |
2348 | dir = BOTTOMUP; | |
2349 | } | |
2350 | else if (sbitmap_a_and_b_cg (tmp, successors, scc)) | |
2351 | { | |
2352 | sbitmap_copy (workset, tmp); | |
2353 | dir = TOPDOWN; | |
2354 | } | |
2355 | else | |
2356 | { | |
2357 | int u; | |
2358 | ||
2359 | sbitmap_zero (workset); | |
2360 | if ((u = find_max_asap (g, scc)) >= 0) | |
2361 | SET_BIT (workset, u); | |
2362 | dir = BOTTOMUP; | |
2363 | } | |
2364 | ||
2365 | sbitmap_zero (zero_bitmap); | |
2366 | while (!sbitmap_equal (workset, zero_bitmap)) | |
2367 | { | |
2368 | int v; | |
2369 | ddg_node_ptr v_node; | |
2370 | sbitmap v_node_preds; | |
2371 | sbitmap v_node_succs; | |
2372 | ||
2373 | if (dir == TOPDOWN) | |
2374 | { | |
2375 | while (!sbitmap_equal (workset, zero_bitmap)) | |
2376 | { | |
2377 | v = find_max_hv_min_mob (g, workset); | |
2378 | v_node = &g->nodes[v]; | |
2379 | node_order[pos++] = v; | |
2380 | v_node_succs = NODE_SUCCESSORS (v_node); | |
2381 | sbitmap_a_and_b (tmp, v_node_succs, scc); | |
2382 | ||
2383 | /* Don't consider the already ordered successors again. */ | |
2384 | sbitmap_difference (tmp, tmp, nodes_ordered); | |
2385 | sbitmap_a_or_b (workset, workset, tmp); | |
2386 | RESET_BIT (workset, v); | |
2387 | SET_BIT (nodes_ordered, v); | |
2388 | } | |
2389 | dir = BOTTOMUP; | |
2390 | sbitmap_zero (predecessors); | |
2391 | find_predecessors (predecessors, g, nodes_ordered); | |
2392 | sbitmap_a_and_b (workset, predecessors, scc); | |
2393 | } | |
2394 | else | |
2395 | { | |
2396 | while (!sbitmap_equal (workset, zero_bitmap)) | |
2397 | { | |
2398 | v = find_max_dv_min_mob (g, workset); | |
2399 | v_node = &g->nodes[v]; | |
2400 | node_order[pos++] = v; | |
2401 | v_node_preds = NODE_PREDECESSORS (v_node); | |
2402 | sbitmap_a_and_b (tmp, v_node_preds, scc); | |
2403 | ||
2404 | /* Don't consider the already ordered predecessors again. */ | |
2405 | sbitmap_difference (tmp, tmp, nodes_ordered); | |
2406 | sbitmap_a_or_b (workset, workset, tmp); | |
2407 | RESET_BIT (workset, v); | |
2408 | SET_BIT (nodes_ordered, v); | |
2409 | } | |
2410 | dir = TOPDOWN; | |
2411 | sbitmap_zero (successors); | |
2412 | find_successors (successors, g, nodes_ordered); | |
2413 | sbitmap_a_and_b (workset, successors, scc); | |
2414 | } | |
2415 | } | |
2416 | sbitmap_free (tmp); | |
2417 | sbitmap_free (workset); | |
2418 | sbitmap_free (zero_bitmap); | |
2419 | sbitmap_free (predecessors); | |
2420 | sbitmap_free (successors); | |
2421 | return pos; | |
2422 | } | |
2423 | ||
2424 | \f | |
2425 | /* This page contains functions for manipulating partial-schedules during | |
2426 | modulo scheduling. */ | |
2427 | ||
2428 | /* Create a partial schedule and allocate a memory to hold II rows. */ | |
5f1f4746 KH |
2429 | |
2430 | static partial_schedule_ptr | |
d397e8c6 MH |
2431 | create_partial_schedule (int ii, ddg_ptr g, int history) |
2432 | { | |
5ed6ace5 | 2433 | partial_schedule_ptr ps = XNEW (struct partial_schedule); |
d397e8c6 MH |
2434 | ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); |
2435 | ps->ii = ii; | |
2436 | ps->history = history; | |
2437 | ps->min_cycle = INT_MAX; | |
2438 | ps->max_cycle = INT_MIN; | |
2439 | ps->g = g; | |
2440 | ||
2441 | return ps; | |
2442 | } | |
2443 | ||
2444 | /* Free the PS_INSNs in rows array of the given partial schedule. | |
2445 | ??? Consider caching the PS_INSN's. */ | |
2446 | static void | |
2447 | free_ps_insns (partial_schedule_ptr ps) | |
2448 | { | |
2449 | int i; | |
2450 | ||
2451 | for (i = 0; i < ps->ii; i++) | |
2452 | { | |
2453 | while (ps->rows[i]) | |
2454 | { | |
2455 | ps_insn_ptr ps_insn = ps->rows[i]->next_in_row; | |
2456 | ||
2457 | free (ps->rows[i]); | |
2458 | ps->rows[i] = ps_insn; | |
2459 | } | |
2460 | ps->rows[i] = NULL; | |
2461 | } | |
2462 | } | |
2463 | ||
2464 | /* Free all the memory allocated to the partial schedule. */ | |
5f1f4746 KH |
2465 | |
2466 | static void | |
d397e8c6 MH |
2467 | free_partial_schedule (partial_schedule_ptr ps) |
2468 | { | |
2469 | if (!ps) | |
2470 | return; | |
2471 | free_ps_insns (ps); | |
2472 | free (ps->rows); | |
2473 | free (ps); | |
2474 | } | |
2475 | ||
2476 | /* Clear the rows array with its PS_INSNs, and create a new one with | |
2477 | NEW_II rows. */ | |
5f1f4746 KH |
2478 | |
2479 | static void | |
d397e8c6 MH |
2480 | reset_partial_schedule (partial_schedule_ptr ps, int new_ii) |
2481 | { | |
2482 | if (!ps) | |
2483 | return; | |
2484 | free_ps_insns (ps); | |
2485 | if (new_ii == ps->ii) | |
2486 | return; | |
2487 | ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii | |
2488 | * sizeof (ps_insn_ptr)); | |
2489 | memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); | |
2490 | ps->ii = new_ii; | |
2491 | ps->min_cycle = INT_MAX; | |
2492 | ps->max_cycle = INT_MIN; | |
2493 | } | |
2494 | ||
2495 | /* Prints the partial schedule as an ii rows array, for each rows | |
2496 | print the ids of the insns in it. */ | |
2497 | void | |
2498 | print_partial_schedule (partial_schedule_ptr ps, FILE *dump) | |
2499 | { | |
2500 | int i; | |
2501 | ||
2502 | for (i = 0; i < ps->ii; i++) | |
2503 | { | |
2504 | ps_insn_ptr ps_i = ps->rows[i]; | |
2505 | ||
de82c453 | 2506 | fprintf (dump, "\n[ROW %d ]: ", i); |
d397e8c6 MH |
2507 | while (ps_i) |
2508 | { | |
2509 | fprintf (dump, "%d, ", | |
2510 | INSN_UID (ps_i->node->insn)); | |
2511 | ps_i = ps_i->next_in_row; | |
2512 | } | |
2513 | } | |
2514 | } | |
2515 | ||
2516 | /* Creates an object of PS_INSN and initializes it to the given parameters. */ | |
2517 | static ps_insn_ptr | |
2518 | create_ps_insn (ddg_node_ptr node, int rest_count, int cycle) | |
2519 | { | |
5ed6ace5 | 2520 | ps_insn_ptr ps_i = XNEW (struct ps_insn); |
d397e8c6 MH |
2521 | |
2522 | ps_i->node = node; | |
2523 | ps_i->next_in_row = NULL; | |
2524 | ps_i->prev_in_row = NULL; | |
2525 | ps_i->row_rest_count = rest_count; | |
2526 | ps_i->cycle = cycle; | |
2527 | ||
2528 | return ps_i; | |
2529 | } | |
2530 | ||
2531 | ||
2532 | /* Removes the given PS_INSN from the partial schedule. Returns false if the | |
2533 | node is not found in the partial schedule, else returns true. */ | |
f73d5666 | 2534 | static bool |
d397e8c6 MH |
2535 | remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) |
2536 | { | |
2537 | int row; | |
2538 | ||
2539 | if (!ps || !ps_i) | |
2540 | return false; | |
2541 | ||
2542 | row = SMODULO (ps_i->cycle, ps->ii); | |
2543 | if (! ps_i->prev_in_row) | |
2544 | { | |
2545 | if (ps_i != ps->rows[row]) | |
2546 | return false; | |
2547 | ||
2548 | ps->rows[row] = ps_i->next_in_row; | |
2549 | if (ps->rows[row]) | |
2550 | ps->rows[row]->prev_in_row = NULL; | |
2551 | } | |
2552 | else | |
2553 | { | |
2554 | ps_i->prev_in_row->next_in_row = ps_i->next_in_row; | |
2555 | if (ps_i->next_in_row) | |
2556 | ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; | |
2557 | } | |
2558 | free (ps_i); | |
2559 | return true; | |
2560 | } | |
2561 | ||
d72372e4 MH |
2562 | /* Unlike what literature describes for modulo scheduling (which focuses |
2563 | on VLIW machines) the order of the instructions inside a cycle is | |
2564 | important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know | |
2565 | where the current instruction should go relative to the already | |
2566 | scheduled instructions in the given cycle. Go over these | |
2567 | instructions and find the first possible column to put it in. */ | |
2568 | static bool | |
2569 | ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, | |
2570 | sbitmap must_precede, sbitmap must_follow) | |
2571 | { | |
2572 | ps_insn_ptr next_ps_i; | |
2573 | ps_insn_ptr first_must_follow = NULL; | |
2574 | ps_insn_ptr last_must_precede = NULL; | |
2575 | int row; | |
2576 | ||
2577 | if (! ps_i) | |
2578 | return false; | |
2579 | ||
2580 | row = SMODULO (ps_i->cycle, ps->ii); | |
2581 | ||
2582 | /* Find the first must follow and the last must precede | |
2a7e31df | 2583 | and insert the node immediately after the must precede |
471854f8 | 2584 | but make sure that it there is no must follow after it. */ |
d72372e4 MH |
2585 | for (next_ps_i = ps->rows[row]; |
2586 | next_ps_i; | |
2587 | next_ps_i = next_ps_i->next_in_row) | |
2588 | { | |
d48b46e0 | 2589 | if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid) |
d72372e4 MH |
2590 | && ! first_must_follow) |
2591 | first_must_follow = next_ps_i; | |
d48b46e0 | 2592 | if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid)) |
d72372e4 MH |
2593 | { |
2594 | /* If we have already met a node that must follow, then | |
2595 | there is no possible column. */ | |
2596 | if (first_must_follow) | |
2597 | return false; | |
2598 | else | |
2599 | last_must_precede = next_ps_i; | |
2600 | } | |
2601 | } | |
2602 | ||
2603 | /* Now insert the node after INSERT_AFTER_PSI. */ | |
2604 | ||
2605 | if (! last_must_precede) | |
2606 | { | |
2607 | ps_i->next_in_row = ps->rows[row]; | |
2608 | ps_i->prev_in_row = NULL; | |
2609 | if (ps_i->next_in_row) | |
2610 | ps_i->next_in_row->prev_in_row = ps_i; | |
2611 | ps->rows[row] = ps_i; | |
2612 | } | |
2613 | else | |
2614 | { | |
2615 | ps_i->next_in_row = last_must_precede->next_in_row; | |
2616 | last_must_precede->next_in_row = ps_i; | |
2617 | ps_i->prev_in_row = last_must_precede; | |
2618 | if (ps_i->next_in_row) | |
2619 | ps_i->next_in_row->prev_in_row = ps_i; | |
2620 | } | |
2621 | ||
2622 | return true; | |
2623 | } | |
2624 | ||
d397e8c6 | 2625 | /* Advances the PS_INSN one column in its current row; returns false |
b8698a0f L |
2626 | in failure and true in success. Bit N is set in MUST_FOLLOW if |
2627 | the node with cuid N must be come after the node pointed to by | |
d72372e4 | 2628 | PS_I when scheduled in the same cycle. */ |
d397e8c6 | 2629 | static int |
d72372e4 MH |
2630 | ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, |
2631 | sbitmap must_follow) | |
d397e8c6 MH |
2632 | { |
2633 | ps_insn_ptr prev, next; | |
2634 | int row; | |
d72372e4 | 2635 | ddg_node_ptr next_node; |
d397e8c6 MH |
2636 | |
2637 | if (!ps || !ps_i) | |
2638 | return false; | |
2639 | ||
2640 | row = SMODULO (ps_i->cycle, ps->ii); | |
2641 | ||
2642 | if (! ps_i->next_in_row) | |
2643 | return false; | |
2644 | ||
d72372e4 MH |
2645 | next_node = ps_i->next_in_row->node; |
2646 | ||
d397e8c6 MH |
2647 | /* Check if next_in_row is dependent on ps_i, both having same sched |
2648 | times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ | |
d48b46e0 | 2649 | if (must_follow && TEST_BIT (must_follow, next_node->cuid)) |
d72372e4 | 2650 | return false; |
d397e8c6 | 2651 | |
2a7e31df | 2652 | /* Advance PS_I over its next_in_row in the doubly linked list. */ |
d397e8c6 MH |
2653 | prev = ps_i->prev_in_row; |
2654 | next = ps_i->next_in_row; | |
2655 | ||
2656 | if (ps_i == ps->rows[row]) | |
2657 | ps->rows[row] = next; | |
2658 | ||
2659 | ps_i->next_in_row = next->next_in_row; | |
2660 | ||
2661 | if (next->next_in_row) | |
2662 | next->next_in_row->prev_in_row = ps_i; | |
2663 | ||
2664 | next->next_in_row = ps_i; | |
2665 | ps_i->prev_in_row = next; | |
2666 | ||
2667 | next->prev_in_row = prev; | |
2668 | if (prev) | |
2669 | prev->next_in_row = next; | |
2670 | ||
2671 | return true; | |
2672 | } | |
2673 | ||
2674 | /* Inserts a DDG_NODE to the given partial schedule at the given cycle. | |
b8698a0f L |
2675 | Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is |
2676 | set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come | |
2677 | before/after (respectively) the node pointed to by PS_I when scheduled | |
d72372e4 | 2678 | in the same cycle. */ |
d397e8c6 | 2679 | static ps_insn_ptr |
d72372e4 MH |
2680 | add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, |
2681 | sbitmap must_precede, sbitmap must_follow) | |
d397e8c6 | 2682 | { |
d72372e4 | 2683 | ps_insn_ptr ps_i; |
d397e8c6 MH |
2684 | int rest_count = 1; |
2685 | int row = SMODULO (cycle, ps->ii); | |
d397e8c6 MH |
2686 | |
2687 | if (ps->rows[row] | |
2688 | && ps->rows[row]->row_rest_count >= issue_rate) | |
2689 | return NULL; | |
2690 | ||
2691 | if (ps->rows[row]) | |
2692 | rest_count += ps->rows[row]->row_rest_count; | |
2693 | ||
2694 | ps_i = create_ps_insn (node, rest_count, cycle); | |
d72372e4 MH |
2695 | |
2696 | /* Finds and inserts PS_I according to MUST_FOLLOW and | |
2697 | MUST_PRECEDE. */ | |
2698 | if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow)) | |
2699 | { | |
2700 | free (ps_i); | |
2701 | return NULL; | |
2702 | } | |
d397e8c6 MH |
2703 | |
2704 | return ps_i; | |
2705 | } | |
2706 | ||
2707 | /* Advance time one cycle. Assumes DFA is being used. */ | |
2708 | static void | |
2709 | advance_one_cycle (void) | |
2710 | { | |
fa0aee89 PB |
2711 | if (targetm.sched.dfa_pre_cycle_insn) |
2712 | state_transition (curr_state, | |
1c91de89 | 2713 | targetm.sched.dfa_pre_cycle_insn ()); |
d397e8c6 | 2714 | |
fa0aee89 | 2715 | state_transition (curr_state, NULL); |
d397e8c6 | 2716 | |
fa0aee89 PB |
2717 | if (targetm.sched.dfa_post_cycle_insn) |
2718 | state_transition (curr_state, | |
1c91de89 | 2719 | targetm.sched.dfa_post_cycle_insn ()); |
d397e8c6 MH |
2720 | } |
2721 | ||
f73d5666 | 2722 | |
f73d5666 | 2723 | |
d397e8c6 MH |
2724 | /* Checks if PS has resource conflicts according to DFA, starting from |
2725 | FROM cycle to TO cycle; returns true if there are conflicts and false | |
2726 | if there are no conflicts. Assumes DFA is being used. */ | |
2727 | static int | |
2728 | ps_has_conflicts (partial_schedule_ptr ps, int from, int to) | |
2729 | { | |
2730 | int cycle; | |
2731 | ||
d397e8c6 MH |
2732 | state_reset (curr_state); |
2733 | ||
2734 | for (cycle = from; cycle <= to; cycle++) | |
2735 | { | |
2736 | ps_insn_ptr crr_insn; | |
2737 | /* Holds the remaining issue slots in the current row. */ | |
2738 | int can_issue_more = issue_rate; | |
2739 | ||
2740 | /* Walk through the DFA for the current row. */ | |
2741 | for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)]; | |
2742 | crr_insn; | |
2743 | crr_insn = crr_insn->next_in_row) | |
2744 | { | |
2745 | rtx insn = crr_insn->node->insn; | |
2746 | ||
b5b8b0ac | 2747 | if (!NONDEBUG_INSN_P (insn)) |
d397e8c6 MH |
2748 | continue; |
2749 | ||
2750 | /* Check if there is room for the current insn. */ | |
2751 | if (!can_issue_more || state_dead_lock_p (curr_state)) | |
2752 | return true; | |
2753 | ||
2754 | /* Update the DFA state and return with failure if the DFA found | |
fa10beec | 2755 | resource conflicts. */ |
d397e8c6 MH |
2756 | if (state_transition (curr_state, insn) >= 0) |
2757 | return true; | |
2758 | ||
2759 | if (targetm.sched.variable_issue) | |
2760 | can_issue_more = | |
1c91de89 KH |
2761 | targetm.sched.variable_issue (sched_dump, sched_verbose, |
2762 | insn, can_issue_more); | |
d397e8c6 MH |
2763 | /* A naked CLOBBER or USE generates no instruction, so don't |
2764 | let them consume issue slots. */ | |
2765 | else if (GET_CODE (PATTERN (insn)) != USE | |
2766 | && GET_CODE (PATTERN (insn)) != CLOBBER) | |
2767 | can_issue_more--; | |
2768 | } | |
2769 | ||
2770 | /* Advance the DFA to the next cycle. */ | |
2771 | advance_one_cycle (); | |
2772 | } | |
2773 | return false; | |
2774 | } | |
2775 | ||
2776 | /* Checks if the given node causes resource conflicts when added to PS at | |
2777 | cycle C. If not the node is added to PS and returned; otherwise zero | |
b8698a0f L |
2778 | is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with |
2779 | cuid N must be come before/after (respectively) the node pointed to by | |
d72372e4 | 2780 | PS_I when scheduled in the same cycle. */ |
f73d5666 | 2781 | ps_insn_ptr |
d72372e4 MH |
2782 | ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, |
2783 | int c, sbitmap must_precede, | |
2784 | sbitmap must_follow) | |
d397e8c6 MH |
2785 | { |
2786 | int has_conflicts = 0; | |
2787 | ps_insn_ptr ps_i; | |
2788 | ||
d72372e4 MH |
2789 | /* First add the node to the PS, if this succeeds check for |
2790 | conflicts, trying different issue slots in the same row. */ | |
2791 | if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow))) | |
d397e8c6 MH |
2792 | return NULL; /* Failed to insert the node at the given cycle. */ |
2793 | ||
2794 | has_conflicts = ps_has_conflicts (ps, c, c) | |
2795 | || (ps->history > 0 | |
2796 | && ps_has_conflicts (ps, | |
2797 | c - ps->history, | |
2798 | c + ps->history)); | |
2799 | ||
2800 | /* Try different issue slots to find one that the given node can be | |
2801 | scheduled in without conflicts. */ | |
2802 | while (has_conflicts) | |
2803 | { | |
d72372e4 | 2804 | if (! ps_insn_advance_column (ps, ps_i, must_follow)) |
d397e8c6 MH |
2805 | break; |
2806 | has_conflicts = ps_has_conflicts (ps, c, c) | |
2807 | || (ps->history > 0 | |
2808 | && ps_has_conflicts (ps, | |
2809 | c - ps->history, | |
2810 | c + ps->history)); | |
2811 | } | |
2812 | ||
2813 | if (has_conflicts) | |
2814 | { | |
2815 | remove_node_from_ps (ps, ps_i); | |
2816 | return NULL; | |
2817 | } | |
2818 | ||
2819 | ps->min_cycle = MIN (ps->min_cycle, c); | |
2820 | ps->max_cycle = MAX (ps->max_cycle, c); | |
2821 | return ps_i; | |
2822 | } | |
2823 | ||
2824 | /* Rotate the rows of PS such that insns scheduled at time | |
2825 | START_CYCLE will appear in row 0. Updates max/min_cycles. */ | |
f73d5666 | 2826 | void |
d397e8c6 MH |
2827 | rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle) |
2828 | { | |
2829 | int i, row, backward_rotates; | |
2830 | int last_row = ps->ii - 1; | |
2831 | ||
2832 | if (start_cycle == 0) | |
2833 | return; | |
2834 | ||
2835 | backward_rotates = SMODULO (start_cycle, ps->ii); | |
2836 | ||
2837 | /* Revisit later and optimize this into a single loop. */ | |
2838 | for (i = 0; i < backward_rotates; i++) | |
2839 | { | |
2840 | ps_insn_ptr first_row = ps->rows[0]; | |
2841 | ||
2842 | for (row = 0; row < last_row; row++) | |
2843 | ps->rows[row] = ps->rows[row+1]; | |
2844 | ||
2845 | ps->rows[last_row] = first_row; | |
2846 | } | |
2847 | ||
2848 | ps->max_cycle -= start_cycle; | |
2849 | ps->min_cycle -= start_cycle; | |
2850 | } | |
d7777192 | 2851 | |
ef330312 PB |
2852 | #endif /* INSN_SCHEDULING */ |
2853 | \f | |
2854 | static bool | |
2855 | gate_handle_sms (void) | |
2856 | { | |
2857 | return (optimize > 0 && flag_modulo_sched); | |
2858 | } | |
2859 | ||
2860 | ||
2861 | /* Run instruction scheduler. */ | |
2862 | /* Perform SMS module scheduling. */ | |
c2924966 | 2863 | static unsigned int |
ef330312 PB |
2864 | rest_of_handle_sms (void) |
2865 | { | |
2866 | #ifdef INSN_SCHEDULING | |
2867 | basic_block bb; | |
ef330312 | 2868 | |
ef330312 | 2869 | /* Collect loop information to be used in SMS. */ |
6fb5fa3c | 2870 | cfg_layout_initialize (0); |
10d22567 | 2871 | sms_schedule (); |
ef330312 PB |
2872 | |
2873 | /* Update the life information, because we add pseudos. */ | |
2874 | max_regno = max_reg_num (); | |
ef330312 PB |
2875 | |
2876 | /* Finalize layout changes. */ | |
2877 | FOR_EACH_BB (bb) | |
2878 | if (bb->next_bb != EXIT_BLOCK_PTR) | |
2879 | bb->aux = bb->next_bb; | |
ef330312 | 2880 | free_dominance_info (CDI_DOMINATORS); |
5f705558 | 2881 | cfg_layout_finalize (); |
ef330312 | 2882 | #endif /* INSN_SCHEDULING */ |
c2924966 | 2883 | return 0; |
ef330312 PB |
2884 | } |
2885 | ||
8ddbbcae | 2886 | struct rtl_opt_pass pass_sms = |
ef330312 | 2887 | { |
8ddbbcae JH |
2888 | { |
2889 | RTL_PASS, | |
ef330312 PB |
2890 | "sms", /* name */ |
2891 | gate_handle_sms, /* gate */ | |
2892 | rest_of_handle_sms, /* execute */ | |
2893 | NULL, /* sub */ | |
2894 | NULL, /* next */ | |
2895 | 0, /* static_pass_number */ | |
2896 | TV_SMS, /* tv_id */ | |
2897 | 0, /* properties_required */ | |
2898 | 0, /* properties_provided */ | |
2899 | 0, /* properties_destroyed */ | |
1a1a5f4b | 2900 | TODO_dump_func, /* todo_flags_start */ |
a36b8a1e | 2901 | TODO_df_finish | TODO_verify_rtl_sharing | |
ef330312 | 2902 | TODO_dump_func | |
8ddbbcae JH |
2903 | TODO_ggc_collect /* todo_flags_finish */ |
2904 | } | |
ef330312 | 2905 | }; |
f73d5666 | 2906 |