]>
Commit | Line | Data |
---|---|---|
b2b40051 | 1 | /* HSAIL IL Register allocation and out-of-SSA. |
8d9254fc | 2 | Copyright (C) 2013-2020 Free Software Foundation, Inc. |
b2b40051 MJ |
3 | Contributed by Michael Matz <matz@suse.de> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tm.h" | |
25 | #include "is-a.h" | |
26 | #include "vec.h" | |
27 | #include "tree.h" | |
28 | #include "dominance.h" | |
3995f3a2 | 29 | #include "basic-block.h" |
b2b40051 | 30 | #include "function.h" |
e144a2b3 RB |
31 | #include "cfganal.h" |
32 | #include "cfg.h" | |
b2b40051 MJ |
33 | #include "bitmap.h" |
34 | #include "dumpfile.h" | |
35 | #include "cgraph.h" | |
36 | #include "print-tree.h" | |
37 | #include "cfghooks.h" | |
a895e6d7 | 38 | #include "alloc-pool.h" |
b2b40051 | 39 | #include "symbol-summary.h" |
13293add | 40 | #include "hsa-common.h" |
b2b40051 MJ |
41 | |
42 | ||
43 | /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */ | |
44 | ||
45 | static void | |
635c99aa | 46 | naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors) |
b2b40051 MJ |
47 | { |
48 | unsigned count = phi->operand_count (); | |
49 | for (unsigned i = 0; i < count; i++) | |
50 | { | |
51 | gcc_checking_assert (phi->get_op (i)); | |
52 | hsa_op_base *op = phi->get_op (i); | |
53 | hsa_bb *hbb; | |
54 | edge e; | |
55 | ||
56 | if (!op) | |
57 | break; | |
58 | ||
635c99aa | 59 | e = predecessors[i]; |
b2b40051 MJ |
60 | if (single_succ_p (e->src)) |
61 | hbb = hsa_bb_for_bb (e->src); | |
62 | else | |
63 | { | |
64 | basic_block old_dest = e->dest; | |
65 | hbb = hsa_init_new_bb (split_edge (e)); | |
66 | ||
67 | /* If switch insn used this edge, fix jump table. */ | |
68 | hsa_bb *source = hsa_bb_for_bb (e->src); | |
69 | hsa_insn_sbr *sbr; | |
70 | if (source->m_last_insn | |
71 | && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn))) | |
72 | sbr->replace_all_labels (old_dest, hbb->m_bb); | |
73 | } | |
74 | ||
75 | hsa_build_append_simple_mov (phi->m_dest, op, hbb); | |
76 | } | |
77 | } | |
78 | ||
79 | /* Naive out-of SSA. */ | |
80 | ||
81 | static void | |
82 | naive_outof_ssa (void) | |
83 | { | |
84 | basic_block bb; | |
85 | ||
86 | hsa_cfun->m_in_ssa = false; | |
87 | ||
88 | FOR_ALL_BB_FN (bb, cfun) | |
89 | { | |
90 | hsa_bb *hbb = hsa_bb_for_bb (bb); | |
91 | hsa_insn_phi *phi; | |
92 | ||
635c99aa MJ |
93 | /* naive_process_phi can call split_edge on an incoming edge which order if |
94 | the incoming edges to the basic block and thus make it inconsistent with | |
95 | the ordering of PHI arguments, so we collect them in advance. */ | |
96 | auto_vec<edge, 8> predecessors; | |
97 | unsigned pred_count = EDGE_COUNT (bb->preds); | |
98 | for (unsigned i = 0; i < pred_count; i++) | |
99 | predecessors.safe_push (EDGE_PRED (bb, i)); | |
100 | ||
b2b40051 MJ |
101 | for (phi = hbb->m_first_phi; |
102 | phi; | |
103 | phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL) | |
635c99aa | 104 | naive_process_phi (phi, predecessors); |
b2b40051 MJ |
105 | |
106 | /* Zap PHI nodes, they will be deallocated when everything else will. */ | |
107 | hbb->m_first_phi = NULL; | |
108 | hbb->m_last_phi = NULL; | |
109 | } | |
110 | } | |
111 | ||
112 | /* Return register class number for the given HSA TYPE. 0 means the 'c' one | |
113 | bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class | |
114 | and 3 for 'q' 128 bit class. */ | |
115 | ||
116 | static int | |
117 | m_reg_class_for_type (BrigType16_t type) | |
118 | { | |
119 | switch (type) | |
120 | { | |
121 | case BRIG_TYPE_B1: | |
122 | return 0; | |
123 | ||
124 | case BRIG_TYPE_U8: | |
125 | case BRIG_TYPE_U16: | |
126 | case BRIG_TYPE_U32: | |
127 | case BRIG_TYPE_S8: | |
128 | case BRIG_TYPE_S16: | |
129 | case BRIG_TYPE_S32: | |
130 | case BRIG_TYPE_F16: | |
131 | case BRIG_TYPE_F32: | |
132 | case BRIG_TYPE_B8: | |
133 | case BRIG_TYPE_B16: | |
134 | case BRIG_TYPE_B32: | |
135 | case BRIG_TYPE_U8X4: | |
136 | case BRIG_TYPE_S8X4: | |
137 | case BRIG_TYPE_U16X2: | |
138 | case BRIG_TYPE_S16X2: | |
139 | case BRIG_TYPE_F16X2: | |
140 | return 1; | |
141 | ||
142 | case BRIG_TYPE_U64: | |
143 | case BRIG_TYPE_S64: | |
144 | case BRIG_TYPE_F64: | |
145 | case BRIG_TYPE_B64: | |
146 | case BRIG_TYPE_U8X8: | |
147 | case BRIG_TYPE_S8X8: | |
148 | case BRIG_TYPE_U16X4: | |
149 | case BRIG_TYPE_S16X4: | |
150 | case BRIG_TYPE_F16X4: | |
151 | case BRIG_TYPE_U32X2: | |
152 | case BRIG_TYPE_S32X2: | |
153 | case BRIG_TYPE_F32X2: | |
154 | return 2; | |
155 | ||
156 | case BRIG_TYPE_B128: | |
157 | case BRIG_TYPE_U8X16: | |
158 | case BRIG_TYPE_S8X16: | |
159 | case BRIG_TYPE_U16X8: | |
160 | case BRIG_TYPE_S16X8: | |
161 | case BRIG_TYPE_F16X8: | |
162 | case BRIG_TYPE_U32X4: | |
163 | case BRIG_TYPE_U64X2: | |
164 | case BRIG_TYPE_S32X4: | |
165 | case BRIG_TYPE_S64X2: | |
166 | case BRIG_TYPE_F32X4: | |
167 | case BRIG_TYPE_F64X2: | |
168 | return 3; | |
169 | ||
170 | default: | |
171 | gcc_unreachable (); | |
172 | } | |
173 | } | |
174 | ||
175 | /* If the Ith operands of INSN is or contains a register (in an address), | |
176 | return the address of that register operand. If not return NULL. */ | |
177 | ||
178 | static hsa_op_reg ** | |
179 | insn_reg_addr (hsa_insn_basic *insn, int i) | |
180 | { | |
181 | hsa_op_base *op = insn->get_op (i); | |
182 | if (!op) | |
183 | return NULL; | |
184 | hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op); | |
185 | if (reg) | |
186 | return (hsa_op_reg **) insn->get_op_addr (i); | |
187 | hsa_op_address *addr = dyn_cast <hsa_op_address *> (op); | |
188 | if (addr && addr->m_reg) | |
189 | return &addr->m_reg; | |
190 | return NULL; | |
191 | } | |
192 | ||
193 | struct m_reg_class_desc | |
194 | { | |
195 | unsigned next_avail, max_num; | |
196 | unsigned used_num, max_used; | |
197 | uint64_t used[2]; | |
198 | char cl_char; | |
199 | }; | |
200 | ||
201 | /* Rewrite the instructions in BB to observe spilled live ranges. | |
202 | CLASSES is the global register class state. */ | |
203 | ||
204 | static void | |
205 | rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes) | |
206 | { | |
207 | hsa_bb *hbb = hsa_bb_for_bb (bb); | |
208 | hsa_insn_basic *insn, *next_insn; | |
209 | ||
210 | for (insn = hbb->m_first_insn; insn; insn = next_insn) | |
211 | { | |
212 | next_insn = insn->m_next; | |
213 | unsigned count = insn->operand_count (); | |
214 | for (unsigned i = 0; i < count; i++) | |
215 | { | |
216 | gcc_checking_assert (insn->get_op (i)); | |
217 | hsa_op_reg **regaddr = insn_reg_addr (insn, i); | |
218 | ||
219 | if (regaddr) | |
220 | { | |
221 | hsa_op_reg *reg = *regaddr; | |
222 | if (reg->m_reg_class) | |
223 | continue; | |
224 | gcc_assert (reg->m_spill_sym); | |
225 | ||
226 | int cl = m_reg_class_for_type (reg->m_type); | |
227 | hsa_op_reg *tmp, *tmp2; | |
228 | if (insn->op_output_p (i)) | |
229 | tmp = hsa_spill_out (insn, reg, &tmp2); | |
230 | else | |
231 | tmp = hsa_spill_in (insn, reg, &tmp2); | |
232 | ||
233 | *regaddr = tmp; | |
234 | ||
235 | tmp->m_reg_class = classes[cl].cl_char; | |
236 | tmp->m_hard_num = (char) (classes[cl].max_num + i); | |
237 | if (tmp2) | |
238 | { | |
239 | gcc_assert (cl == 0); | |
240 | tmp2->m_reg_class = classes[1].cl_char; | |
241 | tmp2->m_hard_num = (char) (classes[1].max_num + i); | |
242 | } | |
243 | } | |
244 | } | |
245 | } | |
246 | } | |
247 | ||
248 | /* Dump current function to dump file F, with info specific | |
249 | to register allocation. */ | |
250 | ||
251 | void | |
252 | dump_hsa_cfun_regalloc (FILE *f) | |
253 | { | |
254 | basic_block bb; | |
255 | ||
256 | fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name); | |
257 | ||
258 | FOR_ALL_BB_FN (bb, cfun) | |
259 | { | |
99b1c316 | 260 | hsa_bb *hbb = (class hsa_bb *) bb->aux; |
b2b40051 MJ |
261 | bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n"); |
262 | dump_hsa_bb (f, hbb); | |
263 | bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n"); | |
264 | } | |
265 | } | |
266 | ||
267 | /* Given the global register allocation state CLASSES and a | |
268 | register REG, try to give it a hardware register. If successful, | |
269 | store that hardreg in REG and return it, otherwise return -1. | |
270 | Also changes CLASSES to accommodate for the allocated register. */ | |
271 | ||
272 | static int | |
273 | try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg) | |
274 | { | |
275 | int cl = m_reg_class_for_type (reg->m_type); | |
276 | int ret = -1; | |
277 | if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4 | |
278 | >= 128 - 5) | |
279 | return -1; | |
280 | if (classes[cl].used_num < classes[cl].max_num) | |
281 | { | |
282 | unsigned int i; | |
283 | classes[cl].used_num++; | |
284 | if (classes[cl].used_num > classes[cl].max_used) | |
285 | classes[cl].max_used = classes[cl].used_num; | |
286 | for (i = 0; i < classes[cl].used_num; i++) | |
287 | if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63)))) | |
288 | break; | |
289 | ret = i; | |
290 | classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63)); | |
291 | reg->m_reg_class = classes[cl].cl_char; | |
292 | reg->m_hard_num = i; | |
293 | } | |
294 | return ret; | |
295 | } | |
296 | ||
297 | /* Free up hardregs used by REG, into allocation state CLASSES. */ | |
298 | ||
299 | static void | |
300 | free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg) | |
301 | { | |
302 | int cl = m_reg_class_for_type (reg->m_type); | |
303 | int ret = reg->m_hard_num; | |
304 | gcc_assert (reg->m_reg_class == classes[cl].cl_char); | |
305 | classes[cl].used_num--; | |
306 | classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63)); | |
307 | } | |
308 | ||
309 | /* Note that the live range for REG ends at least at END. */ | |
310 | ||
311 | static void | |
312 | note_lr_end (hsa_op_reg *reg, int end) | |
313 | { | |
314 | if (reg->m_lr_end < end) | |
315 | reg->m_lr_end = end; | |
316 | } | |
317 | ||
318 | /* Note that the live range for REG starts at least at BEGIN. */ | |
319 | ||
320 | static void | |
321 | note_lr_begin (hsa_op_reg *reg, int begin) | |
322 | { | |
323 | if (reg->m_lr_begin > begin) | |
324 | reg->m_lr_begin = begin; | |
325 | } | |
326 | ||
327 | /* Given two registers A and B, return -1, 0 or 1 if A's live range | |
328 | starts before, at or after B's live range. */ | |
329 | ||
330 | static int | |
331 | cmp_begin (const void *a, const void *b) | |
332 | { | |
333 | const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a; | |
334 | const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b; | |
335 | int ret; | |
336 | if (rega == regb) | |
337 | return 0; | |
338 | ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin; | |
339 | if (ret) | |
340 | return ret; | |
341 | return ((*rega)->m_order - (*regb)->m_order); | |
342 | } | |
343 | ||
344 | /* Given two registers REGA and REGB, return true if REGA's | |
345 | live range ends after REGB's. This results in a sorting order | |
346 | with earlier end points at the end. */ | |
347 | ||
348 | static bool | |
349 | cmp_end (hsa_op_reg * const ®a, hsa_op_reg * const ®b) | |
350 | { | |
351 | int ret; | |
352 | if (rega == regb) | |
353 | return false; | |
354 | ret = (regb)->m_lr_end - (rega)->m_lr_end; | |
355 | if (ret) | |
356 | return ret < 0; | |
357 | return (((regb)->m_order - (rega)->m_order)) < 0; | |
358 | } | |
359 | ||
360 | /* Expire all old intervals in ACTIVE (a per-regclass vector), | |
361 | that is, those that end before the interval REG starts. Give | |
362 | back resources freed so into the state CLASSES. */ | |
363 | ||
364 | static void | |
365 | expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active, | |
366 | struct m_reg_class_desc *classes) | |
367 | { | |
368 | for (int i = 0; i < 4; i++) | |
369 | while (!active[i].is_empty ()) | |
370 | { | |
371 | hsa_op_reg *a = active[i].pop (); | |
372 | if (a->m_lr_end > reg->m_lr_begin) | |
373 | { | |
374 | active[i].quick_push (a); | |
375 | break; | |
376 | } | |
377 | free_reg (classes, a); | |
378 | } | |
379 | } | |
380 | ||
381 | /* The interval REG didn't get a hardreg. Spill it or one of those | |
382 | from ACTIVE (if the latter, then REG will become allocated to the | |
383 | hardreg that formerly was used by it). */ | |
384 | ||
385 | static void | |
386 | spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active) | |
387 | { | |
388 | int cl = m_reg_class_for_type (reg->m_type); | |
389 | gcc_assert (!active[cl].is_empty ()); | |
390 | hsa_op_reg *cand = active[cl][0]; | |
391 | if (cand->m_lr_end > reg->m_lr_end) | |
392 | { | |
393 | reg->m_reg_class = cand->m_reg_class; | |
394 | reg->m_hard_num = cand->m_hard_num; | |
395 | active[cl].ordered_remove (0); | |
396 | unsigned place = active[cl].lower_bound (reg, cmp_end); | |
397 | active[cl].quick_insert (place, reg); | |
398 | } | |
399 | else | |
400 | cand = reg; | |
401 | ||
402 | gcc_assert (!cand->m_spill_sym); | |
403 | BrigType16_t type = cand->m_type; | |
404 | if (type == BRIG_TYPE_B1) | |
405 | type = BRIG_TYPE_U8; | |
406 | cand->m_reg_class = 0; | |
407 | cand->m_spill_sym = hsa_get_spill_symbol (type); | |
408 | cand->m_spill_sym->m_name_number = cand->m_order; | |
409 | } | |
410 | ||
411 | /* Given the global register state CLASSES allocate all HSA virtual | |
412 | registers either to hardregs or to a spill symbol. */ | |
413 | ||
414 | static void | |
415 | linear_scan_regalloc (struct m_reg_class_desc *classes) | |
416 | { | |
417 | /* Compute liveness. */ | |
418 | bool changed; | |
419 | int i, n; | |
420 | int insn_order; | |
421 | int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); | |
422 | bitmap work = BITMAP_ALLOC (NULL); | |
423 | vec<hsa_op_reg*> ind2reg = vNULL; | |
424 | vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL}; | |
425 | hsa_insn_basic *m_last_insn; | |
426 | ||
427 | /* We will need the reverse post order for linearization, | |
428 | and the post order for liveness analysis, which is the same | |
429 | backward. */ | |
430 | n = pre_and_rev_post_order_compute (NULL, bbs, true); | |
431 | ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count); | |
432 | ||
433 | /* Give all instructions a linearized number, at the same time | |
434 | build a mapping from register index to register. */ | |
435 | insn_order = 1; | |
436 | for (i = 0; i < n; i++) | |
437 | { | |
438 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); | |
439 | hsa_bb *hbb = hsa_bb_for_bb (bb); | |
440 | hsa_insn_basic *insn; | |
441 | for (insn = hbb->m_first_insn; insn; insn = insn->m_next) | |
442 | { | |
443 | unsigned opi; | |
444 | insn->m_number = insn_order++; | |
445 | for (opi = 0; opi < insn->operand_count (); opi++) | |
446 | { | |
447 | gcc_checking_assert (insn->get_op (opi)); | |
448 | hsa_op_reg **regaddr = insn_reg_addr (insn, opi); | |
449 | if (regaddr) | |
450 | ind2reg[(*regaddr)->m_order] = *regaddr; | |
451 | } | |
452 | } | |
453 | } | |
454 | ||
455 | /* Initialize all live ranges to [after-end, 0). */ | |
456 | for (i = 0; i < hsa_cfun->m_reg_count; i++) | |
457 | if (ind2reg[i]) | |
458 | ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0; | |
459 | ||
460 | /* Classic liveness analysis, as long as something changes: | |
461 | m_liveout is union (m_livein of successors) | |
462 | m_livein is m_liveout minus defs plus uses. */ | |
463 | do | |
464 | { | |
465 | changed = false; | |
466 | for (i = n - 1; i >= 0; i--) | |
467 | { | |
468 | edge e; | |
469 | edge_iterator ei; | |
470 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); | |
471 | hsa_bb *hbb = hsa_bb_for_bb (bb); | |
472 | ||
473 | /* Union of successors m_livein (or empty if none). */ | |
474 | bool first = true; | |
475 | FOR_EACH_EDGE (e, ei, bb->succs) | |
476 | if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
477 | { | |
478 | hsa_bb *succ = hsa_bb_for_bb (e->dest); | |
479 | if (first) | |
480 | { | |
481 | bitmap_copy (work, succ->m_livein); | |
482 | first = false; | |
483 | } | |
484 | else | |
485 | bitmap_ior_into (work, succ->m_livein); | |
486 | } | |
487 | if (first) | |
488 | bitmap_clear (work); | |
489 | ||
490 | bitmap_copy (hbb->m_liveout, work); | |
491 | ||
492 | /* Remove defs, include uses in a backward insn walk. */ | |
493 | hsa_insn_basic *insn; | |
494 | for (insn = hbb->m_last_insn; insn; insn = insn->m_prev) | |
495 | { | |
496 | unsigned opi; | |
497 | unsigned ndefs = insn->input_count (); | |
498 | for (opi = 0; opi < ndefs && insn->get_op (opi); opi++) | |
499 | { | |
500 | gcc_checking_assert (insn->get_op (opi)); | |
501 | hsa_op_reg **regaddr = insn_reg_addr (insn, opi); | |
502 | if (regaddr) | |
503 | bitmap_clear_bit (work, (*regaddr)->m_order); | |
504 | } | |
505 | for (; opi < insn->operand_count (); opi++) | |
506 | { | |
507 | gcc_checking_assert (insn->get_op (opi)); | |
508 | hsa_op_reg **regaddr = insn_reg_addr (insn, opi); | |
509 | if (regaddr) | |
510 | bitmap_set_bit (work, (*regaddr)->m_order); | |
511 | } | |
512 | } | |
513 | ||
514 | /* Note if that changed something. */ | |
515 | if (bitmap_ior_into (hbb->m_livein, work)) | |
516 | changed = true; | |
517 | } | |
518 | } | |
519 | while (changed); | |
520 | ||
521 | /* Make one pass through all instructions in linear order, | |
522 | noting and merging possible live range start and end points. */ | |
523 | m_last_insn = NULL; | |
524 | for (i = n - 1; i >= 0; i--) | |
525 | { | |
526 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); | |
527 | hsa_bb *hbb = hsa_bb_for_bb (bb); | |
528 | hsa_insn_basic *insn; | |
529 | int after_end_number; | |
530 | unsigned bit; | |
531 | bitmap_iterator bi; | |
532 | ||
533 | if (m_last_insn) | |
534 | after_end_number = m_last_insn->m_number; | |
535 | else | |
536 | after_end_number = insn_order; | |
537 | /* Everything live-out in this BB has at least an end point | |
538 | after us. */ | |
539 | EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi) | |
540 | note_lr_end (ind2reg[bit], after_end_number); | |
541 | ||
542 | for (insn = hbb->m_last_insn; insn; insn = insn->m_prev) | |
543 | { | |
544 | unsigned opi; | |
545 | unsigned ndefs = insn->input_count (); | |
546 | for (opi = 0; opi < insn->operand_count (); opi++) | |
547 | { | |
548 | gcc_checking_assert (insn->get_op (opi)); | |
549 | hsa_op_reg **regaddr = insn_reg_addr (insn, opi); | |
550 | if (regaddr) | |
551 | { | |
552 | hsa_op_reg *reg = *regaddr; | |
553 | if (opi < ndefs) | |
554 | note_lr_begin (reg, insn->m_number); | |
555 | else | |
556 | note_lr_end (reg, insn->m_number); | |
557 | } | |
558 | } | |
559 | } | |
560 | ||
561 | /* Everything live-in in this BB has a start point before | |
562 | our first insn. */ | |
563 | int before_start_number; | |
564 | if (hbb->m_first_insn) | |
565 | before_start_number = hbb->m_first_insn->m_number; | |
566 | else | |
567 | before_start_number = after_end_number; | |
568 | before_start_number--; | |
569 | EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi) | |
570 | note_lr_begin (ind2reg[bit], before_start_number); | |
571 | ||
572 | if (hbb->m_first_insn) | |
573 | m_last_insn = hbb->m_first_insn; | |
574 | } | |
575 | ||
576 | for (i = 0; i < hsa_cfun->m_reg_count; i++) | |
577 | if (ind2reg[i]) | |
578 | { | |
579 | /* All regs that have still their start at after all code actually | |
580 | are defined at the start of the routine (prologue). */ | |
581 | if (ind2reg[i]->m_lr_begin == insn_order) | |
582 | ind2reg[i]->m_lr_begin = 0; | |
583 | /* All regs that have no use but a def will have lr_end == 0, | |
584 | they are actually live from def until after the insn they are | |
585 | defined in. */ | |
586 | if (ind2reg[i]->m_lr_end == 0) | |
587 | ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1; | |
588 | } | |
589 | ||
590 | /* Sort all intervals by increasing start point. */ | |
591 | gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count); | |
592 | ||
ac400631 ML |
593 | if (flag_checking) |
594 | for (unsigned i = 0; i < ind2reg.length (); i++) | |
595 | gcc_assert (ind2reg[i]); | |
b2b40051 MJ |
596 | |
597 | ind2reg.qsort (cmp_begin); | |
598 | for (i = 0; i < 4; i++) | |
599 | active[i].reserve_exact (hsa_cfun->m_reg_count); | |
600 | ||
601 | /* Now comes the linear scan allocation. */ | |
602 | for (i = 0; i < hsa_cfun->m_reg_count; i++) | |
603 | { | |
604 | hsa_op_reg *reg = ind2reg[i]; | |
605 | if (!reg) | |
606 | continue; | |
607 | expire_old_intervals (reg, active, classes); | |
608 | int cl = m_reg_class_for_type (reg->m_type); | |
609 | if (try_alloc_reg (classes, reg) >= 0) | |
610 | { | |
611 | unsigned place = active[cl].lower_bound (reg, cmp_end); | |
612 | active[cl].quick_insert (place, reg); | |
613 | } | |
614 | else | |
615 | spill_at_interval (reg, active); | |
616 | ||
617 | /* Some interesting dumping as we go. */ | |
2998cb96 | 618 | if (dump_file && (dump_flags & TDF_DETAILS)) |
b2b40051 MJ |
619 | { |
620 | fprintf (dump_file, " reg%d: [%5d, %5d)->", | |
621 | reg->m_order, reg->m_lr_begin, reg->m_lr_end); | |
622 | if (reg->m_reg_class) | |
623 | fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num); | |
624 | else | |
625 | fprintf (dump_file, "[%%__%s_%i]", | |
626 | hsa_seg_name (reg->m_spill_sym->m_segment), | |
627 | reg->m_spill_sym->m_name_number); | |
628 | for (int cl = 0; cl < 4; cl++) | |
629 | { | |
630 | bool first = true; | |
631 | hsa_op_reg *r; | |
632 | fprintf (dump_file, " {"); | |
633 | for (int j = 0; active[cl].iterate (j, &r); j++) | |
634 | if (first) | |
635 | { | |
636 | fprintf (dump_file, "%d", r->m_order); | |
637 | first = false; | |
638 | } | |
639 | else | |
640 | fprintf (dump_file, ", %d", r->m_order); | |
641 | fprintf (dump_file, "}"); | |
642 | } | |
643 | fprintf (dump_file, "\n"); | |
644 | } | |
645 | } | |
646 | ||
647 | BITMAP_FREE (work); | |
648 | free (bbs); | |
649 | ||
2998cb96 | 650 | if (dump_file && (dump_flags & TDF_DETAILS)) |
b2b40051 MJ |
651 | { |
652 | fprintf (dump_file, "------- After liveness: -------\n"); | |
653 | dump_hsa_cfun_regalloc (dump_file); | |
654 | fprintf (dump_file, " ----- Intervals:\n"); | |
655 | for (i = 0; i < hsa_cfun->m_reg_count; i++) | |
656 | { | |
657 | hsa_op_reg *reg = ind2reg[i]; | |
658 | if (!reg) | |
659 | continue; | |
660 | fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order, | |
661 | reg->m_lr_begin, reg->m_lr_end); | |
662 | if (reg->m_reg_class) | |
663 | fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num); | |
664 | else | |
665 | fprintf (dump_file, "[%%__%s_%i]\n", | |
666 | hsa_seg_name (reg->m_spill_sym->m_segment), | |
667 | reg->m_spill_sym->m_name_number); | |
668 | } | |
669 | } | |
670 | ||
671 | for (i = 0; i < 4; i++) | |
672 | active[i].release (); | |
673 | ind2reg.release (); | |
674 | } | |
675 | ||
676 | /* Entry point for register allocation. */ | |
677 | ||
678 | static void | |
679 | regalloc (void) | |
680 | { | |
681 | basic_block bb; | |
682 | m_reg_class_desc classes[4]; | |
683 | ||
684 | /* If there are no registers used in the function, exit right away. */ | |
685 | if (hsa_cfun->m_reg_count == 0) | |
686 | return; | |
687 | ||
688 | memset (classes, 0, sizeof (classes)); | |
689 | classes[0].next_avail = 0; | |
690 | classes[0].max_num = 7; | |
691 | classes[0].cl_char = 'c'; | |
692 | classes[1].cl_char = 's'; | |
693 | classes[2].cl_char = 'd'; | |
694 | classes[3].cl_char = 'q'; | |
695 | ||
696 | for (int i = 1; i < 4; i++) | |
697 | { | |
698 | classes[i].next_avail = 0; | |
699 | classes[i].max_num = 20; | |
700 | } | |
701 | ||
702 | linear_scan_regalloc (classes); | |
703 | ||
704 | FOR_ALL_BB_FN (bb, cfun) | |
705 | rewrite_code_bb (bb, classes); | |
706 | } | |
707 | ||
708 | /* Out of SSA and register allocation on HSAIL IL. */ | |
709 | ||
710 | void | |
711 | hsa_regalloc (void) | |
712 | { | |
65e21467 | 713 | hsa_cfun->update_dominance (); |
b2b40051 MJ |
714 | naive_outof_ssa (); |
715 | ||
2998cb96 | 716 | if (dump_file && (dump_flags & TDF_DETAILS)) |
b2b40051 MJ |
717 | { |
718 | fprintf (dump_file, "------- After out-of-SSA: -------\n"); | |
719 | dump_hsa_cfun (dump_file); | |
720 | } | |
721 | ||
722 | regalloc (); | |
723 | ||
2998cb96 | 724 | if (dump_file && (dump_flags & TDF_DETAILS)) |
b2b40051 MJ |
725 | { |
726 | fprintf (dump_file, "------- After register allocation: -------\n"); | |
727 | dump_hsa_cfun (dump_file); | |
728 | } | |
729 | } |