]>
Commit | Line | Data |
---|---|---|
9f0f7d80 | 1 | // Early register allocation pass. |
a945c346 | 2 | // Copyright (C) 2023-2024 Free Software Foundation, Inc. |
9f0f7d80 RS |
3 | // |
4 | // This file is part of GCC. | |
5 | // | |
6 | // GCC is free software; you can redistribute it and/or modify it under | |
7 | // the terms of the GNU General Public License as published by the Free | |
8 | // Software Foundation; either version 3, or (at your option) any later | |
9 | // version. | |
10 | // | |
11 | // GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | // for more details. | |
15 | // | |
16 | // You should have received a copy of the GNU General Public License | |
17 | // along with GCC; see the file COPYING3. If not see | |
18 | // <http://www.gnu.org/licenses/>. | |
19 | ||
20 | // This pass implements a simple form of early register allocation. | |
21 | // It is restricted to FP/SIMD registers, and it only allocates | |
22 | // a region of FP/SIMD usage if it can do so without any spilling. | |
23 | // It punts on anything too complicated, leaving it to the real | |
24 | // register allocator. | |
25 | // | |
26 | // There are two main purposes: | |
27 | // | |
28 | // (1) The pass runs before scheduling. It therefore has a chance to | |
29 | // bag a spill-free allocation, if there is one, before scheduling | |
30 | // moves things around. | |
31 | // | |
32 | // (2) The pass can make use of strided register operations, such as the | |
33 | // strided forms of LD1 and ST1 in SME2. | |
34 | // | |
35 | // The allocator works at the level of individual FPRs, rather than whole | |
36 | // pseudo registers. It is mostly intended to help optimize ACLE code. | |
37 | // | |
38 | // The pass is very simplistic. There are many things that could be improved. | |
39 | #define IN_TARGET_CODE 1 | |
40 | ||
41 | #define INCLUDE_ALGORITHM | |
42 | #define INCLUDE_FUNCTIONAL | |
43 | #include "config.h" | |
44 | #include "system.h" | |
45 | #include "coretypes.h" | |
46 | #include "backend.h" | |
47 | #include "rtl.h" | |
48 | #include "df.h" | |
49 | #include "rtl-ssa.h" | |
50 | #include "tree-pass.h" | |
51 | #include "target.h" | |
52 | #include "expr.h" | |
53 | #include "cfgrtl.h" | |
54 | #include "print-rtl.h" | |
55 | #include "insn-attr.h" | |
56 | #include "insn-opinit.h" | |
57 | #include "reload.h" | |
58 | ||
59 | template<typename T> | |
60 | class simple_iterator : public wrapper_iterator<T> | |
61 | { | |
62 | public: | |
63 | using wrapper_iterator<T>::wrapper_iterator; | |
64 | ||
65 | simple_iterator &operator-- () { --this->m_contents; return *this; } | |
66 | simple_iterator operator-- (int) { return this->m_contents--; } | |
67 | simple_iterator &operator++ () { ++this->m_contents; return *this; } | |
68 | simple_iterator operator++ (int) { return this->m_contents++; } | |
69 | }; | |
70 | ||
71 | using namespace rtl_ssa; | |
72 | ||
73 | namespace { | |
74 | const pass_data pass_data_early_ra = | |
75 | { | |
76 | RTL_PASS, // type | |
77 | "early_ra", // name | |
78 | OPTGROUP_NONE, // optinfo_flags | |
79 | TV_NONE, // tv_id | |
80 | 0, // properties_required | |
81 | 0, // properties_provided | |
82 | 0, // properties_destroyed | |
83 | 0, // todo_flags_start | |
84 | TODO_df_finish, // todo_flags_finish | |
85 | }; | |
86 | ||
87 | using allocno_iterator = simple_iterator<unsigned int>; | |
88 | ||
89 | // Class that represents one run of the pass. | |
90 | class early_ra | |
91 | { | |
92 | public: | |
93 | early_ra (function *fn); | |
94 | ~early_ra (); | |
95 | void execute (); | |
96 | ||
97 | private: | |
98 | static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits"); | |
99 | using operand_mask = uint32_t; | |
100 | ||
101 | // Points in the function are represented using "program points". | |
102 | // The program points are allocated in reverse order, with smaller | |
103 | // numbers indicating later points. These special values indicate | |
104 | // the start and end of a region. | |
105 | static constexpr unsigned int START_OF_REGION = ~0U; | |
106 | static constexpr unsigned int END_OF_REGION = 0U; | |
107 | ||
108 | // An invalid allocno index, used to represent no allocno. | |
109 | static constexpr unsigned int INVALID_ALLOCNO = ~0U; | |
110 | ||
111 | // Enumerates the single FPR sizes that matter for register allocation. | |
112 | // Anything smaller than 64 bits is treated as FPR_D. | |
113 | enum fpr_size_info | |
114 | { | |
115 | FPR_D, | |
116 | FPR_Q, | |
117 | FPR_Z | |
118 | }; | |
119 | ||
120 | // A live range for an FPR, containing program points [START_POINT, | |
121 | // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known | |
122 | // to be equal to ALLOCNO for the duration of the live range. | |
123 | struct fpr_range_info | |
124 | { | |
125 | unsigned int start_point; | |
126 | unsigned int end_point; | |
127 | unsigned int allocno; | |
128 | }; | |
129 | ||
130 | // Flags used in pseudo_reg_info. | |
131 | // | |
132 | // Whether the pseudo register occurs in one instruction alternative that | |
133 | // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register. | |
134 | static constexpr unsigned int ALLOWS_FPR8 = 1U << 0; | |
135 | static constexpr unsigned int ALLOWS_FPR16 = 1U << 1; | |
136 | static constexpr unsigned int ALLOWS_FPR32 = 1U << 2; | |
137 | static constexpr unsigned int ALLOWS_NONFPR = 1U << 3; | |
138 | // | |
139 | // Likewise whether the register occurs in an instruction that requires | |
140 | // the associated register type. | |
141 | static constexpr unsigned int NEEDS_FPR8 = 1U << 4; | |
142 | static constexpr unsigned int NEEDS_FPR16 = 1U << 5; | |
143 | static constexpr unsigned int NEEDS_FPR32 = 1U << 6; | |
144 | static constexpr unsigned int NEEDS_NONFPR = 1U << 7; | |
145 | // | |
146 | // Whether the pseudo register is copied to or from a hard FP register. | |
147 | static constexpr unsigned int HAS_FPR_COPY = 1U << 8; | |
148 | // | |
149 | // Whether the pseudo register is copied to or from a hard non-FP register. | |
150 | static constexpr unsigned int HAS_NONFPR_COPY = 1U << 9; | |
151 | // | |
152 | // Whether the pseudo register is used as a multi-register vector operand | |
153 | // to an instruction that supports strided accesses, and whether it is used | |
154 | // as a multi-register vector operand in some other non-move instruction. | |
155 | static constexpr unsigned int HAS_FLEXIBLE_STRIDE = 1U << 10; | |
156 | static constexpr unsigned int HAS_FIXED_STRIDE = 1U << 11; | |
157 | ||
158 | // Flags that should be propagated across moves between pseudo registers. | |
159 | static constexpr unsigned int PSEUDO_COPY_FLAGS = ~(HAS_FLEXIBLE_STRIDE | |
160 | | HAS_FIXED_STRIDE); | |
161 | ||
162 | // Information about a copy between two registers. | |
163 | struct reg_copy_info | |
164 | { | |
165 | // The two registers, in order. | |
166 | unsigned int regnos[2]; | |
167 | ||
168 | // Index I gives the index of the next reg_copy_info involving REGNOS[I], | |
169 | // or 0 if none. | |
170 | unsigned int next_copies[2]; | |
171 | }; | |
172 | ||
173 | // Information about a pseudo register. | |
174 | struct pseudo_reg_info | |
175 | { | |
176 | // Flags describing how the register is used, defined above. | |
177 | unsigned int flags : 16; | |
178 | ||
179 | // The mode of the pseudo register, cached for convenience. | |
180 | machine_mode mode : 16; | |
181 | ||
182 | // The index of the first copy, or 0 if none. | |
183 | unsigned int first_copy; | |
184 | }; | |
185 | ||
186 | // Information about a group of allocnos that have a fixed offset | |
187 | // relative to each other. The allocnos in each group must be allocated | |
188 | // together. | |
189 | // | |
190 | // Allocnos that can share the same hard register are eventually | |
191 | // chained together. These chains represent edges on a graph of | |
192 | // allocnos, such that two allocnos joined by an edge use the same FPR. | |
193 | // These chains are formed between individual allocnos rather than | |
194 | // whole groups, although the system is required to be self-consistent. | |
195 | // Each clique in the graph has at least one "full-width" allocno group | |
196 | // that has one allocno for every FPR that needs to be allocated to | |
197 | // the clique. | |
198 | // | |
199 | // One group of allocnos is chosen as the "color representative" of | |
200 | // each clique in the graph. This group will be a full-width group. | |
201 | struct allocno_info; | |
202 | struct allocno_group_info | |
203 | { | |
204 | array_slice<unsigned int> chain_heads (); | |
205 | array_slice<allocno_info> allocnos (); | |
206 | allocno_group_info *color_rep (); | |
207 | allocno_info *allocno (unsigned int); | |
208 | ||
209 | // The color representative of the containing clique. | |
210 | allocno_group_info *m_color_rep; | |
211 | ||
212 | // The pseudo register associated with this allocno, or INVALID_REGNUM | |
213 | // if none. | |
214 | unsigned int regno; | |
215 | ||
216 | // The offset of the first allocno (and thus this group) from the start | |
217 | // of color_rep. | |
218 | unsigned int color_rep_offset : 8; | |
219 | ||
220 | // The number of allocnos in the group, and thus the number of FPRs | |
221 | // that need to be allocated. | |
222 | unsigned int size : 8; | |
223 | ||
224 | // The gap between FPRs in the group. This is normally 1, but can be | |
225 | // higher if we've decided to use strided multi-register accesses. | |
226 | unsigned int stride : 4; | |
227 | ||
228 | // Used temporarily while deciding which allocnos should have non-unit | |
229 | // strides; see find_strided_accesses for details. | |
230 | int consecutive_pref : 4; | |
231 | int strided_polarity : 2; | |
232 | ||
233 | // The largest size of FPR needed by references to the allocno group. | |
234 | fpr_size_info fpr_size : 2; | |
235 | ||
236 | // True if all non-move accesses can be converted to strided form. | |
237 | unsigned int has_flexible_stride : 1; | |
238 | ||
239 | // True if we've assigned a color index to this group. | |
240 | unsigned int has_color : 1; | |
241 | ||
242 | // The mask of FPRs that would make valid choices for the first allocno, | |
243 | // taking the requirements of all the allocnos in the group into account. | |
244 | unsigned int fpr_candidates; | |
245 | ||
246 | // The index of the color that has been assigned to the containing clique. | |
247 | unsigned int color; | |
248 | }; | |
249 | ||
250 | // Represents a single FPR-sized quantity that needs to be allocated. | |
251 | // Each allocno is identified by index (for compactness). | |
252 | // | |
253 | // Quantities that span multiple FPRs are assigned groups of consecutive | |
254 | // allocnos. Quantities that occupy a single FPR are assigned their own | |
255 | // group. | |
256 | struct allocno_info | |
257 | { | |
258 | allocno_group_info *group (); | |
2f46e357 RS |
259 | bool is_shared (); |
260 | bool is_equiv_to (unsigned int); | |
9f0f7d80 RS |
261 | |
262 | // The allocno's unique identifier. | |
263 | unsigned int id; | |
264 | ||
265 | // The offset of this allocno into the containing group. | |
266 | unsigned int offset : 8; | |
267 | ||
268 | // The number of allocnos in the containing group. | |
269 | unsigned int group_size : 8; | |
270 | ||
271 | // If the allocno has an affinity with at least one hard register | |
272 | // (so that choosing that hard register would avoid a copy), this is | |
273 | // the number of one such hard register, otherwise it is | |
274 | // FIRST_PSEUDO_REGISTER. | |
275 | unsigned int hard_regno : 8; | |
276 | ||
277 | // Set to 1 if the allocno has a single definition or 2 if it has more. | |
278 | unsigned int num_defs : 2; | |
279 | ||
280 | // True if, at START_POINT, another allocno is copied to this one. | |
281 | // See callers of record_copy for what counts as a copy. | |
282 | unsigned int is_copy_dest : 1; | |
283 | ||
284 | // True if, at START_POINT, another allocno is copied to this one, | |
285 | // and if the allocnos at both ends of the copy chain have an affinity | |
286 | // with the same hard register. | |
287 | unsigned int is_strong_copy_dest : 1; | |
288 | ||
289 | // True if, at END_POINT, this allocno is copied to another one, | |
290 | // and both allocnos have an affinity with the same hard register. | |
291 | unsigned int is_strong_copy_src : 1; | |
292 | ||
293 | // True if the allocno is subject to an earlyclobber at END_POINT, | |
294 | // so that it cannot be tied to the destination of the instruction. | |
295 | unsigned int is_earlyclobbered : 1; | |
296 | ||
2f46e357 RS |
297 | // True if this allocno is known to be equivalent to related_allocno |
298 | // for the whole of this allocno's lifetime. | |
299 | unsigned int is_equiv : 1; | |
300 | ||
9f0f7d80 RS |
301 | // The inclusive range of program points spanned by the allocno. |
302 | // START_POINT >= END_POINT. | |
303 | unsigned int start_point; | |
304 | unsigned int end_point; | |
305 | ||
306 | // If, at END_POINT, this allocno is copied to another allocno, this | |
307 | // is the index of that allocno, otherwise it is INVALID_ALLOCNO. | |
308 | // See callers of record_copy for what counts as a copy. | |
309 | unsigned int copy_dest; | |
310 | ||
2f46e357 RS |
311 | // If this field is not INVALID_ALLOCNO, it indicates one of two things: |
312 | // | |
313 | // - if is_equiv, this allocno is equivalent to related_allocno for | |
314 | // the whole of this allocno's lifetime. | |
315 | // | |
316 | // - if !is_equiv, this allocno's live range is a subrange of | |
317 | // related_allocno's and we have committed to making this allocno | |
318 | // share whatever register related_allocno uses. | |
319 | unsigned int related_allocno; | |
9f0f7d80 | 320 | |
8b5cd6c4 RS |
321 | union |
322 | { | |
323 | // The program point at which the allocno was last defined, | |
324 | // or START_OF_REGION if none. This is only used temporarily | |
325 | // while recording allocnos; after that, chain_next below is | |
326 | // used instead. | |
327 | unsigned int last_def_point; | |
328 | ||
329 | // The next chained allocno in program order (i.e. at lower program | |
330 | // points), or INVALID_ALLOCNO if none. | |
331 | unsigned int chain_next; | |
332 | }; | |
9f0f7d80 | 333 | |
2f46e357 RS |
334 | union |
335 | { | |
336 | // The program point before start_point at which the allocno was | |
337 | // last used, or END_OF_REGION if none. This is only used temporarily | |
338 | // while recording allocnos; after that, chain_prev below is used | |
339 | // instead. | |
340 | unsigned int last_use_point; | |
341 | ||
342 | // The previous chained allocno in program order (i.e. at higher | |
343 | // program points), or INVALID_ALLOCNO if none. | |
344 | unsigned int chain_prev; | |
345 | }; | |
9f0f7d80 RS |
346 | }; |
347 | ||
348 | // Information about a full allocno group or a subgroup of it. | |
349 | // The subgroup can be empty to indicate "none". | |
350 | struct allocno_subgroup | |
351 | { | |
352 | array_slice<allocno_info> allocnos (); | |
353 | allocno_info *allocno (unsigned int); | |
354 | ||
355 | // True if a subgroup is present. | |
356 | operator bool () const { return count; } | |
357 | ||
358 | // The containing group. | |
359 | allocno_group_info *group; | |
360 | ||
361 | // The offset of the subgroup from the start of GROUP. | |
362 | unsigned int start; | |
363 | ||
364 | // The number of allocnos in the subgroup. | |
365 | unsigned int count; | |
366 | }; | |
367 | ||
368 | // Represents information about a copy between an allocno and an FPR. | |
369 | // This establishes an affinity between the allocno and the FPR. | |
370 | struct allocno_copy_info | |
371 | { | |
372 | // The allocno involved in the copy. | |
373 | unsigned int allocno; | |
374 | ||
375 | // The FPR involved in the copy, relative to V0_REGNUM. | |
376 | unsigned int fpr : 16; | |
377 | ||
378 | // A measure of how strong the affinity between the allocno and FPR is. | |
379 | unsigned int weight : 16; | |
380 | }; | |
381 | ||
382 | // Information about a possible allocno chain. | |
383 | struct chain_candidate_info | |
384 | { | |
385 | // The candidate target allocno. | |
386 | allocno_info *allocno; | |
387 | ||
388 | // A rating of the candidate (higher is better). | |
389 | int score; | |
390 | }; | |
391 | ||
392 | // Information about an allocno color. | |
393 | struct color_info | |
394 | { | |
395 | // The color's unique identifier. | |
396 | int id; | |
397 | ||
398 | // The allocated hard register, when known. | |
399 | unsigned int hard_regno; | |
400 | ||
401 | // The clique's representative group. | |
402 | allocno_group_info *group; | |
403 | ||
2f46e357 RS |
404 | // The number of FPR preferences recorded in fpr_preferences. |
405 | unsigned int num_fpr_preferences; | |
406 | ||
9f0f7d80 RS |
407 | // Weights in favor of choosing each FPR as the first register for GROUP. |
408 | int8_t fpr_preferences[32]; | |
409 | }; | |
410 | ||
411 | template<typename T, typename... Ts> | |
412 | T *region_allocate (Ts...); | |
413 | ||
414 | allocno_info *chain_prev (allocno_info *); | |
415 | allocno_info *chain_next (allocno_info *); | |
416 | ||
417 | void dump_pseudo_regs (); | |
418 | void dump_fpr_ranges (); | |
419 | void dump_copies (); | |
420 | void dump_allocnos (); | |
421 | void dump_colors (); | |
422 | ||
423 | iterator_range<allocno_iterator> get_group_allocnos (unsigned int); | |
424 | ||
425 | void preprocess_move (rtx, rtx); | |
426 | void process_pseudo_reg_constraints (rtx_insn *); | |
427 | void preprocess_insns (); | |
428 | ||
429 | int fpr_preference (unsigned int); | |
430 | void propagate_pseudo_reg_info (); | |
431 | ||
432 | void choose_fpr_pseudos (); | |
433 | ||
434 | void start_new_region (); | |
435 | ||
436 | allocno_group_info *create_allocno_group (unsigned int, unsigned int); | |
437 | allocno_subgroup get_allocno_subgroup (rtx); | |
438 | void record_fpr_use (unsigned int); | |
439 | void record_fpr_def (unsigned int); | |
440 | void record_allocno_use (allocno_info *); | |
441 | void record_allocno_def (allocno_info *); | |
2f46e357 | 442 | allocno_info *find_related_start (allocno_info *, allocno_info *, bool); |
9f0f7d80 RS |
443 | void record_copy (rtx, rtx, bool = false); |
444 | void record_constraints (rtx_insn *); | |
445 | void record_artificial_refs (unsigned int); | |
446 | void record_insn_refs (rtx_insn *); | |
447 | ||
448 | bool consider_strong_copy_src_chain (allocno_info *); | |
449 | int strided_polarity_pref (allocno_info *, allocno_info *); | |
450 | void find_strided_accesses (); | |
451 | ||
452 | template<unsigned int allocno_info::*field> | |
453 | static int cmp_increasing (const void *, const void *); | |
454 | bool is_chain_candidate (allocno_info *, allocno_info *); | |
455 | int rate_chain (allocno_info *, allocno_info *); | |
456 | static int cmp_chain_candidates (const void *, const void *); | |
457 | void chain_allocnos (unsigned int &, unsigned int &); | |
2f46e357 RS |
458 | void merge_fpr_info (allocno_group_info *, allocno_group_info *, |
459 | unsigned int); | |
9f0f7d80 RS |
460 | void set_single_color_rep (allocno_info *, allocno_group_info *, |
461 | unsigned int); | |
462 | void set_color_rep (allocno_group_info *, allocno_group_info *, | |
463 | unsigned int); | |
464 | bool try_to_chain_allocnos (allocno_info *, allocno_info *); | |
465 | void create_color (allocno_group_info *); | |
466 | void form_chains (); | |
467 | ||
468 | bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info *); | |
469 | bool call_in_range_p (unsigned int, unsigned int, unsigned int); | |
470 | unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info); | |
471 | ||
472 | void process_copies (); | |
473 | ||
2f46e357 | 474 | static int cmp_allocation_order (const void *, const void *); |
9f0f7d80 RS |
475 | void allocate_colors (); |
476 | allocno_info *find_independent_subchain (allocno_info *); | |
477 | color_info *find_oldest_color (unsigned int, unsigned int); | |
478 | void broaden_colors (); | |
479 | void finalize_allocation (); | |
480 | ||
481 | bool replace_regs (df_ref); | |
482 | int try_enforce_constraints (rtx_insn *, vec<std::pair<int, int>> &); | |
483 | void enforce_constraints (rtx_insn *); | |
484 | bool maybe_convert_to_strided_access (rtx_insn *); | |
485 | void apply_allocation (); | |
486 | ||
487 | void process_region (); | |
488 | bool is_dead_insn (rtx_insn *); | |
489 | void process_block (basic_block, bool); | |
490 | void process_blocks (); | |
491 | ||
492 | // ---------------------------------------------------------------------- | |
493 | ||
494 | // The function we're operating on. | |
495 | function *m_fn; | |
496 | ||
497 | // Information about each pseudo register, indexed by REGNO. | |
498 | auto_vec<pseudo_reg_info> m_pseudo_regs; | |
499 | ||
500 | // All recorded register copies. | |
501 | auto_vec<reg_copy_info> m_pseudo_reg_copies; | |
502 | ||
503 | // The set of pseudos that we've decided to allocate an FPR to. | |
504 | auto_bitmap m_fpr_pseudos; | |
505 | ||
506 | // ---------------------------------------------------------------------- | |
507 | ||
508 | // An obstack for allocating information that is referenced by the member | |
509 | // variables below. | |
510 | obstack m_region_obstack; | |
511 | void *m_region_alloc_start; | |
512 | ||
513 | // ---------------------------------------------------------------------- | |
514 | ||
515 | // The basic block that we're currently processing. | |
516 | basic_block m_current_bb; | |
517 | ||
8b5cd6c4 RS |
518 | // The lowest-numbered program point in the current basic block. |
519 | unsigned int m_current_bb_point; | |
520 | ||
9f0f7d80 RS |
521 | // The program point that we're currently processing (described above). |
522 | unsigned int m_current_point; | |
523 | ||
524 | // The set of allocnos that are currently live. | |
525 | auto_bitmap m_live_allocnos; | |
526 | ||
527 | // The set of FPRs that are currently live. | |
528 | unsigned int m_live_fprs; | |
529 | ||
530 | // ---------------------------------------------------------------------- | |
531 | ||
532 | // A mask of the FPRs that have already been allocated. | |
533 | unsigned int m_allocated_fprs; | |
534 | ||
535 | // A mask of the FPRs that must be at least partially preserved by the | |
536 | // current function. | |
537 | unsigned int m_call_preserved_fprs; | |
538 | ||
539 | // True if we haven't yet failed to allocate the current region. | |
540 | bool m_allocation_successful; | |
541 | ||
542 | // A map from pseudo registers to the first allocno in their associated | |
543 | // allocno groups. | |
544 | hash_map<int_hash<unsigned int, INVALID_REGNUM>, | |
545 | allocno_group_info *> m_regno_to_group; | |
546 | ||
547 | // All recorded copies between allocnos and FPRs. | |
548 | auto_vec<allocno_copy_info> m_allocno_copies; | |
549 | ||
550 | // All allocnos, by index. | |
551 | auto_vec<allocno_info *> m_allocnos; | |
552 | ||
553 | // All allocnos, by increasing START_POINT. | |
554 | auto_vec<allocno_info *> m_sorted_allocnos; | |
555 | ||
2f46e357 RS |
556 | // Allocnos for which is_shared is true. |
557 | auto_vec<allocno_info *> m_shared_allocnos; | |
558 | ||
9f0f7d80 RS |
559 | // All colors, by index. |
560 | auto_vec<color_info *> m_colors; | |
561 | ||
562 | // The instruction ranges that make up the current region, | |
563 | // as half-open ranges [LAST, FIRST). | |
564 | auto_vec<std::pair<rtx_insn *, rtx_insn *>> m_insn_ranges; | |
565 | ||
566 | // The live ranges of each FPR, in order of increasing program point. | |
567 | auto_vec<fpr_range_info> m_fpr_ranges[32]; | |
568 | ||
569 | // For each function call id, a list of program points at which a call | |
570 | // to such a function is made. Each list is in order of increasing | |
571 | // program point. | |
572 | auto_vec<unsigned int> m_call_points[NUM_ABI_IDS]; | |
573 | ||
574 | // A list of instructions that can be removed if allocation succeeds. | |
575 | auto_vec<rtx_insn *> m_dead_insns; | |
576 | }; | |
577 | ||
578 | // True if PAT is something that would typically be treated as a move. | |
579 | static inline bool | |
580 | is_move_set (rtx pat) | |
581 | { | |
582 | if (GET_CODE (pat) != SET) | |
583 | return false; | |
584 | ||
585 | rtx dest = SET_DEST (pat); | |
586 | if (SUBREG_P (dest)) | |
587 | dest = SUBREG_REG (dest); | |
588 | if (!OBJECT_P (dest)) | |
589 | return false; | |
590 | ||
591 | rtx src = SET_SRC (pat); | |
592 | if (SUBREG_P (src)) | |
593 | src = SUBREG_REG (src); | |
594 | if (!OBJECT_P (src) && !CONSTANT_P (src)) | |
595 | return false; | |
596 | ||
597 | return true; | |
598 | } | |
599 | ||
600 | // Return true if operand OP is likely to match OP_ALT after register | |
601 | // allocation. | |
602 | static bool | |
603 | likely_operand_match_p (const operand_alternative &op_alt, rtx op) | |
604 | { | |
605 | // Empty constraints match everything. | |
606 | const char *constraint = op_alt.constraint; | |
607 | if (constraint[0] == 0 || constraint[0] == ',') | |
608 | return true; | |
609 | ||
610 | for (;;) | |
611 | { | |
612 | char c = *constraint; | |
613 | int len = CONSTRAINT_LEN (c, constraint); | |
614 | if (c == 0 || c == ',') | |
615 | break; | |
616 | ||
617 | if (c == 'X') | |
618 | return true; | |
619 | ||
620 | auto cn = lookup_constraint (constraint); | |
8b5cd6c4 | 621 | switch (get_constraint_type (cn)) |
9f0f7d80 | 622 | { |
8b5cd6c4 RS |
623 | case CT_REGISTER: |
624 | if (REG_P (op) || SUBREG_P (op)) | |
9f0f7d80 | 625 | return true; |
8b5cd6c4 RS |
626 | break; |
627 | ||
628 | case CT_MEMORY: | |
629 | case CT_SPECIAL_MEMORY: | |
630 | case CT_RELAXED_MEMORY: | |
631 | if (MEM_P (op)) | |
9f0f7d80 | 632 | return true; |
8b5cd6c4 RS |
633 | break; |
634 | ||
635 | case CT_CONST_INT: | |
636 | case CT_ADDRESS: | |
637 | case CT_FIXED_FORM: | |
638 | if (constraint_satisfied_p (op, cn)) | |
9f0f7d80 | 639 | return true; |
8b5cd6c4 | 640 | break; |
9f0f7d80 RS |
641 | } |
642 | ||
643 | constraint += len; | |
644 | } | |
645 | ||
646 | if (op_alt.matches >= 0) | |
647 | { | |
648 | rtx other = recog_data.operand[op_alt.matches]; | |
649 | if ((REG_P (other) || SUBREG_P (other)) | |
650 | && (REG_P (op) || SUBREG_P (op))) | |
651 | return true; | |
652 | } | |
653 | return false; | |
654 | } | |
655 | ||
656 | // Return true if the operands of the current instruction are likely to | |
657 | // match OP_ALT. | |
658 | static bool | |
659 | likely_alternative_match_p (const operand_alternative *op_alt) | |
660 | { | |
661 | for (int i = 0; i < recog_data.n_operands; ++i) | |
662 | if (!likely_operand_match_p (op_alt[i], recog_data.operand[i])) | |
663 | return false; | |
664 | return true; | |
665 | } | |
666 | ||
667 | // Return the sum of how disparaged OP_ALT is. | |
668 | static int | |
669 | count_rejects (const operand_alternative *op_alt) | |
670 | { | |
671 | int reject = 0; | |
672 | for (int opno = 0; opno < recog_data.n_operands; ++opno) | |
673 | reject += op_alt[opno].reject; | |
674 | return reject; | |
675 | } | |
676 | ||
677 | // Allocate a T from the region obstack. | |
678 | template<typename T, typename... Ts> | |
679 | inline T * | |
680 | early_ra::region_allocate (Ts... args) | |
681 | { | |
682 | static_assert (std::is_trivially_destructible<T>::value, | |
683 | "destructor won't be called"); | |
684 | void *addr = obstack_alloc (&m_region_obstack, sizeof (T)); | |
685 | return new (addr) T (std::forward<Ts> (args)...); | |
686 | } | |
687 | ||
688 | early_ra::early_ra (function *fn) : m_fn (fn), m_live_fprs (0) | |
689 | { | |
690 | gcc_obstack_init (&m_region_obstack); | |
691 | m_region_alloc_start = obstack_alloc (&m_region_obstack, 0); | |
692 | bitmap_tree_view (m_live_allocnos); | |
693 | } | |
694 | ||
695 | early_ra::~early_ra () | |
696 | { | |
697 | obstack_free (&m_region_obstack, nullptr); | |
698 | } | |
699 | ||
700 | // Return an array that, for each allocno A in the group, contains the index | |
701 | // of the allocno at the head of A's chain (that is, the one with the highest | |
702 | // START_POINT). The index is INVALID_ALLOCNO if the chain is empty. | |
703 | inline array_slice<unsigned int> | |
704 | early_ra::allocno_group_info::chain_heads () | |
705 | { | |
706 | auto *start = reinterpret_cast<unsigned int *> (this + 1); | |
707 | return { start, size }; | |
708 | } | |
709 | ||
710 | // Return the array of allocnos in the group. | |
711 | inline array_slice<early_ra::allocno_info> | |
712 | early_ra::allocno_group_info::allocnos () | |
713 | { | |
714 | gcc_checking_assert (regno != INVALID_REGNUM); | |
715 | auto *chain_end = reinterpret_cast<unsigned int *> (this + 1) + size; | |
716 | auto *allocno_start = reinterpret_cast<allocno_info *> (chain_end); | |
717 | return { allocno_start, size }; | |
718 | } | |
719 | ||
720 | // Return the group's color representative. | |
721 | inline early_ra::allocno_group_info * | |
722 | early_ra::allocno_group_info::color_rep () | |
723 | { | |
724 | gcc_checking_assert (m_color_rep->m_color_rep == m_color_rep); | |
725 | return m_color_rep; | |
726 | } | |
727 | ||
728 | // Return the group that contains the allocno. | |
729 | inline early_ra::allocno_group_info * | |
730 | early_ra::allocno_info::group () | |
731 | { | |
732 | auto *chain_end = reinterpret_cast<unsigned int *> (this - offset); | |
733 | return reinterpret_cast<allocno_group_info *> (chain_end - group_size) - 1; | |
734 | } | |
735 | ||
2f46e357 RS |
736 | // Return true if this allocno's live range is a subrange of related_allocno's |
737 | // and if we have committed to making this allocno share whatever register | |
738 | // related_allocno uses. | |
739 | inline bool | |
740 | early_ra::allocno_info::is_shared () | |
741 | { | |
742 | return related_allocno != INVALID_ALLOCNO && !is_equiv; | |
743 | } | |
744 | ||
745 | // Return true if this allocno is known to be equivalent to ALLOCNO. | |
746 | inline bool | |
747 | early_ra::allocno_info::is_equiv_to (unsigned int allocno) | |
748 | { | |
749 | return is_equiv && related_allocno == allocno; | |
750 | } | |
751 | ||
9f0f7d80 RS |
752 | // Return the allocnos in the subgroup. |
753 | inline array_slice<early_ra::allocno_info> | |
754 | early_ra::allocno_subgroup::allocnos () | |
755 | { | |
756 | if (!count) | |
757 | return {}; | |
758 | return { &group->allocnos ()[start], count }; | |
759 | } | |
760 | ||
761 | // Return allocno I in the subgroup, with 0 being the first. | |
762 | inline early_ra::allocno_info * | |
763 | early_ra::allocno_subgroup::allocno (unsigned int i) | |
764 | { | |
765 | return &group->allocnos ()[start + i]; | |
766 | } | |
767 | ||
768 | // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none. | |
769 | inline early_ra::allocno_info * | |
770 | early_ra::chain_prev (allocno_info *allocno) | |
771 | { | |
772 | if (allocno->chain_prev != INVALID_ALLOCNO) | |
773 | return m_allocnos[allocno->chain_prev]; | |
774 | return nullptr; | |
775 | } | |
776 | ||
777 | // Return the next (later) allocno in ALLOCNO's chain, or null if none. | |
778 | inline early_ra::allocno_info * | |
779 | early_ra::chain_next (allocno_info *allocno) | |
780 | { | |
781 | if (allocno->chain_next != INVALID_ALLOCNO) | |
782 | return m_allocnos[allocno->chain_next]; | |
783 | return nullptr; | |
784 | } | |
785 | ||
786 | // Dump the information in m_pseudo_regs. | |
787 | void | |
788 | early_ra::dump_pseudo_regs () | |
789 | { | |
790 | fprintf (dump_file, "\nPseudos:\n"); | |
791 | fprintf (dump_file, " %6s %6s %6s %6s %6s %6s %8s %s\n", | |
792 | "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride", | |
793 | "FPRness", "Copies"); | |
794 | pseudo_reg_info unused_reg = {}; | |
795 | for (unsigned int regno = FIRST_PSEUDO_REGISTER; | |
796 | regno < m_pseudo_regs.length (); ++regno) | |
797 | { | |
798 | const auto ® = m_pseudo_regs[regno]; | |
799 | if (memcmp (®, &unused_reg, sizeof (reg)) == 0) | |
800 | continue; | |
801 | ||
802 | fprintf (dump_file, " %6d %6s %6s %6s %6s %6s %8d", regno, | |
803 | reg.flags & NEEDS_FPR8 ? "Req" | |
804 | : reg.flags & ALLOWS_FPR8 ? "OK" : "-", | |
805 | reg.flags & NEEDS_FPR16 ? "Req" | |
806 | : reg.flags & ALLOWS_FPR16 ? "OK" : "-", | |
807 | reg.flags & NEEDS_FPR32 ? "Req" | |
808 | : reg.flags & ALLOWS_FPR32 ? "OK" : "-", | |
809 | reg.flags & NEEDS_NONFPR ? "Req" | |
810 | : reg.flags & ALLOWS_NONFPR ? "OK" : "-", | |
811 | ~reg.flags & HAS_FLEXIBLE_STRIDE ? "-" | |
812 | : reg.flags & HAS_FIXED_STRIDE ? "Some" : "All", | |
813 | fpr_preference (regno)); | |
814 | if (reg.flags & HAS_FPR_COPY) | |
815 | fprintf (dump_file, " FPR"); | |
816 | if (reg.flags & HAS_NONFPR_COPY) | |
817 | fprintf (dump_file, " Non-FPR"); | |
818 | unsigned int copyi = reg.first_copy; | |
819 | while (copyi) | |
820 | { | |
821 | const auto © = m_pseudo_reg_copies[copyi]; | |
822 | if (copy.regnos[0] == regno) | |
823 | { | |
824 | fprintf (dump_file, " r%d", copy.regnos[1]); | |
825 | copyi = copy.next_copies[0]; | |
826 | } | |
827 | else | |
828 | { | |
829 | fprintf (dump_file, " r%d", copy.regnos[0]); | |
830 | copyi = copy.next_copies[1]; | |
831 | } | |
832 | } | |
833 | fprintf (dump_file, "\n"); | |
834 | } | |
835 | } | |
836 | ||
837 | // Dump the information in m_fpr_ranges. | |
838 | void | |
839 | early_ra::dump_fpr_ranges () | |
840 | { | |
841 | fprintf (dump_file, "\nFPR live ranges:\n"); | |
842 | for (unsigned int fpr = 0; fpr < 32; ++fpr) | |
843 | { | |
844 | auto &intervals = m_fpr_ranges[fpr]; | |
845 | if (intervals.is_empty ()) | |
846 | continue; | |
847 | ||
848 | fprintf (dump_file, " %2d", fpr); | |
849 | for (unsigned int i = 0; i < intervals.length (); ++i) | |
850 | { | |
851 | auto &interval = intervals[i]; | |
852 | if (i && (i % 4) == 0) | |
853 | fprintf (dump_file, "\n "); | |
854 | fprintf (dump_file, " [ %6d %6d ]", interval.start_point, | |
855 | interval.end_point); | |
856 | } | |
857 | fprintf (dump_file, "\n"); | |
858 | } | |
859 | } | |
860 | ||
861 | // Dump the information in m_allocno_copies. | |
862 | void | |
863 | early_ra::dump_copies () | |
864 | { | |
865 | fprintf (dump_file, "\nCopies:\n"); | |
866 | fprintf (dump_file, " %8s %3s %6s\n", | |
867 | "Allocno", "FPR", "Weight"); | |
868 | for (const auto © : m_allocno_copies) | |
869 | fprintf (dump_file, " %8d %3d %6d\n", copy.allocno, | |
870 | copy.fpr, copy.weight); | |
871 | } | |
872 | ||
873 | // Dump the information in m_allocnos. | |
874 | void | |
875 | early_ra::dump_allocnos () | |
876 | { | |
877 | char buffer[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1]; | |
878 | fprintf (dump_file, "\nAllocno groups:\n"); | |
879 | fprintf (dump_file, | |
880 | " %12s %12s %4s %6s %8s %s\n", | |
881 | "Ids", "Regno", "Size", "Stride", "Cands", "Heads"); | |
882 | for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai) | |
883 | { | |
884 | auto *allocno = m_allocnos[ai]; | |
885 | if (allocno->offset != 0) | |
886 | continue; | |
887 | auto *group = allocno->group (); | |
888 | snprintf (buffer, sizeof (buffer), "[%d:%d]", allocno->id, | |
889 | allocno->id + group->size - 1); | |
890 | fprintf (dump_file, " %12s", buffer); | |
891 | snprintf (buffer, sizeof (buffer), "r%d[0:%d]", group->regno, | |
892 | group->size - 1); | |
893 | fprintf (dump_file, " %12s %4s %6d %08x", buffer, | |
894 | group->fpr_size == FPR_D ? "D" | |
895 | : group->fpr_size == FPR_Q ? "Q" : "Z", | |
896 | group->stride, | |
897 | group->fpr_candidates); | |
898 | for (auto head : group->chain_heads ()) | |
899 | if (head == INVALID_ALLOCNO) | |
900 | fprintf (dump_file, " -"); | |
901 | else | |
902 | fprintf (dump_file, " %d", head); | |
903 | fprintf (dump_file, "\n"); | |
904 | } | |
905 | ||
906 | fprintf (dump_file, "\nAllocno chains:\n"); | |
2f46e357 RS |
907 | fprintf (dump_file, " %5s %12s %12s %6s %5s %5s %6s %5s\n", |
908 | "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR"); | |
9f0f7d80 RS |
909 | for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai) |
910 | { | |
911 | auto *allocno = m_allocnos[ai]; | |
912 | if (allocno->chain_prev != INVALID_ALLOCNO) | |
913 | continue; | |
914 | const char *prefix = "=>"; | |
915 | for (;;) | |
916 | { | |
917 | auto *group = allocno->group (); | |
918 | fprintf (dump_file, " %2s", prefix); | |
919 | fprintf (dump_file, " %5d", allocno->id); | |
920 | snprintf (buffer, sizeof (buffer), "r%d[%d]", group->regno, | |
921 | allocno->offset); | |
922 | fprintf (dump_file, " %12s", buffer); | |
923 | snprintf (buffer, sizeof (buffer), "[%d,%d]", | |
924 | allocno->start_point, allocno->end_point); | |
2f46e357 | 925 | fprintf (dump_file, " %11s%s %6s", buffer, |
9f0f7d80 RS |
926 | allocno->is_earlyclobbered ? "*" : " ", |
927 | allocno->is_strong_copy_dest ? "Strong" | |
928 | : allocno->is_copy_dest ? "Yes" : "-"); | |
929 | if (allocno->copy_dest == INVALID_ALLOCNO) | |
930 | fprintf (dump_file, " %5s", "-"); | |
931 | else | |
932 | fprintf (dump_file, " %5d", allocno->copy_dest); | |
2f46e357 RS |
933 | if (allocno->is_equiv) |
934 | fprintf (dump_file, " %5d", allocno->related_allocno); | |
9f0f7d80 RS |
935 | else |
936 | fprintf (dump_file, " %5s", "-"); | |
2f46e357 RS |
937 | if (allocno->is_shared ()) |
938 | fprintf (dump_file, " %6d", allocno->related_allocno); | |
939 | else | |
940 | fprintf (dump_file, " %6s", "-"); | |
9f0f7d80 RS |
941 | if (allocno->hard_regno == FIRST_PSEUDO_REGISTER) |
942 | fprintf (dump_file, " %5s", "-"); | |
943 | else | |
944 | fprintf (dump_file, " %5s", reg_names[allocno->hard_regno]); | |
945 | fprintf (dump_file, "\n"); | |
946 | if (allocno->chain_next == INVALID_ALLOCNO) | |
947 | break; | |
948 | allocno = m_allocnos[allocno->chain_next]; | |
949 | prefix = ""; | |
950 | } | |
951 | } | |
952 | } | |
953 | ||
954 | // Dump the information in m_colors. | |
955 | void | |
956 | early_ra::dump_colors () | |
957 | { | |
958 | fprintf (dump_file, "\nColors:\n"); | |
959 | for (unsigned int i = 0; i < m_colors.length (); ++i) | |
960 | { | |
961 | auto *color = m_colors[i]; | |
962 | if (!color->group) | |
963 | continue; | |
964 | ||
965 | fprintf (dump_file, " color %d:\n", i); | |
966 | fprintf (dump_file, " chains:\n"); | |
967 | auto heads = color->group->chain_heads (); | |
968 | for (unsigned int i = 0; i < color->group->size; ++i) | |
969 | { | |
970 | fprintf (dump_file, " %2d:", i); | |
971 | auto ai = heads[i]; | |
972 | while (ai != INVALID_ALLOCNO) | |
973 | { | |
974 | auto *allocno = m_allocnos[ai]; | |
975 | fprintf (dump_file, " r%d[%d]", allocno->group ()->regno, | |
976 | allocno->offset); | |
977 | ai = allocno->chain_next; | |
978 | } | |
979 | fprintf (dump_file, "\n"); | |
980 | } | |
981 | fprintf (dump_file, " FPR candidates:"); | |
982 | for (unsigned int fpr = 0; fpr < 32; ++fpr) | |
983 | fprintf (dump_file, "%s%c", fpr % 8 ? "" : " ", | |
984 | color->group->fpr_candidates & (1U << fpr) ? 'Y' : '-'); | |
985 | fprintf (dump_file, "\n"); | |
986 | fprintf (dump_file, " FPR preferences:"); | |
987 | for (unsigned int fpr = 0; fpr < 32; ++fpr) | |
988 | if (color->fpr_preferences[fpr]) | |
989 | fprintf (dump_file, " %d(%d)", fpr, color->fpr_preferences[fpr]); | |
990 | fprintf (dump_file, "\n"); | |
991 | } | |
992 | } | |
993 | ||
994 | // Record any necessary information about a move from SRC to DEST. | |
995 | void | |
996 | early_ra::preprocess_move (rtx dest, rtx src) | |
997 | { | |
998 | if (SUBREG_P (dest)) | |
999 | dest = SUBREG_REG (dest); | |
1000 | if (!REG_P (dest)) | |
1001 | return; | |
1002 | ||
1003 | if (SUBREG_P (src)) | |
1004 | src = SUBREG_REG (src); | |
1005 | if (!REG_P (src)) | |
1006 | return; | |
1007 | ||
1008 | // Sort the registers by increasing REGNO. | |
1009 | rtx regs[] = { dest, src }; | |
1010 | if (REGNO (dest) > REGNO (src)) | |
1011 | std::swap (regs[0], regs[1]); | |
1012 | unsigned int regno0 = REGNO (regs[0]); | |
1013 | unsigned int regno1 = REGNO (regs[1]); | |
1014 | ||
1015 | // Ignore moves between hard registers. | |
1016 | if (HARD_REGISTER_NUM_P (regno1)) | |
1017 | return; | |
1018 | ||
1019 | // For moves between hard registers and pseudos, just record the type | |
1020 | // of hard register involved. | |
1021 | auto ®1 = m_pseudo_regs[regno1]; | |
1022 | reg1.mode = GET_MODE (regs[1]); | |
1023 | if (HARD_REGISTER_NUM_P (regno0)) | |
1024 | { | |
1025 | reg1.flags |= (FP_REGNUM_P (regno0) ? HAS_FPR_COPY : HAS_NONFPR_COPY); | |
1026 | return; | |
1027 | } | |
1028 | ||
1029 | // Record a move between two pseudo registers. | |
1030 | auto ®0 = m_pseudo_regs[regno0]; | |
1031 | reg0.mode = GET_MODE (regs[0]); | |
1032 | ||
1033 | reg_copy_info copy; | |
1034 | copy.regnos[0] = regno0; | |
1035 | copy.regnos[1] = regno1; | |
1036 | copy.next_copies[0] = reg0.first_copy; | |
1037 | copy.next_copies[1] = reg1.first_copy; | |
1038 | ||
1039 | reg0.first_copy = reg1.first_copy = m_pseudo_reg_copies.length (); | |
1040 | m_pseudo_reg_copies.safe_push (copy); | |
1041 | } | |
1042 | ||
1043 | // Return true if INSN has a multi-vector operand and if that operand | |
1044 | // could be converted to strided form. | |
1045 | static bool | |
1046 | is_stride_candidate (rtx_insn *insn) | |
1047 | { | |
1048 | if (recog_memoized (insn) < 0) | |
1049 | return false; | |
1050 | ||
1051 | auto stride_type = get_attr_stride_type (insn); | |
1052 | return (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE | |
1053 | || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE | |
1054 | || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE); | |
1055 | } | |
1056 | ||
1057 | // Go through the constraints of INSN, which has already been extracted, | |
1058 | // and record any relevant information about pseudo registers. | |
1059 | void | |
1060 | early_ra::process_pseudo_reg_constraints (rtx_insn *insn) | |
1061 | { | |
1062 | extract_insn (insn); | |
1063 | preprocess_constraints (insn); | |
1064 | ||
1065 | // Flags that describe any multi-register vector operands. | |
1066 | unsigned int insn_flags = (is_stride_candidate (insn) | |
1067 | ? HAS_FLEXIBLE_STRIDE | |
1068 | : HAS_FIXED_STRIDE); | |
1069 | ||
1070 | auto alts = get_preferred_alternatives (insn); | |
1071 | ||
1072 | int operand_matches[MAX_RECOG_OPERANDS]; | |
1073 | unsigned int operand_flags[MAX_RECOG_OPERANDS]; | |
1074 | for (int i = 0; i < recog_data.n_operands; ++i) | |
1075 | { | |
1076 | operand_matches[i] = -1; | |
1077 | operand_flags[i] = 0; | |
1078 | } | |
1079 | ||
1080 | // Extract information from the constraints, considering all plausible | |
1081 | // alternatives. | |
1082 | for (int altno = 0; altno < recog_data.n_alternatives; ++altno) | |
1083 | { | |
1084 | if (!(alts & ALTERNATIVE_BIT (altno))) | |
1085 | continue; | |
1086 | ||
1087 | auto *op_alt = &recog_op_alt[altno * recog_data.n_operands]; | |
1088 | if (!likely_alternative_match_p (op_alt)) | |
1089 | continue; | |
1090 | ||
1091 | // Use SRC_OPNO's constraints to derive information about DEST_OPNO. | |
1092 | auto record_operand = [&](int src_opno, int dest_opno) | |
1093 | { | |
1094 | int matches = op_alt[src_opno].matches; | |
1095 | if (matches >= 0) | |
1096 | operand_matches[dest_opno] = matches; | |
1097 | ||
1098 | auto cl = alternative_class (op_alt, src_opno); | |
1099 | if (cl != NO_REGS) | |
1100 | { | |
1101 | if (reg_class_subset_p (cl, FP_REGS)) | |
1102 | operand_flags[dest_opno] |= ALLOWS_FPR32; | |
1103 | if (reg_class_subset_p (cl, FP_LO_REGS)) | |
1104 | operand_flags[dest_opno] |= ALLOWS_FPR16; | |
1105 | if (reg_class_subset_p (cl, FP_LO8_REGS)) | |
1106 | operand_flags[dest_opno] |= ALLOWS_FPR8; | |
1107 | if (!reg_classes_intersect_p (cl, FP_REGS)) | |
1108 | operand_flags[dest_opno] |= ALLOWS_NONFPR; | |
1109 | } | |
1110 | }; | |
1111 | ||
1112 | for (int i = 0; i < recog_data.n_operands; ++i) | |
1113 | { | |
1114 | record_operand (i, i); | |
1115 | if (recog_data.constraints[i][0] == '%') | |
1116 | { | |
1117 | record_operand (i, i + 1); | |
1118 | record_operand (i + 1, i); | |
1119 | } | |
1120 | } | |
1121 | } | |
1122 | ||
1123 | // Process the information we collected above. | |
1124 | for (int i = 0; i < recog_data.n_operands; ++i) | |
1125 | { | |
1126 | rtx op = recog_data.operand[i]; | |
1127 | machine_mode orig_mode = GET_MODE (op); | |
1128 | if (SUBREG_P (op)) | |
1129 | op = SUBREG_REG (op); | |
1130 | ||
1131 | // Record the accumulated information in m_pseudo_regs. | |
1132 | if (REG_P (op) && !HARD_REGISTER_P (op)) | |
1133 | { | |
1134 | // The flags so far just describe what at least one alternative | |
1135 | // would accept. Calculate the associated NEEDS_* information. | |
1136 | auto flags = operand_flags[i]; | |
1137 | if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR)) | |
1138 | flags |= NEEDS_NONFPR; | |
1139 | else if ((flags & ALLOWS_FPR32) && !(flags & ALLOWS_NONFPR)) | |
1140 | { | |
1141 | if (flags & ALLOWS_FPR8) | |
1142 | flags |= NEEDS_FPR8; | |
1143 | if (flags & ALLOWS_FPR16) | |
1144 | flags |= NEEDS_FPR16; | |
1145 | flags |= NEEDS_FPR32; | |
1146 | } | |
1147 | ||
1148 | // Look for multi-register vector operands. | |
1149 | if (VECTOR_MODE_P (orig_mode) | |
1150 | && targetm.hard_regno_mode_ok (V0_REGNUM, orig_mode) | |
1151 | && hard_regno_nregs (V0_REGNUM, orig_mode) > 1) | |
1152 | flags |= insn_flags; | |
1153 | ||
1154 | m_pseudo_regs[REGNO (op)].flags |= flags; | |
1155 | m_pseudo_regs[REGNO (op)].mode = GET_MODE (op); | |
1156 | } | |
1157 | ||
1158 | // Treat matching constraints as equivalent to moves. | |
1159 | if (operand_matches[i] >= 0) | |
1160 | preprocess_move (recog_data.operand[operand_matches[i]], op); | |
1161 | } | |
1162 | } | |
1163 | ||
1164 | // Make one pass through the instructions, collecting information that | |
1165 | // will be needed later. | |
1166 | void | |
1167 | early_ra::preprocess_insns () | |
1168 | { | |
1169 | m_pseudo_regs.safe_grow_cleared (max_reg_num ()); | |
1170 | m_pseudo_reg_copies.safe_push (reg_copy_info ()); | |
1171 | for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
1172 | { | |
1173 | if (!NONDEBUG_INSN_P (insn)) | |
1174 | continue; | |
1175 | ||
1176 | if (GET_CODE (PATTERN (insn)) == USE | |
1177 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
1178 | continue; | |
1179 | ||
1180 | rtx set = single_set (insn); | |
1181 | if (set && is_move_set (set)) | |
1182 | preprocess_move (SET_DEST (set), SET_SRC (set)); | |
1183 | else | |
1184 | process_pseudo_reg_constraints (insn); | |
1185 | } | |
1186 | } | |
1187 | ||
1188 | // Return a signed integer that says (roughly) how strong an affinity | |
1189 | // pseudo register REGNO has with FPRs. A positive value indicates | |
1190 | // that we should try to allocate an FPR, a negative value indicates | |
1191 | // that we shouldn't, and 0 indicates neutrality. | |
1192 | int | |
1193 | early_ra::fpr_preference (unsigned int regno) | |
1194 | { | |
1195 | auto mode = m_pseudo_regs[regno].mode; | |
1196 | auto flags = m_pseudo_regs[regno].flags; | |
1197 | if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode)) | |
1198 | return -3; | |
1199 | else if (flags & HAS_FLEXIBLE_STRIDE) | |
1200 | return 3; | |
1201 | else if (flags & NEEDS_FPR32) | |
1202 | return 2; | |
2f46e357 | 1203 | else if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR)) |
9f0f7d80 RS |
1204 | return -2; |
1205 | else if ((flags & HAS_FPR_COPY) && !(flags & HAS_NONFPR_COPY)) | |
1206 | return 1; | |
1207 | else if ((flags & HAS_NONFPR_COPY) && !(flags & HAS_FPR_COPY)) | |
1208 | return -1; | |
1209 | else | |
1210 | return 0; | |
1211 | } | |
1212 | ||
1213 | // Propagate information about pseudo-registers along copy edges, | |
1214 | // while doing so doesn't create conflicting FPR preferences. | |
1215 | void | |
1216 | early_ra::propagate_pseudo_reg_info () | |
1217 | { | |
1218 | struct stack_entry { unsigned int regno, copyi; }; | |
1219 | ||
1220 | auto_vec<stack_entry, 32> stack; | |
1221 | for (unsigned int i = FIRST_PSEUDO_REGISTER; | |
1222 | i < m_pseudo_regs.length (); ++i) | |
1223 | { | |
1224 | auto start = m_pseudo_regs[i].first_copy; | |
1225 | if (!start) | |
1226 | continue; | |
1227 | ||
1228 | stack.quick_push ({ i, start }); | |
1229 | while (!stack.is_empty ()) | |
1230 | { | |
1231 | auto entry = stack.pop (); | |
1232 | auto © = m_pseudo_reg_copies[entry.copyi]; | |
1233 | auto src_regno = entry.regno; | |
1234 | auto dest_regno = (src_regno == copy.regnos[1] | |
1235 | ? copy.regnos[0] | |
1236 | : copy.regnos[1]); | |
1237 | auto next_copyi = (src_regno == copy.regnos[1] | |
1238 | ? copy.next_copies[1] | |
1239 | : copy.next_copies[0]); | |
1240 | if (next_copyi) | |
1241 | stack.safe_push ({ src_regno, next_copyi }); | |
1242 | ||
1243 | auto &src_reg = m_pseudo_regs[src_regno]; | |
1244 | auto &dest_reg = m_pseudo_regs[dest_regno]; | |
1245 | ||
1246 | if (src_reg.flags & ~dest_reg.flags & PSEUDO_COPY_FLAGS) | |
1247 | { | |
1248 | auto src_preference = fpr_preference (src_regno); | |
1249 | auto dest_preference = fpr_preference (dest_regno); | |
1250 | if ((src_preference >= 0 && dest_preference >= 0) | |
1251 | || (src_preference <= 0 && dest_preference <= 0)) | |
1252 | { | |
1253 | dest_reg.flags |= (src_reg.flags & PSEUDO_COPY_FLAGS); | |
1254 | stack.safe_push ({ dest_regno, dest_reg.first_copy }); | |
1255 | } | |
1256 | } | |
1257 | } | |
1258 | } | |
1259 | } | |
1260 | ||
1261 | // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos | |
1262 | // accordingly. | |
1263 | void | |
1264 | early_ra::choose_fpr_pseudos () | |
1265 | { | |
1266 | for (unsigned int i = FIRST_PSEUDO_REGISTER; | |
1267 | i < m_pseudo_regs.length (); ++i) | |
1268 | if (fpr_preference (i) > 0) | |
1269 | bitmap_set_bit (m_fpr_pseudos, i); | |
1270 | } | |
1271 | ||
1272 | // Clear out information about the previous CFG region (if any) | |
1273 | // and set up the data for a new region. | |
1274 | void | |
1275 | early_ra::start_new_region () | |
1276 | { | |
1277 | obstack_free (&m_region_obstack, m_region_alloc_start); | |
1278 | m_regno_to_group.empty (); | |
1279 | m_allocno_copies.truncate (0); | |
1280 | m_allocnos.truncate (0); | |
1281 | m_sorted_allocnos.truncate (0); | |
2f46e357 | 1282 | m_shared_allocnos.truncate (0); |
9f0f7d80 RS |
1283 | m_colors.truncate (0); |
1284 | m_insn_ranges.truncate (0); | |
1285 | for (auto &fpr_ranges : m_fpr_ranges) | |
1286 | fpr_ranges.truncate (0); | |
1287 | for (auto &call_points : m_call_points) | |
1288 | call_points.truncate (0); | |
1289 | gcc_assert (bitmap_empty_p (m_live_allocnos) && m_live_fprs == 0); | |
1290 | m_dead_insns.truncate (0); | |
1291 | m_allocated_fprs = 0; | |
1292 | m_call_preserved_fprs = 0; | |
1293 | m_allocation_successful = true; | |
1294 | } | |
1295 | ||
1296 | // Create and return an allocno group of size SIZE for register REGNO. | |
1297 | // REGNO can be INVALID_REGNUM if the group just exists to allow | |
1298 | // other groups to be chained together, and does not have any new | |
1299 | // allocnos of its own. | |
1300 | early_ra::allocno_group_info * | |
1301 | early_ra::create_allocno_group (unsigned int regno, unsigned int size) | |
1302 | { | |
1303 | static_assert (alignof (unsigned int) == alignof (allocno_info), | |
1304 | "allocno_info alignment"); | |
1305 | unsigned int num_allocnos = (regno != INVALID_REGNUM ? size : 0); | |
1306 | ||
1307 | // Allocate an allocno_group_info, followed by an array of chain heads, | |
1308 | // followed by the allocnos themselves. | |
1309 | size_t alloc_size = (sizeof (allocno_group_info) | |
1310 | + size * sizeof (unsigned int) | |
1311 | + num_allocnos * sizeof (allocno_info)); | |
1312 | void *data = obstack_alloc (&m_region_obstack, alloc_size); | |
1313 | ||
1314 | // Initialize the group. | |
1315 | auto *group = reinterpret_cast<allocno_group_info *> (data); | |
1316 | memset (group, 0, sizeof (*group)); | |
1317 | group->m_color_rep = group; | |
1318 | group->regno = regno; | |
1319 | group->size = size; | |
1320 | group->stride = 1; | |
1321 | group->fpr_size = FPR_D; | |
1322 | group->fpr_candidates = ~0U; | |
1323 | ||
1324 | // Initialize the chain heads. | |
1325 | auto heads = group->chain_heads (); | |
1326 | for (unsigned int i = 0; i < heads.size (); ++i) | |
1327 | heads[i] = (i < num_allocnos ? m_allocnos.length () + i : INVALID_ALLOCNO); | |
1328 | ||
1329 | // Initialize the allocnos. | |
1330 | if (num_allocnos > 0) | |
1331 | { | |
1332 | auto allocnos = group->allocnos (); | |
1333 | memset (allocnos.begin (), 0, num_allocnos * sizeof (allocno_info)); | |
1334 | for (unsigned int i = 0; i < num_allocnos; ++i) | |
1335 | { | |
1336 | auto *allocno = &allocnos[i]; | |
1337 | allocno->id = m_allocnos.length (); | |
1338 | allocno->offset = i; | |
1339 | allocno->group_size = size; | |
1340 | allocno->hard_regno = FIRST_PSEUDO_REGISTER; | |
1341 | allocno->start_point = END_OF_REGION; | |
1342 | allocno->end_point = START_OF_REGION; | |
1343 | allocno->copy_dest = INVALID_ALLOCNO; | |
2f46e357 | 1344 | allocno->related_allocno = INVALID_ALLOCNO; |
9f0f7d80 RS |
1345 | allocno->chain_next = INVALID_ALLOCNO; |
1346 | allocno->chain_prev = INVALID_ALLOCNO; | |
1347 | m_allocnos.safe_push (allocno); | |
1348 | } | |
1349 | } | |
1350 | return group; | |
1351 | } | |
1352 | ||
1353 | // If REG refers to a pseudo register that might be allocated to FPRs, | |
1354 | // return the associated range of allocnos, creating new ones if necessary. | |
1355 | // Return an empty range otherwise. | |
1356 | early_ra::allocno_subgroup | |
1357 | early_ra::get_allocno_subgroup (rtx reg) | |
1358 | { | |
1359 | if (GET_CODE (reg) == SUBREG) | |
1360 | { | |
1361 | allocno_subgroup inner = get_allocno_subgroup (SUBREG_REG (reg)); | |
1362 | if (!inner) | |
1363 | return {}; | |
1364 | ||
1365 | if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)), | |
1366 | GET_MODE (reg))) | |
1367 | { | |
1368 | m_allocation_successful = false; | |
1369 | return {}; | |
1370 | } | |
1371 | ||
1372 | subreg_info info; | |
1373 | subreg_get_info (V0_REGNUM, GET_MODE (SUBREG_REG (reg)), | |
1374 | SUBREG_BYTE (reg), GET_MODE (reg), &info); | |
1375 | if (!info.representable_p) | |
1376 | { | |
1377 | m_allocation_successful = false; | |
1378 | return {}; | |
1379 | } | |
1380 | ||
1381 | inner.start += info.offset; | |
1382 | inner.count = info.nregs; | |
1383 | return inner; | |
1384 | } | |
1385 | ||
1386 | if (!REG_P (reg) || HARD_REGISTER_P (reg)) | |
1387 | return {}; | |
1388 | ||
1389 | unsigned int regno = REGNO (reg); | |
1390 | if (fpr_preference (regno) <= 0) | |
1391 | return {}; | |
1392 | ||
1393 | unsigned int count = hard_regno_nregs (V0_REGNUM, GET_MODE (reg)); | |
1394 | bool existed; | |
1395 | auto &entry = m_regno_to_group.get_or_insert (regno, &existed); | |
1396 | if (!existed) | |
1397 | { | |
1398 | auto *group = create_allocno_group (regno, count); | |
1399 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1400 | { | |
1401 | auto allocnos = group->allocnos (); | |
1402 | fprintf (dump_file, "Creating allocnos [%d:%d] for r%d\n", | |
1403 | allocnos.front ().id, allocnos.back ().id, regno); | |
1404 | } | |
1405 | ||
1406 | auto reg_bits = GET_MODE_BITSIZE (GET_MODE (reg)); | |
1407 | auto fpr_bits = exact_div (reg_bits, count); | |
1408 | auto flags = m_pseudo_regs[regno].flags; | |
1409 | ||
1410 | // Punt for now if there is a choice to be made between using an | |
1411 | // FPR and a non-FPR. | |
1412 | if ((flags & NEEDS_NONFPR) | |
1413 | || ((flags & ALLOWS_NONFPR) | |
1414 | && !FLOAT_MODE_P (GET_MODE (reg)) | |
1415 | && !VECTOR_MODE_P (GET_MODE (reg)))) | |
1416 | m_allocation_successful = false; | |
1417 | ||
1418 | if (flags & ALLOWS_FPR8) | |
1419 | group->fpr_candidates &= 0xff; | |
1420 | else if (flags & ALLOWS_FPR16) | |
1421 | group->fpr_candidates &= 0xffff; | |
1422 | group->fpr_candidates &= ~0U >> (count - 1); | |
1423 | ||
1424 | group->has_flexible_stride = ((flags & HAS_FLEXIBLE_STRIDE) != 0 | |
1425 | && (flags & HAS_FIXED_STRIDE) == 0); | |
1426 | ||
1427 | group->fpr_size = (maybe_gt (fpr_bits, 128) ? FPR_Z | |
1428 | : maybe_gt (fpr_bits, 64) ? FPR_Q : FPR_D); | |
1429 | ||
1430 | entry = group; | |
1431 | } | |
1432 | return { entry, 0, count }; | |
1433 | } | |
1434 | ||
1435 | // Record a use of FPR REGNO at the current program point, as part of | |
1436 | // a backwards walk over a block. | |
1437 | void | |
1438 | early_ra::record_fpr_use (unsigned int regno) | |
1439 | { | |
1440 | gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM)); | |
1441 | unsigned int offset = regno - V0_REGNUM; | |
1442 | if (!(m_live_fprs & (1U << offset))) | |
1443 | { | |
1444 | m_fpr_ranges[offset].safe_push ({ START_OF_REGION, m_current_point, | |
1445 | INVALID_ALLOCNO }); | |
1446 | m_live_fprs |= 1U << offset; | |
1447 | } | |
1448 | } | |
1449 | ||
1450 | // Record a definition of FPR REGNO at the current program point, as part of | |
1451 | // a backwards walk over a block. | |
1452 | void | |
1453 | early_ra::record_fpr_def (unsigned int regno) | |
1454 | { | |
1455 | gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM)); | |
1456 | unsigned int offset = regno - V0_REGNUM; | |
1457 | ||
1458 | // The definition completes the current live range. If the result | |
1459 | // of the definition is used, the live range extends to the last use. | |
1460 | // Otherwise the live range is just a momentary blip at the current point. | |
1461 | auto &ranges = m_fpr_ranges[offset]; | |
1462 | if (m_live_fprs & (1U << offset)) | |
1463 | { | |
1464 | ranges.last ().start_point = m_current_point; | |
1465 | m_live_fprs &= ~(1U << offset); | |
1466 | } | |
1467 | else | |
1468 | ranges.safe_push ({ m_current_point, m_current_point, INVALID_ALLOCNO }); | |
1469 | } | |
1470 | ||
1471 | // Record a use of allocno ALLOCNO at the current program point, as part | |
1472 | // of a backwards walk over a block. | |
1473 | void | |
1474 | early_ra::record_allocno_use (allocno_info *allocno) | |
1475 | { | |
2f46e357 RS |
1476 | if (allocno->start_point == m_current_point) |
1477 | return; | |
1478 | ||
1479 | gcc_checking_assert (!allocno->is_shared ()); | |
9f0f7d80 RS |
1480 | bitmap_set_bit (m_live_allocnos, allocno->id); |
1481 | if (allocno->end_point > m_current_point) | |
8b5cd6c4 RS |
1482 | { |
1483 | allocno->end_point = m_current_point; | |
1484 | allocno->last_def_point = START_OF_REGION; | |
2f46e357 | 1485 | allocno->last_use_point = END_OF_REGION; |
8b5cd6c4 | 1486 | } |
2f46e357 RS |
1487 | else |
1488 | allocno->last_use_point = allocno->start_point; | |
9f0f7d80 RS |
1489 | allocno->start_point = m_current_point; |
1490 | allocno->is_copy_dest = false; | |
2f46e357 RS |
1491 | allocno->is_strong_copy_src = false; |
1492 | allocno->related_allocno = INVALID_ALLOCNO; | |
1493 | allocno->is_equiv = false; | |
9f0f7d80 RS |
1494 | } |
1495 | ||
1496 | // Record a definition of the allocno with index AI at the current program | |
1497 | // point, as part of a backwards walk over a block. The allocno is known | |
1498 | // to be live. | |
1499 | void | |
1500 | early_ra::record_allocno_def (allocno_info *allocno) | |
1501 | { | |
2f46e357 RS |
1502 | gcc_checking_assert (!allocno->is_shared ()); |
1503 | allocno->last_use_point = allocno->start_point; | |
8b5cd6c4 | 1504 | allocno->last_def_point = m_current_point; |
9f0f7d80 RS |
1505 | allocno->start_point = m_current_point; |
1506 | allocno->num_defs = MIN (allocno->num_defs + 1, 2); | |
1507 | gcc_checking_assert (!allocno->is_copy_dest | |
2f46e357 | 1508 | && !allocno->is_strong_copy_src); |
9f0f7d80 RS |
1509 | if (!bitmap_clear_bit (m_live_allocnos, allocno->id)) |
1510 | gcc_unreachable (); | |
1511 | } | |
1512 | ||
2f46e357 RS |
1513 | // SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the |
1514 | // two allocnos are known to be equal. See whether we can mark a chain of | |
1515 | // allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the | |
1516 | // start of the chain if so, otherwise return null. | |
1517 | // | |
1518 | // If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated | |
1519 | // as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO. | |
1520 | // | |
1521 | // Sharing chains are a rather hacky workaround for the fact that we | |
1522 | // don't collect segmented live ranges, and that in the end we want to do | |
1523 | // simple interval graph coloring. | |
1524 | early_ra::allocno_info * | |
1525 | early_ra::find_related_start (allocno_info *dest_allocno, | |
1526 | allocno_info *src_allocno, bool is_equiv) | |
8b5cd6c4 | 1527 | { |
2f46e357 RS |
1528 | allocno_info *res = nullptr; |
1529 | for (;;) | |
8b5cd6c4 | 1530 | { |
2f46e357 RS |
1531 | if (src_allocno->end_point > dest_allocno->end_point) |
1532 | // The src allocno dies first. | |
1533 | return res; | |
8b5cd6c4 | 1534 | |
2f46e357 RS |
1535 | if (src_allocno->num_defs != 0) |
1536 | { | |
1537 | if (dest_allocno->end_point < m_current_bb_point) | |
1538 | // We don't currently track enough information to handle multiple | |
1539 | // definitions across basic block boundaries. | |
1540 | return res; | |
1541 | ||
1542 | if (src_allocno->last_def_point >= dest_allocno->end_point) | |
1543 | // There is another definition during the destination's live range. | |
1544 | return res; | |
1545 | } | |
1546 | if (is_equiv) | |
1547 | { | |
1548 | if (dest_allocno->num_defs == 1) | |
1549 | // dest_allocno is equivalent to src_allocno for dest_allocno's | |
1550 | // entire live range. Fall back to that if we can't establish | |
1551 | // a sharing chain. | |
1552 | res = dest_allocno; | |
1553 | } | |
1554 | else | |
1555 | { | |
1556 | if (src_allocno->last_use_point >= dest_allocno->end_point) | |
1557 | // src_allocno is live during dest_allocno's live range, | |
1558 | // and the two allocnos do not necessarily have the same value. | |
1559 | return res; | |
1560 | } | |
1561 | ||
1562 | if (dest_allocno->group_size != 1 | |
1563 | || DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1) | |
1564 | // Currently only single allocnos that are defined once can | |
1565 | // share registers with non-equivalent allocnos. This could be | |
1566 | // relaxed, but at the time of writing, aggregates are not valid | |
1567 | // SSA names and so generally only use a single pseudo throughout | |
1568 | // their lifetime. | |
1569 | return res; | |
1570 | ||
1571 | if (dest_allocno->copy_dest == src_allocno->id) | |
1572 | // We've found a complete and valid sharing chain. | |
1573 | return dest_allocno; | |
1574 | ||
1575 | if (dest_allocno->copy_dest == INVALID_ALLOCNO) | |
1576 | return res; | |
1577 | ||
1578 | auto *next_allocno = m_allocnos[dest_allocno->copy_dest]; | |
1579 | if (!is_chain_candidate (dest_allocno, next_allocno)) | |
1580 | return res; | |
1581 | ||
1582 | dest_allocno = next_allocno; | |
1583 | is_equiv = false; | |
8b5cd6c4 | 1584 | } |
8b5cd6c4 RS |
1585 | } |
1586 | ||
9f0f7d80 RS |
1587 | // Record any relevant allocno-related information for an actual or imagined |
1588 | // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit | |
1589 | // move instruction, false if it represents one way of satisfying the previous | |
1590 | // instruction's constraints. | |
1591 | void | |
1592 | early_ra::record_copy (rtx dest, rtx src, bool from_move_p) | |
1593 | { | |
1594 | auto dest_range = get_allocno_subgroup (dest); | |
1595 | auto src_range = get_allocno_subgroup (src); | |
1596 | if (from_move_p | |
1597 | && dest_range | |
1598 | && REG_P (src) | |
1599 | && FP_REGNUM_P (REGNO (src))) | |
1600 | { | |
1601 | // A copy from an FPR to an allocno group. | |
1602 | unsigned int fpr = REGNO (src) - V0_REGNUM; | |
1603 | m_allocno_copies.safe_push ({ dest_range.allocno (0)->id, fpr, | |
1604 | dest_range.count }); | |
1605 | ||
1606 | // If the allocno at the other end of the chain of copies from DEST | |
1607 | // has a copy to the same FPR, record that all intervening copy chains | |
1608 | // could become "strong" ones. This indicates that picking the FPR | |
1609 | // avoids a copy at both ends. | |
1610 | unsigned int hard_regno = REGNO (src); | |
1611 | for (auto &dest_allocno : dest_range.allocnos ()) | |
1612 | if (dest_allocno.hard_regno == hard_regno++) | |
1613 | dest_allocno.is_strong_copy_src = true; | |
1614 | } | |
1615 | else if (from_move_p | |
1616 | && src_range | |
1617 | && REG_P (dest) | |
1618 | && FP_REGNUM_P (REGNO (dest))) | |
1619 | { | |
1620 | // A copy from an allocno group to an FPR. | |
1621 | unsigned int fpr = REGNO (dest) - V0_REGNUM; | |
1622 | m_allocno_copies.safe_push ({ src_range.allocno (0)->id, fpr, | |
1623 | src_range.count }); | |
1624 | for (auto &src_allocno : src_range.allocnos ()) | |
1625 | { | |
1626 | // If the copy comes from a move, see whether the destination | |
1627 | // FPR is known to be equal to the source allocno for the FPR's | |
1628 | // last live range. | |
1629 | if (from_move_p && src_allocno.num_defs == 0) | |
1630 | { | |
1631 | auto &last_range = m_fpr_ranges[fpr].last (); | |
1632 | if (last_range.end_point >= src_allocno.end_point) | |
1633 | last_range.allocno = src_allocno.id; | |
1634 | } | |
1635 | src_allocno.hard_regno = V0_REGNUM + fpr; | |
1636 | fpr += 1; | |
1637 | } | |
1638 | } | |
1639 | else if (src_range && dest_range) | |
1640 | { | |
1641 | // A copy between two allocno groups. We can only have a mismatched | |
1642 | // number of FPRs for imaginary, non-move copies. In that case | |
1643 | // the matching happens on the common lowparts. | |
1644 | gcc_assert (!from_move_p || src_range.count == dest_range.count); | |
1645 | unsigned int count = std::min (src_range.count, dest_range.count); | |
1646 | if (WORDS_BIG_ENDIAN) | |
1647 | { | |
1648 | src_range.start += src_range.count - count; | |
1649 | dest_range.start += dest_range.count - count; | |
1650 | } | |
1651 | src_range.count = count; | |
1652 | dest_range.count = count; | |
1653 | ||
1654 | // Ignore (imaginary non-move) copies if the destination is still live. | |
1655 | for (auto &dest_allocno : dest_range.allocnos ()) | |
1656 | if (bitmap_bit_p (m_live_allocnos, dest_allocno.id)) | |
1657 | return; | |
1658 | ||
1659 | for (unsigned int i = 0; i < src_range.count; ++i) | |
1660 | { | |
1661 | auto *dest_allocno = dest_range.allocno (i); | |
1662 | auto *src_allocno = src_range.allocno (i); | |
1663 | if (src_allocno->end_point > dest_allocno->start_point) | |
1664 | { | |
1665 | gcc_assert (src_allocno->copy_dest == INVALID_ALLOCNO | |
1666 | || src_allocno->copy_dest == dest_allocno->id); | |
1667 | src_allocno->copy_dest = dest_allocno->id; | |
1668 | src_allocno->hard_regno = dest_allocno->hard_regno; | |
1669 | dest_allocno->is_copy_dest = 1; | |
1670 | } | |
2f46e357 RS |
1671 | else if (auto *start_allocno = find_related_start (dest_allocno, |
1672 | src_allocno, | |
1673 | from_move_p)) | |
1674 | { | |
1675 | auto *next_allocno = dest_allocno; | |
1676 | for (;;) | |
1677 | { | |
1678 | next_allocno->related_allocno = src_allocno->id; | |
1679 | next_allocno->is_equiv = (start_allocno == dest_allocno | |
1680 | && from_move_p); | |
1681 | if (next_allocno == start_allocno) | |
1682 | break; | |
1683 | next_allocno = m_allocnos[next_allocno->copy_dest]; | |
1684 | } | |
1685 | } | |
9f0f7d80 RS |
1686 | } |
1687 | } | |
1688 | } | |
1689 | ||
1690 | // Record any relevant allocno-related information about the constraints | |
1691 | // on INSN, which has already been extracted. | |
1692 | void | |
1693 | early_ra::record_constraints (rtx_insn *insn) | |
1694 | { | |
1695 | preprocess_constraints (insn); | |
1696 | ||
1697 | int operand_matches[MAX_RECOG_OPERANDS]; | |
1698 | for (int i = 0; i < recog_data.n_operands; ++i) | |
1699 | operand_matches[i] = -1; | |
1700 | ||
1701 | auto alts = get_preferred_alternatives (insn); | |
1702 | bool any_ok = recog_data.n_alternatives == 0; | |
1703 | ||
1704 | // The set of output operands that are earlyclobber in at least one | |
1705 | // alternative. | |
1706 | operand_mask earlyclobber_operands = 0; | |
1707 | ||
1708 | // The set of output operands that are matched to inputs in at least | |
1709 | // one alternative. | |
1710 | operand_mask matched_operands = 0; | |
1711 | ||
1712 | // The set of output operands that are not matched to inputs in at least | |
1713 | // one alternative. | |
1714 | operand_mask unmatched_operands = 0; | |
1715 | ||
1716 | // The set of input operands that are matched to outputs in at least one | |
1717 | // alternative, or that overlap with such an input if the output is not | |
1718 | // earlyclobber. The latter part of the condition copes with things | |
1719 | // like y = x * x, where the first x is tied to the destination, and where | |
1720 | // y is not earlyclobber. | |
1721 | operand_mask matches_operands = 0; | |
1722 | ||
1723 | for (int altno = 0; altno < recog_data.n_alternatives; ++altno) | |
1724 | { | |
1725 | if (!(alts & ALTERNATIVE_BIT (altno))) | |
1726 | continue; | |
1727 | ||
1728 | auto *op_alt = &recog_op_alt[altno * recog_data.n_operands]; | |
1729 | if (!likely_alternative_match_p (op_alt)) | |
1730 | continue; | |
1731 | ||
1732 | any_ok = true; | |
1733 | ||
1734 | // Update the information for operand DEST_OPNO based on the constraint | |
1735 | // information for operand SRC_OPNO. The numbers can be different for | |
1736 | // swapped commutative operands. | |
1737 | auto record_operand = [&](int src_opno, int dest_opno) | |
1738 | { | |
1739 | int matches = op_alt[src_opno].matches; | |
1740 | // A matched earlyclobber cannot be used if the same operand value | |
1741 | // occurs in an unmatched operand. E.g. for y = x * x, a matched | |
1742 | // earlyclobber on the first input does not cover the second input. | |
1743 | if (matches >= 0) | |
1744 | { | |
1745 | rtx op = recog_data.operand[dest_opno]; | |
1746 | operand_mask overlaps = 0; | |
1747 | for (int i = 0; i < recog_data.n_operands; ++i) | |
1748 | if (i != dest_opno | |
1749 | && !recog_data.is_operator[i] | |
1750 | && recog_data.operand_type[i] != OP_OUT | |
1751 | && reg_overlap_mentioned_p (op, recog_data.operand[i])) | |
1752 | overlaps |= 1U << i; | |
1753 | if (!op_alt[matches].earlyclobber || overlaps == 0) | |
1754 | { | |
1755 | operand_matches[dest_opno] = matches; | |
1756 | matches_operands |= (1U << dest_opno) | overlaps; | |
1757 | } | |
1758 | } | |
1759 | }; | |
1760 | ||
1761 | auto reject = count_rejects (op_alt); | |
1762 | for (int opno = 0; opno < recog_data.n_operands; ++opno) | |
1763 | { | |
1764 | operand_mask op_mask = operand_mask (1) << opno; | |
1765 | ||
1766 | if (recog_data.operand_type[opno] != OP_IN) | |
1767 | { | |
1768 | if (reject == 0 && op_alt[opno].matched >= 0) | |
1769 | matched_operands |= op_mask; | |
1770 | else | |
1771 | unmatched_operands |= op_mask; | |
1772 | } | |
1773 | ||
1774 | if (op_alt[opno].earlyclobber) | |
1775 | earlyclobber_operands |= op_mask; | |
1776 | ||
1777 | // Punt for now on scratches. If we wanted to handle them, | |
1778 | // we'd need to create allocnos for them, like IRA does. | |
1779 | rtx op = recog_data.operand[opno]; | |
1780 | if (GET_CODE (op) == SCRATCH | |
1781 | && reg_classes_intersect_p (op_alt[opno].cl, FP_REGS)) | |
1782 | m_allocation_successful = false; | |
1783 | ||
1784 | // Record filter information, which applies to the first register | |
1785 | // in the operand. | |
1786 | if (auto filters = alternative_register_filters (op_alt, opno)) | |
1787 | if (auto range = get_allocno_subgroup (recog_data.operand[opno])) | |
1788 | for (unsigned int fpr = range.start; fpr < 32; ++fpr) | |
1789 | if (!test_register_filters (filters, fpr)) | |
1790 | range.group->fpr_candidates &= ~(1U << (fpr - range.start)); | |
1791 | ||
1792 | if (reject == 0) | |
1793 | { | |
1794 | // Record possible matched operands. | |
1795 | record_operand (opno, opno); | |
1796 | if (recog_data.constraints[opno][0] == '%') | |
1797 | { | |
1798 | record_operand (opno, opno + 1); | |
1799 | record_operand (opno + 1, opno); | |
1800 | } | |
1801 | } | |
1802 | } | |
1803 | } | |
1804 | ||
1805 | if (!any_ok) | |
1806 | { | |
1807 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1808 | fprintf (dump_file, " -- no match\n"); | |
1809 | m_allocation_successful = false; | |
1810 | } | |
1811 | ||
1812 | // Record if there is an output operand that is never earlyclobber and never | |
1813 | // matched to an input. See the comment below for how this is used. | |
1814 | rtx dest_op = NULL_RTX; | |
1815 | for (int opno = 0; opno < recog_data.n_operands; ++opno) | |
1816 | { | |
1817 | auto op_mask = operand_mask (1) << opno; | |
1818 | if (recog_data.operand_type[opno] == OP_OUT | |
1819 | && (earlyclobber_operands & op_mask) == 0 | |
1820 | && (matched_operands & op_mask) == 0) | |
1821 | { | |
1822 | dest_op = recog_data.operand[opno]; | |
1823 | break; | |
1824 | } | |
1825 | } | |
1826 | ||
1827 | for (int opno = 0; opno < recog_data.n_operands; ++opno) | |
1828 | { | |
1829 | auto op_mask = operand_mask (1) << opno; | |
1830 | rtx op = recog_data.operand[opno]; | |
1831 | int matches = operand_matches[opno]; | |
1832 | ||
1833 | // Punt for now on operands that already have a fixed choice of | |
1834 | // register, since we don't have IRA's ability to find an alternative. | |
1835 | // It's better if earlier passes don't create this kind of situation. | |
1836 | if (REG_P (op) && FP_REGNUM_P (REGNO (op))) | |
1837 | m_allocation_successful = false; | |
1838 | ||
1839 | // Treat input operands as being earlyclobbered if an output is | |
1840 | // sometimes earlyclobber and if the input never matches an output. | |
1841 | // Do the same if there is an output that is always matched to an | |
1842 | // input, and if this operand doesn't match that input. In both | |
1843 | // cases, tying the input and the output would lead to an impossible | |
1844 | // combination (or at least one that is difficult to reload). | |
1845 | if (recog_data.operand_type[opno] != OP_OUT | |
1846 | && ((earlyclobber_operands && matches < 0) | |
1847 | || ((matched_operands & ~unmatched_operands) | |
1848 | && !(matches_operands & op_mask)))) | |
1849 | for (auto &allocno : get_allocno_subgroup (op).allocnos ()) | |
1850 | if (allocno.end_point + 1 == m_current_point) | |
1851 | allocno.is_earlyclobbered = true; | |
1852 | ||
1853 | // Create copies between operands that can be tied. This (deliberately) | |
1854 | // might add several copies to the same destination register; later code | |
1855 | // can then choose between them based on other criteria. | |
1856 | // | |
1857 | // If there is an output operand that is never matched or earlyclobber, | |
1858 | // and an input operand that never matches an output operand, create | |
1859 | // a tentative copy between them. This allows hard register preferences | |
1860 | // to be transmitted along the copy chains. | |
1861 | if (matches >= 0) | |
1862 | record_copy (recog_data.operand[matches], op); | |
1863 | else if (dest_op && recog_data.operand_type[opno] == OP_IN) | |
1864 | record_copy (dest_op, op); | |
1865 | } | |
1866 | } | |
1867 | ||
1868 | // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the | |
1869 | // start of the current basic block, otherwise model the artificial uses | |
1870 | // and defs at the end of the basic block. This is done as part of a | |
1871 | // backwards walk, so defs should be processed before uses. | |
1872 | void | |
1873 | early_ra::record_artificial_refs (unsigned int flags) | |
1874 | { | |
1875 | df_ref ref; | |
1876 | ||
1877 | FOR_EACH_ARTIFICIAL_DEF (ref, m_current_bb->index) | |
1878 | if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags | |
1879 | && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM)) | |
1880 | record_fpr_def (DF_REF_REGNO (ref)); | |
1881 | m_current_point += 1; | |
1882 | ||
1883 | FOR_EACH_ARTIFICIAL_USE (ref, m_current_bb->index) | |
1884 | if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags | |
1885 | && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM)) | |
1886 | record_fpr_use (DF_REF_REGNO (ref)); | |
1887 | m_current_point += 1; | |
1888 | } | |
1889 | ||
1890 | // Model the register references in INSN as part of a backwards walk. | |
1891 | void | |
1892 | early_ra::record_insn_refs (rtx_insn *insn) | |
1893 | { | |
1894 | df_ref ref; | |
1895 | ||
1896 | // Record all definitions, excluding partial call clobbers. | |
1897 | FOR_EACH_INSN_DEF (ref, insn) | |
1898 | if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM)) | |
1899 | record_fpr_def (DF_REF_REGNO (ref)); | |
1900 | else | |
1901 | { | |
1902 | auto range = get_allocno_subgroup (DF_REF_REG (ref)); | |
1903 | for (auto &allocno : range.allocnos ()) | |
1904 | { | |
1905 | // If the destination is unused, record a momentary blip | |
1906 | // in its live range. | |
1907 | if (!bitmap_bit_p (m_live_allocnos, allocno.id)) | |
1908 | record_allocno_use (&allocno); | |
1909 | record_allocno_def (&allocno); | |
1910 | } | |
1911 | } | |
1912 | m_current_point += 1; | |
1913 | ||
1914 | // Model the call made by a call insn as a separate phase in the | |
1915 | // evaluation of the insn. Any partial call clobbers happen at that | |
1916 | // point, rather than in the definition or use phase of the insn. | |
1917 | if (auto *call_insn = dyn_cast<rtx_call_insn *> (insn)) | |
1918 | { | |
1919 | function_abi abi = insn_callee_abi (call_insn); | |
1920 | m_call_points[abi.id ()].safe_push (m_current_point); | |
1921 | m_current_point += 1; | |
1922 | } | |
1923 | ||
1924 | // Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs, | |
1925 | // since we track the FPR-sized parts of them individually. | |
1926 | FOR_EACH_INSN_USE (ref, insn) | |
1927 | if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM)) | |
1928 | record_fpr_use (DF_REF_REGNO (ref)); | |
1929 | else if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE) | |
1930 | || DF_REF_FLAGS_IS_SET (ref, DF_REF_STRICT_LOW_PART) | |
1931 | || DF_REF_FLAGS_IS_SET (ref, DF_REF_ZERO_EXTRACT)) | |
1932 | { | |
1933 | auto range = get_allocno_subgroup (DF_REF_REG (ref)); | |
1934 | for (auto &allocno : range.allocnos ()) | |
1935 | record_allocno_use (&allocno); | |
1936 | } | |
1937 | m_current_point += 1; | |
1938 | } | |
1939 | ||
1940 | // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a | |
1941 | // natural chain that has an affinity with the same hard register at | |
1942 | // both ends. | |
1943 | bool | |
1944 | early_ra::consider_strong_copy_src_chain (allocno_info *allocno) | |
1945 | { | |
1946 | auto *src_allocno = allocno; | |
1947 | while (src_allocno->copy_dest != INVALID_ALLOCNO) | |
1948 | { | |
1949 | auto *dest_allocno = m_allocnos[src_allocno->copy_dest]; | |
1950 | if (dest_allocno->start_point > src_allocno->end_point | |
1951 | || dest_allocno->hard_regno != src_allocno->hard_regno) | |
1952 | return false; | |
1953 | gcc_checking_assert (dest_allocno->is_copy_dest); | |
1954 | src_allocno = dest_allocno; | |
1955 | } | |
1956 | ||
1957 | while (allocno->copy_dest != INVALID_ALLOCNO) | |
1958 | { | |
1959 | allocno->is_strong_copy_src = 1; | |
1960 | allocno = m_allocnos[allocno->copy_dest]; | |
1961 | allocno->is_strong_copy_dest = 1; | |
1962 | } | |
1963 | return true; | |
1964 | } | |
1965 | ||
1966 | // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being | |
1967 | // chained together. See whether chaining them requires the containing | |
1968 | // groups to have the same stride, or whether it requires them to have | |
1969 | // different strides. Return 1 if they should have the same stride, | |
1970 | // -1 if they should have different strides, or 0 if it doesn't matter. | |
1971 | int | |
1972 | early_ra::strided_polarity_pref (allocno_info *allocno1, | |
1973 | allocno_info *allocno2) | |
1974 | { | |
1975 | if (allocno1->offset + 1 < allocno1->group_size | |
1976 | && allocno2->offset + 1 < allocno2->group_size) | |
1977 | { | |
1978 | if (is_chain_candidate (allocno1 + 1, allocno2 + 1)) | |
1979 | return 1; | |
1980 | else | |
1981 | return -1; | |
1982 | } | |
1983 | ||
1984 | if (allocno1->offset > 0 && allocno2->offset > 0) | |
1985 | { | |
1986 | if (is_chain_candidate (allocno1 - 1, allocno2 - 1)) | |
1987 | return 1; | |
1988 | else | |
1989 | return -1; | |
1990 | } | |
1991 | ||
1992 | return 0; | |
1993 | } | |
1994 | ||
1995 | // Decide which groups should be strided. Also complete "strong" copy chains. | |
1996 | void | |
1997 | early_ra::find_strided_accesses () | |
1998 | { | |
1999 | // This function forms a graph of allocnos, linked by equivalences and | |
2000 | // natural copy chains. It temporarily uses chain_next to record the | |
2f46e357 | 2001 | // reverse of equivalence edges (related_allocno) and chain_prev to record |
9f0f7d80 RS |
2002 | // the reverse of copy edges (copy_dest). |
2003 | unsigned int allocno_info::*links[] = { | |
2004 | &allocno_info::chain_next, | |
2005 | &allocno_info::chain_prev, | |
2006 | &allocno_info::copy_dest, | |
2f46e357 | 2007 | &allocno_info::related_allocno |
9f0f7d80 RS |
2008 | }; |
2009 | ||
2010 | // Set up the temporary reverse edges. Check for strong copy chains. | |
2011 | for (unsigned int i = m_allocnos.length (); i-- > 0; ) | |
2012 | { | |
2013 | auto *allocno1 = m_allocnos[i]; | |
2014 | if (allocno1->copy_dest != INVALID_ALLOCNO) | |
2015 | m_allocnos[allocno1->copy_dest]->chain_prev = allocno1->id; | |
2f46e357 RS |
2016 | if (allocno1->related_allocno != INVALID_ALLOCNO) |
2017 | m_allocnos[allocno1->related_allocno]->chain_next = allocno1->id; | |
9f0f7d80 RS |
2018 | |
2019 | if (allocno1->is_strong_copy_src | |
2f46e357 RS |
2020 | && !allocno1->is_copy_dest |
2021 | && !consider_strong_copy_src_chain (allocno1)) | |
9f0f7d80 RS |
2022 | allocno1->is_strong_copy_src = false; |
2023 | } | |
2024 | ||
2025 | // Partition the graph into cliques based on edges that have the following | |
2026 | // properties: | |
2027 | // | |
2028 | // - the edge joins two allocnos whose groups have a free choice between | |
2029 | // consecutive and strided allocations. | |
2030 | // | |
2031 | // - the two groups have a relative strided polarity preference (that is | |
2032 | // they should make the same choice between consecutive and strided, | |
2033 | // or they should make opposite choices). | |
2034 | // | |
2035 | // Assign relative polarities to each group connected in this way. | |
2036 | // | |
2037 | // The aim is to discover natural move-free striding choices, which will | |
2038 | // often exist in carefully written ACLE code. | |
2039 | unsigned int num_edges = m_allocnos.length () * ARRAY_SIZE (links); | |
2040 | auto_sbitmap visited_edges (num_edges); | |
2041 | bitmap_clear (visited_edges); | |
2042 | ||
2043 | auto_vec<unsigned int, 32> worklist; | |
2044 | for (unsigned int i = 0; i < num_edges; ++i) | |
2045 | { | |
2046 | if (!bitmap_set_bit (visited_edges, i)) | |
2047 | continue; | |
2048 | worklist.quick_push (i); | |
2049 | while (!worklist.is_empty ()) | |
2050 | { | |
2051 | auto ei = worklist.pop (); | |
2052 | auto *allocno1 = m_allocnos[ei / ARRAY_SIZE (links)]; | |
2053 | auto ai2 = allocno1->*links[ei % ARRAY_SIZE (links)]; | |
2054 | if (ai2 == INVALID_ALLOCNO) | |
2055 | continue; | |
2056 | ||
2057 | auto *allocno2 = m_allocnos[ai2]; | |
2058 | auto *group1 = allocno1->group (); | |
2059 | auto *group2 = allocno2->group (); | |
2060 | if (!group1->has_flexible_stride || !group2->has_flexible_stride) | |
2061 | continue; | |
2062 | ||
2063 | int pref = strided_polarity_pref (allocno1, allocno2); | |
2064 | if (pref == 0) | |
2065 | continue; | |
2066 | ||
2067 | for (auto *group : { group1, group2 }) | |
2068 | for (auto &allocno : group->allocnos ()) | |
2069 | for (unsigned int j = 0; j < ARRAY_SIZE (links); ++j) | |
2070 | if (bitmap_set_bit (visited_edges, allocno.id * 4 + j)) | |
2071 | worklist.safe_push (allocno.id * 4 + j); | |
2072 | ||
2073 | if (group1->strided_polarity) | |
2074 | group2->strided_polarity = group1->strided_polarity * pref; | |
81dfa84e RS |
2075 | else if (group2->strided_polarity) |
2076 | group1->strided_polarity = group2->strided_polarity * pref; | |
9f0f7d80 RS |
2077 | else |
2078 | { | |
2079 | group1->strided_polarity = 1; | |
2080 | group2->strided_polarity = pref; | |
2081 | } | |
2082 | } | |
2083 | } | |
2084 | ||
2085 | // Now look for edges between allocnos in multi-register groups where: | |
2086 | // | |
2087 | // - the two groups have a relative strided polarity preference (as above). | |
2088 | // | |
2089 | // - one group (G1) has a free choice between consecutive and strided | |
2090 | // allocations. | |
2091 | // | |
2092 | // - the other group (G2) must use consecutive allocations. | |
2093 | // | |
2094 | // Update G1's individual preference for strided or consecutive allocations | |
2095 | // based on G2. If the previous loop chose a polarity for G1, work out | |
2096 | // whether it is better for polarity 1 or -1 to correspond to consecutive | |
2097 | // allocation. | |
2098 | int consecutive_pref = 0; | |
2099 | for (unsigned int i = m_allocnos.length (); i-- > 0; ) | |
2100 | { | |
2101 | auto *allocno1 = m_allocnos[i]; | |
2102 | for (auto link : links) | |
2103 | { | |
2104 | auto ai2 = allocno1->*link; | |
2105 | if (ai2 == INVALID_ALLOCNO) | |
2106 | continue; | |
2107 | ||
2108 | auto *allocno2 = m_allocnos[ai2]; | |
2109 | auto *group1 = allocno1->group (); | |
2110 | auto *group2 = allocno2->group (); | |
2111 | if (group1->has_flexible_stride == group2->has_flexible_stride) | |
2112 | continue; | |
2113 | ||
2114 | int pref = strided_polarity_pref (allocno1, allocno2); | |
2115 | if (pref == 0) | |
2116 | continue; | |
2117 | ||
2118 | auto *group = (group1->has_flexible_stride ? group1 : group2); | |
2119 | consecutive_pref += group->strided_polarity * pref; | |
2120 | group->consecutive_pref += pref; | |
2121 | } | |
2122 | } | |
2123 | ||
2124 | // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive | |
2125 | // allocation, arbitrarily pick 1. | |
2126 | if (consecutive_pref == 0) | |
2127 | consecutive_pref = 1; | |
2128 | ||
2129 | // Record which multi-register groups should use strided allocations. | |
2130 | // Clear out the temporary edges. | |
2131 | for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai) | |
2132 | { | |
2133 | auto *allocno = m_allocnos[ai]; | |
2134 | allocno->chain_prev = INVALID_ALLOCNO; | |
2135 | allocno->chain_next = INVALID_ALLOCNO; | |
2136 | ||
2137 | if (allocno->offset != 0) | |
2138 | continue; | |
2139 | ||
2140 | auto *group = allocno->group (); | |
2141 | if (!group->has_flexible_stride) | |
2142 | continue; | |
2143 | ||
2144 | bool make_strided = (group->strided_polarity | |
2145 | ? (consecutive_pref * group->strided_polarity) < 0 | |
2146 | : group->consecutive_pref < 0); | |
2147 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2148 | fprintf (dump_file, "Allocno [%d:%d]: strided polarity %d," | |
2149 | " consecutive pref %d, %s\n", | |
2150 | allocno->id, allocno->id + group->size - 1, | |
2151 | group->strided_polarity, group->consecutive_pref, | |
2152 | make_strided ? "making strided" : "keeping consecutive"); | |
2153 | if (!make_strided) | |
2154 | continue; | |
2155 | ||
2156 | // 2-register groups have a stride of 8 FPRs and must start in | |
2157 | // registers matching the mask 0x17. 4-register groups have a stride | |
2158 | // of 4 FPRs and must start in registers matching the mask 0x13. | |
2159 | group->stride = group->size == 2 ? 8 : 4; | |
2160 | gcc_checking_assert (group->fpr_candidates | |
2161 | == (group->size == 2 ? 0x55555555 : 0x11111111)); | |
2162 | group->fpr_candidates = (group->size == 2 ? 0xff00ff : 0xf000f); | |
2163 | } | |
2164 | } | |
2165 | ||
2166 | // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=> | |
2167 | // result that puts allocnos in order of increasing FIELD. | |
2168 | template<unsigned int early_ra::allocno_info::*field> | |
2169 | int | |
2170 | early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr) | |
2171 | { | |
2172 | auto *allocno1 = *(allocno_info *const *) allocno1_ptr; | |
2173 | auto *allocno2 = *(allocno_info *const *) allocno2_ptr; | |
2174 | ||
2175 | if (allocno1->*field != allocno2->*field) | |
2176 | return allocno1->*field < allocno2->*field ? -1 : 1; | |
2177 | return (allocno1->id < allocno2->id ? -1 | |
2178 | : allocno1->id == allocno2->id ? 0 : 1); | |
2179 | } | |
2180 | ||
2181 | // Return true if we should consider chaining ALLOCNO1 onto the head | |
2182 | // of ALLOCNO2. This is just a local test of the two allocnos; it doesn't | |
2183 | // guarantee that chaining them would give a self-consistent system. | |
2184 | bool | |
2185 | early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2) | |
2186 | { | |
2f46e357 RS |
2187 | if (allocno2->is_shared ()) |
2188 | return false; | |
2189 | ||
2190 | if (allocno1->is_equiv) | |
2191 | allocno1 = m_allocnos[allocno1->related_allocno]; | |
9f0f7d80 RS |
2192 | |
2193 | if (allocno2->start_point >= allocno1->end_point | |
2f46e357 | 2194 | && !allocno2->is_equiv_to (allocno1->id)) |
9f0f7d80 RS |
2195 | return false; |
2196 | ||
2197 | if (allocno2->is_strong_copy_dest) | |
2198 | { | |
2199 | if (!allocno1->is_strong_copy_src | |
2200 | || allocno1->copy_dest != allocno2->id) | |
2201 | return false; | |
2202 | } | |
2203 | else if (allocno2->is_copy_dest) | |
2204 | { | |
2205 | if (allocno1->copy_dest != allocno2->id) | |
2206 | return false; | |
2207 | } | |
2208 | else if (allocno1->is_earlyclobbered) | |
2209 | { | |
2210 | if (allocno1->end_point == allocno2->start_point + 1) | |
2211 | return false; | |
2212 | } | |
2213 | ||
2214 | return true; | |
2215 | } | |
2216 | ||
2217 | // We're trying to chain allocno ALLOCNO1 to a later allocno. | |
2218 | // Rate how good a choice ALLOCNO2 would be, with higher being better. | |
2219 | int | |
2220 | early_ra::rate_chain (allocno_info *allocno1, allocno_info *allocno2) | |
2221 | { | |
2222 | int score = 0; | |
2223 | if (allocno2->is_strong_copy_dest) | |
2224 | score += 256; | |
2225 | else if (allocno2->is_copy_dest) | |
2226 | score += 128; | |
2227 | ||
2228 | // Prefer well-aligned matches. | |
2229 | auto *group1 = allocno1->group (); | |
2230 | auto *group2 = allocno2->group (); | |
2231 | if (group1->stride == 1 && group2->stride == 1) | |
2232 | { | |
2233 | unsigned int min_size = std::min (group1->color_rep ()->size, | |
2234 | group2->color_rep ()->size); | |
2235 | if ((group1->color_rep_offset + allocno1->offset) % min_size | |
2236 | == (group2->color_rep_offset + allocno2->offset) % min_size) | |
2237 | score += min_size; | |
2238 | else | |
2239 | score -= min_size; | |
2240 | } | |
2241 | return score; | |
2242 | } | |
2243 | ||
2244 | // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing | |
2245 | // score. | |
2246 | int | |
2247 | early_ra::cmp_chain_candidates (const void *arg1, const void *arg2) | |
2248 | { | |
2249 | auto &candidate1 = *(const chain_candidate_info *) arg1; | |
2250 | auto &candidate2 = *(const chain_candidate_info *) arg2; | |
2251 | if (candidate1.score != candidate2.score) | |
2252 | return candidate1.score > candidate2.score ? -1 : 1; | |
2253 | ||
2254 | // Prefer to increase the gap between uses of the allocated register, | |
2255 | // to give the scheduler more freedom. | |
2256 | auto *allocno1 = candidate1.allocno; | |
2257 | auto *allocno2 = candidate2.allocno; | |
2258 | if (allocno1->start_point != allocno2->start_point) | |
2259 | return allocno1->start_point < allocno2->start_point ? -1 : 1; | |
2260 | ||
2261 | if (allocno1 != allocno2) | |
2262 | return allocno1->id < allocno2->id ? -1 : 1; | |
2263 | ||
2264 | return 0; | |
2265 | } | |
2266 | ||
2267 | // Join the chains of allocnos that start at HEADI1 and HEADI2. | |
2268 | // HEADI1 is either empty or a single allocno. | |
2269 | void | |
2270 | early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2) | |
2271 | { | |
2272 | if (headi1 == INVALID_ALLOCNO) | |
2273 | headi1 = headi2; | |
2274 | else if (headi2 == INVALID_ALLOCNO) | |
2275 | headi2 = headi1; | |
2276 | else | |
2277 | { | |
2278 | auto *head1 = m_allocnos[headi1]; | |
2279 | auto *head2 = m_allocnos[headi2]; | |
2280 | gcc_checking_assert (head1->chain_next == INVALID_ALLOCNO | |
2281 | && head1->chain_prev == INVALID_ALLOCNO | |
2282 | && head2->chain_prev == INVALID_ALLOCNO); | |
2283 | ||
2f46e357 RS |
2284 | if (head1->is_equiv |
2285 | && m_allocnos[head1->related_allocno]->copy_dest == headi2) | |
9f0f7d80 RS |
2286 | { |
2287 | head1->is_copy_dest = head2->is_copy_dest; | |
2288 | head1->is_strong_copy_dest = head2->is_strong_copy_dest; | |
2f46e357 | 2289 | m_allocnos[head1->related_allocno]->copy_dest = headi1; |
9f0f7d80 RS |
2290 | } |
2291 | head1->chain_next = headi2; | |
2292 | head2->chain_prev = headi1; | |
2293 | ||
2294 | headi2 = headi1; | |
2295 | } | |
2296 | } | |
2297 | ||
2f46e357 RS |
2298 | // Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts |
2299 | // OFFSET allocnos into GROUP2. | |
2300 | void | |
2301 | early_ra::merge_fpr_info (allocno_group_info *group1, | |
2302 | allocno_group_info *group2, | |
2303 | unsigned int offset) | |
2304 | { | |
2305 | group1->fpr_size = std::max (group1->fpr_size, group2->fpr_size); | |
2306 | group1->fpr_candidates &= (group2->fpr_candidates | |
2307 | >> (offset * group1->stride)); | |
2308 | } | |
2309 | ||
9f0f7d80 RS |
2310 | // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO |
2311 | // ends being at allocno offset REP_OFFSET from the start of REP. | |
2312 | void | |
2313 | early_ra::set_single_color_rep (allocno_info *allocno, allocno_group_info *rep, | |
2314 | unsigned int rep_offset) | |
2315 | { | |
2316 | auto *group = allocno->group (); | |
2317 | if (group->m_color_rep == rep) | |
2318 | return; | |
2319 | ||
2320 | group->m_color_rep = rep; | |
2321 | gcc_checking_assert (multiple_p (group->stride, rep->stride)); | |
2322 | unsigned int factor = group->stride / rep->stride; | |
2323 | gcc_checking_assert (rep_offset >= allocno->offset * factor); | |
2324 | group->color_rep_offset = rep_offset - allocno->offset * factor; | |
2f46e357 | 2325 | merge_fpr_info (rep, group, group->color_rep_offset); |
9f0f7d80 RS |
2326 | } |
2327 | ||
2328 | // REP1 and REP2 are color representatives. Change REP1's color representative | |
2329 | // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2. | |
2330 | void | |
2331 | early_ra::set_color_rep (allocno_group_info *rep1, allocno_group_info *rep2, | |
2332 | unsigned int rep2_offset) | |
2333 | { | |
2334 | gcc_checking_assert (rep1 != rep2 | |
2335 | && rep2->m_color_rep == rep2 | |
2336 | && multiple_p (rep1->stride, rep2->stride)); | |
2337 | ||
2338 | auto heads1 = rep1->chain_heads (); | |
2339 | auto heads2 = rep2->chain_heads (); | |
2340 | for (unsigned int i1 = 0; i1 < heads1.size (); ++i1) | |
2341 | if (heads1[i1] != INVALID_ALLOCNO) | |
2342 | { | |
2343 | unsigned int i2 = rep2_offset + i1 * rep1->stride / rep2->stride; | |
2344 | if (heads2[i2] == INVALID_ALLOCNO) | |
2345 | heads2[i2] = heads1[i1]; | |
2346 | else | |
2347 | gcc_checking_assert (heads2[i2] == heads1[i1]); | |
2348 | set_single_color_rep (m_allocnos[heads1[i1]], rep2, i2); | |
2349 | } | |
2350 | } | |
2351 | ||
2352 | // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2. | |
2353 | // Return true on success. | |
2354 | bool | |
2355 | early_ra::try_to_chain_allocnos (allocno_info *allocno1, | |
2356 | allocno_info *allocno2) | |
2357 | { | |
2358 | auto *group1 = allocno1->group ()->color_rep (); | |
2359 | auto *group2 = allocno2->group ()->color_rep (); | |
2360 | ||
2361 | // Avoid trying to tie different subgroups of the same group. This can | |
2362 | // happen if the parts of a register are defined and used piecemeal. | |
2363 | if (group1 == group2) | |
2364 | return false; | |
2365 | ||
2366 | // The stride (in FPRs) between allocnos of each color representative. | |
2367 | auto fpr_stride1 = group1->stride; | |
2368 | auto fpr_stride2 = group2->stride; | |
2369 | ||
2370 | // The offset (in FPRs) of each allocno group from its color representative. | |
2371 | auto fpr_offset1 = allocno1->group ()->color_rep_offset * fpr_stride1; | |
2372 | auto fpr_offset2 = allocno2->group ()->color_rep_offset * fpr_stride2; | |
2373 | ||
2374 | // The offset (in FPRs) of each allocno from its color representative. | |
2375 | fpr_offset1 += allocno1->offset * allocno1->group ()->stride; | |
2376 | fpr_offset2 += allocno2->offset * allocno2->group ()->stride; | |
2377 | ||
2378 | // The FPR overlap is in multiples of the larger stride. | |
2379 | auto max_fpr_stride = std::max (fpr_stride1, fpr_stride2); | |
2380 | auto min_fpr_offset = std::min (fpr_offset1, fpr_offset2); | |
2381 | auto fpr_overlap_offset = ROUND_DOWN (min_fpr_offset, max_fpr_stride); | |
2382 | ||
2383 | // The offset (in FPRs) of the start of the overlapping region from | |
2384 | // each color representative. | |
2385 | fpr_offset1 -= fpr_overlap_offset; | |
2386 | fpr_offset2 -= fpr_overlap_offset; | |
2387 | ||
2388 | // The number of FPRs in each color representative after the start | |
2389 | // of the overlapping region. | |
2390 | auto fpr_after1 = (group1->size - 1) * fpr_stride1 - fpr_offset1; | |
2391 | auto fpr_after2 = (group2->size - 1) * fpr_stride2 - fpr_offset2; | |
2392 | ||
2393 | auto min_fpr_after = std::min (fpr_after1, fpr_after2); | |
2394 | ||
2395 | // The number of overlapping allocnos. | |
2396 | auto allocno_overlap_size = min_fpr_after / max_fpr_stride + 1; | |
2397 | ||
2398 | // The offset (in allocnos) of the overlapping region from the start | |
2399 | // of each color representative. | |
2400 | auto allocno_offset1 = fpr_offset1 / fpr_stride1; | |
2401 | auto allocno_offset2 = fpr_offset2 / fpr_stride2; | |
2402 | ||
2403 | // The stride (in allocnos) between overlapping allocnos. | |
2404 | auto allocno_stride1 = max_fpr_stride / fpr_stride1; | |
2405 | auto allocno_stride2 = max_fpr_stride / fpr_stride2; | |
2406 | ||
2407 | // Reject combinations that are impossible to allocate. | |
2408 | auto fprs1 = group1->fpr_candidates; | |
2409 | auto fprs2 = group2->fpr_candidates; | |
2410 | if (fpr_offset1 > fpr_offset2) | |
2411 | fprs2 >>= (fpr_offset1 - fpr_offset2); | |
2412 | else | |
2413 | fprs1 >>= (fpr_offset2 - fpr_offset1); | |
2414 | if ((fprs1 & fprs2) == 0) | |
2415 | { | |
2416 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2417 | fprintf (dump_file, " - cannot chain %d->%d, no FPRs in common" | |
2418 | " (%08x@%d and %08x@%d)\n", allocno1->id, allocno2->id, | |
2419 | group1->fpr_candidates, fpr_offset1, | |
2420 | group2->fpr_candidates, fpr_offset2); | |
2421 | return false; | |
2422 | } | |
2423 | ||
2424 | // Check whether the chain can be formed. | |
2425 | auto heads1 = group1->chain_heads (); | |
2426 | auto heads2 = group2->chain_heads (); | |
2427 | for (unsigned int i = 0; i < allocno_overlap_size; ++i) | |
2428 | { | |
2429 | auto headi1 = heads1[allocno_offset1 + i * allocno_stride1]; | |
2430 | auto headi2 = heads2[allocno_offset2 + i * allocno_stride2]; | |
2431 | if (headi1 != INVALID_ALLOCNO && headi2 != INVALID_ALLOCNO) | |
2432 | { | |
2433 | auto *head1 = m_allocnos[headi1]; | |
2434 | auto *head2 = m_allocnos[headi2]; | |
2435 | if (head1->chain_next != INVALID_ALLOCNO) | |
2436 | return false; | |
2f46e357 | 2437 | if (!head2->is_equiv_to (head1->id) |
9f0f7d80 RS |
2438 | && head1->end_point <= head2->start_point) |
2439 | return false; | |
2440 | } | |
2441 | } | |
2442 | ||
2443 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2444 | { | |
2445 | fprintf (dump_file, " - chaining allocnos ["); | |
2446 | for (unsigned int i = 0; i < allocno_overlap_size; ++i) | |
2447 | fprintf (dump_file, "%s%d", i ? "," : "", | |
2448 | heads1[allocno_offset1 + i * allocno_stride1]); | |
2449 | fprintf (dump_file, "] and ["); | |
2450 | for (unsigned int i = 0; i < allocno_overlap_size; ++i) | |
2451 | fprintf (dump_file, "%s%d", i ? "," : "", | |
2452 | heads2[allocno_offset2 + i * allocno_stride2]); | |
2453 | fprintf (dump_file, "]\n"); | |
2454 | } | |
2455 | ||
2456 | // Chain the allocnos, updating the chain heads. | |
2457 | for (unsigned int i = 0; i < allocno_overlap_size; ++i) | |
2458 | chain_allocnos (heads1[allocno_offset1 + i * allocno_stride1], | |
2459 | heads2[allocno_offset2 + i * allocno_stride2]); | |
2460 | ||
2461 | // Pick a color representative for the merged groups. | |
2462 | allocno_group_info *new_rep; | |
2463 | if (allocno_offset1 == 0 | |
2464 | && group1->size == allocno_overlap_size * allocno_stride1 | |
2465 | && multiple_p (fpr_stride1, fpr_stride2)) | |
2466 | { | |
2467 | // The first group fits within the second. | |
2468 | set_color_rep (group1, group2, allocno_offset2); | |
2469 | new_rep = group2; | |
2470 | } | |
2471 | else if (allocno_offset2 == 0 | |
2472 | && group2->size == allocno_overlap_size * allocno_stride2 | |
2473 | && multiple_p (fpr_stride2, fpr_stride1)) | |
2474 | { | |
2475 | // The second group fits within the first. | |
2476 | set_color_rep (group2, group1, allocno_offset1); | |
2477 | new_rep = group1; | |
2478 | } | |
2479 | else | |
2480 | { | |
2481 | // We need a new group that is big enough to span both groups. | |
2482 | // The new group always has an FPR stride of 1. | |
2483 | auto max_fpr_offset = std::max (fpr_offset1, fpr_offset2); | |
2484 | auto max_fpr_after = std::max (fpr_after1, fpr_after2); | |
2485 | auto new_size = max_fpr_offset + max_fpr_after + 1; | |
2486 | new_rep = create_allocno_group (INVALID_REGNUM, new_size); | |
2487 | ||
2488 | set_color_rep (group1, new_rep, max_fpr_offset - fpr_offset1); | |
2489 | set_color_rep (group2, new_rep, max_fpr_offset - fpr_offset2); | |
2490 | } | |
2491 | ||
2492 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2493 | { | |
2494 | fprintf (dump_file, " - new frontier ["); | |
2495 | auto new_heads = new_rep->chain_heads (); | |
2496 | for (unsigned int i = 0; i < new_heads.size (); ++i) | |
2497 | { | |
2498 | if (i) | |
2499 | fprintf (dump_file, ","); | |
2500 | if (new_heads[i] == INVALID_ALLOCNO) | |
2501 | fprintf (dump_file, "-"); | |
2502 | else | |
2503 | fprintf (dump_file, "%d", new_heads[i]); | |
2504 | } | |
2505 | fprintf (dump_file, "]\n"); | |
2506 | } | |
2507 | ||
2508 | return true; | |
2509 | } | |
2510 | ||
2511 | // Create a color_info for color representative GROUP. | |
2512 | void | |
2513 | early_ra::create_color (allocno_group_info *group) | |
2514 | { | |
2515 | auto *color = region_allocate<color_info> (); | |
2516 | color->id = m_colors.length (); | |
2517 | color->hard_regno = FIRST_PSEUDO_REGISTER; | |
2518 | color->group = group; | |
2519 | ||
2520 | gcc_checking_assert (group->m_color_rep == group); | |
2521 | group->has_color = true; | |
2522 | group->color = m_colors.length (); | |
2523 | ||
2524 | m_colors.safe_push (color); | |
2525 | } | |
2526 | ||
2527 | // Form allocnos into chains. Create colors for each resulting clique. | |
2528 | void | |
2529 | early_ra::form_chains () | |
2530 | { | |
2531 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2532 | fprintf (dump_file, "\nChaining allocnos:\n"); | |
2533 | ||
2534 | // Perform (modified) interval graph coloring. First sort by | |
2535 | // increasing start point. | |
2536 | m_sorted_allocnos.reserve (m_allocnos.length ()); | |
2537 | m_sorted_allocnos.splice (m_allocnos); | |
2538 | m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>); | |
2539 | ||
2540 | // During this phase, color representatives are only correct for | |
2541 | // unprocessed allocno groups (where the color representative is | |
2542 | // the group itself) and for groups that contain a current chain head. | |
2543 | unsigned int ti = 0; | |
2544 | auto_vec<chain_candidate_info> candidates; | |
2545 | for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi) | |
2546 | { | |
2547 | auto *allocno1 = m_sorted_allocnos[hi]; | |
2548 | if (allocno1->chain_next != INVALID_ALLOCNO) | |
2549 | continue; | |
2550 | ||
2551 | // Record conflicts with direct uses for FPR hard registers. | |
2552 | auto *group1 = allocno1->group (); | |
2553 | for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr) | |
2554 | if (fpr_conflicts_with_allocno_p (fpr, allocno1)) | |
2555 | group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset)); | |
2556 | ||
2557 | // Record conflicts due to partially call-clobbered registers. | |
2558 | // (Full clobbers are handled by the previous loop.) | |
2559 | for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id) | |
2560 | if (call_in_range_p (abi_id, allocno1->start_point, | |
2561 | allocno1->end_point)) | |
2562 | { | |
2563 | auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size); | |
2564 | group1->fpr_candidates &= ~fprs >> allocno1->offset; | |
2565 | } | |
2566 | ||
2f46e357 RS |
2567 | if (allocno1->is_shared ()) |
2568 | { | |
2569 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2570 | fprintf (dump_file, " Allocno %d shares the same hard register" | |
2571 | " as allocno %d\n", allocno1->id, | |
2572 | allocno1->related_allocno); | |
2573 | auto *allocno2 = m_allocnos[allocno1->related_allocno]; | |
2574 | merge_fpr_info (allocno2->group (), group1, allocno2->offset); | |
2575 | m_shared_allocnos.safe_push (allocno1); | |
2576 | continue; | |
2577 | } | |
2578 | ||
9f0f7d80 RS |
2579 | // Find earlier allocnos (in processing order) that could be chained |
2580 | // to this one. | |
2581 | candidates.truncate (0); | |
2582 | for (unsigned int sci = ti; sci < hi; ++sci) | |
2583 | { | |
2584 | auto *allocno2 = m_sorted_allocnos[sci]; | |
2585 | if (allocno2->chain_prev == INVALID_ALLOCNO) | |
2586 | { | |
2587 | if (!is_chain_candidate (allocno1, allocno2)) | |
2588 | continue; | |
2589 | chain_candidate_info candidate; | |
2590 | candidate.allocno = allocno2; | |
2591 | candidate.score = rate_chain (allocno1, allocno2); | |
2592 | candidates.safe_push (candidate); | |
2593 | } | |
2594 | else if (sci == ti) | |
2595 | ++ti; | |
2596 | } | |
2597 | ||
2598 | // Sort the candidates by decreasing score. | |
2599 | candidates.qsort (cmp_chain_candidates); | |
2600 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2601 | { | |
2602 | fprintf (dump_file, " Chain candidates for %d:", allocno1->id); | |
2603 | for (auto &candidate : candidates) | |
2604 | fprintf (dump_file, " %d(%d)", candidate.allocno->id, | |
2605 | candidate.score); | |
2606 | fprintf (dump_file, "\n"); | |
2607 | } | |
2608 | ||
2609 | // Pick the first candidate that works. | |
2610 | for (auto &candidate : candidates) | |
2611 | if (try_to_chain_allocnos (allocno1, candidate.allocno)) | |
2612 | break; | |
2613 | } | |
2614 | ||
2615 | // Create color_infos for each group. Make sure that each group's | |
2616 | // color representative is up to date. | |
2617 | for (unsigned int hi = m_sorted_allocnos.length (); hi-- > 0; ) | |
2618 | { | |
2619 | auto *allocno = m_sorted_allocnos[hi]; | |
2f46e357 RS |
2620 | if (allocno->is_shared ()) |
2621 | continue; | |
2622 | ||
9f0f7d80 RS |
2623 | auto *rep = allocno->group ()->color_rep (); |
2624 | if (rep->has_color) | |
2625 | continue; | |
2626 | ||
2627 | create_color (rep); | |
2628 | auto heads = rep->chain_heads (); | |
2629 | for (unsigned int i = 0; i < heads.size (); ++i) | |
2630 | { | |
2631 | unsigned int ai = heads[i]; | |
2632 | while (ai != INVALID_ALLOCNO) | |
2633 | { | |
2634 | allocno = m_allocnos[ai]; | |
2635 | set_single_color_rep (allocno, rep, i * rep->stride); | |
2636 | ai = allocno->chain_next; | |
2637 | } | |
2638 | } | |
2639 | } | |
2640 | } | |
2641 | ||
2642 | // Return true if the given FPR (starting at 0) conflicts with allocno | |
2643 | // ALLOCNO. | |
2644 | bool | |
2645 | early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr, | |
2646 | allocno_info *allocno) | |
2647 | { | |
2648 | auto &ranges = m_fpr_ranges[fpr]; | |
2649 | unsigned int start_i = 0; | |
2650 | unsigned int end_i = ranges.length (); | |
2651 | while (start_i < end_i) | |
2652 | { | |
2653 | unsigned int mid_i = (start_i + end_i) / 2; | |
2654 | auto &range = ranges[mid_i]; | |
2655 | if (allocno->end_point > range.start_point) | |
2656 | start_i = mid_i + 1; | |
2657 | else if (allocno->start_point < range.end_point) | |
2658 | end_i = mid_i; | |
2659 | else | |
2660 | { | |
2661 | if (range.allocno != allocno->id) | |
2662 | return true; | |
2663 | // The FPR is equivalent to ALLOCNO for this particular range. | |
2664 | // See whether ALLOCNO conflicts with a neighboring range. | |
2665 | if (mid_i > 0 | |
2666 | && ranges[mid_i - 1].start_point >= allocno->end_point) | |
2667 | return true; | |
2668 | if (mid_i + 1 < ranges.length () | |
2669 | && ranges[mid_i + 1].end_point <= allocno->start_point) | |
2670 | return true; | |
2671 | return false; | |
2672 | } | |
2673 | } | |
2674 | return false; | |
2675 | } | |
2676 | ||
2677 | // Return true if there is a call with ABI identifier ABI_ID in the inclusive | |
2678 | // program point range [START_POINT, END_POINT]. | |
2679 | bool | |
2680 | early_ra::call_in_range_p (unsigned int abi_id, unsigned int start_point, | |
2681 | unsigned int end_point) | |
2682 | { | |
2683 | auto &points = m_call_points[abi_id]; | |
2684 | unsigned int start_i = 0; | |
2685 | unsigned int end_i = points.length (); | |
2686 | while (start_i < end_i) | |
2687 | { | |
2688 | unsigned int mid_i = (start_i + end_i) / 2; | |
2689 | auto point = points[mid_i]; | |
2690 | if (end_point > point) | |
2691 | start_i = mid_i + 1; | |
2692 | else if (start_point < point) | |
2693 | end_i = mid_i; | |
2694 | else | |
2695 | return true; | |
2696 | } | |
2697 | return false; | |
2698 | } | |
2699 | ||
2700 | // Return the set of FPRs for which a value of size SIZE will be clobbered | |
2701 | // by a call to a function with ABI identifier ABI_ID, but would not be | |
2702 | // for some smaller size. The set therefore excludes FPRs that are | |
2703 | // fully-clobbered, like V0 in the base ABI. | |
2704 | unsigned int | |
2705 | early_ra::partial_fpr_clobbers (unsigned int abi_id, fpr_size_info size) | |
2706 | { | |
2707 | auto &abi = function_abis[abi_id]; | |
2708 | unsigned int clobbers = 0; | |
2709 | machine_mode mode = (size == FPR_D ? V8QImode | |
2710 | : size == FPR_Q ? V16QImode : VNx16QImode); | |
2711 | for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno) | |
2712 | if (!abi.clobbers_full_reg_p (regno) | |
2713 | && abi.clobbers_reg_p (mode, regno)) | |
2714 | clobbers |= 1U << (regno - V0_REGNUM); | |
2715 | return clobbers; | |
2716 | } | |
2717 | ||
2718 | // Process copies between pseudo registers and hard registers and update | |
2719 | // the FPR preferences for the associated colors. | |
2720 | void | |
2721 | early_ra::process_copies () | |
2722 | { | |
2723 | for (auto © : m_allocno_copies) | |
2724 | { | |
2725 | auto *allocno = m_allocnos[copy.allocno]; | |
2726 | auto *group = allocno->group (); | |
2727 | auto offset = group->color_rep_offset + allocno->offset; | |
2728 | if (offset > copy.fpr) | |
2729 | continue; | |
2730 | ||
2731 | unsigned int fpr = copy.fpr - offset; | |
2732 | auto *color = m_colors[group->color_rep ()->color]; | |
2733 | color->fpr_preferences[fpr] = MIN (color->fpr_preferences[fpr] | |
2734 | + copy.weight, 127); | |
2f46e357 | 2735 | color->num_fpr_preferences += copy.weight; |
9f0f7d80 RS |
2736 | } |
2737 | } | |
2738 | ||
2739 | // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=> | |
2f46e357 | 2740 | // result that puts colors in allocation order. |
9f0f7d80 | 2741 | int |
2f46e357 | 2742 | early_ra::cmp_allocation_order (const void *color1_ptr, const void *color2_ptr) |
9f0f7d80 RS |
2743 | { |
2744 | auto *color1 = *(color_info *const *) color1_ptr; | |
2745 | auto *color2 = *(color_info *const *) color2_ptr; | |
2746 | ||
2f46e357 | 2747 | // Allocate bigger groups before smaller groups. |
9f0f7d80 RS |
2748 | if (color1->group->size != color2->group->size) |
2749 | return color1->group->size > color2->group->size ? -1 : 1; | |
2f46e357 RS |
2750 | |
2751 | // Allocate groups with stronger FPR preferences before groups with weaker | |
2752 | // FPR preferences. | |
2753 | if (color1->num_fpr_preferences != color2->num_fpr_preferences) | |
2754 | return color1->num_fpr_preferences > color2->num_fpr_preferences ? -1 : 1; | |
2755 | ||
9f0f7d80 RS |
2756 | return (color1->id < color2->id ? -1 |
2757 | : color1->id == color2->id ? 0 : 1); | |
2758 | } | |
2759 | ||
2760 | // Allocate a register to each color. If we run out of registers, | |
2761 | // give up on doing a full allocation of the FPR-based pseudos in the | |
2762 | // region. | |
2763 | void | |
2764 | early_ra::allocate_colors () | |
2765 | { | |
2766 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2767 | fprintf (dump_file, "\nAllocating registers:\n"); | |
2768 | ||
2769 | auto_vec<color_info *> sorted_colors; | |
2770 | sorted_colors.safe_splice (m_colors); | |
2f46e357 | 2771 | sorted_colors.qsort (cmp_allocation_order); |
9f0f7d80 RS |
2772 | |
2773 | for (unsigned int i = 0; i < 32; ++i) | |
2774 | if (!crtl->abi->clobbers_full_reg_p (V0_REGNUM + i)) | |
2775 | m_call_preserved_fprs |= 1U << i; | |
2776 | ||
2777 | for (auto *color : sorted_colors) | |
2778 | { | |
2779 | unsigned int candidates = color->group->fpr_candidates; | |
2780 | for (unsigned int i = 0; i < color->group->size; ++i) | |
2781 | candidates &= ~(m_allocated_fprs >> i); | |
2782 | unsigned int best = INVALID_REGNUM; | |
2783 | int best_weight = 0; | |
2784 | for (unsigned int fpr = 0; fpr <= 32U - color->group->size; ++fpr) | |
2785 | { | |
2786 | if ((candidates & (1U << fpr)) == 0) | |
2787 | continue; | |
2788 | int weight = color->fpr_preferences[fpr]; | |
2789 | // Account for registers that the current function must preserve. | |
2790 | for (unsigned int i = 0; i < color->group->size; ++i) | |
2791 | if (m_call_preserved_fprs & (1U << (fpr + i))) | |
2792 | weight -= 1; | |
2793 | if (best == INVALID_REGNUM || best_weight <= weight) | |
2794 | { | |
2795 | best = fpr; | |
2796 | best_weight = weight; | |
2797 | } | |
2798 | } | |
2799 | ||
2800 | if (best == INVALID_REGNUM) | |
2801 | { | |
2802 | m_allocation_successful = false; | |
2803 | return; | |
2804 | } | |
2805 | ||
2806 | color->hard_regno = best + V0_REGNUM; | |
2807 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2808 | fprintf (dump_file, " Allocating [v%d:v%d] to color %d\n", | |
2809 | best, best + color->group->size - 1, color->id); | |
2810 | m_allocated_fprs |= ((1U << color->group->size) - 1) << best; | |
2811 | } | |
2812 | } | |
2813 | ||
2814 | // See if ALLOCNO ends a subchain of single registers that can be split | |
2815 | // off without affecting the rest of the chain, and without introducing | |
2816 | // any moves. Return the start of the chain if so (which might be ALLOCNO | |
2817 | // itself), otherwise return null. | |
2818 | early_ra::allocno_info * | |
2819 | early_ra::find_independent_subchain (allocno_info *allocno) | |
2820 | { | |
2821 | // Make sure ALLOCNO ends a natural subchain. | |
2822 | if (auto *next_allocno = chain_next (allocno)) | |
2823 | if (next_allocno->start_point + 1 >= allocno->end_point) | |
2824 | return nullptr; | |
2825 | ||
2826 | // Check the allocnos in the purported subchain and find the other end. | |
2827 | for (;;) | |
2828 | { | |
2829 | auto *group = allocno->group (); | |
2830 | if (group->m_color_rep == group) | |
2831 | return nullptr; | |
2832 | if (group->size != 1) | |
2833 | return nullptr; | |
2834 | ||
2835 | auto *prev_allocno = chain_prev (allocno); | |
2836 | if (!prev_allocno || allocno->start_point + 1 < prev_allocno->end_point) | |
2837 | return allocno; | |
2838 | ||
2839 | allocno = prev_allocno; | |
2840 | } | |
2841 | } | |
2842 | ||
2843 | // Search the colors starting at index FIRST_COLOR whose FPRs do not belong | |
2844 | // to FPR_CONFLICTS. Return the first such color that has no group. If all | |
2845 | // such colors have groups, instead return the color with the latest | |
2846 | // (smallest) start point. | |
2847 | early_ra::color_info * | |
2848 | early_ra::find_oldest_color (unsigned int first_color, | |
2849 | unsigned int fpr_conflicts) | |
2850 | { | |
2851 | color_info *best = nullptr; | |
2852 | unsigned int best_start_point = ~0U; | |
2853 | for (unsigned int ci = first_color; ci < m_colors.length (); ++ci) | |
2854 | { | |
2855 | auto *color = m_colors[ci]; | |
2856 | if (fpr_conflicts & (1U << (color->hard_regno - V0_REGNUM))) | |
2857 | continue; | |
2858 | if (!color->group) | |
2859 | return color; | |
2860 | auto chain_head = color->group->chain_heads ()[0]; | |
2861 | auto start_point = m_allocnos[chain_head]->start_point; | |
2862 | if (!best || best_start_point > start_point) | |
2863 | { | |
2864 | best = color; | |
2865 | best_start_point = start_point; | |
2866 | } | |
2867 | } | |
2868 | return best; | |
2869 | } | |
2870 | ||
2871 | // If there are some spare FPRs that can be reused without introducing saves, | |
2872 | // restores, or moves, use them to "broaden" the allocation, in order to give | |
2873 | // the scheduler more freedom. This is particularly useful for forming LDPs | |
2874 | // and STPs. | |
2875 | void | |
2876 | early_ra::broaden_colors () | |
2877 | { | |
2878 | // Create dummy colors for every leftover FPR that can be used cheaply. | |
2879 | unsigned int first_color = m_colors.length (); | |
2880 | for (unsigned int fpr = 0; fpr < 32; ++fpr) | |
2881 | if (((m_allocated_fprs | m_call_preserved_fprs) & (1U << fpr)) == 0) | |
2882 | { | |
2883 | auto *color = region_allocate<color_info> (); | |
2884 | color->id = m_colors.length (); | |
2885 | color->hard_regno = V0_REGNUM + fpr; | |
2886 | color->group = nullptr; | |
2887 | m_colors.safe_push (color); | |
2888 | } | |
2889 | ||
2890 | // Exit early if there are no spare FPRs. | |
2891 | if (first_color == m_colors.length ()) | |
2892 | return; | |
2893 | ||
2894 | // Go through the allocnos in order, seeing if there is a subchain of | |
2895 | // single-FPR allocnos that can be split off from the containingg clique. | |
2896 | // Allocate such subchains to the new colors on an oldest-first basis. | |
2897 | for (auto *allocno : m_sorted_allocnos) | |
2898 | if (auto *start_allocno = find_independent_subchain (allocno)) | |
2899 | { | |
2900 | unsigned int fpr_conflicts = 0; | |
2901 | auto *member = allocno; | |
2902 | for (;;) | |
2903 | { | |
2904 | fpr_conflicts |= ~member->group ()->fpr_candidates; | |
2905 | if (member == start_allocno) | |
2906 | break; | |
2907 | member = m_allocnos[member->chain_prev]; | |
2908 | } | |
2909 | ||
2910 | auto *color = find_oldest_color (first_color, fpr_conflicts); | |
2911 | if (!color) | |
2912 | continue; | |
2913 | ||
2914 | if (!color->group) | |
2915 | { | |
2916 | auto *group = allocno->group (); | |
2917 | color->group = group; | |
2918 | group->color = color->id; | |
2919 | group->chain_heads ()[0] = INVALID_ALLOCNO; | |
2920 | } | |
2921 | else | |
2922 | { | |
2923 | auto chain_head = color->group->chain_heads ()[0]; | |
2924 | auto start_point = m_allocnos[chain_head]->start_point; | |
2925 | if (start_point >= allocno->end_point) | |
2926 | // Allocating to COLOR isn't viable, and it was the best | |
2927 | // option available. | |
2928 | continue; | |
2929 | ||
2930 | auto *next_allocno = chain_next (allocno); | |
2931 | if (!next_allocno || next_allocno->start_point <= start_point) | |
2932 | // The current allocation gives at least as much scheduling | |
2933 | // freedom as COLOR would. | |
2934 | continue; | |
2935 | } | |
2936 | ||
2937 | // Unlink the chain. | |
2938 | if (auto *next_allocno = chain_next (allocno)) | |
2939 | next_allocno->chain_prev = start_allocno->chain_prev; | |
2940 | if (auto *prev_allocno = chain_prev (start_allocno)) | |
2941 | prev_allocno->chain_next = allocno->chain_next; | |
2942 | ||
2943 | // Make the subchain use COLOR. | |
2944 | allocno->chain_next = color->group->chain_heads ()[0]; | |
2945 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2946 | fprintf (dump_file, "Moving to optional color %d (register %s):", | |
2947 | color->id, reg_names[color->hard_regno]); | |
2948 | for (;;) | |
2949 | { | |
2950 | auto *group = allocno->group (); | |
2951 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2952 | fprintf (dump_file, " r%d", group->regno); | |
2953 | group->m_color_rep = color->group; | |
2954 | group->color_rep_offset = 0; | |
2955 | if (allocno == start_allocno) | |
2956 | break; | |
2957 | allocno = m_allocnos[allocno->chain_prev]; | |
2958 | } | |
2959 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2960 | fprintf (dump_file, "\n"); | |
2961 | color->group->chain_heads ()[0] = start_allocno->id; | |
2962 | } | |
2963 | } | |
2964 | ||
2965 | // Record the final choice of hard register for each allocno. | |
2966 | void | |
2967 | early_ra::finalize_allocation () | |
2968 | { | |
2969 | for (auto *allocno : m_allocnos) | |
2970 | { | |
2f46e357 RS |
2971 | if (allocno->is_shared ()) |
2972 | continue; | |
9f0f7d80 RS |
2973 | auto *group = allocno->group (); |
2974 | auto *rep = group->color_rep (); | |
2975 | auto rep_regno = m_colors[rep->color]->hard_regno; | |
2976 | auto group_regno = rep_regno + group->color_rep_offset; | |
2977 | allocno->hard_regno = group_regno + allocno->offset * group->stride; | |
2978 | } | |
2f46e357 RS |
2979 | for (auto *allocno : m_shared_allocnos) |
2980 | allocno->hard_regno = m_allocnos[allocno->related_allocno]->hard_regno; | |
9f0f7d80 RS |
2981 | } |
2982 | ||
2983 | // Replace any allocno references in REFS with the allocated register. | |
2984 | bool | |
2985 | early_ra::replace_regs (df_ref refs) | |
2986 | { | |
2987 | bool changed = false; | |
2988 | for (df_ref ref = refs; ref; ref = DF_REF_NEXT_LOC (ref)) | |
2989 | { | |
2990 | auto range = get_allocno_subgroup (DF_REF_REG (ref)); | |
2991 | if (!range) | |
2992 | continue; | |
2993 | ||
2994 | auto new_regno = range.allocno (0)->hard_regno; | |
2995 | *DF_REF_LOC (ref) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref)), new_regno); | |
2996 | changed = true; | |
2997 | } | |
2998 | return changed; | |
2999 | } | |
3000 | ||
3001 | // Try to make INSN match its FPR-related constraints. If this needs | |
3002 | // a source operand (SRC) to be copied to a destination operand (DEST) | |
3003 | // before INSN, add the associated (DEST, SRC) pairs to MOVES. | |
3004 | // | |
3005 | // Return -1 on failure, otherwise return a ?/!-style reject count. | |
3006 | // The reject count doesn't model the moves, just the internal alternative | |
3007 | // preferences. | |
3008 | int | |
3009 | early_ra::try_enforce_constraints (rtx_insn *insn, | |
3010 | vec<std::pair<int, int>> &moves) | |
3011 | { | |
3012 | if (!constrain_operands (0, get_preferred_alternatives (insn))) | |
3013 | return -1; | |
3014 | ||
3015 | // Pick the alternative with the lowest cost. | |
3016 | int best = -1; | |
3017 | auto alts = get_preferred_alternatives (insn); | |
3018 | for (int altno = 0; altno < recog_data.n_alternatives; ++altno) | |
3019 | { | |
3020 | if (!(alts & ALTERNATIVE_BIT (altno))) | |
3021 | continue; | |
3022 | ||
3023 | auto *op_alt = &recog_op_alt[altno * recog_data.n_operands]; | |
3024 | if (!likely_alternative_match_p (op_alt)) | |
3025 | continue; | |
3026 | ||
3027 | auto_vec<std::pair<int, int>, 4> new_moves; | |
3028 | for (int opno = 0; opno < recog_data.n_operands; ++opno) | |
3029 | { | |
3030 | rtx op = recog_data.operand[opno]; | |
3031 | if (REG_P (op) | |
3032 | && FP_REGNUM_P (REGNO (op)) | |
3033 | && op_alt[opno].matched >= 0) | |
3034 | { | |
3035 | rtx old_src = recog_data.operand[op_alt[opno].matched]; | |
3036 | if (!operands_match_p (op, old_src)) | |
3037 | { | |
3038 | for (int i = 0; i < recog_data.n_operands; ++i) | |
3039 | if (i != opno) | |
3040 | { | |
3041 | rtx other = recog_data.operand[i]; | |
3042 | if (reg_overlap_mentioned_p (op, other)) | |
3043 | { | |
3044 | old_src = NULL_RTX; | |
3045 | break; | |
3046 | } | |
3047 | } | |
3048 | if (!old_src) | |
3049 | continue; | |
3050 | new_moves.safe_push ({ opno, op_alt[opno].matched }); | |
3051 | } | |
3052 | } | |
3053 | } | |
3054 | int cost = count_rejects (op_alt) + new_moves.length () * 7; | |
3055 | if (best < 0 || cost < best) | |
3056 | { | |
3057 | best = cost; | |
3058 | moves.truncate (0); | |
3059 | moves.safe_splice (new_moves); | |
3060 | } | |
3061 | } | |
3062 | return best; | |
3063 | } | |
3064 | ||
3065 | // Make INSN matches its FPR-related constraints. | |
3066 | void | |
3067 | early_ra::enforce_constraints (rtx_insn *insn) | |
3068 | { | |
3069 | extract_insn (insn); | |
3070 | preprocess_constraints (insn); | |
3071 | ||
3072 | // First try with the operands they are. | |
3073 | auto_vec<std::pair<int, int>, 4> moves; | |
3074 | int cost = try_enforce_constraints (insn, moves); | |
3075 | ||
3076 | // Next try taking advantage of commutativity. | |
3077 | for (int opno = 0; opno < recog_data.n_operands - 1; ++opno) | |
3078 | if (recog_data.constraints[opno][0] == '%') | |
3079 | { | |
3080 | std::swap (*recog_data.operand_loc[opno], | |
3081 | *recog_data.operand_loc[opno + 1]); | |
3082 | std::swap (recog_data.operand[opno], | |
3083 | recog_data.operand[opno + 1]); | |
3084 | auto_vec<std::pair<int, int>, 4> swapped_moves; | |
3085 | int swapped_cost = try_enforce_constraints (insn, swapped_moves); | |
3086 | if (swapped_cost >= 0 && (cost < 0 || swapped_cost < cost)) | |
3087 | { | |
3088 | cost = swapped_cost; | |
3089 | moves.truncate (0); | |
3090 | moves.safe_splice (swapped_moves); | |
3091 | } | |
3092 | else | |
3093 | { | |
3094 | std::swap (*recog_data.operand_loc[opno], | |
3095 | *recog_data.operand_loc[opno + 1]); | |
3096 | std::swap (recog_data.operand[opno], | |
3097 | recog_data.operand[opno + 1]); | |
3098 | } | |
3099 | } | |
3100 | ||
3101 | // The allocation should ensure that there is at least one valid combination. | |
3102 | // It's too late to back out now if not. | |
3103 | gcc_assert (cost >= 0); | |
3104 | for (int i = 0; i < recog_data.n_dups; ++i) | |
3105 | { | |
3106 | int dup_of = recog_data.dup_num[i]; | |
3107 | rtx new_op = *recog_data.operand_loc[dup_of]; | |
3108 | if (new_op != recog_data.operand[dup_of]) | |
3109 | *recog_data.dup_loc[i] = copy_rtx (new_op); | |
3110 | } | |
3111 | for (auto move : moves) | |
3112 | { | |
3113 | int dest_opno = move.first; | |
3114 | int src_opno = move.second; | |
3115 | rtx dest = recog_data.operand[dest_opno]; | |
3116 | rtx old_src = recog_data.operand[src_opno]; | |
3117 | rtx new_src = lowpart_subreg (GET_MODE (old_src), dest, GET_MODE (dest)); | |
3118 | emit_insn_before (gen_move_insn (new_src, old_src), insn); | |
3119 | *recog_data.operand_loc[src_opno] = new_src; | |
3120 | } | |
3121 | } | |
3122 | ||
3123 | // See whether INSN is an instruction that operates on multi-register vectors, | |
3124 | // and if we have decided to make it use strided rather than consecutive | |
3125 | // accesses. Update the pattern and return true if so. | |
3126 | bool | |
3127 | early_ra::maybe_convert_to_strided_access (rtx_insn *insn) | |
3128 | { | |
3129 | if (!NONJUMP_INSN_P (insn) || recog_memoized (insn) < 0) | |
3130 | return false; | |
3131 | ||
3132 | auto stride_type = get_attr_stride_type (insn); | |
3133 | rtx pat = PATTERN (insn); | |
3134 | rtx op; | |
3135 | if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE | |
3136 | || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE) | |
3137 | op = SET_DEST (pat); | |
3138 | else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE) | |
3139 | op = XVECEXP (SET_SRC (pat), 0, 1); | |
3140 | else | |
3141 | return false; | |
3142 | ||
3143 | auto range = get_allocno_subgroup (op); | |
3144 | if (!range || range.group->stride == 1) | |
3145 | return false; | |
3146 | ||
3147 | gcc_assert (range.start == 0 && range.count == range.group->size); | |
3148 | auto elt_mode = GET_MODE_INNER (GET_MODE (op)); | |
3149 | auto single_mode = aarch64_full_sve_mode (elt_mode).require (); | |
3150 | auto_vec<rtx, 4> regs; | |
3151 | for (unsigned int i = 0; i < range.count; ++i) | |
3152 | regs.quick_push (gen_rtx_REG (single_mode, range.allocno (i)->hard_regno)); | |
3153 | ||
3154 | extract_insn (insn); | |
3155 | if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE) | |
3156 | { | |
3157 | auto unspec = XINT (SET_SRC (pat), 1); | |
3158 | if (range.count == 2) | |
3159 | pat = gen_aarch64_strided2 (unspec, GET_MODE (op), regs[0], regs[1], | |
3160 | recog_data.operand[1], | |
3161 | recog_data.operand[2]); | |
3162 | else | |
3163 | pat = gen_aarch64_strided4 (unspec, GET_MODE (op), | |
3164 | regs[0], regs[1], regs[2], regs[3], | |
3165 | recog_data.operand[1], | |
3166 | recog_data.operand[2]); | |
3167 | } | |
3168 | else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE) | |
3169 | { | |
3170 | auto unspec = XINT (SET_SRC (pat), 1); | |
3171 | if (range.count == 2) | |
3172 | pat = gen_aarch64_strided2 (unspec, GET_MODE (op), | |
3173 | recog_data.operand[0], | |
3174 | recog_data.operand[2], regs[0], regs[1]); | |
3175 | else | |
3176 | pat = gen_aarch64_strided4 (unspec, GET_MODE (op), | |
3177 | recog_data.operand[0], | |
3178 | recog_data.operand[2], | |
3179 | regs[0], regs[1], regs[2], regs[3]); | |
3180 | // Ensure correct sharing for the source memory. | |
3181 | // | |
3182 | // ??? Why doesn't the generator get this right? | |
3183 | XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1) | |
3184 | = *recog_data.dup_loc[0]; | |
3185 | } | |
3186 | else if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE) | |
3187 | { | |
3188 | auto bits = INTVAL (XVECEXP (SET_SRC (pat), 0, 4)); | |
3189 | if (range.count == 2) | |
3190 | pat = gen_aarch64_sme_lut_strided2 (bits, single_mode, | |
3191 | regs[0], regs[1], | |
3192 | recog_data.operand[1], | |
3193 | recog_data.operand[2]); | |
3194 | else | |
3195 | pat = gen_aarch64_sme_lut_strided4 (bits, single_mode, | |
3196 | regs[0], regs[1], regs[2], regs[3], | |
3197 | recog_data.operand[1], | |
3198 | recog_data.operand[2]); | |
3199 | } | |
3200 | else | |
3201 | gcc_unreachable (); | |
3202 | PATTERN (insn) = pat; | |
3203 | INSN_CODE (insn) = -1; | |
3204 | df_insn_rescan (insn); | |
3205 | return true; | |
3206 | } | |
3207 | ||
3208 | // We've successfully allocated the current region. Apply the allocation | |
3209 | // to the instructions. | |
3210 | void | |
3211 | early_ra::apply_allocation () | |
3212 | { | |
803d222e RS |
3213 | for (auto *insn : m_dead_insns) |
3214 | set_insn_deleted (insn); | |
3215 | ||
9f0f7d80 RS |
3216 | rtx_insn *prev; |
3217 | for (auto insn_range : m_insn_ranges) | |
3218 | for (rtx_insn *insn = insn_range.first; | |
3219 | insn != insn_range.second; | |
3220 | insn = prev) | |
3221 | { | |
3222 | prev = PREV_INSN (insn); | |
3223 | if (!INSN_P (insn)) | |
3224 | continue; | |
3225 | ||
3226 | bool changed = maybe_convert_to_strided_access (insn); | |
3227 | changed |= replace_regs (DF_INSN_DEFS (insn)); | |
3228 | changed |= replace_regs (DF_INSN_USES (insn)); | |
3229 | if (changed && NONDEBUG_INSN_P (insn)) | |
3230 | { | |
3231 | if (GET_CODE (PATTERN (insn)) != USE | |
3232 | && GET_CODE (PATTERN (insn)) != CLOBBER | |
3233 | && !is_move_set (PATTERN (insn))) | |
3234 | enforce_constraints (insn); | |
3235 | ||
3236 | // A REG_EQUIV note establishes an equivalence throughout | |
3237 | // the function, but here we're reusing hard registers for | |
3238 | // multiple pseudo registers. We also no longer need REG_EQUIV | |
3239 | // notes that record potential spill locations, since we've | |
3240 | // allocated the pseudo register without spilling. | |
3241 | rtx *ptr = ®_NOTES (insn); | |
3242 | while (*ptr) | |
3243 | if (REG_NOTE_KIND (*ptr) == REG_EQUIV) | |
3244 | *ptr = XEXP (*ptr, 1); | |
3245 | else | |
3246 | ptr = &XEXP (*ptr, 1); | |
3247 | } | |
3248 | changed |= replace_regs (DF_INSN_EQ_USES (insn)); | |
3249 | if (changed) | |
3250 | df_insn_rescan (insn); | |
3251 | } | |
3252 | ||
3253 | for (auto *insn : m_dead_insns) | |
3254 | delete_insn (insn); | |
3255 | } | |
3256 | ||
3257 | // Try to allocate the current region. Update the instructions if successful. | |
3258 | void | |
3259 | early_ra::process_region () | |
3260 | { | |
8b5cd6c4 | 3261 | for (auto *allocno : m_allocnos) |
2f46e357 RS |
3262 | { |
3263 | allocno->chain_next = INVALID_ALLOCNO; | |
3264 | allocno->chain_prev = INVALID_ALLOCNO; | |
3265 | } | |
8b5cd6c4 | 3266 | |
9f0f7d80 RS |
3267 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3268 | { | |
3269 | dump_fpr_ranges (); | |
3270 | dump_copies (); | |
3271 | dump_allocnos (); | |
3272 | } | |
3273 | ||
3274 | find_strided_accesses (); | |
3275 | ||
3276 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3277 | dump_allocnos (); | |
3278 | ||
3279 | form_chains (); | |
3280 | ||
3281 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3282 | dump_allocnos (); | |
3283 | ||
3284 | process_copies (); | |
3285 | ||
3286 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3287 | dump_colors (); | |
3288 | ||
3289 | allocate_colors (); | |
3290 | if (!m_allocation_successful) | |
3291 | return; | |
3292 | ||
3293 | broaden_colors (); | |
3294 | finalize_allocation (); | |
3295 | ||
3296 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3297 | { | |
3298 | fprintf (dump_file, "\nAllocation successful\nFinal allocation:\n"); | |
3299 | dump_allocnos (); | |
3300 | dump_colors (); | |
3301 | } | |
3302 | ||
3303 | apply_allocation (); | |
3304 | } | |
3305 | ||
3306 | // Return true if INSN would become dead if we successfully allocate the | |
3307 | // current region. | |
3308 | bool | |
3309 | early_ra::is_dead_insn (rtx_insn *insn) | |
3310 | { | |
3311 | rtx set = single_set (insn); | |
3312 | if (!set) | |
3313 | return false; | |
3314 | ||
3315 | rtx dest = SET_DEST (set); | |
3316 | auto dest_range = get_allocno_subgroup (dest); | |
3317 | if (!dest_range) | |
3318 | return false; | |
3319 | ||
3320 | for (auto &allocno : dest_range.allocnos ()) | |
3321 | if (bitmap_bit_p (m_live_allocnos, allocno.id)) | |
3322 | return false; | |
3323 | ||
3324 | if (side_effects_p (set)) | |
3325 | return false; | |
3326 | ||
3327 | return true; | |
3328 | } | |
3329 | ||
3330 | // Build up information about block BB. IS_ISOLATED is true if the | |
3331 | // block is not part of a larger region. | |
3332 | void | |
3333 | early_ra::process_block (basic_block bb, bool is_isolated) | |
3334 | { | |
3335 | m_current_bb = bb; | |
8b5cd6c4 RS |
3336 | m_current_point += 1; |
3337 | m_current_bb_point = m_current_point; | |
9f0f7d80 RS |
3338 | |
3339 | // Process live-out FPRs. | |
3340 | bitmap live_out = df_get_live_out (bb); | |
3341 | for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno) | |
3342 | if (bitmap_bit_p (live_out, regno)) | |
3343 | record_fpr_use (regno); | |
3344 | ||
3345 | // Process live-out allocnos. We don't track individual FPR liveness | |
3346 | // across block boundaries, so we have to assume that the whole pseudo | |
3347 | // register is live. | |
3348 | bitmap_iterator bi; | |
3349 | unsigned int regno; | |
3350 | EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb), m_fpr_pseudos, | |
3351 | FIRST_PSEUDO_REGISTER, regno, bi) | |
3352 | { | |
3353 | auto range = get_allocno_subgroup (regno_reg_rtx[regno]); | |
3354 | for (auto &allocno : range.allocnos ()) | |
3355 | record_allocno_use (&allocno); | |
3356 | } | |
3357 | ||
3358 | m_current_point += 1; | |
3359 | ||
3360 | record_artificial_refs (0); | |
3361 | ||
3362 | bool is_first = true; | |
3363 | rtx_insn *start_insn = BB_END (bb); | |
3364 | rtx_insn *insn; | |
3365 | FOR_BB_INSNS_REVERSE (bb, insn) | |
3366 | { | |
3367 | if (!NONDEBUG_INSN_P (insn)) | |
3368 | continue; | |
3369 | ||
3370 | // CLOBBERs are used to prevent pseudos from being upwards exposed. | |
3371 | // We can ignore them if allocation is successful. | |
3372 | if (GET_CODE (PATTERN (insn)) == CLOBBER) | |
3373 | { | |
3374 | if (get_allocno_subgroup (XEXP (PATTERN (insn), 0))) | |
3375 | m_dead_insns.safe_push (insn); | |
3376 | continue; | |
3377 | } | |
3378 | ||
3379 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3380 | { | |
3381 | if (is_first) | |
3382 | fprintf (dump_file, "\nBlock %d:\n", bb->index); | |
3383 | fprintf (dump_file, "%6d:", m_current_point); | |
3384 | pretty_printer rtl_slim_pp; | |
3385 | rtl_slim_pp.buffer->stream = dump_file; | |
3386 | print_insn (&rtl_slim_pp, insn, 1); | |
3387 | pp_flush (&rtl_slim_pp); | |
3388 | fprintf (dump_file, "\n"); | |
3389 | } | |
3390 | is_first = false; | |
3391 | ||
3392 | if (is_dead_insn (insn)) | |
3393 | { | |
3394 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3395 | fprintf (dump_file, "%14s -- dead\n", ""); | |
3396 | m_dead_insns.safe_push (insn); | |
3397 | } | |
3398 | else | |
3399 | { | |
3400 | record_insn_refs (insn); | |
3401 | rtx pat = PATTERN (insn); | |
3402 | if (is_move_set (pat)) | |
3403 | record_copy (SET_DEST (pat), SET_SRC (pat), true); | |
3404 | else | |
3405 | { | |
3406 | extract_insn (insn); | |
3407 | record_constraints (insn); | |
3408 | } | |
3409 | } | |
3410 | ||
3411 | // See whether we have a complete region, with no remaining live | |
3412 | // allocnos. | |
3413 | if (is_isolated | |
3414 | && bitmap_empty_p (m_live_allocnos) | |
3415 | && m_live_fprs == 0 | |
3416 | && m_allocation_successful | |
3417 | && !m_allocnos.is_empty ()) | |
3418 | { | |
3419 | rtx_insn *prev_insn = PREV_INSN (insn); | |
3420 | m_insn_ranges.safe_push ({ start_insn, prev_insn }); | |
3421 | process_region (); | |
3422 | start_new_region (); | |
3423 | is_first = true; | |
3424 | start_insn = prev_insn; | |
3425 | } | |
3426 | } | |
3427 | m_insn_ranges.safe_push ({ start_insn, BB_HEAD (bb) }); | |
3428 | ||
3429 | record_artificial_refs (DF_REF_AT_TOP); | |
3430 | ||
3431 | // Process live-in FPRs. | |
3432 | bitmap live_in = df_get_live_in (bb); | |
3433 | for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno) | |
3434 | if (bitmap_bit_p (live_in, regno) | |
3435 | && (m_live_fprs & (1U << (regno - V0_REGNUM)))) | |
3436 | record_fpr_def (regno); | |
3437 | ||
3438 | // Process live-in allocnos. | |
3439 | EXECUTE_IF_AND_IN_BITMAP (live_in, m_fpr_pseudos, | |
3440 | FIRST_PSEUDO_REGISTER, regno, bi) | |
3441 | { | |
3442 | auto range = get_allocno_subgroup (regno_reg_rtx[regno]); | |
3443 | for (auto &allocno : range.allocnos ()) | |
3444 | if (bitmap_bit_p (m_live_allocnos, allocno.id)) | |
3445 | record_allocno_def (&allocno); | |
3446 | } | |
3447 | ||
3448 | m_current_point += 1; | |
3449 | ||
3450 | bitmap_clear (m_live_allocnos); | |
3451 | m_live_fprs = 0; | |
3452 | } | |
3453 | ||
3454 | // Divide the function into regions, such that there no edges into or out | |
3455 | // of the region have live "FPR pseudos". | |
3456 | void | |
3457 | early_ra::process_blocks () | |
3458 | { | |
3459 | auto_sbitmap visited (last_basic_block_for_fn (m_fn)); | |
3460 | auto_sbitmap fpr_pseudos_live_out (last_basic_block_for_fn (m_fn)); | |
3461 | auto_sbitmap fpr_pseudos_live_in (last_basic_block_for_fn (m_fn)); | |
3462 | ||
3463 | bitmap_clear (visited); | |
3464 | bitmap_clear (fpr_pseudos_live_out); | |
3465 | bitmap_clear (fpr_pseudos_live_in); | |
3466 | ||
3467 | // Record which blocks have live FPR pseudos on entry and exit. | |
3468 | basic_block bb; | |
3469 | FOR_EACH_BB_FN (bb, m_fn) | |
3470 | { | |
3471 | if (bitmap_intersect_p (df_get_live_out (bb), m_fpr_pseudos)) | |
3472 | bitmap_set_bit (fpr_pseudos_live_out, bb->index); | |
3473 | if (bitmap_intersect_p (df_get_live_in (bb), m_fpr_pseudos)) | |
3474 | bitmap_set_bit (fpr_pseudos_live_in, bb->index); | |
3475 | } | |
3476 | ||
3477 | struct stack_node { edge_iterator ei; basic_block bb; }; | |
3478 | ||
3479 | auto_vec<stack_node, 32> stack; | |
3480 | auto_vec<basic_block, 32> region; | |
3481 | ||
3482 | // Go through the function in reverse postorder and process the region | |
3483 | // containing each block. | |
3484 | unsigned int n_blocks = df_get_n_blocks (DF_FORWARD); | |
3485 | int *order = df_get_postorder (DF_FORWARD); | |
3486 | for (unsigned int bbi = 0; bbi < n_blocks; ++bbi) | |
3487 | { | |
3488 | basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, order[bbi]); | |
3489 | if (bb->index < NUM_FIXED_BLOCKS) | |
3490 | continue; | |
3491 | ||
3492 | if (!bitmap_set_bit (visited, bb->index)) | |
3493 | continue; | |
3494 | ||
3495 | // Process forward edges before backward edges (so push backward | |
3496 | // edges first). Build the region in an approximation of reverse | |
3497 | // program order. | |
3498 | if (bitmap_bit_p (fpr_pseudos_live_in, bb->index)) | |
3499 | stack.quick_push ({ ei_start (bb->preds), nullptr }); | |
3500 | if (bitmap_bit_p (fpr_pseudos_live_out, bb->index)) | |
3501 | stack.quick_push ({ ei_start (bb->succs), bb }); | |
3502 | else | |
3503 | region.safe_push (bb); | |
3504 | while (!stack.is_empty ()) | |
3505 | { | |
3506 | auto &node = stack.last (); | |
3507 | if (ei_end_p (node.ei)) | |
3508 | { | |
3509 | if (node.bb) | |
3510 | region.safe_push (node.bb); | |
3511 | stack.pop (); | |
3512 | continue; | |
3513 | } | |
3514 | edge e = ei_edge (node.ei); | |
3515 | if (node.bb) | |
3516 | { | |
3517 | // A forward edge from a node that has not yet been added | |
3518 | // to region. | |
3519 | if (bitmap_bit_p (fpr_pseudos_live_in, e->dest->index) | |
3520 | && bitmap_set_bit (visited, e->dest->index)) | |
3521 | { | |
3522 | stack.safe_push ({ ei_start (e->dest->preds), nullptr }); | |
3523 | if (bitmap_bit_p (fpr_pseudos_live_out, e->dest->index)) | |
3524 | stack.safe_push ({ ei_start (e->dest->succs), e->dest }); | |
3525 | else | |
3526 | region.safe_push (e->dest); | |
3527 | } | |
3528 | else | |
3529 | ei_next (&node.ei); | |
3530 | } | |
3531 | else | |
3532 | { | |
3533 | // A backward edge from a node that has already been added | |
3534 | // to the region. | |
3535 | if (bitmap_bit_p (fpr_pseudos_live_out, e->src->index) | |
3536 | && bitmap_set_bit (visited, e->src->index)) | |
3537 | { | |
3538 | if (bitmap_bit_p (fpr_pseudos_live_in, e->src->index)) | |
3539 | stack.safe_push ({ ei_start (e->src->preds), nullptr }); | |
3540 | stack.safe_push ({ ei_start (e->src->succs), e->src }); | |
3541 | } | |
3542 | else | |
3543 | ei_next (&node.ei); | |
3544 | } | |
3545 | } | |
3546 | ||
3547 | m_current_point = 2; | |
3548 | start_new_region (); | |
3549 | ||
3550 | if (region.is_empty ()) | |
3551 | process_block (bb, true); | |
3552 | else | |
3553 | { | |
3554 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3555 | { | |
3556 | fprintf (dump_file, "\nRegion (from %d):", bb->index); | |
3557 | for (unsigned int j = 0; j < region.length (); ++j) | |
3558 | fprintf (dump_file, " %d", region[j]->index); | |
3559 | fprintf (dump_file, "\n"); | |
3560 | } | |
3561 | for (unsigned int j = 0; j < region.length (); ++j) | |
3562 | { | |
3563 | basic_block bb = region[j]; | |
3564 | bool is_isolated | |
3565 | = ((j == 0 && !bitmap_bit_p (fpr_pseudos_live_out, bb->index)) | |
3566 | || (j == region.length () - 1 | |
3567 | && !bitmap_bit_p (fpr_pseudos_live_in, bb->index))); | |
3568 | process_block (bb, is_isolated); | |
3569 | } | |
3570 | } | |
3571 | region.truncate (0); | |
3572 | ||
3573 | if (!m_allocnos.is_empty () && m_allocation_successful) | |
3574 | process_region (); | |
3575 | } | |
3576 | } | |
3577 | ||
3578 | // Run the pass on the current function. | |
3579 | void | |
3580 | early_ra::execute () | |
3581 | { | |
3582 | df_analyze (); | |
3583 | ||
3584 | preprocess_insns (); | |
3585 | propagate_pseudo_reg_info (); | |
3586 | choose_fpr_pseudos (); | |
3587 | if (bitmap_empty_p (m_fpr_pseudos)) | |
3588 | return; | |
3589 | ||
3590 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3591 | dump_pseudo_regs (); | |
3592 | ||
3593 | process_blocks (); | |
3594 | df_verify (); | |
3595 | } | |
3596 | ||
3597 | class pass_early_ra : public rtl_opt_pass | |
3598 | { | |
3599 | public: | |
3600 | pass_early_ra (gcc::context *ctxt) | |
3601 | : rtl_opt_pass (pass_data_early_ra, ctxt) | |
3602 | {} | |
3603 | ||
3604 | // opt_pass methods: | |
3605 | virtual bool gate (function *); | |
3606 | virtual unsigned int execute (function *); | |
3607 | }; | |
3608 | ||
3609 | bool | |
3610 | pass_early_ra::gate (function *) | |
3611 | { | |
3612 | // Require a vector ISA to be enabled. | |
3613 | if (!TARGET_SIMD && !TARGET_SVE) | |
3614 | return false; | |
3615 | ||
3616 | if (aarch64_early_ra == AARCH64_EARLY_RA_NONE) | |
3617 | return false; | |
3618 | ||
3619 | if (aarch64_early_ra == AARCH64_EARLY_RA_STRIDED | |
3620 | && !TARGET_STREAMING_SME2) | |
3621 | return false; | |
3622 | ||
3623 | return true; | |
3624 | } | |
3625 | ||
3626 | unsigned int | |
3627 | pass_early_ra::execute (function *fn) | |
3628 | { | |
3629 | early_ra (fn).execute (); | |
3630 | return 0; | |
3631 | } | |
3632 | ||
3633 | } // end namespace | |
3634 | ||
8b5cd6c4 | 3635 | // Create a new instance of the pass. |
9f0f7d80 RS |
3636 | rtl_opt_pass * |
3637 | make_pass_aarch64_early_ra (gcc::context *ctxt) | |
3638 | { | |
3639 | return new pass_early_ra (ctxt); | |
3640 | } |