1 // Early register allocation pass.
2 // Copyright (C) 2023 Free Software Foundation, Inc.
4 // This file is part of GCC.
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // This pass implements a simple form of early register allocation.
21 // It is restricted to FP/SIMD registers, and it only allocates
22 // a region of FP/SIMD usage if it can do so without any spilling.
23 // It punts on anything too complicated, leaving it to the real
24 // register allocator.
26 // There are two main purposes:
28 // (1) The pass runs before scheduling. It therefore has a chance to
29 // bag a spill-free allocation, if there is one, before scheduling
30 // moves things around.
32 // (2) The pass can make use of strided register operations, such as the
33 // strided forms of LD1 and ST1 in SME2.
35 // The allocator works at the level of individual FPRs, rather than whole
36 // pseudo registers. It is mostly intended to help optimize ACLE code.
38 // The pass is very simplistic. There are many things that could be improved.
39 #define IN_TARGET_CODE 1
41 #define INCLUDE_ALGORITHM
42 #define INCLUDE_FUNCTIONAL
45 #include "coretypes.h"
50 #include "tree-pass.h"
54 #include "print-rtl.h"
55 #include "insn-attr.h"
56 #include "insn-opinit.h"
60 class simple_iterator
: public wrapper_iterator
<T
>
63 using wrapper_iterator
<T
>::wrapper_iterator
;
65 simple_iterator
&operator-- () { --this->m_contents
; return *this; }
66 simple_iterator
operator-- (int) { return this->m_contents
--; }
67 simple_iterator
&operator++ () { ++this->m_contents
; return *this; }
68 simple_iterator
operator++ (int) { return this->m_contents
++; }
71 using namespace rtl_ssa
;
74 const pass_data pass_data_early_ra
=
78 OPTGROUP_NONE
, // optinfo_flags
80 0, // properties_required
81 0, // properties_provided
82 0, // properties_destroyed
83 0, // todo_flags_start
84 TODO_df_finish
, // todo_flags_finish
87 using allocno_iterator
= simple_iterator
<unsigned int>;
89 // Class that represents one run of the pass.
93 early_ra (function
*fn
);
98 static_assert (MAX_RECOG_OPERANDS
<= 32, "Operand mask is 32 bits");
99 using operand_mask
= uint32_t;
101 // Points in the function are represented using "program points".
102 // The program points are allocated in reverse order, with smaller
103 // numbers indicating later points. These special values indicate
104 // the start and end of a region.
105 static constexpr unsigned int START_OF_REGION
= ~0U;
106 static constexpr unsigned int END_OF_REGION
= 0U;
108 // An invalid allocno index, used to represent no allocno.
109 static constexpr unsigned int INVALID_ALLOCNO
= ~0U;
111 // Enumerates the single FPR sizes that matter for register allocation.
112 // Anything smaller than 64 bits is treated as FPR_D.
120 // A live range for an FPR, containing program points [START_POINT,
121 // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
122 // to be equal to ALLOCNO for the duration of the live range.
123 struct fpr_range_info
125 unsigned int start_point
;
126 unsigned int end_point
;
127 unsigned int allocno
;
130 // Flags used in pseudo_reg_info.
132 // Whether the pseudo register occurs in one instruction alternative that
133 // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
134 static constexpr unsigned int ALLOWS_FPR8
= 1U << 0;
135 static constexpr unsigned int ALLOWS_FPR16
= 1U << 1;
136 static constexpr unsigned int ALLOWS_FPR32
= 1U << 2;
137 static constexpr unsigned int ALLOWS_NONFPR
= 1U << 3;
139 // Likewise whether the register occurs in an instruction that requires
140 // the associated register type.
141 static constexpr unsigned int NEEDS_FPR8
= 1U << 4;
142 static constexpr unsigned int NEEDS_FPR16
= 1U << 5;
143 static constexpr unsigned int NEEDS_FPR32
= 1U << 6;
144 static constexpr unsigned int NEEDS_NONFPR
= 1U << 7;
146 // Whether the pseudo register is copied to or from a hard FP register.
147 static constexpr unsigned int HAS_FPR_COPY
= 1U << 8;
149 // Whether the pseudo register is copied to or from a hard non-FP register.
150 static constexpr unsigned int HAS_NONFPR_COPY
= 1U << 9;
152 // Whether the pseudo register is used as a multi-register vector operand
153 // to an instruction that supports strided accesses, and whether it is used
154 // as a multi-register vector operand in some other non-move instruction.
155 static constexpr unsigned int HAS_FLEXIBLE_STRIDE
= 1U << 10;
156 static constexpr unsigned int HAS_FIXED_STRIDE
= 1U << 11;
158 // Flags that should be propagated across moves between pseudo registers.
159 static constexpr unsigned int PSEUDO_COPY_FLAGS
= ~(HAS_FLEXIBLE_STRIDE
162 // Information about a copy between two registers.
165 // The two registers, in order.
166 unsigned int regnos
[2];
168 // Index I gives the index of the next reg_copy_info involving REGNOS[I],
170 unsigned int next_copies
[2];
173 // Information about a pseudo register.
174 struct pseudo_reg_info
176 // Flags describing how the register is used, defined above.
177 unsigned int flags
: 16;
179 // The mode of the pseudo register, cached for convenience.
180 machine_mode mode
: 16;
182 // The index of the first copy, or 0 if none.
183 unsigned int first_copy
;
186 // Information about a group of allocnos that have a fixed offset
187 // relative to each other. The allocnos in each group must be allocated
190 // Allocnos that can share the same hard register are eventually
191 // chained together. These chains represent edges on a graph of
192 // allocnos, such that two allocnos joined by an edge use the same FPR.
193 // These chains are formed between individual allocnos rather than
194 // whole groups, although the system is required to be self-consistent.
195 // Each clique in the graph has at least one "full-width" allocno group
196 // that has one allocno for every FPR that needs to be allocated to
199 // One group of allocnos is chosen as the "color representative" of
200 // each clique in the graph. This group will be a full-width group.
202 struct allocno_group_info
204 array_slice
<unsigned int> chain_heads ();
205 array_slice
<allocno_info
> allocnos ();
206 allocno_group_info
*color_rep ();
207 allocno_info
*allocno (unsigned int);
209 // The color representative of the containing clique.
210 allocno_group_info
*m_color_rep
;
212 // The pseudo register associated with this allocno, or INVALID_REGNUM
216 // The offset of the first allocno (and thus this group) from the start
218 unsigned int color_rep_offset
: 8;
220 // The number of allocnos in the group, and thus the number of FPRs
221 // that need to be allocated.
222 unsigned int size
: 8;
224 // The gap between FPRs in the group. This is normally 1, but can be
225 // higher if we've decided to use strided multi-register accesses.
226 unsigned int stride
: 4;
228 // Used temporarily while deciding which allocnos should have non-unit
229 // strides; see find_strided_accesses for details.
230 int consecutive_pref
: 4;
231 int strided_polarity
: 2;
233 // The largest size of FPR needed by references to the allocno group.
234 fpr_size_info fpr_size
: 2;
236 // True if all non-move accesses can be converted to strided form.
237 unsigned int has_flexible_stride
: 1;
239 // True if we've assigned a color index to this group.
240 unsigned int has_color
: 1;
242 // The mask of FPRs that would make valid choices for the first allocno,
243 // taking the requirements of all the allocnos in the group into account.
244 unsigned int fpr_candidates
;
246 // The index of the color that has been assigned to the containing clique.
250 // Represents a single FPR-sized quantity that needs to be allocated.
251 // Each allocno is identified by index (for compactness).
253 // Quantities that span multiple FPRs are assigned groups of consecutive
254 // allocnos. Quantities that occupy a single FPR are assigned their own
258 allocno_group_info
*group ();
260 // The allocno's unique identifier.
263 // The offset of this allocno into the containing group.
264 unsigned int offset
: 8;
266 // The number of allocnos in the containing group.
267 unsigned int group_size
: 8;
269 // If the allocno has an affinity with at least one hard register
270 // (so that choosing that hard register would avoid a copy), this is
271 // the number of one such hard register, otherwise it is
272 // FIRST_PSEUDO_REGISTER.
273 unsigned int hard_regno
: 8;
275 // Set to 1 if the allocno has a single definition or 2 if it has more.
276 unsigned int num_defs
: 2;
278 // True if, at START_POINT, another allocno is copied to this one.
279 // See callers of record_copy for what counts as a copy.
280 unsigned int is_copy_dest
: 1;
282 // True if, at START_POINT, another allocno is copied to this one,
283 // and if the allocnos at both ends of the copy chain have an affinity
284 // with the same hard register.
285 unsigned int is_strong_copy_dest
: 1;
287 // True if, at END_POINT, this allocno is copied to another one,
288 // and both allocnos have an affinity with the same hard register.
289 unsigned int is_strong_copy_src
: 1;
291 // True if the allocno is subject to an earlyclobber at END_POINT,
292 // so that it cannot be tied to the destination of the instruction.
293 unsigned int is_earlyclobbered
: 1;
295 // The inclusive range of program points spanned by the allocno.
296 // START_POINT >= END_POINT.
297 unsigned int start_point
;
298 unsigned int end_point
;
300 // If, at END_POINT, this allocno is copied to another allocno, this
301 // is the index of that allocno, otherwise it is INVALID_ALLOCNO.
302 // See callers of record_copy for what counts as a copy.
303 unsigned int copy_dest
;
305 // If this field is not INVALID_ALLOCNO, this allocno is known to be
306 // equivalent to EQUIV_ALLOCNO for the whole of this allocno's lifetime.
307 unsigned int equiv_allocno
;
311 // The program point at which the allocno was last defined,
312 // or START_OF_REGION if none. This is only used temporarily
313 // while recording allocnos; after that, chain_next below is
315 unsigned int last_def_point
;
317 // The next chained allocno in program order (i.e. at lower program
318 // points), or INVALID_ALLOCNO if none.
319 unsigned int chain_next
;
322 // The previous chained allocno in program order (i.e. at higher
323 // program points), or INVALID_ALLOCNO if none.
324 unsigned int chain_prev
;
327 // Information about a full allocno group or a subgroup of it.
328 // The subgroup can be empty to indicate "none".
329 struct allocno_subgroup
331 array_slice
<allocno_info
> allocnos ();
332 allocno_info
*allocno (unsigned int);
334 // True if a subgroup is present.
335 operator bool () const { return count
; }
337 // The containing group.
338 allocno_group_info
*group
;
340 // The offset of the subgroup from the start of GROUP.
343 // The number of allocnos in the subgroup.
347 // Represents information about a copy between an allocno and an FPR.
348 // This establishes an affinity between the allocno and the FPR.
349 struct allocno_copy_info
351 // The allocno involved in the copy.
352 unsigned int allocno
;
354 // The FPR involved in the copy, relative to V0_REGNUM.
355 unsigned int fpr
: 16;
357 // A measure of how strong the affinity between the allocno and FPR is.
358 unsigned int weight
: 16;
361 // Information about a possible allocno chain.
362 struct chain_candidate_info
364 // The candidate target allocno.
365 allocno_info
*allocno
;
367 // A rating of the candidate (higher is better).
371 // Information about an allocno color.
374 // The color's unique identifier.
377 // The allocated hard register, when known.
378 unsigned int hard_regno
;
380 // The clique's representative group.
381 allocno_group_info
*group
;
383 // Weights in favor of choosing each FPR as the first register for GROUP.
384 int8_t fpr_preferences
[32];
387 template<typename T
, typename
... Ts
>
388 T
*region_allocate (Ts
...);
390 allocno_info
*chain_prev (allocno_info
*);
391 allocno_info
*chain_next (allocno_info
*);
393 void dump_pseudo_regs ();
394 void dump_fpr_ranges ();
396 void dump_allocnos ();
399 iterator_range
<allocno_iterator
> get_group_allocnos (unsigned int);
401 void preprocess_move (rtx
, rtx
);
402 void process_pseudo_reg_constraints (rtx_insn
*);
403 void preprocess_insns ();
405 int fpr_preference (unsigned int);
406 void propagate_pseudo_reg_info ();
408 void choose_fpr_pseudos ();
410 void start_new_region ();
412 allocno_group_info
*create_allocno_group (unsigned int, unsigned int);
413 allocno_subgroup
get_allocno_subgroup (rtx
);
414 void record_fpr_use (unsigned int);
415 void record_fpr_def (unsigned int);
416 void record_allocno_use (allocno_info
*);
417 void record_allocno_def (allocno_info
*);
418 bool valid_equivalence_p (allocno_info
*, allocno_info
*);
419 void record_copy (rtx
, rtx
, bool = false);
420 void record_constraints (rtx_insn
*);
421 void record_artificial_refs (unsigned int);
422 void record_insn_refs (rtx_insn
*);
424 bool consider_strong_copy_src_chain (allocno_info
*);
425 int strided_polarity_pref (allocno_info
*, allocno_info
*);
426 void find_strided_accesses ();
428 template<unsigned int allocno_info::*field
>
429 static int cmp_increasing (const void *, const void *);
430 bool is_chain_candidate (allocno_info
*, allocno_info
*);
431 int rate_chain (allocno_info
*, allocno_info
*);
432 static int cmp_chain_candidates (const void *, const void *);
433 void chain_allocnos (unsigned int &, unsigned int &);
434 void set_single_color_rep (allocno_info
*, allocno_group_info
*,
436 void set_color_rep (allocno_group_info
*, allocno_group_info
*,
438 bool try_to_chain_allocnos (allocno_info
*, allocno_info
*);
439 void create_color (allocno_group_info
*);
442 bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info
*);
443 bool call_in_range_p (unsigned int, unsigned int, unsigned int);
444 unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info
);
446 void process_copies ();
448 static int cmp_decreasing_size (const void *, const void *);
449 void allocate_colors ();
450 allocno_info
*find_independent_subchain (allocno_info
*);
451 color_info
*find_oldest_color (unsigned int, unsigned int);
452 void broaden_colors ();
453 void finalize_allocation ();
455 bool replace_regs (df_ref
);
456 int try_enforce_constraints (rtx_insn
*, vec
<std::pair
<int, int>> &);
457 void enforce_constraints (rtx_insn
*);
458 bool maybe_convert_to_strided_access (rtx_insn
*);
459 void apply_allocation ();
461 void process_region ();
462 bool is_dead_insn (rtx_insn
*);
463 void process_block (basic_block
, bool);
464 void process_blocks ();
466 // ----------------------------------------------------------------------
468 // The function we're operating on.
471 // Information about each pseudo register, indexed by REGNO.
472 auto_vec
<pseudo_reg_info
> m_pseudo_regs
;
474 // All recorded register copies.
475 auto_vec
<reg_copy_info
> m_pseudo_reg_copies
;
477 // The set of pseudos that we've decided to allocate an FPR to.
478 auto_bitmap m_fpr_pseudos
;
480 // ----------------------------------------------------------------------
482 // An obstack for allocating information that is referenced by the member
484 obstack m_region_obstack
;
485 void *m_region_alloc_start
;
487 // ----------------------------------------------------------------------
489 // The basic block that we're currently processing.
490 basic_block m_current_bb
;
492 // The lowest-numbered program point in the current basic block.
493 unsigned int m_current_bb_point
;
495 // The program point that we're currently processing (described above).
496 unsigned int m_current_point
;
498 // The set of allocnos that are currently live.
499 auto_bitmap m_live_allocnos
;
501 // The set of FPRs that are currently live.
502 unsigned int m_live_fprs
;
504 // ----------------------------------------------------------------------
506 // A mask of the FPRs that have already been allocated.
507 unsigned int m_allocated_fprs
;
509 // A mask of the FPRs that must be at least partially preserved by the
511 unsigned int m_call_preserved_fprs
;
513 // True if we haven't yet failed to allocate the current region.
514 bool m_allocation_successful
;
516 // A map from pseudo registers to the first allocno in their associated
518 hash_map
<int_hash
<unsigned int, INVALID_REGNUM
>,
519 allocno_group_info
*> m_regno_to_group
;
521 // All recorded copies between allocnos and FPRs.
522 auto_vec
<allocno_copy_info
> m_allocno_copies
;
524 // All allocnos, by index.
525 auto_vec
<allocno_info
*> m_allocnos
;
527 // All allocnos, by increasing START_POINT.
528 auto_vec
<allocno_info
*> m_sorted_allocnos
;
530 // All colors, by index.
531 auto_vec
<color_info
*> m_colors
;
533 // The instruction ranges that make up the current region,
534 // as half-open ranges [LAST, FIRST).
535 auto_vec
<std::pair
<rtx_insn
*, rtx_insn
*>> m_insn_ranges
;
537 // The live ranges of each FPR, in order of increasing program point.
538 auto_vec
<fpr_range_info
> m_fpr_ranges
[32];
540 // For each function call id, a list of program points at which a call
541 // to such a function is made. Each list is in order of increasing
543 auto_vec
<unsigned int> m_call_points
[NUM_ABI_IDS
];
545 // A list of instructions that can be removed if allocation succeeds.
546 auto_vec
<rtx_insn
*> m_dead_insns
;
549 // True if PAT is something that would typically be treated as a move.
551 is_move_set (rtx pat
)
553 if (GET_CODE (pat
) != SET
)
556 rtx dest
= SET_DEST (pat
);
558 dest
= SUBREG_REG (dest
);
559 if (!OBJECT_P (dest
))
562 rtx src
= SET_SRC (pat
);
564 src
= SUBREG_REG (src
);
565 if (!OBJECT_P (src
) && !CONSTANT_P (src
))
571 // Return true if operand OP is likely to match OP_ALT after register
574 likely_operand_match_p (const operand_alternative
&op_alt
, rtx op
)
576 // Empty constraints match everything.
577 const char *constraint
= op_alt
.constraint
;
578 if (constraint
[0] == 0 || constraint
[0] == ',')
583 char c
= *constraint
;
584 int len
= CONSTRAINT_LEN (c
, constraint
);
585 if (c
== 0 || c
== ',')
591 auto cn
= lookup_constraint (constraint
);
592 switch (get_constraint_type (cn
))
595 if (REG_P (op
) || SUBREG_P (op
))
600 case CT_SPECIAL_MEMORY
:
601 case CT_RELAXED_MEMORY
:
609 if (constraint_satisfied_p (op
, cn
))
617 if (op_alt
.matches
>= 0)
619 rtx other
= recog_data
.operand
[op_alt
.matches
];
620 if ((REG_P (other
) || SUBREG_P (other
))
621 && (REG_P (op
) || SUBREG_P (op
)))
627 // Return true if the operands of the current instruction are likely to
630 likely_alternative_match_p (const operand_alternative
*op_alt
)
632 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
633 if (!likely_operand_match_p (op_alt
[i
], recog_data
.operand
[i
]))
638 // Return the sum of how disparaged OP_ALT is.
640 count_rejects (const operand_alternative
*op_alt
)
643 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
644 reject
+= op_alt
[opno
].reject
;
648 // Allocate a T from the region obstack.
649 template<typename T
, typename
... Ts
>
651 early_ra::region_allocate (Ts
... args
)
653 static_assert (std::is_trivially_destructible
<T
>::value
,
654 "destructor won't be called");
655 void *addr
= obstack_alloc (&m_region_obstack
, sizeof (T
));
656 return new (addr
) T (std::forward
<Ts
> (args
)...);
659 early_ra::early_ra (function
*fn
) : m_fn (fn
), m_live_fprs (0)
661 gcc_obstack_init (&m_region_obstack
);
662 m_region_alloc_start
= obstack_alloc (&m_region_obstack
, 0);
663 bitmap_tree_view (m_live_allocnos
);
666 early_ra::~early_ra ()
668 obstack_free (&m_region_obstack
, nullptr);
671 // Return an array that, for each allocno A in the group, contains the index
672 // of the allocno at the head of A's chain (that is, the one with the highest
673 // START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
674 inline array_slice
<unsigned int>
675 early_ra::allocno_group_info::chain_heads ()
677 auto *start
= reinterpret_cast<unsigned int *> (this + 1);
678 return { start
, size
};
681 // Return the array of allocnos in the group.
682 inline array_slice
<early_ra::allocno_info
>
683 early_ra::allocno_group_info::allocnos ()
685 gcc_checking_assert (regno
!= INVALID_REGNUM
);
686 auto *chain_end
= reinterpret_cast<unsigned int *> (this + 1) + size
;
687 auto *allocno_start
= reinterpret_cast<allocno_info
*> (chain_end
);
688 return { allocno_start
, size
};
691 // Return the group's color representative.
692 inline early_ra::allocno_group_info
*
693 early_ra::allocno_group_info::color_rep ()
695 gcc_checking_assert (m_color_rep
->m_color_rep
== m_color_rep
);
699 // Return the group that contains the allocno.
700 inline early_ra::allocno_group_info
*
701 early_ra::allocno_info::group ()
703 auto *chain_end
= reinterpret_cast<unsigned int *> (this - offset
);
704 return reinterpret_cast<allocno_group_info
*> (chain_end
- group_size
) - 1;
707 // Return the allocnos in the subgroup.
708 inline array_slice
<early_ra::allocno_info
>
709 early_ra::allocno_subgroup::allocnos ()
713 return { &group
->allocnos ()[start
], count
};
716 // Return allocno I in the subgroup, with 0 being the first.
717 inline early_ra::allocno_info
*
718 early_ra::allocno_subgroup::allocno (unsigned int i
)
720 return &group
->allocnos ()[start
+ i
];
723 // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
724 inline early_ra::allocno_info
*
725 early_ra::chain_prev (allocno_info
*allocno
)
727 if (allocno
->chain_prev
!= INVALID_ALLOCNO
)
728 return m_allocnos
[allocno
->chain_prev
];
732 // Return the next (later) allocno in ALLOCNO's chain, or null if none.
733 inline early_ra::allocno_info
*
734 early_ra::chain_next (allocno_info
*allocno
)
736 if (allocno
->chain_next
!= INVALID_ALLOCNO
)
737 return m_allocnos
[allocno
->chain_next
];
741 // Dump the information in m_pseudo_regs.
743 early_ra::dump_pseudo_regs ()
745 fprintf (dump_file
, "\nPseudos:\n");
746 fprintf (dump_file
, " %6s %6s %6s %6s %6s %6s %8s %s\n",
747 "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
748 "FPRness", "Copies");
749 pseudo_reg_info unused_reg
= {};
750 for (unsigned int regno
= FIRST_PSEUDO_REGISTER
;
751 regno
< m_pseudo_regs
.length (); ++regno
)
753 const auto ®
= m_pseudo_regs
[regno
];
754 if (memcmp (®
, &unused_reg
, sizeof (reg
)) == 0)
757 fprintf (dump_file
, " %6d %6s %6s %6s %6s %6s %8d", regno
,
758 reg
.flags
& NEEDS_FPR8
? "Req"
759 : reg
.flags
& ALLOWS_FPR8
? "OK" : "-",
760 reg
.flags
& NEEDS_FPR16
? "Req"
761 : reg
.flags
& ALLOWS_FPR16
? "OK" : "-",
762 reg
.flags
& NEEDS_FPR32
? "Req"
763 : reg
.flags
& ALLOWS_FPR32
? "OK" : "-",
764 reg
.flags
& NEEDS_NONFPR
? "Req"
765 : reg
.flags
& ALLOWS_NONFPR
? "OK" : "-",
766 ~reg
.flags
& HAS_FLEXIBLE_STRIDE
? "-"
767 : reg
.flags
& HAS_FIXED_STRIDE
? "Some" : "All",
768 fpr_preference (regno
));
769 if (reg
.flags
& HAS_FPR_COPY
)
770 fprintf (dump_file
, " FPR");
771 if (reg
.flags
& HAS_NONFPR_COPY
)
772 fprintf (dump_file
, " Non-FPR");
773 unsigned int copyi
= reg
.first_copy
;
776 const auto ©
= m_pseudo_reg_copies
[copyi
];
777 if (copy
.regnos
[0] == regno
)
779 fprintf (dump_file
, " r%d", copy
.regnos
[1]);
780 copyi
= copy
.next_copies
[0];
784 fprintf (dump_file
, " r%d", copy
.regnos
[0]);
785 copyi
= copy
.next_copies
[1];
788 fprintf (dump_file
, "\n");
792 // Dump the information in m_fpr_ranges.
794 early_ra::dump_fpr_ranges ()
796 fprintf (dump_file
, "\nFPR live ranges:\n");
797 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
799 auto &intervals
= m_fpr_ranges
[fpr
];
800 if (intervals
.is_empty ())
803 fprintf (dump_file
, " %2d", fpr
);
804 for (unsigned int i
= 0; i
< intervals
.length (); ++i
)
806 auto &interval
= intervals
[i
];
807 if (i
&& (i
% 4) == 0)
808 fprintf (dump_file
, "\n ");
809 fprintf (dump_file
, " [ %6d %6d ]", interval
.start_point
,
812 fprintf (dump_file
, "\n");
816 // Dump the information in m_allocno_copies.
818 early_ra::dump_copies ()
820 fprintf (dump_file
, "\nCopies:\n");
821 fprintf (dump_file
, " %8s %3s %6s\n",
822 "Allocno", "FPR", "Weight");
823 for (const auto ©
: m_allocno_copies
)
824 fprintf (dump_file
, " %8d %3d %6d\n", copy
.allocno
,
825 copy
.fpr
, copy
.weight
);
828 // Dump the information in m_allocnos.
830 early_ra::dump_allocnos ()
832 char buffer
[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
833 fprintf (dump_file
, "\nAllocno groups:\n");
835 " %12s %12s %4s %6s %8s %s\n",
836 "Ids", "Regno", "Size", "Stride", "Cands", "Heads");
837 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
839 auto *allocno
= m_allocnos
[ai
];
840 if (allocno
->offset
!= 0)
842 auto *group
= allocno
->group ();
843 snprintf (buffer
, sizeof (buffer
), "[%d:%d]", allocno
->id
,
844 allocno
->id
+ group
->size
- 1);
845 fprintf (dump_file
, " %12s", buffer
);
846 snprintf (buffer
, sizeof (buffer
), "r%d[0:%d]", group
->regno
,
848 fprintf (dump_file
, " %12s %4s %6d %08x", buffer
,
849 group
->fpr_size
== FPR_D
? "D"
850 : group
->fpr_size
== FPR_Q
? "Q" : "Z",
852 group
->fpr_candidates
);
853 for (auto head
: group
->chain_heads ())
854 if (head
== INVALID_ALLOCNO
)
855 fprintf (dump_file
, " -");
857 fprintf (dump_file
, " %d", head
);
858 fprintf (dump_file
, "\n");
861 fprintf (dump_file
, "\nAllocno chains:\n");
862 fprintf (dump_file
, " %5s %12s %12s %5s %5s %5s %5s\n",
863 "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "FPR");
864 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
866 auto *allocno
= m_allocnos
[ai
];
867 if (allocno
->chain_prev
!= INVALID_ALLOCNO
)
869 const char *prefix
= "=>";
872 auto *group
= allocno
->group ();
873 fprintf (dump_file
, " %2s", prefix
);
874 fprintf (dump_file
, " %5d", allocno
->id
);
875 snprintf (buffer
, sizeof (buffer
), "r%d[%d]", group
->regno
,
877 fprintf (dump_file
, " %12s", buffer
);
878 snprintf (buffer
, sizeof (buffer
), "[%d,%d]",
879 allocno
->start_point
, allocno
->end_point
);
880 fprintf (dump_file
, " %11s%s %5s", buffer
,
881 allocno
->is_earlyclobbered
? "*" : " ",
882 allocno
->is_strong_copy_dest
? "Strong"
883 : allocno
->is_copy_dest
? "Yes" : "-");
884 if (allocno
->copy_dest
== INVALID_ALLOCNO
)
885 fprintf (dump_file
, " %5s", "-");
887 fprintf (dump_file
, " %5d", allocno
->copy_dest
);
888 if (allocno
->equiv_allocno
!= INVALID_ALLOCNO
)
889 fprintf (dump_file
, " %5d", allocno
->equiv_allocno
);
891 fprintf (dump_file
, " %5s", "-");
892 if (allocno
->hard_regno
== FIRST_PSEUDO_REGISTER
)
893 fprintf (dump_file
, " %5s", "-");
895 fprintf (dump_file
, " %5s", reg_names
[allocno
->hard_regno
]);
896 fprintf (dump_file
, "\n");
897 if (allocno
->chain_next
== INVALID_ALLOCNO
)
899 allocno
= m_allocnos
[allocno
->chain_next
];
905 // Dump the information in m_colors.
907 early_ra::dump_colors ()
909 fprintf (dump_file
, "\nColors:\n");
910 for (unsigned int i
= 0; i
< m_colors
.length (); ++i
)
912 auto *color
= m_colors
[i
];
916 fprintf (dump_file
, " color %d:\n", i
);
917 fprintf (dump_file
, " chains:\n");
918 auto heads
= color
->group
->chain_heads ();
919 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
921 fprintf (dump_file
, " %2d:", i
);
923 while (ai
!= INVALID_ALLOCNO
)
925 auto *allocno
= m_allocnos
[ai
];
926 fprintf (dump_file
, " r%d[%d]", allocno
->group ()->regno
,
928 ai
= allocno
->chain_next
;
930 fprintf (dump_file
, "\n");
932 fprintf (dump_file
, " FPR candidates:");
933 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
934 fprintf (dump_file
, "%s%c", fpr
% 8 ? "" : " ",
935 color
->group
->fpr_candidates
& (1U << fpr
) ? 'Y' : '-');
936 fprintf (dump_file
, "\n");
937 fprintf (dump_file
, " FPR preferences:");
938 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
939 if (color
->fpr_preferences
[fpr
])
940 fprintf (dump_file
, " %d(%d)", fpr
, color
->fpr_preferences
[fpr
]);
941 fprintf (dump_file
, "\n");
945 // Record any necessary information about a move from SRC to DEST.
947 early_ra::preprocess_move (rtx dest
, rtx src
)
950 dest
= SUBREG_REG (dest
);
955 src
= SUBREG_REG (src
);
959 // Sort the registers by increasing REGNO.
960 rtx regs
[] = { dest
, src
};
961 if (REGNO (dest
) > REGNO (src
))
962 std::swap (regs
[0], regs
[1]);
963 unsigned int regno0
= REGNO (regs
[0]);
964 unsigned int regno1
= REGNO (regs
[1]);
966 // Ignore moves between hard registers.
967 if (HARD_REGISTER_NUM_P (regno1
))
970 // For moves between hard registers and pseudos, just record the type
971 // of hard register involved.
972 auto ®1
= m_pseudo_regs
[regno1
];
973 reg1
.mode
= GET_MODE (regs
[1]);
974 if (HARD_REGISTER_NUM_P (regno0
))
976 reg1
.flags
|= (FP_REGNUM_P (regno0
) ? HAS_FPR_COPY
: HAS_NONFPR_COPY
);
980 // Record a move between two pseudo registers.
981 auto ®0
= m_pseudo_regs
[regno0
];
982 reg0
.mode
= GET_MODE (regs
[0]);
985 copy
.regnos
[0] = regno0
;
986 copy
.regnos
[1] = regno1
;
987 copy
.next_copies
[0] = reg0
.first_copy
;
988 copy
.next_copies
[1] = reg1
.first_copy
;
990 reg0
.first_copy
= reg1
.first_copy
= m_pseudo_reg_copies
.length ();
991 m_pseudo_reg_copies
.safe_push (copy
);
994 // Return true if INSN has a multi-vector operand and if that operand
995 // could be converted to strided form.
997 is_stride_candidate (rtx_insn
*insn
)
999 if (recog_memoized (insn
) < 0)
1002 auto stride_type
= get_attr_stride_type (insn
);
1003 return (stride_type
== STRIDE_TYPE_LUTI_CONSECUTIVE
1004 || stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
1005 || stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
);
1008 // Go through the constraints of INSN, which has already been extracted,
1009 // and record any relevant information about pseudo registers.
1011 early_ra::process_pseudo_reg_constraints (rtx_insn
*insn
)
1013 extract_insn (insn
);
1014 preprocess_constraints (insn
);
1016 // Flags that describe any multi-register vector operands.
1017 unsigned int insn_flags
= (is_stride_candidate (insn
)
1018 ? HAS_FLEXIBLE_STRIDE
1019 : HAS_FIXED_STRIDE
);
1021 auto alts
= get_preferred_alternatives (insn
);
1023 int operand_matches
[MAX_RECOG_OPERANDS
];
1024 unsigned int operand_flags
[MAX_RECOG_OPERANDS
];
1025 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1027 operand_matches
[i
] = -1;
1028 operand_flags
[i
] = 0;
1031 // Extract information from the constraints, considering all plausible
1033 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
1035 if (!(alts
& ALTERNATIVE_BIT (altno
)))
1038 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
1039 if (!likely_alternative_match_p (op_alt
))
1042 // Use SRC_OPNO's constraints to derive information about DEST_OPNO.
1043 auto record_operand
= [&](int src_opno
, int dest_opno
)
1045 int matches
= op_alt
[src_opno
].matches
;
1047 operand_matches
[dest_opno
] = matches
;
1049 auto cl
= alternative_class (op_alt
, src_opno
);
1052 if (reg_class_subset_p (cl
, FP_REGS
))
1053 operand_flags
[dest_opno
] |= ALLOWS_FPR32
;
1054 if (reg_class_subset_p (cl
, FP_LO_REGS
))
1055 operand_flags
[dest_opno
] |= ALLOWS_FPR16
;
1056 if (reg_class_subset_p (cl
, FP_LO8_REGS
))
1057 operand_flags
[dest_opno
] |= ALLOWS_FPR8
;
1058 if (!reg_classes_intersect_p (cl
, FP_REGS
))
1059 operand_flags
[dest_opno
] |= ALLOWS_NONFPR
;
1063 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1065 record_operand (i
, i
);
1066 if (recog_data
.constraints
[i
][0] == '%')
1068 record_operand (i
, i
+ 1);
1069 record_operand (i
+ 1, i
);
1074 // Process the information we collected above.
1075 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1077 rtx op
= recog_data
.operand
[i
];
1078 machine_mode orig_mode
= GET_MODE (op
);
1080 op
= SUBREG_REG (op
);
1082 // Record the accumulated information in m_pseudo_regs.
1083 if (REG_P (op
) && !HARD_REGISTER_P (op
))
1085 // The flags so far just describe what at least one alternative
1086 // would accept. Calculate the associated NEEDS_* information.
1087 auto flags
= operand_flags
[i
];
1088 if (!(flags
& ALLOWS_FPR32
) && (flags
& ALLOWS_NONFPR
))
1089 flags
|= NEEDS_NONFPR
;
1090 else if ((flags
& ALLOWS_FPR32
) && !(flags
& ALLOWS_NONFPR
))
1092 if (flags
& ALLOWS_FPR8
)
1093 flags
|= NEEDS_FPR8
;
1094 if (flags
& ALLOWS_FPR16
)
1095 flags
|= NEEDS_FPR16
;
1096 flags
|= NEEDS_FPR32
;
1099 // Look for multi-register vector operands.
1100 if (VECTOR_MODE_P (orig_mode
)
1101 && targetm
.hard_regno_mode_ok (V0_REGNUM
, orig_mode
)
1102 && hard_regno_nregs (V0_REGNUM
, orig_mode
) > 1)
1103 flags
|= insn_flags
;
1105 m_pseudo_regs
[REGNO (op
)].flags
|= flags
;
1106 m_pseudo_regs
[REGNO (op
)].mode
= GET_MODE (op
);
1109 // Treat matching constraints as equivalent to moves.
1110 if (operand_matches
[i
] >= 0)
1111 preprocess_move (recog_data
.operand
[operand_matches
[i
]], op
);
1115 // Make one pass through the instructions, collecting information that
1116 // will be needed later.
1118 early_ra::preprocess_insns ()
1120 m_pseudo_regs
.safe_grow_cleared (max_reg_num ());
1121 m_pseudo_reg_copies
.safe_push (reg_copy_info ());
1122 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
1124 if (!NONDEBUG_INSN_P (insn
))
1127 if (GET_CODE (PATTERN (insn
)) == USE
1128 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
1131 rtx set
= single_set (insn
);
1132 if (set
&& is_move_set (set
))
1133 preprocess_move (SET_DEST (set
), SET_SRC (set
));
1135 process_pseudo_reg_constraints (insn
);
1139 // Return a signed integer that says (roughly) how strong an affinity
1140 // pseudo register REGNO has with FPRs. A positive value indicates
1141 // that we should try to allocate an FPR, a negative value indicates
1142 // that we shouldn't, and 0 indicates neutrality.
1144 early_ra::fpr_preference (unsigned int regno
)
1146 auto mode
= m_pseudo_regs
[regno
].mode
;
1147 auto flags
= m_pseudo_regs
[regno
].flags
;
1148 if (mode
== VOIDmode
|| !targetm
.hard_regno_mode_ok (V0_REGNUM
, mode
))
1150 else if (flags
& HAS_FLEXIBLE_STRIDE
)
1152 else if (flags
& NEEDS_FPR32
)
1154 else if (!(flags
& ALLOWS_FPR32
))
1156 else if ((flags
& HAS_FPR_COPY
) && !(flags
& HAS_NONFPR_COPY
))
1158 else if ((flags
& HAS_NONFPR_COPY
) && !(flags
& HAS_FPR_COPY
))
1164 // Propagate information about pseudo-registers along copy edges,
1165 // while doing so doesn't create conflicting FPR preferences.
1167 early_ra::propagate_pseudo_reg_info ()
1169 struct stack_entry
{ unsigned int regno
, copyi
; };
1171 auto_vec
<stack_entry
, 32> stack
;
1172 for (unsigned int i
= FIRST_PSEUDO_REGISTER
;
1173 i
< m_pseudo_regs
.length (); ++i
)
1175 auto start
= m_pseudo_regs
[i
].first_copy
;
1179 stack
.quick_push ({ i
, start
});
1180 while (!stack
.is_empty ())
1182 auto entry
= stack
.pop ();
1183 auto ©
= m_pseudo_reg_copies
[entry
.copyi
];
1184 auto src_regno
= entry
.regno
;
1185 auto dest_regno
= (src_regno
== copy
.regnos
[1]
1188 auto next_copyi
= (src_regno
== copy
.regnos
[1]
1189 ? copy
.next_copies
[1]
1190 : copy
.next_copies
[0]);
1192 stack
.safe_push ({ src_regno
, next_copyi
});
1194 auto &src_reg
= m_pseudo_regs
[src_regno
];
1195 auto &dest_reg
= m_pseudo_regs
[dest_regno
];
1197 if (src_reg
.flags
& ~dest_reg
.flags
& PSEUDO_COPY_FLAGS
)
1199 auto src_preference
= fpr_preference (src_regno
);
1200 auto dest_preference
= fpr_preference (dest_regno
);
1201 if ((src_preference
>= 0 && dest_preference
>= 0)
1202 || (src_preference
<= 0 && dest_preference
<= 0))
1204 dest_reg
.flags
|= (src_reg
.flags
& PSEUDO_COPY_FLAGS
);
1205 stack
.safe_push ({ dest_regno
, dest_reg
.first_copy
});
1212 // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
1215 early_ra::choose_fpr_pseudos ()
1217 for (unsigned int i
= FIRST_PSEUDO_REGISTER
;
1218 i
< m_pseudo_regs
.length (); ++i
)
1219 if (fpr_preference (i
) > 0)
1220 bitmap_set_bit (m_fpr_pseudos
, i
);
1223 // Clear out information about the previous CFG region (if any)
1224 // and set up the data for a new region.
1226 early_ra::start_new_region ()
1228 obstack_free (&m_region_obstack
, m_region_alloc_start
);
1229 m_regno_to_group
.empty ();
1230 m_allocno_copies
.truncate (0);
1231 m_allocnos
.truncate (0);
1232 m_sorted_allocnos
.truncate (0);
1233 m_colors
.truncate (0);
1234 m_insn_ranges
.truncate (0);
1235 for (auto &fpr_ranges
: m_fpr_ranges
)
1236 fpr_ranges
.truncate (0);
1237 for (auto &call_points
: m_call_points
)
1238 call_points
.truncate (0);
1239 gcc_assert (bitmap_empty_p (m_live_allocnos
) && m_live_fprs
== 0);
1240 m_dead_insns
.truncate (0);
1241 m_allocated_fprs
= 0;
1242 m_call_preserved_fprs
= 0;
1243 m_allocation_successful
= true;
1246 // Create and return an allocno group of size SIZE for register REGNO.
1247 // REGNO can be INVALID_REGNUM if the group just exists to allow
1248 // other groups to be chained together, and does not have any new
1249 // allocnos of its own.
1250 early_ra::allocno_group_info
*
1251 early_ra::create_allocno_group (unsigned int regno
, unsigned int size
)
1253 static_assert (alignof (unsigned int) == alignof (allocno_info
),
1254 "allocno_info alignment");
1255 unsigned int num_allocnos
= (regno
!= INVALID_REGNUM
? size
: 0);
1257 // Allocate an allocno_group_info, followed by an array of chain heads,
1258 // followed by the allocnos themselves.
1259 size_t alloc_size
= (sizeof (allocno_group_info
)
1260 + size
* sizeof (unsigned int)
1261 + num_allocnos
* sizeof (allocno_info
));
1262 void *data
= obstack_alloc (&m_region_obstack
, alloc_size
);
1264 // Initialize the group.
1265 auto *group
= reinterpret_cast<allocno_group_info
*> (data
);
1266 memset (group
, 0, sizeof (*group
));
1267 group
->m_color_rep
= group
;
1268 group
->regno
= regno
;
1271 group
->fpr_size
= FPR_D
;
1272 group
->fpr_candidates
= ~0U;
1274 // Initialize the chain heads.
1275 auto heads
= group
->chain_heads ();
1276 for (unsigned int i
= 0; i
< heads
.size (); ++i
)
1277 heads
[i
] = (i
< num_allocnos
? m_allocnos
.length () + i
: INVALID_ALLOCNO
);
1279 // Initialize the allocnos.
1280 if (num_allocnos
> 0)
1282 auto allocnos
= group
->allocnos ();
1283 memset (allocnos
.begin (), 0, num_allocnos
* sizeof (allocno_info
));
1284 for (unsigned int i
= 0; i
< num_allocnos
; ++i
)
1286 auto *allocno
= &allocnos
[i
];
1287 allocno
->id
= m_allocnos
.length ();
1288 allocno
->offset
= i
;
1289 allocno
->group_size
= size
;
1290 allocno
->hard_regno
= FIRST_PSEUDO_REGISTER
;
1291 allocno
->start_point
= END_OF_REGION
;
1292 allocno
->end_point
= START_OF_REGION
;
1293 allocno
->copy_dest
= INVALID_ALLOCNO
;
1294 allocno
->equiv_allocno
= INVALID_ALLOCNO
;
1295 allocno
->chain_next
= INVALID_ALLOCNO
;
1296 allocno
->chain_prev
= INVALID_ALLOCNO
;
1297 m_allocnos
.safe_push (allocno
);
1303 // If REG refers to a pseudo register that might be allocated to FPRs,
1304 // return the associated range of allocnos, creating new ones if necessary.
1305 // Return an empty range otherwise.
1306 early_ra::allocno_subgroup
1307 early_ra::get_allocno_subgroup (rtx reg
)
1309 if (GET_CODE (reg
) == SUBREG
)
1311 allocno_subgroup inner
= get_allocno_subgroup (SUBREG_REG (reg
));
1315 if (!targetm
.modes_tieable_p (GET_MODE (SUBREG_REG (reg
)),
1318 m_allocation_successful
= false;
1323 subreg_get_info (V0_REGNUM
, GET_MODE (SUBREG_REG (reg
)),
1324 SUBREG_BYTE (reg
), GET_MODE (reg
), &info
);
1325 if (!info
.representable_p
)
1327 m_allocation_successful
= false;
1331 inner
.start
+= info
.offset
;
1332 inner
.count
= info
.nregs
;
1336 if (!REG_P (reg
) || HARD_REGISTER_P (reg
))
1339 unsigned int regno
= REGNO (reg
);
1340 if (fpr_preference (regno
) <= 0)
1343 unsigned int count
= hard_regno_nregs (V0_REGNUM
, GET_MODE (reg
));
1345 auto &entry
= m_regno_to_group
.get_or_insert (regno
, &existed
);
1348 auto *group
= create_allocno_group (regno
, count
);
1349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1351 auto allocnos
= group
->allocnos ();
1352 fprintf (dump_file
, "Creating allocnos [%d:%d] for r%d\n",
1353 allocnos
.front ().id
, allocnos
.back ().id
, regno
);
1356 auto reg_bits
= GET_MODE_BITSIZE (GET_MODE (reg
));
1357 auto fpr_bits
= exact_div (reg_bits
, count
);
1358 auto flags
= m_pseudo_regs
[regno
].flags
;
1360 // Punt for now if there is a choice to be made between using an
1361 // FPR and a non-FPR.
1362 if ((flags
& NEEDS_NONFPR
)
1363 || ((flags
& ALLOWS_NONFPR
)
1364 && !FLOAT_MODE_P (GET_MODE (reg
))
1365 && !VECTOR_MODE_P (GET_MODE (reg
))))
1366 m_allocation_successful
= false;
1368 if (flags
& ALLOWS_FPR8
)
1369 group
->fpr_candidates
&= 0xff;
1370 else if (flags
& ALLOWS_FPR16
)
1371 group
->fpr_candidates
&= 0xffff;
1372 group
->fpr_candidates
&= ~0U >> (count
- 1);
1374 group
->has_flexible_stride
= ((flags
& HAS_FLEXIBLE_STRIDE
) != 0
1375 && (flags
& HAS_FIXED_STRIDE
) == 0);
1377 group
->fpr_size
= (maybe_gt (fpr_bits
, 128) ? FPR_Z
1378 : maybe_gt (fpr_bits
, 64) ? FPR_Q
: FPR_D
);
1382 return { entry
, 0, count
};
1385 // Record a use of FPR REGNO at the current program point, as part of
1386 // a backwards walk over a block.
1388 early_ra::record_fpr_use (unsigned int regno
)
1390 gcc_assert (IN_RANGE (regno
, V0_REGNUM
, V31_REGNUM
));
1391 unsigned int offset
= regno
- V0_REGNUM
;
1392 if (!(m_live_fprs
& (1U << offset
)))
1394 m_fpr_ranges
[offset
].safe_push ({ START_OF_REGION
, m_current_point
,
1396 m_live_fprs
|= 1U << offset
;
1400 // Record a definition of FPR REGNO at the current program point, as part of
1401 // a backwards walk over a block.
1403 early_ra::record_fpr_def (unsigned int regno
)
1405 gcc_assert (IN_RANGE (regno
, V0_REGNUM
, V31_REGNUM
));
1406 unsigned int offset
= regno
- V0_REGNUM
;
1408 // The definition completes the current live range. If the result
1409 // of the definition is used, the live range extends to the last use.
1410 // Otherwise the live range is just a momentary blip at the current point.
1411 auto &ranges
= m_fpr_ranges
[offset
];
1412 if (m_live_fprs
& (1U << offset
))
1414 ranges
.last ().start_point
= m_current_point
;
1415 m_live_fprs
&= ~(1U << offset
);
1418 ranges
.safe_push ({ m_current_point
, m_current_point
, INVALID_ALLOCNO
});
1421 // Record a use of allocno ALLOCNO at the current program point, as part
1422 // of a backwards walk over a block.
1424 early_ra::record_allocno_use (allocno_info
*allocno
)
1426 bitmap_set_bit (m_live_allocnos
, allocno
->id
);
1427 if (allocno
->end_point
> m_current_point
)
1429 allocno
->end_point
= m_current_point
;
1430 allocno
->last_def_point
= START_OF_REGION
;
1432 allocno
->start_point
= m_current_point
;
1433 allocno
->is_copy_dest
= false;
1434 allocno
->is_strong_copy_dest
= false;
1435 allocno
->equiv_allocno
= INVALID_ALLOCNO
;
1438 // Record a definition of the allocno with index AI at the current program
1439 // point, as part of a backwards walk over a block. The allocno is known
1442 early_ra::record_allocno_def (allocno_info
*allocno
)
1444 allocno
->last_def_point
= m_current_point
;
1445 allocno
->start_point
= m_current_point
;
1446 allocno
->num_defs
= MIN (allocno
->num_defs
+ 1, 2);
1447 gcc_checking_assert (!allocno
->is_copy_dest
1448 && !allocno
->is_strong_copy_dest
);
1449 if (!bitmap_clear_bit (m_live_allocnos
, allocno
->id
))
1453 // Return true if a move from SRC_ALLOCNO to DEST_ALLOCNO could be treated
1454 // as an equivalence.
1456 early_ra::valid_equivalence_p (allocno_info
*dest_allocno
,
1457 allocno_info
*src_allocno
)
1459 if (src_allocno
->end_point
> dest_allocno
->end_point
)
1460 // The src allocno dies first.
1463 if (src_allocno
->num_defs
!= 0)
1465 if (dest_allocno
->end_point
< m_current_bb_point
)
1466 // We don't currently track enough information to handle multiple
1467 // definitions across basic block boundaries.
1470 if (src_allocno
->last_def_point
>= dest_allocno
->end_point
)
1471 // There is another definition during the destination's live range.
1474 return dest_allocno
->num_defs
== 1;
1477 // Record any relevant allocno-related information for an actual or imagined
1478 // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
1479 // move instruction, false if it represents one way of satisfying the previous
1480 // instruction's constraints.
1482 early_ra::record_copy (rtx dest
, rtx src
, bool from_move_p
)
1484 auto dest_range
= get_allocno_subgroup (dest
);
1485 auto src_range
= get_allocno_subgroup (src
);
1489 && FP_REGNUM_P (REGNO (src
)))
1491 // A copy from an FPR to an allocno group.
1492 unsigned int fpr
= REGNO (src
) - V0_REGNUM
;
1493 m_allocno_copies
.safe_push ({ dest_range
.allocno (0)->id
, fpr
,
1494 dest_range
.count
});
1496 // If the allocno at the other end of the chain of copies from DEST
1497 // has a copy to the same FPR, record that all intervening copy chains
1498 // could become "strong" ones. This indicates that picking the FPR
1499 // avoids a copy at both ends.
1500 unsigned int hard_regno
= REGNO (src
);
1501 for (auto &dest_allocno
: dest_range
.allocnos ())
1502 if (dest_allocno
.hard_regno
== hard_regno
++)
1503 dest_allocno
.is_strong_copy_src
= true;
1505 else if (from_move_p
1508 && FP_REGNUM_P (REGNO (dest
)))
1510 // A copy from an allocno group to an FPR.
1511 unsigned int fpr
= REGNO (dest
) - V0_REGNUM
;
1512 m_allocno_copies
.safe_push ({ src_range
.allocno (0)->id
, fpr
,
1514 for (auto &src_allocno
: src_range
.allocnos ())
1516 // If the copy comes from a move, see whether the destination
1517 // FPR is known to be equal to the source allocno for the FPR's
1519 if (from_move_p
&& src_allocno
.num_defs
== 0)
1521 auto &last_range
= m_fpr_ranges
[fpr
].last ();
1522 if (last_range
.end_point
>= src_allocno
.end_point
)
1523 last_range
.allocno
= src_allocno
.id
;
1525 src_allocno
.hard_regno
= V0_REGNUM
+ fpr
;
1529 else if (src_range
&& dest_range
)
1531 // A copy between two allocno groups. We can only have a mismatched
1532 // number of FPRs for imaginary, non-move copies. In that case
1533 // the matching happens on the common lowparts.
1534 gcc_assert (!from_move_p
|| src_range
.count
== dest_range
.count
);
1535 unsigned int count
= std::min (src_range
.count
, dest_range
.count
);
1536 if (WORDS_BIG_ENDIAN
)
1538 src_range
.start
+= src_range
.count
- count
;
1539 dest_range
.start
+= dest_range
.count
- count
;
1541 src_range
.count
= count
;
1542 dest_range
.count
= count
;
1544 // Ignore (imaginary non-move) copies if the destination is still live.
1545 for (auto &dest_allocno
: dest_range
.allocnos ())
1546 if (bitmap_bit_p (m_live_allocnos
, dest_allocno
.id
))
1549 for (unsigned int i
= 0; i
< src_range
.count
; ++i
)
1551 auto *dest_allocno
= dest_range
.allocno (i
);
1552 auto *src_allocno
= src_range
.allocno (i
);
1553 if (src_allocno
->end_point
> dest_allocno
->start_point
)
1555 gcc_assert (src_allocno
->copy_dest
== INVALID_ALLOCNO
1556 || src_allocno
->copy_dest
== dest_allocno
->id
);
1557 src_allocno
->copy_dest
= dest_allocno
->id
;
1558 src_allocno
->hard_regno
= dest_allocno
->hard_regno
;
1559 dest_allocno
->is_copy_dest
= 1;
1561 else if (from_move_p
1562 && valid_equivalence_p (dest_allocno
, src_allocno
))
1563 dest_allocno
->equiv_allocno
= src_allocno
->id
;
1568 // Record any relevant allocno-related information about the constraints
1569 // on INSN, which has already been extracted.
1571 early_ra::record_constraints (rtx_insn
*insn
)
1573 preprocess_constraints (insn
);
1575 int operand_matches
[MAX_RECOG_OPERANDS
];
1576 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1577 operand_matches
[i
] = -1;
1579 auto alts
= get_preferred_alternatives (insn
);
1580 bool any_ok
= recog_data
.n_alternatives
== 0;
1582 // The set of output operands that are earlyclobber in at least one
1584 operand_mask earlyclobber_operands
= 0;
1586 // The set of output operands that are matched to inputs in at least
1588 operand_mask matched_operands
= 0;
1590 // The set of output operands that are not matched to inputs in at least
1592 operand_mask unmatched_operands
= 0;
1594 // The set of input operands that are matched to outputs in at least one
1595 // alternative, or that overlap with such an input if the output is not
1596 // earlyclobber. The latter part of the condition copes with things
1597 // like y = x * x, where the first x is tied to the destination, and where
1598 // y is not earlyclobber.
1599 operand_mask matches_operands
= 0;
1601 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
1603 if (!(alts
& ALTERNATIVE_BIT (altno
)))
1606 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
1607 if (!likely_alternative_match_p (op_alt
))
1612 // Update the information for operand DEST_OPNO based on the constraint
1613 // information for operand SRC_OPNO. The numbers can be different for
1614 // swapped commutative operands.
1615 auto record_operand
= [&](int src_opno
, int dest_opno
)
1617 int matches
= op_alt
[src_opno
].matches
;
1618 // A matched earlyclobber cannot be used if the same operand value
1619 // occurs in an unmatched operand. E.g. for y = x * x, a matched
1620 // earlyclobber on the first input does not cover the second input.
1623 rtx op
= recog_data
.operand
[dest_opno
];
1624 operand_mask overlaps
= 0;
1625 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1627 && !recog_data
.is_operator
[i
]
1628 && recog_data
.operand_type
[i
] != OP_OUT
1629 && reg_overlap_mentioned_p (op
, recog_data
.operand
[i
]))
1630 overlaps
|= 1U << i
;
1631 if (!op_alt
[matches
].earlyclobber
|| overlaps
== 0)
1633 operand_matches
[dest_opno
] = matches
;
1634 matches_operands
|= (1U << dest_opno
) | overlaps
;
1639 auto reject
= count_rejects (op_alt
);
1640 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1642 operand_mask op_mask
= operand_mask (1) << opno
;
1644 if (recog_data
.operand_type
[opno
] != OP_IN
)
1646 if (reject
== 0 && op_alt
[opno
].matched
>= 0)
1647 matched_operands
|= op_mask
;
1649 unmatched_operands
|= op_mask
;
1652 if (op_alt
[opno
].earlyclobber
)
1653 earlyclobber_operands
|= op_mask
;
1655 // Punt for now on scratches. If we wanted to handle them,
1656 // we'd need to create allocnos for them, like IRA does.
1657 rtx op
= recog_data
.operand
[opno
];
1658 if (GET_CODE (op
) == SCRATCH
1659 && reg_classes_intersect_p (op_alt
[opno
].cl
, FP_REGS
))
1660 m_allocation_successful
= false;
1662 // Record filter information, which applies to the first register
1664 if (auto filters
= alternative_register_filters (op_alt
, opno
))
1665 if (auto range
= get_allocno_subgroup (recog_data
.operand
[opno
]))
1666 for (unsigned int fpr
= range
.start
; fpr
< 32; ++fpr
)
1667 if (!test_register_filters (filters
, fpr
))
1668 range
.group
->fpr_candidates
&= ~(1U << (fpr
- range
.start
));
1672 // Record possible matched operands.
1673 record_operand (opno
, opno
);
1674 if (recog_data
.constraints
[opno
][0] == '%')
1676 record_operand (opno
, opno
+ 1);
1677 record_operand (opno
+ 1, opno
);
1685 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1686 fprintf (dump_file
, " -- no match\n");
1687 m_allocation_successful
= false;
1690 // Record if there is an output operand that is never earlyclobber and never
1691 // matched to an input. See the comment below for how this is used.
1692 rtx dest_op
= NULL_RTX
;
1693 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1695 auto op_mask
= operand_mask (1) << opno
;
1696 if (recog_data
.operand_type
[opno
] == OP_OUT
1697 && (earlyclobber_operands
& op_mask
) == 0
1698 && (matched_operands
& op_mask
) == 0)
1700 dest_op
= recog_data
.operand
[opno
];
1705 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1707 auto op_mask
= operand_mask (1) << opno
;
1708 rtx op
= recog_data
.operand
[opno
];
1709 int matches
= operand_matches
[opno
];
1711 // Punt for now on operands that already have a fixed choice of
1712 // register, since we don't have IRA's ability to find an alternative.
1713 // It's better if earlier passes don't create this kind of situation.
1714 if (REG_P (op
) && FP_REGNUM_P (REGNO (op
)))
1715 m_allocation_successful
= false;
1717 // Treat input operands as being earlyclobbered if an output is
1718 // sometimes earlyclobber and if the input never matches an output.
1719 // Do the same if there is an output that is always matched to an
1720 // input, and if this operand doesn't match that input. In both
1721 // cases, tying the input and the output would lead to an impossible
1722 // combination (or at least one that is difficult to reload).
1723 if (recog_data
.operand_type
[opno
] != OP_OUT
1724 && ((earlyclobber_operands
&& matches
< 0)
1725 || ((matched_operands
& ~unmatched_operands
)
1726 && !(matches_operands
& op_mask
))))
1727 for (auto &allocno
: get_allocno_subgroup (op
).allocnos ())
1728 if (allocno
.end_point
+ 1 == m_current_point
)
1729 allocno
.is_earlyclobbered
= true;
1731 // Create copies between operands that can be tied. This (deliberately)
1732 // might add several copies to the same destination register; later code
1733 // can then choose between them based on other criteria.
1735 // If there is an output operand that is never matched or earlyclobber,
1736 // and an input operand that never matches an output operand, create
1737 // a tentative copy between them. This allows hard register preferences
1738 // to be transmitted along the copy chains.
1740 record_copy (recog_data
.operand
[matches
], op
);
1741 else if (dest_op
&& recog_data
.operand_type
[opno
] == OP_IN
)
1742 record_copy (dest_op
, op
);
1746 // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
1747 // start of the current basic block, otherwise model the artificial uses
1748 // and defs at the end of the basic block. This is done as part of a
1749 // backwards walk, so defs should be processed before uses.
1751 early_ra::record_artificial_refs (unsigned int flags
)
1755 FOR_EACH_ARTIFICIAL_DEF (ref
, m_current_bb
->index
)
1756 if ((DF_REF_FLAGS (ref
) & DF_REF_AT_TOP
) == flags
1757 && IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1758 record_fpr_def (DF_REF_REGNO (ref
));
1759 m_current_point
+= 1;
1761 FOR_EACH_ARTIFICIAL_USE (ref
, m_current_bb
->index
)
1762 if ((DF_REF_FLAGS (ref
) & DF_REF_AT_TOP
) == flags
1763 && IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1764 record_fpr_use (DF_REF_REGNO (ref
));
1765 m_current_point
+= 1;
1768 // Model the register references in INSN as part of a backwards walk.
1770 early_ra::record_insn_refs (rtx_insn
*insn
)
1774 // Record all definitions, excluding partial call clobbers.
1775 FOR_EACH_INSN_DEF (ref
, insn
)
1776 if (IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1777 record_fpr_def (DF_REF_REGNO (ref
));
1780 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
1781 for (auto &allocno
: range
.allocnos ())
1783 // If the destination is unused, record a momentary blip
1784 // in its live range.
1785 if (!bitmap_bit_p (m_live_allocnos
, allocno
.id
))
1786 record_allocno_use (&allocno
);
1787 record_allocno_def (&allocno
);
1790 m_current_point
+= 1;
1792 // Model the call made by a call insn as a separate phase in the
1793 // evaluation of the insn. Any partial call clobbers happen at that
1794 // point, rather than in the definition or use phase of the insn.
1795 if (auto *call_insn
= dyn_cast
<rtx_call_insn
*> (insn
))
1797 function_abi abi
= insn_callee_abi (call_insn
);
1798 m_call_points
[abi
.id ()].safe_push (m_current_point
);
1799 m_current_point
+= 1;
1802 // Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs,
1803 // since we track the FPR-sized parts of them individually.
1804 FOR_EACH_INSN_USE (ref
, insn
)
1805 if (IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1806 record_fpr_use (DF_REF_REGNO (ref
));
1807 else if (!DF_REF_FLAGS_IS_SET (ref
, DF_REF_READ_WRITE
)
1808 || DF_REF_FLAGS_IS_SET (ref
, DF_REF_STRICT_LOW_PART
)
1809 || DF_REF_FLAGS_IS_SET (ref
, DF_REF_ZERO_EXTRACT
))
1811 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
1812 for (auto &allocno
: range
.allocnos ())
1813 record_allocno_use (&allocno
);
1815 m_current_point
+= 1;
1818 // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
1819 // natural chain that has an affinity with the same hard register at
1822 early_ra::consider_strong_copy_src_chain (allocno_info
*allocno
)
1824 auto *src_allocno
= allocno
;
1825 while (src_allocno
->copy_dest
!= INVALID_ALLOCNO
)
1827 auto *dest_allocno
= m_allocnos
[src_allocno
->copy_dest
];
1828 if (dest_allocno
->start_point
> src_allocno
->end_point
1829 || dest_allocno
->hard_regno
!= src_allocno
->hard_regno
)
1831 gcc_checking_assert (dest_allocno
->is_copy_dest
);
1832 src_allocno
= dest_allocno
;
1835 while (allocno
->copy_dest
!= INVALID_ALLOCNO
)
1837 allocno
->is_strong_copy_src
= 1;
1838 allocno
= m_allocnos
[allocno
->copy_dest
];
1839 allocno
->is_strong_copy_dest
= 1;
1844 // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
1845 // chained together. See whether chaining them requires the containing
1846 // groups to have the same stride, or whether it requires them to have
1847 // different strides. Return 1 if they should have the same stride,
1848 // -1 if they should have different strides, or 0 if it doesn't matter.
1850 early_ra::strided_polarity_pref (allocno_info
*allocno1
,
1851 allocno_info
*allocno2
)
1853 if (allocno1
->offset
+ 1 < allocno1
->group_size
1854 && allocno2
->offset
+ 1 < allocno2
->group_size
)
1856 if (is_chain_candidate (allocno1
+ 1, allocno2
+ 1))
1862 if (allocno1
->offset
> 0 && allocno2
->offset
> 0)
1864 if (is_chain_candidate (allocno1
- 1, allocno2
- 1))
1873 // Decide which groups should be strided. Also complete "strong" copy chains.
1875 early_ra::find_strided_accesses ()
1877 // This function forms a graph of allocnos, linked by equivalences and
1878 // natural copy chains. It temporarily uses chain_next to record the
1879 // reverse of equivalence edges (equiv_allocno) and chain_prev to record
1880 // the reverse of copy edges (copy_dest).
1881 unsigned int allocno_info::*links
[] = {
1882 &allocno_info::chain_next
,
1883 &allocno_info::chain_prev
,
1884 &allocno_info::copy_dest
,
1885 &allocno_info::equiv_allocno
1888 // Set up the temporary reverse edges. Check for strong copy chains.
1889 for (unsigned int i
= m_allocnos
.length (); i
-- > 0; )
1891 auto *allocno1
= m_allocnos
[i
];
1892 if (allocno1
->copy_dest
!= INVALID_ALLOCNO
)
1893 m_allocnos
[allocno1
->copy_dest
]->chain_prev
= allocno1
->id
;
1894 if (allocno1
->equiv_allocno
!= INVALID_ALLOCNO
)
1895 m_allocnos
[allocno1
->equiv_allocno
]->chain_next
= allocno1
->id
;
1897 if (allocno1
->is_strong_copy_src
1898 && (allocno1
->is_copy_dest
1899 || !consider_strong_copy_src_chain (allocno1
)))
1900 allocno1
->is_strong_copy_src
= false;
1903 // Partition the graph into cliques based on edges that have the following
1906 // - the edge joins two allocnos whose groups have a free choice between
1907 // consecutive and strided allocations.
1909 // - the two groups have a relative strided polarity preference (that is
1910 // they should make the same choice between consecutive and strided,
1911 // or they should make opposite choices).
1913 // Assign relative polarities to each group connected in this way.
1915 // The aim is to discover natural move-free striding choices, which will
1916 // often exist in carefully written ACLE code.
1917 unsigned int num_edges
= m_allocnos
.length () * ARRAY_SIZE (links
);
1918 auto_sbitmap
visited_edges (num_edges
);
1919 bitmap_clear (visited_edges
);
1921 auto_vec
<unsigned int, 32> worklist
;
1922 for (unsigned int i
= 0; i
< num_edges
; ++i
)
1924 if (!bitmap_set_bit (visited_edges
, i
))
1926 worklist
.quick_push (i
);
1927 while (!worklist
.is_empty ())
1929 auto ei
= worklist
.pop ();
1930 auto *allocno1
= m_allocnos
[ei
/ ARRAY_SIZE (links
)];
1931 auto ai2
= allocno1
->*links
[ei
% ARRAY_SIZE (links
)];
1932 if (ai2
== INVALID_ALLOCNO
)
1935 auto *allocno2
= m_allocnos
[ai2
];
1936 auto *group1
= allocno1
->group ();
1937 auto *group2
= allocno2
->group ();
1938 if (!group1
->has_flexible_stride
|| !group2
->has_flexible_stride
)
1941 int pref
= strided_polarity_pref (allocno1
, allocno2
);
1945 for (auto *group
: { group1
, group2
})
1946 for (auto &allocno
: group
->allocnos ())
1947 for (unsigned int j
= 0; j
< ARRAY_SIZE (links
); ++j
)
1948 if (bitmap_set_bit (visited_edges
, allocno
.id
* 4 + j
))
1949 worklist
.safe_push (allocno
.id
* 4 + j
);
1951 if (group1
->strided_polarity
)
1952 group2
->strided_polarity
= group1
->strided_polarity
* pref
;
1953 else if (group1
->strided_polarity
)
1954 group2
->strided_polarity
= group1
->strided_polarity
* pref
;
1957 group1
->strided_polarity
= 1;
1958 group2
->strided_polarity
= pref
;
1963 // Now look for edges between allocnos in multi-register groups where:
1965 // - the two groups have a relative strided polarity preference (as above).
1967 // - one group (G1) has a free choice between consecutive and strided
1970 // - the other group (G2) must use consecutive allocations.
1972 // Update G1's individual preference for strided or consecutive allocations
1973 // based on G2. If the previous loop chose a polarity for G1, work out
1974 // whether it is better for polarity 1 or -1 to correspond to consecutive
1976 int consecutive_pref
= 0;
1977 for (unsigned int i
= m_allocnos
.length (); i
-- > 0; )
1979 auto *allocno1
= m_allocnos
[i
];
1980 for (auto link
: links
)
1982 auto ai2
= allocno1
->*link
;
1983 if (ai2
== INVALID_ALLOCNO
)
1986 auto *allocno2
= m_allocnos
[ai2
];
1987 auto *group1
= allocno1
->group ();
1988 auto *group2
= allocno2
->group ();
1989 if (group1
->has_flexible_stride
== group2
->has_flexible_stride
)
1992 int pref
= strided_polarity_pref (allocno1
, allocno2
);
1996 auto *group
= (group1
->has_flexible_stride
? group1
: group2
);
1997 consecutive_pref
+= group
->strided_polarity
* pref
;
1998 group
->consecutive_pref
+= pref
;
2002 // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
2003 // allocation, arbitrarily pick 1.
2004 if (consecutive_pref
== 0)
2005 consecutive_pref
= 1;
2007 // Record which multi-register groups should use strided allocations.
2008 // Clear out the temporary edges.
2009 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
2011 auto *allocno
= m_allocnos
[ai
];
2012 allocno
->chain_prev
= INVALID_ALLOCNO
;
2013 allocno
->chain_next
= INVALID_ALLOCNO
;
2015 if (allocno
->offset
!= 0)
2018 auto *group
= allocno
->group ();
2019 if (!group
->has_flexible_stride
)
2022 bool make_strided
= (group
->strided_polarity
2023 ? (consecutive_pref
* group
->strided_polarity
) < 0
2024 : group
->consecutive_pref
< 0);
2025 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2026 fprintf (dump_file
, "Allocno [%d:%d]: strided polarity %d,"
2027 " consecutive pref %d, %s\n",
2028 allocno
->id
, allocno
->id
+ group
->size
- 1,
2029 group
->strided_polarity
, group
->consecutive_pref
,
2030 make_strided
? "making strided" : "keeping consecutive");
2034 // 2-register groups have a stride of 8 FPRs and must start in
2035 // registers matching the mask 0x17. 4-register groups have a stride
2036 // of 4 FPRs and must start in registers matching the mask 0x13.
2037 group
->stride
= group
->size
== 2 ? 8 : 4;
2038 gcc_checking_assert (group
->fpr_candidates
2039 == (group
->size
== 2 ? 0x55555555 : 0x11111111));
2040 group
->fpr_candidates
= (group
->size
== 2 ? 0xff00ff : 0xf000f);
2044 // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
2045 // result that puts allocnos in order of increasing FIELD.
2046 template<unsigned int early_ra::allocno_info::*field
>
2048 early_ra::cmp_increasing (const void *allocno1_ptr
, const void *allocno2_ptr
)
2050 auto *allocno1
= *(allocno_info
*const *) allocno1_ptr
;
2051 auto *allocno2
= *(allocno_info
*const *) allocno2_ptr
;
2053 if (allocno1
->*field
!= allocno2
->*field
)
2054 return allocno1
->*field
< allocno2
->*field
? -1 : 1;
2055 return (allocno1
->id
< allocno2
->id
? -1
2056 : allocno1
->id
== allocno2
->id
? 0 : 1);
2059 // Return true if we should consider chaining ALLOCNO1 onto the head
2060 // of ALLOCNO2. This is just a local test of the two allocnos; it doesn't
2061 // guarantee that chaining them would give a self-consistent system.
2063 early_ra::is_chain_candidate (allocno_info
*allocno1
, allocno_info
*allocno2
)
2065 if (allocno1
->equiv_allocno
!= INVALID_ALLOCNO
)
2066 allocno1
= m_allocnos
[allocno1
->equiv_allocno
];
2068 if (allocno2
->start_point
>= allocno1
->end_point
2069 && allocno2
->equiv_allocno
!= allocno1
->id
)
2072 if (allocno2
->is_strong_copy_dest
)
2074 if (!allocno1
->is_strong_copy_src
2075 || allocno1
->copy_dest
!= allocno2
->id
)
2078 else if (allocno2
->is_copy_dest
)
2080 if (allocno1
->copy_dest
!= allocno2
->id
)
2083 else if (allocno1
->is_earlyclobbered
)
2085 if (allocno1
->end_point
== allocno2
->start_point
+ 1)
2092 // We're trying to chain allocno ALLOCNO1 to a later allocno.
2093 // Rate how good a choice ALLOCNO2 would be, with higher being better.
2095 early_ra::rate_chain (allocno_info
*allocno1
, allocno_info
*allocno2
)
2098 if (allocno2
->is_strong_copy_dest
)
2100 else if (allocno2
->is_copy_dest
)
2103 // Prefer well-aligned matches.
2104 auto *group1
= allocno1
->group ();
2105 auto *group2
= allocno2
->group ();
2106 if (group1
->stride
== 1 && group2
->stride
== 1)
2108 unsigned int min_size
= std::min (group1
->color_rep ()->size
,
2109 group2
->color_rep ()->size
);
2110 if ((group1
->color_rep_offset
+ allocno1
->offset
) % min_size
2111 == (group2
->color_rep_offset
+ allocno2
->offset
) % min_size
)
2119 // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
2122 early_ra::cmp_chain_candidates (const void *arg1
, const void *arg2
)
2124 auto &candidate1
= *(const chain_candidate_info
*) arg1
;
2125 auto &candidate2
= *(const chain_candidate_info
*) arg2
;
2126 if (candidate1
.score
!= candidate2
.score
)
2127 return candidate1
.score
> candidate2
.score
? -1 : 1;
2129 // Prefer to increase the gap between uses of the allocated register,
2130 // to give the scheduler more freedom.
2131 auto *allocno1
= candidate1
.allocno
;
2132 auto *allocno2
= candidate2
.allocno
;
2133 if (allocno1
->start_point
!= allocno2
->start_point
)
2134 return allocno1
->start_point
< allocno2
->start_point
? -1 : 1;
2136 if (allocno1
!= allocno2
)
2137 return allocno1
->id
< allocno2
->id
? -1 : 1;
2142 // Join the chains of allocnos that start at HEADI1 and HEADI2.
2143 // HEADI1 is either empty or a single allocno.
2145 early_ra::chain_allocnos (unsigned int &headi1
, unsigned int &headi2
)
2147 if (headi1
== INVALID_ALLOCNO
)
2149 else if (headi2
== INVALID_ALLOCNO
)
2153 auto *head1
= m_allocnos
[headi1
];
2154 auto *head2
= m_allocnos
[headi2
];
2155 gcc_checking_assert (head1
->chain_next
== INVALID_ALLOCNO
2156 && head1
->chain_prev
== INVALID_ALLOCNO
2157 && head2
->chain_prev
== INVALID_ALLOCNO
);
2159 if (head1
->equiv_allocno
!= INVALID_ALLOCNO
2160 && m_allocnos
[head1
->equiv_allocno
]->copy_dest
== headi2
)
2162 head1
->is_copy_dest
= head2
->is_copy_dest
;
2163 head1
->is_strong_copy_dest
= head2
->is_strong_copy_dest
;
2164 m_allocnos
[head1
->equiv_allocno
]->copy_dest
= headi1
;
2166 head1
->chain_next
= headi2
;
2167 head2
->chain_prev
= headi1
;
2173 // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
2174 // ends being at allocno offset REP_OFFSET from the start of REP.
2176 early_ra::set_single_color_rep (allocno_info
*allocno
, allocno_group_info
*rep
,
2177 unsigned int rep_offset
)
2179 auto *group
= allocno
->group ();
2180 if (group
->m_color_rep
== rep
)
2183 group
->m_color_rep
= rep
;
2184 gcc_checking_assert (multiple_p (group
->stride
, rep
->stride
));
2185 unsigned int factor
= group
->stride
/ rep
->stride
;
2186 gcc_checking_assert (rep_offset
>= allocno
->offset
* factor
);
2187 group
->color_rep_offset
= rep_offset
- allocno
->offset
* factor
;
2188 rep
->fpr_size
= std::max (rep
->fpr_size
, group
->fpr_size
);
2189 rep
->fpr_candidates
&= (group
->fpr_candidates
2190 >> (group
->color_rep_offset
* rep
->stride
));
2193 // REP1 and REP2 are color representatives. Change REP1's color representative
2194 // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
2196 early_ra::set_color_rep (allocno_group_info
*rep1
, allocno_group_info
*rep2
,
2197 unsigned int rep2_offset
)
2199 gcc_checking_assert (rep1
!= rep2
2200 && rep2
->m_color_rep
== rep2
2201 && multiple_p (rep1
->stride
, rep2
->stride
));
2203 auto heads1
= rep1
->chain_heads ();
2204 auto heads2
= rep2
->chain_heads ();
2205 for (unsigned int i1
= 0; i1
< heads1
.size (); ++i1
)
2206 if (heads1
[i1
] != INVALID_ALLOCNO
)
2208 unsigned int i2
= rep2_offset
+ i1
* rep1
->stride
/ rep2
->stride
;
2209 if (heads2
[i2
] == INVALID_ALLOCNO
)
2210 heads2
[i2
] = heads1
[i1
];
2212 gcc_checking_assert (heads2
[i2
] == heads1
[i1
]);
2213 set_single_color_rep (m_allocnos
[heads1
[i1
]], rep2
, i2
);
2217 // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
2218 // Return true on success.
2220 early_ra::try_to_chain_allocnos (allocno_info
*allocno1
,
2221 allocno_info
*allocno2
)
2223 auto *group1
= allocno1
->group ()->color_rep ();
2224 auto *group2
= allocno2
->group ()->color_rep ();
2226 // Avoid trying to tie different subgroups of the same group. This can
2227 // happen if the parts of a register are defined and used piecemeal.
2228 if (group1
== group2
)
2231 // The stride (in FPRs) between allocnos of each color representative.
2232 auto fpr_stride1
= group1
->stride
;
2233 auto fpr_stride2
= group2
->stride
;
2235 // The offset (in FPRs) of each allocno group from its color representative.
2236 auto fpr_offset1
= allocno1
->group ()->color_rep_offset
* fpr_stride1
;
2237 auto fpr_offset2
= allocno2
->group ()->color_rep_offset
* fpr_stride2
;
2239 // The offset (in FPRs) of each allocno from its color representative.
2240 fpr_offset1
+= allocno1
->offset
* allocno1
->group ()->stride
;
2241 fpr_offset2
+= allocno2
->offset
* allocno2
->group ()->stride
;
2243 // The FPR overlap is in multiples of the larger stride.
2244 auto max_fpr_stride
= std::max (fpr_stride1
, fpr_stride2
);
2245 auto min_fpr_offset
= std::min (fpr_offset1
, fpr_offset2
);
2246 auto fpr_overlap_offset
= ROUND_DOWN (min_fpr_offset
, max_fpr_stride
);
2248 // The offset (in FPRs) of the start of the overlapping region from
2249 // each color representative.
2250 fpr_offset1
-= fpr_overlap_offset
;
2251 fpr_offset2
-= fpr_overlap_offset
;
2253 // The number of FPRs in each color representative after the start
2254 // of the overlapping region.
2255 auto fpr_after1
= (group1
->size
- 1) * fpr_stride1
- fpr_offset1
;
2256 auto fpr_after2
= (group2
->size
- 1) * fpr_stride2
- fpr_offset2
;
2258 auto min_fpr_after
= std::min (fpr_after1
, fpr_after2
);
2260 // The number of overlapping allocnos.
2261 auto allocno_overlap_size
= min_fpr_after
/ max_fpr_stride
+ 1;
2263 // The offset (in allocnos) of the overlapping region from the start
2264 // of each color representative.
2265 auto allocno_offset1
= fpr_offset1
/ fpr_stride1
;
2266 auto allocno_offset2
= fpr_offset2
/ fpr_stride2
;
2268 // The stride (in allocnos) between overlapping allocnos.
2269 auto allocno_stride1
= max_fpr_stride
/ fpr_stride1
;
2270 auto allocno_stride2
= max_fpr_stride
/ fpr_stride2
;
2272 // Reject combinations that are impossible to allocate.
2273 auto fprs1
= group1
->fpr_candidates
;
2274 auto fprs2
= group2
->fpr_candidates
;
2275 if (fpr_offset1
> fpr_offset2
)
2276 fprs2
>>= (fpr_offset1
- fpr_offset2
);
2278 fprs1
>>= (fpr_offset2
- fpr_offset1
);
2279 if ((fprs1
& fprs2
) == 0)
2281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2282 fprintf (dump_file
, " - cannot chain %d->%d, no FPRs in common"
2283 " (%08x@%d and %08x@%d)\n", allocno1
->id
, allocno2
->id
,
2284 group1
->fpr_candidates
, fpr_offset1
,
2285 group2
->fpr_candidates
, fpr_offset2
);
2289 // Check whether the chain can be formed.
2290 auto heads1
= group1
->chain_heads ();
2291 auto heads2
= group2
->chain_heads ();
2292 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2294 auto headi1
= heads1
[allocno_offset1
+ i
* allocno_stride1
];
2295 auto headi2
= heads2
[allocno_offset2
+ i
* allocno_stride2
];
2296 if (headi1
!= INVALID_ALLOCNO
&& headi2
!= INVALID_ALLOCNO
)
2298 auto *head1
= m_allocnos
[headi1
];
2299 auto *head2
= m_allocnos
[headi2
];
2300 if (head1
->chain_next
!= INVALID_ALLOCNO
)
2302 if (head2
->equiv_allocno
!= head1
->id
2303 && head1
->end_point
<= head2
->start_point
)
2308 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2310 fprintf (dump_file
, " - chaining allocnos [");
2311 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2312 fprintf (dump_file
, "%s%d", i
? "," : "",
2313 heads1
[allocno_offset1
+ i
* allocno_stride1
]);
2314 fprintf (dump_file
, "] and [");
2315 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2316 fprintf (dump_file
, "%s%d", i
? "," : "",
2317 heads2
[allocno_offset2
+ i
* allocno_stride2
]);
2318 fprintf (dump_file
, "]\n");
2321 // Chain the allocnos, updating the chain heads.
2322 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2323 chain_allocnos (heads1
[allocno_offset1
+ i
* allocno_stride1
],
2324 heads2
[allocno_offset2
+ i
* allocno_stride2
]);
2326 // Pick a color representative for the merged groups.
2327 allocno_group_info
*new_rep
;
2328 if (allocno_offset1
== 0
2329 && group1
->size
== allocno_overlap_size
* allocno_stride1
2330 && multiple_p (fpr_stride1
, fpr_stride2
))
2332 // The first group fits within the second.
2333 set_color_rep (group1
, group2
, allocno_offset2
);
2336 else if (allocno_offset2
== 0
2337 && group2
->size
== allocno_overlap_size
* allocno_stride2
2338 && multiple_p (fpr_stride2
, fpr_stride1
))
2340 // The second group fits within the first.
2341 set_color_rep (group2
, group1
, allocno_offset1
);
2346 // We need a new group that is big enough to span both groups.
2347 // The new group always has an FPR stride of 1.
2348 auto max_fpr_offset
= std::max (fpr_offset1
, fpr_offset2
);
2349 auto max_fpr_after
= std::max (fpr_after1
, fpr_after2
);
2350 auto new_size
= max_fpr_offset
+ max_fpr_after
+ 1;
2351 new_rep
= create_allocno_group (INVALID_REGNUM
, new_size
);
2353 set_color_rep (group1
, new_rep
, max_fpr_offset
- fpr_offset1
);
2354 set_color_rep (group2
, new_rep
, max_fpr_offset
- fpr_offset2
);
2357 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2359 fprintf (dump_file
, " - new frontier [");
2360 auto new_heads
= new_rep
->chain_heads ();
2361 for (unsigned int i
= 0; i
< new_heads
.size (); ++i
)
2364 fprintf (dump_file
, ",");
2365 if (new_heads
[i
] == INVALID_ALLOCNO
)
2366 fprintf (dump_file
, "-");
2368 fprintf (dump_file
, "%d", new_heads
[i
]);
2370 fprintf (dump_file
, "]\n");
2376 // Create a color_info for color representative GROUP.
2378 early_ra::create_color (allocno_group_info
*group
)
2380 auto *color
= region_allocate
<color_info
> ();
2381 color
->id
= m_colors
.length ();
2382 color
->hard_regno
= FIRST_PSEUDO_REGISTER
;
2383 color
->group
= group
;
2385 gcc_checking_assert (group
->m_color_rep
== group
);
2386 group
->has_color
= true;
2387 group
->color
= m_colors
.length ();
2389 m_colors
.safe_push (color
);
2392 // Form allocnos into chains. Create colors for each resulting clique.
2394 early_ra::form_chains ()
2396 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2397 fprintf (dump_file
, "\nChaining allocnos:\n");
2399 // Perform (modified) interval graph coloring. First sort by
2400 // increasing start point.
2401 m_sorted_allocnos
.reserve (m_allocnos
.length ());
2402 m_sorted_allocnos
.splice (m_allocnos
);
2403 m_sorted_allocnos
.qsort (cmp_increasing
<&allocno_info::start_point
>);
2405 // During this phase, color representatives are only correct for
2406 // unprocessed allocno groups (where the color representative is
2407 // the group itself) and for groups that contain a current chain head.
2408 unsigned int ti
= 0;
2409 auto_vec
<chain_candidate_info
> candidates
;
2410 for (unsigned int hi
= 0; hi
< m_sorted_allocnos
.length (); ++hi
)
2412 auto *allocno1
= m_sorted_allocnos
[hi
];
2413 if (allocno1
->chain_next
!= INVALID_ALLOCNO
)
2416 // Record conflicts with direct uses for FPR hard registers.
2417 auto *group1
= allocno1
->group ();
2418 for (unsigned int fpr
= allocno1
->offset
; fpr
< 32; ++fpr
)
2419 if (fpr_conflicts_with_allocno_p (fpr
, allocno1
))
2420 group1
->fpr_candidates
&= ~(1U << (fpr
- allocno1
->offset
));
2422 // Record conflicts due to partially call-clobbered registers.
2423 // (Full clobbers are handled by the previous loop.)
2424 for (unsigned int abi_id
= 0; abi_id
< NUM_ABI_IDS
; ++abi_id
)
2425 if (call_in_range_p (abi_id
, allocno1
->start_point
,
2426 allocno1
->end_point
))
2428 auto fprs
= partial_fpr_clobbers (abi_id
, group1
->fpr_size
);
2429 group1
->fpr_candidates
&= ~fprs
>> allocno1
->offset
;
2432 // Find earlier allocnos (in processing order) that could be chained
2434 candidates
.truncate (0);
2435 for (unsigned int sci
= ti
; sci
< hi
; ++sci
)
2437 auto *allocno2
= m_sorted_allocnos
[sci
];
2438 if (allocno2
->chain_prev
== INVALID_ALLOCNO
)
2440 if (!is_chain_candidate (allocno1
, allocno2
))
2442 chain_candidate_info candidate
;
2443 candidate
.allocno
= allocno2
;
2444 candidate
.score
= rate_chain (allocno1
, allocno2
);
2445 candidates
.safe_push (candidate
);
2451 // Sort the candidates by decreasing score.
2452 candidates
.qsort (cmp_chain_candidates
);
2453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2455 fprintf (dump_file
, " Chain candidates for %d:", allocno1
->id
);
2456 for (auto &candidate
: candidates
)
2457 fprintf (dump_file
, " %d(%d)", candidate
.allocno
->id
,
2459 fprintf (dump_file
, "\n");
2462 // Pick the first candidate that works.
2463 for (auto &candidate
: candidates
)
2464 if (try_to_chain_allocnos (allocno1
, candidate
.allocno
))
2468 // Create color_infos for each group. Make sure that each group's
2469 // color representative is up to date.
2470 for (unsigned int hi
= m_sorted_allocnos
.length (); hi
-- > 0; )
2472 auto *allocno
= m_sorted_allocnos
[hi
];
2473 auto *rep
= allocno
->group ()->color_rep ();
2478 auto heads
= rep
->chain_heads ();
2479 for (unsigned int i
= 0; i
< heads
.size (); ++i
)
2481 unsigned int ai
= heads
[i
];
2482 while (ai
!= INVALID_ALLOCNO
)
2484 allocno
= m_allocnos
[ai
];
2485 set_single_color_rep (allocno
, rep
, i
* rep
->stride
);
2486 ai
= allocno
->chain_next
;
2492 // Return true if the given FPR (starting at 0) conflicts with allocno
2495 early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr
,
2496 allocno_info
*allocno
)
2498 auto &ranges
= m_fpr_ranges
[fpr
];
2499 unsigned int start_i
= 0;
2500 unsigned int end_i
= ranges
.length ();
2501 while (start_i
< end_i
)
2503 unsigned int mid_i
= (start_i
+ end_i
) / 2;
2504 auto &range
= ranges
[mid_i
];
2505 if (allocno
->end_point
> range
.start_point
)
2506 start_i
= mid_i
+ 1;
2507 else if (allocno
->start_point
< range
.end_point
)
2511 if (range
.allocno
!= allocno
->id
)
2513 // The FPR is equivalent to ALLOCNO for this particular range.
2514 // See whether ALLOCNO conflicts with a neighboring range.
2516 && ranges
[mid_i
- 1].start_point
>= allocno
->end_point
)
2518 if (mid_i
+ 1 < ranges
.length ()
2519 && ranges
[mid_i
+ 1].end_point
<= allocno
->start_point
)
2527 // Return true if there is a call with ABI identifier ABI_ID in the inclusive
2528 // program point range [START_POINT, END_POINT].
2530 early_ra::call_in_range_p (unsigned int abi_id
, unsigned int start_point
,
2531 unsigned int end_point
)
2533 auto &points
= m_call_points
[abi_id
];
2534 unsigned int start_i
= 0;
2535 unsigned int end_i
= points
.length ();
2536 while (start_i
< end_i
)
2538 unsigned int mid_i
= (start_i
+ end_i
) / 2;
2539 auto point
= points
[mid_i
];
2540 if (end_point
> point
)
2541 start_i
= mid_i
+ 1;
2542 else if (start_point
< point
)
2550 // Return the set of FPRs for which a value of size SIZE will be clobbered
2551 // by a call to a function with ABI identifier ABI_ID, but would not be
2552 // for some smaller size. The set therefore excludes FPRs that are
2553 // fully-clobbered, like V0 in the base ABI.
2555 early_ra::partial_fpr_clobbers (unsigned int abi_id
, fpr_size_info size
)
2557 auto &abi
= function_abis
[abi_id
];
2558 unsigned int clobbers
= 0;
2559 machine_mode mode
= (size
== FPR_D
? V8QImode
2560 : size
== FPR_Q
? V16QImode
: VNx16QImode
);
2561 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
2562 if (!abi
.clobbers_full_reg_p (regno
)
2563 && abi
.clobbers_reg_p (mode
, regno
))
2564 clobbers
|= 1U << (regno
- V0_REGNUM
);
2568 // Process copies between pseudo registers and hard registers and update
2569 // the FPR preferences for the associated colors.
2571 early_ra::process_copies ()
2573 for (auto ©
: m_allocno_copies
)
2575 auto *allocno
= m_allocnos
[copy
.allocno
];
2576 auto *group
= allocno
->group ();
2577 auto offset
= group
->color_rep_offset
+ allocno
->offset
;
2578 if (offset
> copy
.fpr
)
2581 unsigned int fpr
= copy
.fpr
- offset
;
2582 auto *color
= m_colors
[group
->color_rep ()->color
];
2583 color
->fpr_preferences
[fpr
] = MIN (color
->fpr_preferences
[fpr
]
2584 + copy
.weight
, 127);
2588 // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
2589 // result that puts colors in order of decreasing size.
2591 early_ra::cmp_decreasing_size (const void *color1_ptr
, const void *color2_ptr
)
2593 auto *color1
= *(color_info
*const *) color1_ptr
;
2594 auto *color2
= *(color_info
*const *) color2_ptr
;
2596 if (color1
->group
->size
!= color2
->group
->size
)
2597 return color1
->group
->size
> color2
->group
->size
? -1 : 1;
2598 return (color1
->id
< color2
->id
? -1
2599 : color1
->id
== color2
->id
? 0 : 1);
2602 // Allocate a register to each color. If we run out of registers,
2603 // give up on doing a full allocation of the FPR-based pseudos in the
2606 early_ra::allocate_colors ()
2608 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2609 fprintf (dump_file
, "\nAllocating registers:\n");
2611 auto_vec
<color_info
*> sorted_colors
;
2612 sorted_colors
.safe_splice (m_colors
);
2613 sorted_colors
.qsort (cmp_decreasing_size
);
2615 for (unsigned int i
= 0; i
< 32; ++i
)
2616 if (!crtl
->abi
->clobbers_full_reg_p (V0_REGNUM
+ i
))
2617 m_call_preserved_fprs
|= 1U << i
;
2619 for (auto *color
: sorted_colors
)
2621 unsigned int candidates
= color
->group
->fpr_candidates
;
2622 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
2623 candidates
&= ~(m_allocated_fprs
>> i
);
2624 unsigned int best
= INVALID_REGNUM
;
2625 int best_weight
= 0;
2626 for (unsigned int fpr
= 0; fpr
<= 32U - color
->group
->size
; ++fpr
)
2628 if ((candidates
& (1U << fpr
)) == 0)
2630 int weight
= color
->fpr_preferences
[fpr
];
2631 // Account for registers that the current function must preserve.
2632 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
2633 if (m_call_preserved_fprs
& (1U << (fpr
+ i
)))
2635 if (best
== INVALID_REGNUM
|| best_weight
<= weight
)
2638 best_weight
= weight
;
2642 if (best
== INVALID_REGNUM
)
2644 m_allocation_successful
= false;
2648 color
->hard_regno
= best
+ V0_REGNUM
;
2649 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2650 fprintf (dump_file
, " Allocating [v%d:v%d] to color %d\n",
2651 best
, best
+ color
->group
->size
- 1, color
->id
);
2652 m_allocated_fprs
|= ((1U << color
->group
->size
) - 1) << best
;
2656 // See if ALLOCNO ends a subchain of single registers that can be split
2657 // off without affecting the rest of the chain, and without introducing
2658 // any moves. Return the start of the chain if so (which might be ALLOCNO
2659 // itself), otherwise return null.
2660 early_ra::allocno_info
*
2661 early_ra::find_independent_subchain (allocno_info
*allocno
)
2663 // Make sure ALLOCNO ends a natural subchain.
2664 if (auto *next_allocno
= chain_next (allocno
))
2665 if (next_allocno
->start_point
+ 1 >= allocno
->end_point
)
2668 // Check the allocnos in the purported subchain and find the other end.
2671 auto *group
= allocno
->group ();
2672 if (group
->m_color_rep
== group
)
2674 if (group
->size
!= 1)
2677 auto *prev_allocno
= chain_prev (allocno
);
2678 if (!prev_allocno
|| allocno
->start_point
+ 1 < prev_allocno
->end_point
)
2681 allocno
= prev_allocno
;
2685 // Search the colors starting at index FIRST_COLOR whose FPRs do not belong
2686 // to FPR_CONFLICTS. Return the first such color that has no group. If all
2687 // such colors have groups, instead return the color with the latest
2688 // (smallest) start point.
2689 early_ra::color_info
*
2690 early_ra::find_oldest_color (unsigned int first_color
,
2691 unsigned int fpr_conflicts
)
2693 color_info
*best
= nullptr;
2694 unsigned int best_start_point
= ~0U;
2695 for (unsigned int ci
= first_color
; ci
< m_colors
.length (); ++ci
)
2697 auto *color
= m_colors
[ci
];
2698 if (fpr_conflicts
& (1U << (color
->hard_regno
- V0_REGNUM
)))
2702 auto chain_head
= color
->group
->chain_heads ()[0];
2703 auto start_point
= m_allocnos
[chain_head
]->start_point
;
2704 if (!best
|| best_start_point
> start_point
)
2707 best_start_point
= start_point
;
2713 // If there are some spare FPRs that can be reused without introducing saves,
2714 // restores, or moves, use them to "broaden" the allocation, in order to give
2715 // the scheduler more freedom. This is particularly useful for forming LDPs
2718 early_ra::broaden_colors ()
2720 // Create dummy colors for every leftover FPR that can be used cheaply.
2721 unsigned int first_color
= m_colors
.length ();
2722 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
2723 if (((m_allocated_fprs
| m_call_preserved_fprs
) & (1U << fpr
)) == 0)
2725 auto *color
= region_allocate
<color_info
> ();
2726 color
->id
= m_colors
.length ();
2727 color
->hard_regno
= V0_REGNUM
+ fpr
;
2728 color
->group
= nullptr;
2729 m_colors
.safe_push (color
);
2732 // Exit early if there are no spare FPRs.
2733 if (first_color
== m_colors
.length ())
2736 // Go through the allocnos in order, seeing if there is a subchain of
2737 // single-FPR allocnos that can be split off from the containingg clique.
2738 // Allocate such subchains to the new colors on an oldest-first basis.
2739 for (auto *allocno
: m_sorted_allocnos
)
2740 if (auto *start_allocno
= find_independent_subchain (allocno
))
2742 unsigned int fpr_conflicts
= 0;
2743 auto *member
= allocno
;
2746 fpr_conflicts
|= ~member
->group ()->fpr_candidates
;
2747 if (member
== start_allocno
)
2749 member
= m_allocnos
[member
->chain_prev
];
2752 auto *color
= find_oldest_color (first_color
, fpr_conflicts
);
2758 auto *group
= allocno
->group ();
2759 color
->group
= group
;
2760 group
->color
= color
->id
;
2761 group
->chain_heads ()[0] = INVALID_ALLOCNO
;
2765 auto chain_head
= color
->group
->chain_heads ()[0];
2766 auto start_point
= m_allocnos
[chain_head
]->start_point
;
2767 if (start_point
>= allocno
->end_point
)
2768 // Allocating to COLOR isn't viable, and it was the best
2769 // option available.
2772 auto *next_allocno
= chain_next (allocno
);
2773 if (!next_allocno
|| next_allocno
->start_point
<= start_point
)
2774 // The current allocation gives at least as much scheduling
2775 // freedom as COLOR would.
2779 // Unlink the chain.
2780 if (auto *next_allocno
= chain_next (allocno
))
2781 next_allocno
->chain_prev
= start_allocno
->chain_prev
;
2782 if (auto *prev_allocno
= chain_prev (start_allocno
))
2783 prev_allocno
->chain_next
= allocno
->chain_next
;
2785 // Make the subchain use COLOR.
2786 allocno
->chain_next
= color
->group
->chain_heads ()[0];
2787 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2788 fprintf (dump_file
, "Moving to optional color %d (register %s):",
2789 color
->id
, reg_names
[color
->hard_regno
]);
2792 auto *group
= allocno
->group ();
2793 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2794 fprintf (dump_file
, " r%d", group
->regno
);
2795 group
->m_color_rep
= color
->group
;
2796 group
->color_rep_offset
= 0;
2797 if (allocno
== start_allocno
)
2799 allocno
= m_allocnos
[allocno
->chain_prev
];
2801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2802 fprintf (dump_file
, "\n");
2803 color
->group
->chain_heads ()[0] = start_allocno
->id
;
2807 // Record the final choice of hard register for each allocno.
2809 early_ra::finalize_allocation ()
2811 for (auto *allocno
: m_allocnos
)
2813 auto *group
= allocno
->group ();
2814 auto *rep
= group
->color_rep ();
2815 auto rep_regno
= m_colors
[rep
->color
]->hard_regno
;
2816 auto group_regno
= rep_regno
+ group
->color_rep_offset
;
2817 allocno
->hard_regno
= group_regno
+ allocno
->offset
* group
->stride
;
2821 // Replace any allocno references in REFS with the allocated register.
2823 early_ra::replace_regs (df_ref refs
)
2825 bool changed
= false;
2826 for (df_ref ref
= refs
; ref
; ref
= DF_REF_NEXT_LOC (ref
))
2828 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
2832 auto new_regno
= range
.allocno (0)->hard_regno
;
2833 *DF_REF_LOC (ref
) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref
)), new_regno
);
2839 // Try to make INSN match its FPR-related constraints. If this needs
2840 // a source operand (SRC) to be copied to a destination operand (DEST)
2841 // before INSN, add the associated (DEST, SRC) pairs to MOVES.
2843 // Return -1 on failure, otherwise return a ?/!-style reject count.
2844 // The reject count doesn't model the moves, just the internal alternative
2847 early_ra::try_enforce_constraints (rtx_insn
*insn
,
2848 vec
<std::pair
<int, int>> &moves
)
2850 if (!constrain_operands (0, get_preferred_alternatives (insn
)))
2853 // Pick the alternative with the lowest cost.
2855 auto alts
= get_preferred_alternatives (insn
);
2856 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
2858 if (!(alts
& ALTERNATIVE_BIT (altno
)))
2861 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
2862 if (!likely_alternative_match_p (op_alt
))
2865 auto_vec
<std::pair
<int, int>, 4> new_moves
;
2866 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
2868 rtx op
= recog_data
.operand
[opno
];
2870 && FP_REGNUM_P (REGNO (op
))
2871 && op_alt
[opno
].matched
>= 0)
2873 rtx old_src
= recog_data
.operand
[op_alt
[opno
].matched
];
2874 if (!operands_match_p (op
, old_src
))
2876 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
2879 rtx other
= recog_data
.operand
[i
];
2880 if (reg_overlap_mentioned_p (op
, other
))
2888 new_moves
.safe_push ({ opno
, op_alt
[opno
].matched
});
2892 int cost
= count_rejects (op_alt
) + new_moves
.length () * 7;
2893 if (best
< 0 || cost
< best
)
2897 moves
.safe_splice (new_moves
);
2903 // Make INSN matches its FPR-related constraints.
2905 early_ra::enforce_constraints (rtx_insn
*insn
)
2907 extract_insn (insn
);
2908 preprocess_constraints (insn
);
2910 // First try with the operands they are.
2911 auto_vec
<std::pair
<int, int>, 4> moves
;
2912 int cost
= try_enforce_constraints (insn
, moves
);
2914 // Next try taking advantage of commutativity.
2915 for (int opno
= 0; opno
< recog_data
.n_operands
- 1; ++opno
)
2916 if (recog_data
.constraints
[opno
][0] == '%')
2918 std::swap (*recog_data
.operand_loc
[opno
],
2919 *recog_data
.operand_loc
[opno
+ 1]);
2920 std::swap (recog_data
.operand
[opno
],
2921 recog_data
.operand
[opno
+ 1]);
2922 auto_vec
<std::pair
<int, int>, 4> swapped_moves
;
2923 int swapped_cost
= try_enforce_constraints (insn
, swapped_moves
);
2924 if (swapped_cost
>= 0 && (cost
< 0 || swapped_cost
< cost
))
2926 cost
= swapped_cost
;
2928 moves
.safe_splice (swapped_moves
);
2932 std::swap (*recog_data
.operand_loc
[opno
],
2933 *recog_data
.operand_loc
[opno
+ 1]);
2934 std::swap (recog_data
.operand
[opno
],
2935 recog_data
.operand
[opno
+ 1]);
2939 // The allocation should ensure that there is at least one valid combination.
2940 // It's too late to back out now if not.
2941 gcc_assert (cost
>= 0);
2942 for (int i
= 0; i
< recog_data
.n_dups
; ++i
)
2944 int dup_of
= recog_data
.dup_num
[i
];
2945 rtx new_op
= *recog_data
.operand_loc
[dup_of
];
2946 if (new_op
!= recog_data
.operand
[dup_of
])
2947 *recog_data
.dup_loc
[i
] = copy_rtx (new_op
);
2949 for (auto move
: moves
)
2951 int dest_opno
= move
.first
;
2952 int src_opno
= move
.second
;
2953 rtx dest
= recog_data
.operand
[dest_opno
];
2954 rtx old_src
= recog_data
.operand
[src_opno
];
2955 rtx new_src
= lowpart_subreg (GET_MODE (old_src
), dest
, GET_MODE (dest
));
2956 emit_insn_before (gen_move_insn (new_src
, old_src
), insn
);
2957 *recog_data
.operand_loc
[src_opno
] = new_src
;
2961 // See whether INSN is an instruction that operates on multi-register vectors,
2962 // and if we have decided to make it use strided rather than consecutive
2963 // accesses. Update the pattern and return true if so.
2965 early_ra::maybe_convert_to_strided_access (rtx_insn
*insn
)
2967 if (!NONJUMP_INSN_P (insn
) || recog_memoized (insn
) < 0)
2970 auto stride_type
= get_attr_stride_type (insn
);
2971 rtx pat
= PATTERN (insn
);
2973 if (stride_type
== STRIDE_TYPE_LUTI_CONSECUTIVE
2974 || stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
)
2975 op
= SET_DEST (pat
);
2976 else if (stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
)
2977 op
= XVECEXP (SET_SRC (pat
), 0, 1);
2981 auto range
= get_allocno_subgroup (op
);
2982 if (!range
|| range
.group
->stride
== 1)
2985 gcc_assert (range
.start
== 0 && range
.count
== range
.group
->size
);
2986 auto elt_mode
= GET_MODE_INNER (GET_MODE (op
));
2987 auto single_mode
= aarch64_full_sve_mode (elt_mode
).require ();
2988 auto_vec
<rtx
, 4> regs
;
2989 for (unsigned int i
= 0; i
< range
.count
; ++i
)
2990 regs
.quick_push (gen_rtx_REG (single_mode
, range
.allocno (i
)->hard_regno
));
2992 extract_insn (insn
);
2993 if (stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
)
2995 auto unspec
= XINT (SET_SRC (pat
), 1);
2996 if (range
.count
== 2)
2997 pat
= gen_aarch64_strided2 (unspec
, GET_MODE (op
), regs
[0], regs
[1],
2998 recog_data
.operand
[1],
2999 recog_data
.operand
[2]);
3001 pat
= gen_aarch64_strided4 (unspec
, GET_MODE (op
),
3002 regs
[0], regs
[1], regs
[2], regs
[3],
3003 recog_data
.operand
[1],
3004 recog_data
.operand
[2]);
3006 else if (stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
)
3008 auto unspec
= XINT (SET_SRC (pat
), 1);
3009 if (range
.count
== 2)
3010 pat
= gen_aarch64_strided2 (unspec
, GET_MODE (op
),
3011 recog_data
.operand
[0],
3012 recog_data
.operand
[2], regs
[0], regs
[1]);
3014 pat
= gen_aarch64_strided4 (unspec
, GET_MODE (op
),
3015 recog_data
.operand
[0],
3016 recog_data
.operand
[2],
3017 regs
[0], regs
[1], regs
[2], regs
[3]);
3018 // Ensure correct sharing for the source memory.
3020 // ??? Why doesn't the generator get this right?
3021 XVECEXP (SET_SRC (pat
), 0, XVECLEN (SET_SRC (pat
), 0) - 1)
3022 = *recog_data
.dup_loc
[0];
3024 else if (stride_type
== STRIDE_TYPE_LUTI_CONSECUTIVE
)
3026 auto bits
= INTVAL (XVECEXP (SET_SRC (pat
), 0, 4));
3027 if (range
.count
== 2)
3028 pat
= gen_aarch64_sme_lut_strided2 (bits
, single_mode
,
3030 recog_data
.operand
[1],
3031 recog_data
.operand
[2]);
3033 pat
= gen_aarch64_sme_lut_strided4 (bits
, single_mode
,
3034 regs
[0], regs
[1], regs
[2], regs
[3],
3035 recog_data
.operand
[1],
3036 recog_data
.operand
[2]);
3040 PATTERN (insn
) = pat
;
3041 INSN_CODE (insn
) = -1;
3042 df_insn_rescan (insn
);
3046 // We've successfully allocated the current region. Apply the allocation
3047 // to the instructions.
3049 early_ra::apply_allocation ()
3052 for (auto insn_range
: m_insn_ranges
)
3053 for (rtx_insn
*insn
= insn_range
.first
;
3054 insn
!= insn_range
.second
;
3057 prev
= PREV_INSN (insn
);
3061 bool changed
= maybe_convert_to_strided_access (insn
);
3062 changed
|= replace_regs (DF_INSN_DEFS (insn
));
3063 changed
|= replace_regs (DF_INSN_USES (insn
));
3064 if (changed
&& NONDEBUG_INSN_P (insn
))
3066 if (GET_CODE (PATTERN (insn
)) != USE
3067 && GET_CODE (PATTERN (insn
)) != CLOBBER
3068 && !is_move_set (PATTERN (insn
)))
3069 enforce_constraints (insn
);
3071 // A REG_EQUIV note establishes an equivalence throughout
3072 // the function, but here we're reusing hard registers for
3073 // multiple pseudo registers. We also no longer need REG_EQUIV
3074 // notes that record potential spill locations, since we've
3075 // allocated the pseudo register without spilling.
3076 rtx
*ptr
= ®_NOTES (insn
);
3078 if (REG_NOTE_KIND (*ptr
) == REG_EQUIV
)
3079 *ptr
= XEXP (*ptr
, 1);
3081 ptr
= &XEXP (*ptr
, 1);
3083 changed
|= replace_regs (DF_INSN_EQ_USES (insn
));
3085 df_insn_rescan (insn
);
3088 for (auto *insn
: m_dead_insns
)
3092 // Try to allocate the current region. Update the instructions if successful.
3094 early_ra::process_region ()
3096 for (auto *allocno
: m_allocnos
)
3097 allocno
->chain_next
= INVALID_ALLOCNO
;
3099 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3106 find_strided_accesses ();
3108 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3113 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3118 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3122 if (!m_allocation_successful
)
3126 finalize_allocation ();
3128 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3130 fprintf (dump_file
, "\nAllocation successful\nFinal allocation:\n");
3135 apply_allocation ();
3138 // Return true if INSN would become dead if we successfully allocate the
3141 early_ra::is_dead_insn (rtx_insn
*insn
)
3143 rtx set
= single_set (insn
);
3147 rtx dest
= SET_DEST (set
);
3148 auto dest_range
= get_allocno_subgroup (dest
);
3152 for (auto &allocno
: dest_range
.allocnos ())
3153 if (bitmap_bit_p (m_live_allocnos
, allocno
.id
))
3156 if (side_effects_p (set
))
3162 // Build up information about block BB. IS_ISOLATED is true if the
3163 // block is not part of a larger region.
3165 early_ra::process_block (basic_block bb
, bool is_isolated
)
3168 m_current_point
+= 1;
3169 m_current_bb_point
= m_current_point
;
3171 // Process live-out FPRs.
3172 bitmap live_out
= df_get_live_out (bb
);
3173 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
3174 if (bitmap_bit_p (live_out
, regno
))
3175 record_fpr_use (regno
);
3177 // Process live-out allocnos. We don't track individual FPR liveness
3178 // across block boundaries, so we have to assume that the whole pseudo
3179 // register is live.
3182 EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb
), m_fpr_pseudos
,
3183 FIRST_PSEUDO_REGISTER
, regno
, bi
)
3185 auto range
= get_allocno_subgroup (regno_reg_rtx
[regno
]);
3186 for (auto &allocno
: range
.allocnos ())
3187 record_allocno_use (&allocno
);
3190 m_current_point
+= 1;
3192 record_artificial_refs (0);
3194 bool is_first
= true;
3195 rtx_insn
*start_insn
= BB_END (bb
);
3197 FOR_BB_INSNS_REVERSE (bb
, insn
)
3199 if (!NONDEBUG_INSN_P (insn
))
3202 // CLOBBERs are used to prevent pseudos from being upwards exposed.
3203 // We can ignore them if allocation is successful.
3204 if (GET_CODE (PATTERN (insn
)) == CLOBBER
)
3206 if (get_allocno_subgroup (XEXP (PATTERN (insn
), 0)))
3207 m_dead_insns
.safe_push (insn
);
3211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3214 fprintf (dump_file
, "\nBlock %d:\n", bb
->index
);
3215 fprintf (dump_file
, "%6d:", m_current_point
);
3216 pretty_printer rtl_slim_pp
;
3217 rtl_slim_pp
.buffer
->stream
= dump_file
;
3218 print_insn (&rtl_slim_pp
, insn
, 1);
3219 pp_flush (&rtl_slim_pp
);
3220 fprintf (dump_file
, "\n");
3224 if (is_dead_insn (insn
))
3226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3227 fprintf (dump_file
, "%14s -- dead\n", "");
3228 m_dead_insns
.safe_push (insn
);
3232 record_insn_refs (insn
);
3233 rtx pat
= PATTERN (insn
);
3234 if (is_move_set (pat
))
3235 record_copy (SET_DEST (pat
), SET_SRC (pat
), true);
3238 extract_insn (insn
);
3239 record_constraints (insn
);
3243 // See whether we have a complete region, with no remaining live
3246 && bitmap_empty_p (m_live_allocnos
)
3248 && m_allocation_successful
3249 && !m_allocnos
.is_empty ())
3251 rtx_insn
*prev_insn
= PREV_INSN (insn
);
3252 m_insn_ranges
.safe_push ({ start_insn
, prev_insn
});
3254 start_new_region ();
3256 start_insn
= prev_insn
;
3259 m_insn_ranges
.safe_push ({ start_insn
, BB_HEAD (bb
) });
3261 record_artificial_refs (DF_REF_AT_TOP
);
3263 // Process live-in FPRs.
3264 bitmap live_in
= df_get_live_in (bb
);
3265 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
3266 if (bitmap_bit_p (live_in
, regno
)
3267 && (m_live_fprs
& (1U << (regno
- V0_REGNUM
))))
3268 record_fpr_def (regno
);
3270 // Process live-in allocnos.
3271 EXECUTE_IF_AND_IN_BITMAP (live_in
, m_fpr_pseudos
,
3272 FIRST_PSEUDO_REGISTER
, regno
, bi
)
3274 auto range
= get_allocno_subgroup (regno_reg_rtx
[regno
]);
3275 for (auto &allocno
: range
.allocnos ())
3276 if (bitmap_bit_p (m_live_allocnos
, allocno
.id
))
3277 record_allocno_def (&allocno
);
3280 m_current_point
+= 1;
3282 bitmap_clear (m_live_allocnos
);
3286 // Divide the function into regions, such that there no edges into or out
3287 // of the region have live "FPR pseudos".
3289 early_ra::process_blocks ()
3291 auto_sbitmap
visited (last_basic_block_for_fn (m_fn
));
3292 auto_sbitmap
fpr_pseudos_live_out (last_basic_block_for_fn (m_fn
));
3293 auto_sbitmap
fpr_pseudos_live_in (last_basic_block_for_fn (m_fn
));
3295 bitmap_clear (visited
);
3296 bitmap_clear (fpr_pseudos_live_out
);
3297 bitmap_clear (fpr_pseudos_live_in
);
3299 // Record which blocks have live FPR pseudos on entry and exit.
3301 FOR_EACH_BB_FN (bb
, m_fn
)
3303 if (bitmap_intersect_p (df_get_live_out (bb
), m_fpr_pseudos
))
3304 bitmap_set_bit (fpr_pseudos_live_out
, bb
->index
);
3305 if (bitmap_intersect_p (df_get_live_in (bb
), m_fpr_pseudos
))
3306 bitmap_set_bit (fpr_pseudos_live_in
, bb
->index
);
3309 struct stack_node
{ edge_iterator ei
; basic_block bb
; };
3311 auto_vec
<stack_node
, 32> stack
;
3312 auto_vec
<basic_block
, 32> region
;
3314 // Go through the function in reverse postorder and process the region
3315 // containing each block.
3316 unsigned int n_blocks
= df_get_n_blocks (DF_FORWARD
);
3317 int *order
= df_get_postorder (DF_FORWARD
);
3318 for (unsigned int bbi
= 0; bbi
< n_blocks
; ++bbi
)
3320 basic_block bb
= BASIC_BLOCK_FOR_FN (m_fn
, order
[bbi
]);
3321 if (bb
->index
< NUM_FIXED_BLOCKS
)
3324 if (!bitmap_set_bit (visited
, bb
->index
))
3327 // Process forward edges before backward edges (so push backward
3328 // edges first). Build the region in an approximation of reverse
3330 if (bitmap_bit_p (fpr_pseudos_live_in
, bb
->index
))
3331 stack
.quick_push ({ ei_start (bb
->preds
), nullptr });
3332 if (bitmap_bit_p (fpr_pseudos_live_out
, bb
->index
))
3333 stack
.quick_push ({ ei_start (bb
->succs
), bb
});
3335 region
.safe_push (bb
);
3336 while (!stack
.is_empty ())
3338 auto &node
= stack
.last ();
3339 if (ei_end_p (node
.ei
))
3342 region
.safe_push (node
.bb
);
3346 edge e
= ei_edge (node
.ei
);
3349 // A forward edge from a node that has not yet been added
3351 if (bitmap_bit_p (fpr_pseudos_live_in
, e
->dest
->index
)
3352 && bitmap_set_bit (visited
, e
->dest
->index
))
3354 stack
.safe_push ({ ei_start (e
->dest
->preds
), nullptr });
3355 if (bitmap_bit_p (fpr_pseudos_live_out
, e
->dest
->index
))
3356 stack
.safe_push ({ ei_start (e
->dest
->succs
), e
->dest
});
3358 region
.safe_push (e
->dest
);
3365 // A backward edge from a node that has already been added
3367 if (bitmap_bit_p (fpr_pseudos_live_out
, e
->src
->index
)
3368 && bitmap_set_bit (visited
, e
->src
->index
))
3370 if (bitmap_bit_p (fpr_pseudos_live_in
, e
->src
->index
))
3371 stack
.safe_push ({ ei_start (e
->src
->preds
), nullptr });
3372 stack
.safe_push ({ ei_start (e
->src
->succs
), e
->src
});
3379 m_current_point
= 2;
3380 start_new_region ();
3382 if (region
.is_empty ())
3383 process_block (bb
, true);
3386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3388 fprintf (dump_file
, "\nRegion (from %d):", bb
->index
);
3389 for (unsigned int j
= 0; j
< region
.length (); ++j
)
3390 fprintf (dump_file
, " %d", region
[j
]->index
);
3391 fprintf (dump_file
, "\n");
3393 for (unsigned int j
= 0; j
< region
.length (); ++j
)
3395 basic_block bb
= region
[j
];
3397 = ((j
== 0 && !bitmap_bit_p (fpr_pseudos_live_out
, bb
->index
))
3398 || (j
== region
.length () - 1
3399 && !bitmap_bit_p (fpr_pseudos_live_in
, bb
->index
)));
3400 process_block (bb
, is_isolated
);
3403 region
.truncate (0);
3405 if (!m_allocnos
.is_empty () && m_allocation_successful
)
3410 // Run the pass on the current function.
3412 early_ra::execute ()
3416 preprocess_insns ();
3417 propagate_pseudo_reg_info ();
3418 choose_fpr_pseudos ();
3419 if (bitmap_empty_p (m_fpr_pseudos
))
3422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3423 dump_pseudo_regs ();
3429 class pass_early_ra
: public rtl_opt_pass
3432 pass_early_ra (gcc::context
*ctxt
)
3433 : rtl_opt_pass (pass_data_early_ra
, ctxt
)
3436 // opt_pass methods:
3437 virtual bool gate (function
*);
3438 virtual unsigned int execute (function
*);
3442 pass_early_ra::gate (function
*)
3444 // Require a vector ISA to be enabled.
3445 if (!TARGET_SIMD
&& !TARGET_SVE
)
3448 if (aarch64_early_ra
== AARCH64_EARLY_RA_NONE
)
3451 if (aarch64_early_ra
== AARCH64_EARLY_RA_STRIDED
3452 && !TARGET_STREAMING_SME2
)
3459 pass_early_ra::execute (function
*fn
)
3461 early_ra (fn
).execute ();
3467 // Create a new instance of the pass.
3469 make_pass_aarch64_early_ra (gcc::context
*ctxt
)
3471 return new pass_early_ra (ctxt
);