1 // Early register allocation pass.
2 // Copyright (C) 2023 Free Software Foundation, Inc.
4 // This file is part of GCC.
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // This pass implements a simple form of early register allocation.
21 // It is restricted to FP/SIMD registers, and it only allocates
22 // a region of FP/SIMD usage if it can do so without any spilling.
23 // It punts on anything too complicated, leaving it to the real
24 // register allocator.
26 // There are two main purposes:
28 // (1) The pass runs before scheduling. It therefore has a chance to
29 // bag a spill-free allocation, if there is one, before scheduling
30 // moves things around.
32 // (2) The pass can make use of strided register operations, such as the
33 // strided forms of LD1 and ST1 in SME2.
35 // The allocator works at the level of individual FPRs, rather than whole
36 // pseudo registers. It is mostly intended to help optimize ACLE code.
38 // The pass is very simplistic. There are many things that could be improved.
39 #define IN_TARGET_CODE 1
41 #define INCLUDE_ALGORITHM
42 #define INCLUDE_FUNCTIONAL
45 #include "coretypes.h"
50 #include "tree-pass.h"
54 #include "print-rtl.h"
55 #include "insn-attr.h"
56 #include "insn-opinit.h"
60 class simple_iterator
: public wrapper_iterator
<T
>
63 using wrapper_iterator
<T
>::wrapper_iterator
;
65 simple_iterator
&operator-- () { --this->m_contents
; return *this; }
66 simple_iterator
operator-- (int) { return this->m_contents
--; }
67 simple_iterator
&operator++ () { ++this->m_contents
; return *this; }
68 simple_iterator
operator++ (int) { return this->m_contents
++; }
71 using namespace rtl_ssa
;
74 const pass_data pass_data_early_ra
=
78 OPTGROUP_NONE
, // optinfo_flags
80 0, // properties_required
81 0, // properties_provided
82 0, // properties_destroyed
83 0, // todo_flags_start
84 TODO_df_finish
, // todo_flags_finish
87 using allocno_iterator
= simple_iterator
<unsigned int>;
89 // Class that represents one run of the pass.
93 early_ra (function
*fn
);
98 static_assert (MAX_RECOG_OPERANDS
<= 32, "Operand mask is 32 bits");
99 using operand_mask
= uint32_t;
101 // Points in the function are represented using "program points".
102 // The program points are allocated in reverse order, with smaller
103 // numbers indicating later points. These special values indicate
104 // the start and end of a region.
105 static constexpr unsigned int START_OF_REGION
= ~0U;
106 static constexpr unsigned int END_OF_REGION
= 0U;
108 // An invalid allocno index, used to represent no allocno.
109 static constexpr unsigned int INVALID_ALLOCNO
= ~0U;
111 // Enumerates the single FPR sizes that matter for register allocation.
112 // Anything smaller than 64 bits is treated as FPR_D.
120 // A live range for an FPR, containing program points [START_POINT,
121 // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
122 // to be equal to ALLOCNO for the duration of the live range.
123 struct fpr_range_info
125 unsigned int start_point
;
126 unsigned int end_point
;
127 unsigned int allocno
;
130 // Flags used in pseudo_reg_info.
132 // Whether the pseudo register occurs in one instruction alternative that
133 // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
134 static constexpr unsigned int ALLOWS_FPR8
= 1U << 0;
135 static constexpr unsigned int ALLOWS_FPR16
= 1U << 1;
136 static constexpr unsigned int ALLOWS_FPR32
= 1U << 2;
137 static constexpr unsigned int ALLOWS_NONFPR
= 1U << 3;
139 // Likewise whether the register occurs in an instruction that requires
140 // the associated register type.
141 static constexpr unsigned int NEEDS_FPR8
= 1U << 4;
142 static constexpr unsigned int NEEDS_FPR16
= 1U << 5;
143 static constexpr unsigned int NEEDS_FPR32
= 1U << 6;
144 static constexpr unsigned int NEEDS_NONFPR
= 1U << 7;
146 // Whether the pseudo register is copied to or from a hard FP register.
147 static constexpr unsigned int HAS_FPR_COPY
= 1U << 8;
149 // Whether the pseudo register is copied to or from a hard non-FP register.
150 static constexpr unsigned int HAS_NONFPR_COPY
= 1U << 9;
152 // Whether the pseudo register is used as a multi-register vector operand
153 // to an instruction that supports strided accesses, and whether it is used
154 // as a multi-register vector operand in some other non-move instruction.
155 static constexpr unsigned int HAS_FLEXIBLE_STRIDE
= 1U << 10;
156 static constexpr unsigned int HAS_FIXED_STRIDE
= 1U << 11;
158 // Flags that should be propagated across moves between pseudo registers.
159 static constexpr unsigned int PSEUDO_COPY_FLAGS
= ~(HAS_FLEXIBLE_STRIDE
162 // Information about a copy between two registers.
165 // The two registers, in order.
166 unsigned int regnos
[2];
168 // Index I gives the index of the next reg_copy_info involving REGNOS[I],
170 unsigned int next_copies
[2];
173 // Information about a pseudo register.
174 struct pseudo_reg_info
176 // Flags describing how the register is used, defined above.
177 unsigned int flags
: 16;
179 // The mode of the pseudo register, cached for convenience.
180 machine_mode mode
: 16;
182 // The index of the first copy, or 0 if none.
183 unsigned int first_copy
;
186 // Information about a group of allocnos that have a fixed offset
187 // relative to each other. The allocnos in each group must be allocated
190 // Allocnos that can share the same hard register are eventually
191 // chained together. These chains represent edges on a graph of
192 // allocnos, such that two allocnos joined by an edge use the same FPR.
193 // These chains are formed between individual allocnos rather than
194 // whole groups, although the system is required to be self-consistent.
195 // Each clique in the graph has at least one "full-width" allocno group
196 // that has one allocno for every FPR that needs to be allocated to
199 // One group of allocnos is chosen as the "color representative" of
200 // each clique in the graph. This group will be a full-width group.
202 struct allocno_group_info
204 array_slice
<unsigned int> chain_heads ();
205 array_slice
<allocno_info
> allocnos ();
206 allocno_group_info
*color_rep ();
207 allocno_info
*allocno (unsigned int);
209 // The color representative of the containing clique.
210 allocno_group_info
*m_color_rep
;
212 // The pseudo register associated with this allocno, or INVALID_REGNUM
216 // The offset of the first allocno (and thus this group) from the start
218 unsigned int color_rep_offset
: 8;
220 // The number of allocnos in the group, and thus the number of FPRs
221 // that need to be allocated.
222 unsigned int size
: 8;
224 // The gap between FPRs in the group. This is normally 1, but can be
225 // higher if we've decided to use strided multi-register accesses.
226 unsigned int stride
: 4;
228 // Used temporarily while deciding which allocnos should have non-unit
229 // strides; see find_strided_accesses for details.
230 int consecutive_pref
: 4;
231 int strided_polarity
: 2;
233 // The largest size of FPR needed by references to the allocno group.
234 fpr_size_info fpr_size
: 2;
236 // True if all non-move accesses can be converted to strided form.
237 unsigned int has_flexible_stride
: 1;
239 // True if we've assigned a color index to this group.
240 unsigned int has_color
: 1;
242 // The mask of FPRs that would make valid choices for the first allocno,
243 // taking the requirements of all the allocnos in the group into account.
244 unsigned int fpr_candidates
;
246 // The index of the color that has been assigned to the containing clique.
250 // Represents a single FPR-sized quantity that needs to be allocated.
251 // Each allocno is identified by index (for compactness).
253 // Quantities that span multiple FPRs are assigned groups of consecutive
254 // allocnos. Quantities that occupy a single FPR are assigned their own
258 allocno_group_info
*group ();
260 bool is_equiv_to (unsigned int);
262 // The allocno's unique identifier.
265 // The offset of this allocno into the containing group.
266 unsigned int offset
: 8;
268 // The number of allocnos in the containing group.
269 unsigned int group_size
: 8;
271 // If the allocno has an affinity with at least one hard register
272 // (so that choosing that hard register would avoid a copy), this is
273 // the number of one such hard register, otherwise it is
274 // FIRST_PSEUDO_REGISTER.
275 unsigned int hard_regno
: 8;
277 // Set to 1 if the allocno has a single definition or 2 if it has more.
278 unsigned int num_defs
: 2;
280 // True if, at START_POINT, another allocno is copied to this one.
281 // See callers of record_copy for what counts as a copy.
282 unsigned int is_copy_dest
: 1;
284 // True if, at START_POINT, another allocno is copied to this one,
285 // and if the allocnos at both ends of the copy chain have an affinity
286 // with the same hard register.
287 unsigned int is_strong_copy_dest
: 1;
289 // True if, at END_POINT, this allocno is copied to another one,
290 // and both allocnos have an affinity with the same hard register.
291 unsigned int is_strong_copy_src
: 1;
293 // True if the allocno is subject to an earlyclobber at END_POINT,
294 // so that it cannot be tied to the destination of the instruction.
295 unsigned int is_earlyclobbered
: 1;
297 // True if this allocno is known to be equivalent to related_allocno
298 // for the whole of this allocno's lifetime.
299 unsigned int is_equiv
: 1;
301 // The inclusive range of program points spanned by the allocno.
302 // START_POINT >= END_POINT.
303 unsigned int start_point
;
304 unsigned int end_point
;
306 // If, at END_POINT, this allocno is copied to another allocno, this
307 // is the index of that allocno, otherwise it is INVALID_ALLOCNO.
308 // See callers of record_copy for what counts as a copy.
309 unsigned int copy_dest
;
311 // If this field is not INVALID_ALLOCNO, it indicates one of two things:
313 // - if is_equiv, this allocno is equivalent to related_allocno for
314 // the whole of this allocno's lifetime.
316 // - if !is_equiv, this allocno's live range is a subrange of
317 // related_allocno's and we have committed to making this allocno
318 // share whatever register related_allocno uses.
319 unsigned int related_allocno
;
323 // The program point at which the allocno was last defined,
324 // or START_OF_REGION if none. This is only used temporarily
325 // while recording allocnos; after that, chain_next below is
327 unsigned int last_def_point
;
329 // The next chained allocno in program order (i.e. at lower program
330 // points), or INVALID_ALLOCNO if none.
331 unsigned int chain_next
;
336 // The program point before start_point at which the allocno was
337 // last used, or END_OF_REGION if none. This is only used temporarily
338 // while recording allocnos; after that, chain_prev below is used
340 unsigned int last_use_point
;
342 // The previous chained allocno in program order (i.e. at higher
343 // program points), or INVALID_ALLOCNO if none.
344 unsigned int chain_prev
;
348 // Information about a full allocno group or a subgroup of it.
349 // The subgroup can be empty to indicate "none".
350 struct allocno_subgroup
352 array_slice
<allocno_info
> allocnos ();
353 allocno_info
*allocno (unsigned int);
355 // True if a subgroup is present.
356 operator bool () const { return count
; }
358 // The containing group.
359 allocno_group_info
*group
;
361 // The offset of the subgroup from the start of GROUP.
364 // The number of allocnos in the subgroup.
368 // Represents information about a copy between an allocno and an FPR.
369 // This establishes an affinity between the allocno and the FPR.
370 struct allocno_copy_info
372 // The allocno involved in the copy.
373 unsigned int allocno
;
375 // The FPR involved in the copy, relative to V0_REGNUM.
376 unsigned int fpr
: 16;
378 // A measure of how strong the affinity between the allocno and FPR is.
379 unsigned int weight
: 16;
382 // Information about a possible allocno chain.
383 struct chain_candidate_info
385 // The candidate target allocno.
386 allocno_info
*allocno
;
388 // A rating of the candidate (higher is better).
392 // Information about an allocno color.
395 // The color's unique identifier.
398 // The allocated hard register, when known.
399 unsigned int hard_regno
;
401 // The clique's representative group.
402 allocno_group_info
*group
;
404 // The number of FPR preferences recorded in fpr_preferences.
405 unsigned int num_fpr_preferences
;
407 // Weights in favor of choosing each FPR as the first register for GROUP.
408 int8_t fpr_preferences
[32];
411 template<typename T
, typename
... Ts
>
412 T
*region_allocate (Ts
...);
414 allocno_info
*chain_prev (allocno_info
*);
415 allocno_info
*chain_next (allocno_info
*);
417 void dump_pseudo_regs ();
418 void dump_fpr_ranges ();
420 void dump_allocnos ();
423 iterator_range
<allocno_iterator
> get_group_allocnos (unsigned int);
425 void preprocess_move (rtx
, rtx
);
426 void process_pseudo_reg_constraints (rtx_insn
*);
427 void preprocess_insns ();
429 int fpr_preference (unsigned int);
430 void propagate_pseudo_reg_info ();
432 void choose_fpr_pseudos ();
434 void start_new_region ();
436 allocno_group_info
*create_allocno_group (unsigned int, unsigned int);
437 allocno_subgroup
get_allocno_subgroup (rtx
);
438 void record_fpr_use (unsigned int);
439 void record_fpr_def (unsigned int);
440 void record_allocno_use (allocno_info
*);
441 void record_allocno_def (allocno_info
*);
442 allocno_info
*find_related_start (allocno_info
*, allocno_info
*, bool);
443 void record_copy (rtx
, rtx
, bool = false);
444 void record_constraints (rtx_insn
*);
445 void record_artificial_refs (unsigned int);
446 void record_insn_refs (rtx_insn
*);
448 bool consider_strong_copy_src_chain (allocno_info
*);
449 int strided_polarity_pref (allocno_info
*, allocno_info
*);
450 void find_strided_accesses ();
452 template<unsigned int allocno_info::*field
>
453 static int cmp_increasing (const void *, const void *);
454 bool is_chain_candidate (allocno_info
*, allocno_info
*);
455 int rate_chain (allocno_info
*, allocno_info
*);
456 static int cmp_chain_candidates (const void *, const void *);
457 void chain_allocnos (unsigned int &, unsigned int &);
458 void merge_fpr_info (allocno_group_info
*, allocno_group_info
*,
460 void set_single_color_rep (allocno_info
*, allocno_group_info
*,
462 void set_color_rep (allocno_group_info
*, allocno_group_info
*,
464 bool try_to_chain_allocnos (allocno_info
*, allocno_info
*);
465 void create_color (allocno_group_info
*);
468 bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info
*);
469 bool call_in_range_p (unsigned int, unsigned int, unsigned int);
470 unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info
);
472 void process_copies ();
474 static int cmp_allocation_order (const void *, const void *);
475 void allocate_colors ();
476 allocno_info
*find_independent_subchain (allocno_info
*);
477 color_info
*find_oldest_color (unsigned int, unsigned int);
478 void broaden_colors ();
479 void finalize_allocation ();
481 bool replace_regs (df_ref
);
482 int try_enforce_constraints (rtx_insn
*, vec
<std::pair
<int, int>> &);
483 void enforce_constraints (rtx_insn
*);
484 bool maybe_convert_to_strided_access (rtx_insn
*);
485 void apply_allocation ();
487 void process_region ();
488 bool is_dead_insn (rtx_insn
*);
489 void process_block (basic_block
, bool);
490 void process_blocks ();
492 // ----------------------------------------------------------------------
494 // The function we're operating on.
497 // Information about each pseudo register, indexed by REGNO.
498 auto_vec
<pseudo_reg_info
> m_pseudo_regs
;
500 // All recorded register copies.
501 auto_vec
<reg_copy_info
> m_pseudo_reg_copies
;
503 // The set of pseudos that we've decided to allocate an FPR to.
504 auto_bitmap m_fpr_pseudos
;
506 // ----------------------------------------------------------------------
508 // An obstack for allocating information that is referenced by the member
510 obstack m_region_obstack
;
511 void *m_region_alloc_start
;
513 // ----------------------------------------------------------------------
515 // The basic block that we're currently processing.
516 basic_block m_current_bb
;
518 // The lowest-numbered program point in the current basic block.
519 unsigned int m_current_bb_point
;
521 // The program point that we're currently processing (described above).
522 unsigned int m_current_point
;
524 // The set of allocnos that are currently live.
525 auto_bitmap m_live_allocnos
;
527 // The set of FPRs that are currently live.
528 unsigned int m_live_fprs
;
530 // ----------------------------------------------------------------------
532 // A mask of the FPRs that have already been allocated.
533 unsigned int m_allocated_fprs
;
535 // A mask of the FPRs that must be at least partially preserved by the
537 unsigned int m_call_preserved_fprs
;
539 // True if we haven't yet failed to allocate the current region.
540 bool m_allocation_successful
;
542 // A map from pseudo registers to the first allocno in their associated
544 hash_map
<int_hash
<unsigned int, INVALID_REGNUM
>,
545 allocno_group_info
*> m_regno_to_group
;
547 // All recorded copies between allocnos and FPRs.
548 auto_vec
<allocno_copy_info
> m_allocno_copies
;
550 // All allocnos, by index.
551 auto_vec
<allocno_info
*> m_allocnos
;
553 // All allocnos, by increasing START_POINT.
554 auto_vec
<allocno_info
*> m_sorted_allocnos
;
556 // Allocnos for which is_shared is true.
557 auto_vec
<allocno_info
*> m_shared_allocnos
;
559 // All colors, by index.
560 auto_vec
<color_info
*> m_colors
;
562 // The instruction ranges that make up the current region,
563 // as half-open ranges [LAST, FIRST).
564 auto_vec
<std::pair
<rtx_insn
*, rtx_insn
*>> m_insn_ranges
;
566 // The live ranges of each FPR, in order of increasing program point.
567 auto_vec
<fpr_range_info
> m_fpr_ranges
[32];
569 // For each function call id, a list of program points at which a call
570 // to such a function is made. Each list is in order of increasing
572 auto_vec
<unsigned int> m_call_points
[NUM_ABI_IDS
];
574 // A list of instructions that can be removed if allocation succeeds.
575 auto_vec
<rtx_insn
*> m_dead_insns
;
578 // True if PAT is something that would typically be treated as a move.
580 is_move_set (rtx pat
)
582 if (GET_CODE (pat
) != SET
)
585 rtx dest
= SET_DEST (pat
);
587 dest
= SUBREG_REG (dest
);
588 if (!OBJECT_P (dest
))
591 rtx src
= SET_SRC (pat
);
593 src
= SUBREG_REG (src
);
594 if (!OBJECT_P (src
) && !CONSTANT_P (src
))
600 // Return true if operand OP is likely to match OP_ALT after register
603 likely_operand_match_p (const operand_alternative
&op_alt
, rtx op
)
605 // Empty constraints match everything.
606 const char *constraint
= op_alt
.constraint
;
607 if (constraint
[0] == 0 || constraint
[0] == ',')
612 char c
= *constraint
;
613 int len
= CONSTRAINT_LEN (c
, constraint
);
614 if (c
== 0 || c
== ',')
620 auto cn
= lookup_constraint (constraint
);
621 switch (get_constraint_type (cn
))
624 if (REG_P (op
) || SUBREG_P (op
))
629 case CT_SPECIAL_MEMORY
:
630 case CT_RELAXED_MEMORY
:
638 if (constraint_satisfied_p (op
, cn
))
646 if (op_alt
.matches
>= 0)
648 rtx other
= recog_data
.operand
[op_alt
.matches
];
649 if ((REG_P (other
) || SUBREG_P (other
))
650 && (REG_P (op
) || SUBREG_P (op
)))
656 // Return true if the operands of the current instruction are likely to
659 likely_alternative_match_p (const operand_alternative
*op_alt
)
661 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
662 if (!likely_operand_match_p (op_alt
[i
], recog_data
.operand
[i
]))
667 // Return the sum of how disparaged OP_ALT is.
669 count_rejects (const operand_alternative
*op_alt
)
672 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
673 reject
+= op_alt
[opno
].reject
;
677 // Allocate a T from the region obstack.
678 template<typename T
, typename
... Ts
>
680 early_ra::region_allocate (Ts
... args
)
682 static_assert (std::is_trivially_destructible
<T
>::value
,
683 "destructor won't be called");
684 void *addr
= obstack_alloc (&m_region_obstack
, sizeof (T
));
685 return new (addr
) T (std::forward
<Ts
> (args
)...);
688 early_ra::early_ra (function
*fn
) : m_fn (fn
), m_live_fprs (0)
690 gcc_obstack_init (&m_region_obstack
);
691 m_region_alloc_start
= obstack_alloc (&m_region_obstack
, 0);
692 bitmap_tree_view (m_live_allocnos
);
695 early_ra::~early_ra ()
697 obstack_free (&m_region_obstack
, nullptr);
700 // Return an array that, for each allocno A in the group, contains the index
701 // of the allocno at the head of A's chain (that is, the one with the highest
702 // START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
703 inline array_slice
<unsigned int>
704 early_ra::allocno_group_info::chain_heads ()
706 auto *start
= reinterpret_cast<unsigned int *> (this + 1);
707 return { start
, size
};
710 // Return the array of allocnos in the group.
711 inline array_slice
<early_ra::allocno_info
>
712 early_ra::allocno_group_info::allocnos ()
714 gcc_checking_assert (regno
!= INVALID_REGNUM
);
715 auto *chain_end
= reinterpret_cast<unsigned int *> (this + 1) + size
;
716 auto *allocno_start
= reinterpret_cast<allocno_info
*> (chain_end
);
717 return { allocno_start
, size
};
720 // Return the group's color representative.
721 inline early_ra::allocno_group_info
*
722 early_ra::allocno_group_info::color_rep ()
724 gcc_checking_assert (m_color_rep
->m_color_rep
== m_color_rep
);
728 // Return the group that contains the allocno.
729 inline early_ra::allocno_group_info
*
730 early_ra::allocno_info::group ()
732 auto *chain_end
= reinterpret_cast<unsigned int *> (this - offset
);
733 return reinterpret_cast<allocno_group_info
*> (chain_end
- group_size
) - 1;
736 // Return true if this allocno's live range is a subrange of related_allocno's
737 // and if we have committed to making this allocno share whatever register
738 // related_allocno uses.
740 early_ra::allocno_info::is_shared ()
742 return related_allocno
!= INVALID_ALLOCNO
&& !is_equiv
;
745 // Return true if this allocno is known to be equivalent to ALLOCNO.
747 early_ra::allocno_info::is_equiv_to (unsigned int allocno
)
749 return is_equiv
&& related_allocno
== allocno
;
752 // Return the allocnos in the subgroup.
753 inline array_slice
<early_ra::allocno_info
>
754 early_ra::allocno_subgroup::allocnos ()
758 return { &group
->allocnos ()[start
], count
};
761 // Return allocno I in the subgroup, with 0 being the first.
762 inline early_ra::allocno_info
*
763 early_ra::allocno_subgroup::allocno (unsigned int i
)
765 return &group
->allocnos ()[start
+ i
];
768 // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
769 inline early_ra::allocno_info
*
770 early_ra::chain_prev (allocno_info
*allocno
)
772 if (allocno
->chain_prev
!= INVALID_ALLOCNO
)
773 return m_allocnos
[allocno
->chain_prev
];
777 // Return the next (later) allocno in ALLOCNO's chain, or null if none.
778 inline early_ra::allocno_info
*
779 early_ra::chain_next (allocno_info
*allocno
)
781 if (allocno
->chain_next
!= INVALID_ALLOCNO
)
782 return m_allocnos
[allocno
->chain_next
];
786 // Dump the information in m_pseudo_regs.
788 early_ra::dump_pseudo_regs ()
790 fprintf (dump_file
, "\nPseudos:\n");
791 fprintf (dump_file
, " %6s %6s %6s %6s %6s %6s %8s %s\n",
792 "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
793 "FPRness", "Copies");
794 pseudo_reg_info unused_reg
= {};
795 for (unsigned int regno
= FIRST_PSEUDO_REGISTER
;
796 regno
< m_pseudo_regs
.length (); ++regno
)
798 const auto ®
= m_pseudo_regs
[regno
];
799 if (memcmp (®
, &unused_reg
, sizeof (reg
)) == 0)
802 fprintf (dump_file
, " %6d %6s %6s %6s %6s %6s %8d", regno
,
803 reg
.flags
& NEEDS_FPR8
? "Req"
804 : reg
.flags
& ALLOWS_FPR8
? "OK" : "-",
805 reg
.flags
& NEEDS_FPR16
? "Req"
806 : reg
.flags
& ALLOWS_FPR16
? "OK" : "-",
807 reg
.flags
& NEEDS_FPR32
? "Req"
808 : reg
.flags
& ALLOWS_FPR32
? "OK" : "-",
809 reg
.flags
& NEEDS_NONFPR
? "Req"
810 : reg
.flags
& ALLOWS_NONFPR
? "OK" : "-",
811 ~reg
.flags
& HAS_FLEXIBLE_STRIDE
? "-"
812 : reg
.flags
& HAS_FIXED_STRIDE
? "Some" : "All",
813 fpr_preference (regno
));
814 if (reg
.flags
& HAS_FPR_COPY
)
815 fprintf (dump_file
, " FPR");
816 if (reg
.flags
& HAS_NONFPR_COPY
)
817 fprintf (dump_file
, " Non-FPR");
818 unsigned int copyi
= reg
.first_copy
;
821 const auto ©
= m_pseudo_reg_copies
[copyi
];
822 if (copy
.regnos
[0] == regno
)
824 fprintf (dump_file
, " r%d", copy
.regnos
[1]);
825 copyi
= copy
.next_copies
[0];
829 fprintf (dump_file
, " r%d", copy
.regnos
[0]);
830 copyi
= copy
.next_copies
[1];
833 fprintf (dump_file
, "\n");
837 // Dump the information in m_fpr_ranges.
839 early_ra::dump_fpr_ranges ()
841 fprintf (dump_file
, "\nFPR live ranges:\n");
842 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
844 auto &intervals
= m_fpr_ranges
[fpr
];
845 if (intervals
.is_empty ())
848 fprintf (dump_file
, " %2d", fpr
);
849 for (unsigned int i
= 0; i
< intervals
.length (); ++i
)
851 auto &interval
= intervals
[i
];
852 if (i
&& (i
% 4) == 0)
853 fprintf (dump_file
, "\n ");
854 fprintf (dump_file
, " [ %6d %6d ]", interval
.start_point
,
857 fprintf (dump_file
, "\n");
861 // Dump the information in m_allocno_copies.
863 early_ra::dump_copies ()
865 fprintf (dump_file
, "\nCopies:\n");
866 fprintf (dump_file
, " %8s %3s %6s\n",
867 "Allocno", "FPR", "Weight");
868 for (const auto ©
: m_allocno_copies
)
869 fprintf (dump_file
, " %8d %3d %6d\n", copy
.allocno
,
870 copy
.fpr
, copy
.weight
);
873 // Dump the information in m_allocnos.
875 early_ra::dump_allocnos ()
877 char buffer
[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
878 fprintf (dump_file
, "\nAllocno groups:\n");
880 " %12s %12s %4s %6s %8s %s\n",
881 "Ids", "Regno", "Size", "Stride", "Cands", "Heads");
882 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
884 auto *allocno
= m_allocnos
[ai
];
885 if (allocno
->offset
!= 0)
887 auto *group
= allocno
->group ();
888 snprintf (buffer
, sizeof (buffer
), "[%d:%d]", allocno
->id
,
889 allocno
->id
+ group
->size
- 1);
890 fprintf (dump_file
, " %12s", buffer
);
891 snprintf (buffer
, sizeof (buffer
), "r%d[0:%d]", group
->regno
,
893 fprintf (dump_file
, " %12s %4s %6d %08x", buffer
,
894 group
->fpr_size
== FPR_D
? "D"
895 : group
->fpr_size
== FPR_Q
? "Q" : "Z",
897 group
->fpr_candidates
);
898 for (auto head
: group
->chain_heads ())
899 if (head
== INVALID_ALLOCNO
)
900 fprintf (dump_file
, " -");
902 fprintf (dump_file
, " %d", head
);
903 fprintf (dump_file
, "\n");
906 fprintf (dump_file
, "\nAllocno chains:\n");
907 fprintf (dump_file
, " %5s %12s %12s %6s %5s %5s %6s %5s\n",
908 "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR");
909 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
911 auto *allocno
= m_allocnos
[ai
];
912 if (allocno
->chain_prev
!= INVALID_ALLOCNO
)
914 const char *prefix
= "=>";
917 auto *group
= allocno
->group ();
918 fprintf (dump_file
, " %2s", prefix
);
919 fprintf (dump_file
, " %5d", allocno
->id
);
920 snprintf (buffer
, sizeof (buffer
), "r%d[%d]", group
->regno
,
922 fprintf (dump_file
, " %12s", buffer
);
923 snprintf (buffer
, sizeof (buffer
), "[%d,%d]",
924 allocno
->start_point
, allocno
->end_point
);
925 fprintf (dump_file
, " %11s%s %6s", buffer
,
926 allocno
->is_earlyclobbered
? "*" : " ",
927 allocno
->is_strong_copy_dest
? "Strong"
928 : allocno
->is_copy_dest
? "Yes" : "-");
929 if (allocno
->copy_dest
== INVALID_ALLOCNO
)
930 fprintf (dump_file
, " %5s", "-");
932 fprintf (dump_file
, " %5d", allocno
->copy_dest
);
933 if (allocno
->is_equiv
)
934 fprintf (dump_file
, " %5d", allocno
->related_allocno
);
936 fprintf (dump_file
, " %5s", "-");
937 if (allocno
->is_shared ())
938 fprintf (dump_file
, " %6d", allocno
->related_allocno
);
940 fprintf (dump_file
, " %6s", "-");
941 if (allocno
->hard_regno
== FIRST_PSEUDO_REGISTER
)
942 fprintf (dump_file
, " %5s", "-");
944 fprintf (dump_file
, " %5s", reg_names
[allocno
->hard_regno
]);
945 fprintf (dump_file
, "\n");
946 if (allocno
->chain_next
== INVALID_ALLOCNO
)
948 allocno
= m_allocnos
[allocno
->chain_next
];
954 // Dump the information in m_colors.
956 early_ra::dump_colors ()
958 fprintf (dump_file
, "\nColors:\n");
959 for (unsigned int i
= 0; i
< m_colors
.length (); ++i
)
961 auto *color
= m_colors
[i
];
965 fprintf (dump_file
, " color %d:\n", i
);
966 fprintf (dump_file
, " chains:\n");
967 auto heads
= color
->group
->chain_heads ();
968 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
970 fprintf (dump_file
, " %2d:", i
);
972 while (ai
!= INVALID_ALLOCNO
)
974 auto *allocno
= m_allocnos
[ai
];
975 fprintf (dump_file
, " r%d[%d]", allocno
->group ()->regno
,
977 ai
= allocno
->chain_next
;
979 fprintf (dump_file
, "\n");
981 fprintf (dump_file
, " FPR candidates:");
982 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
983 fprintf (dump_file
, "%s%c", fpr
% 8 ? "" : " ",
984 color
->group
->fpr_candidates
& (1U << fpr
) ? 'Y' : '-');
985 fprintf (dump_file
, "\n");
986 fprintf (dump_file
, " FPR preferences:");
987 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
988 if (color
->fpr_preferences
[fpr
])
989 fprintf (dump_file
, " %d(%d)", fpr
, color
->fpr_preferences
[fpr
]);
990 fprintf (dump_file
, "\n");
994 // Record any necessary information about a move from SRC to DEST.
996 early_ra::preprocess_move (rtx dest
, rtx src
)
999 dest
= SUBREG_REG (dest
);
1004 src
= SUBREG_REG (src
);
1008 // Sort the registers by increasing REGNO.
1009 rtx regs
[] = { dest
, src
};
1010 if (REGNO (dest
) > REGNO (src
))
1011 std::swap (regs
[0], regs
[1]);
1012 unsigned int regno0
= REGNO (regs
[0]);
1013 unsigned int regno1
= REGNO (regs
[1]);
1015 // Ignore moves between hard registers.
1016 if (HARD_REGISTER_NUM_P (regno1
))
1019 // For moves between hard registers and pseudos, just record the type
1020 // of hard register involved.
1021 auto ®1
= m_pseudo_regs
[regno1
];
1022 reg1
.mode
= GET_MODE (regs
[1]);
1023 if (HARD_REGISTER_NUM_P (regno0
))
1025 reg1
.flags
|= (FP_REGNUM_P (regno0
) ? HAS_FPR_COPY
: HAS_NONFPR_COPY
);
1029 // Record a move between two pseudo registers.
1030 auto ®0
= m_pseudo_regs
[regno0
];
1031 reg0
.mode
= GET_MODE (regs
[0]);
1034 copy
.regnos
[0] = regno0
;
1035 copy
.regnos
[1] = regno1
;
1036 copy
.next_copies
[0] = reg0
.first_copy
;
1037 copy
.next_copies
[1] = reg1
.first_copy
;
1039 reg0
.first_copy
= reg1
.first_copy
= m_pseudo_reg_copies
.length ();
1040 m_pseudo_reg_copies
.safe_push (copy
);
1043 // Return true if INSN has a multi-vector operand and if that operand
1044 // could be converted to strided form.
1046 is_stride_candidate (rtx_insn
*insn
)
1048 if (recog_memoized (insn
) < 0)
1051 auto stride_type
= get_attr_stride_type (insn
);
1052 return (stride_type
== STRIDE_TYPE_LUTI_CONSECUTIVE
1053 || stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
1054 || stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
);
1057 // Go through the constraints of INSN, which has already been extracted,
1058 // and record any relevant information about pseudo registers.
1060 early_ra::process_pseudo_reg_constraints (rtx_insn
*insn
)
1062 extract_insn (insn
);
1063 preprocess_constraints (insn
);
1065 // Flags that describe any multi-register vector operands.
1066 unsigned int insn_flags
= (is_stride_candidate (insn
)
1067 ? HAS_FLEXIBLE_STRIDE
1068 : HAS_FIXED_STRIDE
);
1070 auto alts
= get_preferred_alternatives (insn
);
1072 int operand_matches
[MAX_RECOG_OPERANDS
];
1073 unsigned int operand_flags
[MAX_RECOG_OPERANDS
];
1074 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1076 operand_matches
[i
] = -1;
1077 operand_flags
[i
] = 0;
1080 // Extract information from the constraints, considering all plausible
1082 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
1084 if (!(alts
& ALTERNATIVE_BIT (altno
)))
1087 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
1088 if (!likely_alternative_match_p (op_alt
))
1091 // Use SRC_OPNO's constraints to derive information about DEST_OPNO.
1092 auto record_operand
= [&](int src_opno
, int dest_opno
)
1094 int matches
= op_alt
[src_opno
].matches
;
1096 operand_matches
[dest_opno
] = matches
;
1098 auto cl
= alternative_class (op_alt
, src_opno
);
1101 if (reg_class_subset_p (cl
, FP_REGS
))
1102 operand_flags
[dest_opno
] |= ALLOWS_FPR32
;
1103 if (reg_class_subset_p (cl
, FP_LO_REGS
))
1104 operand_flags
[dest_opno
] |= ALLOWS_FPR16
;
1105 if (reg_class_subset_p (cl
, FP_LO8_REGS
))
1106 operand_flags
[dest_opno
] |= ALLOWS_FPR8
;
1107 if (!reg_classes_intersect_p (cl
, FP_REGS
))
1108 operand_flags
[dest_opno
] |= ALLOWS_NONFPR
;
1112 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1114 record_operand (i
, i
);
1115 if (recog_data
.constraints
[i
][0] == '%')
1117 record_operand (i
, i
+ 1);
1118 record_operand (i
+ 1, i
);
1123 // Process the information we collected above.
1124 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1126 rtx op
= recog_data
.operand
[i
];
1127 machine_mode orig_mode
= GET_MODE (op
);
1129 op
= SUBREG_REG (op
);
1131 // Record the accumulated information in m_pseudo_regs.
1132 if (REG_P (op
) && !HARD_REGISTER_P (op
))
1134 // The flags so far just describe what at least one alternative
1135 // would accept. Calculate the associated NEEDS_* information.
1136 auto flags
= operand_flags
[i
];
1137 if (!(flags
& ALLOWS_FPR32
) && (flags
& ALLOWS_NONFPR
))
1138 flags
|= NEEDS_NONFPR
;
1139 else if ((flags
& ALLOWS_FPR32
) && !(flags
& ALLOWS_NONFPR
))
1141 if (flags
& ALLOWS_FPR8
)
1142 flags
|= NEEDS_FPR8
;
1143 if (flags
& ALLOWS_FPR16
)
1144 flags
|= NEEDS_FPR16
;
1145 flags
|= NEEDS_FPR32
;
1148 // Look for multi-register vector operands.
1149 if (VECTOR_MODE_P (orig_mode
)
1150 && targetm
.hard_regno_mode_ok (V0_REGNUM
, orig_mode
)
1151 && hard_regno_nregs (V0_REGNUM
, orig_mode
) > 1)
1152 flags
|= insn_flags
;
1154 m_pseudo_regs
[REGNO (op
)].flags
|= flags
;
1155 m_pseudo_regs
[REGNO (op
)].mode
= GET_MODE (op
);
1158 // Treat matching constraints as equivalent to moves.
1159 if (operand_matches
[i
] >= 0)
1160 preprocess_move (recog_data
.operand
[operand_matches
[i
]], op
);
1164 // Make one pass through the instructions, collecting information that
1165 // will be needed later.
1167 early_ra::preprocess_insns ()
1169 m_pseudo_regs
.safe_grow_cleared (max_reg_num ());
1170 m_pseudo_reg_copies
.safe_push (reg_copy_info ());
1171 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
1173 if (!NONDEBUG_INSN_P (insn
))
1176 if (GET_CODE (PATTERN (insn
)) == USE
1177 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
1180 rtx set
= single_set (insn
);
1181 if (set
&& is_move_set (set
))
1182 preprocess_move (SET_DEST (set
), SET_SRC (set
));
1184 process_pseudo_reg_constraints (insn
);
1188 // Return a signed integer that says (roughly) how strong an affinity
1189 // pseudo register REGNO has with FPRs. A positive value indicates
1190 // that we should try to allocate an FPR, a negative value indicates
1191 // that we shouldn't, and 0 indicates neutrality.
1193 early_ra::fpr_preference (unsigned int regno
)
1195 auto mode
= m_pseudo_regs
[regno
].mode
;
1196 auto flags
= m_pseudo_regs
[regno
].flags
;
1197 if (mode
== VOIDmode
|| !targetm
.hard_regno_mode_ok (V0_REGNUM
, mode
))
1199 else if (flags
& HAS_FLEXIBLE_STRIDE
)
1201 else if (flags
& NEEDS_FPR32
)
1203 else if (!(flags
& ALLOWS_FPR32
) && (flags
& ALLOWS_NONFPR
))
1205 else if ((flags
& HAS_FPR_COPY
) && !(flags
& HAS_NONFPR_COPY
))
1207 else if ((flags
& HAS_NONFPR_COPY
) && !(flags
& HAS_FPR_COPY
))
1213 // Propagate information about pseudo-registers along copy edges,
1214 // while doing so doesn't create conflicting FPR preferences.
1216 early_ra::propagate_pseudo_reg_info ()
1218 struct stack_entry
{ unsigned int regno
, copyi
; };
1220 auto_vec
<stack_entry
, 32> stack
;
1221 for (unsigned int i
= FIRST_PSEUDO_REGISTER
;
1222 i
< m_pseudo_regs
.length (); ++i
)
1224 auto start
= m_pseudo_regs
[i
].first_copy
;
1228 stack
.quick_push ({ i
, start
});
1229 while (!stack
.is_empty ())
1231 auto entry
= stack
.pop ();
1232 auto ©
= m_pseudo_reg_copies
[entry
.copyi
];
1233 auto src_regno
= entry
.regno
;
1234 auto dest_regno
= (src_regno
== copy
.regnos
[1]
1237 auto next_copyi
= (src_regno
== copy
.regnos
[1]
1238 ? copy
.next_copies
[1]
1239 : copy
.next_copies
[0]);
1241 stack
.safe_push ({ src_regno
, next_copyi
});
1243 auto &src_reg
= m_pseudo_regs
[src_regno
];
1244 auto &dest_reg
= m_pseudo_regs
[dest_regno
];
1246 if (src_reg
.flags
& ~dest_reg
.flags
& PSEUDO_COPY_FLAGS
)
1248 auto src_preference
= fpr_preference (src_regno
);
1249 auto dest_preference
= fpr_preference (dest_regno
);
1250 if ((src_preference
>= 0 && dest_preference
>= 0)
1251 || (src_preference
<= 0 && dest_preference
<= 0))
1253 dest_reg
.flags
|= (src_reg
.flags
& PSEUDO_COPY_FLAGS
);
1254 stack
.safe_push ({ dest_regno
, dest_reg
.first_copy
});
1261 // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
1264 early_ra::choose_fpr_pseudos ()
1266 for (unsigned int i
= FIRST_PSEUDO_REGISTER
;
1267 i
< m_pseudo_regs
.length (); ++i
)
1268 if (fpr_preference (i
) > 0)
1269 bitmap_set_bit (m_fpr_pseudos
, i
);
1272 // Clear out information about the previous CFG region (if any)
1273 // and set up the data for a new region.
1275 early_ra::start_new_region ()
1277 obstack_free (&m_region_obstack
, m_region_alloc_start
);
1278 m_regno_to_group
.empty ();
1279 m_allocno_copies
.truncate (0);
1280 m_allocnos
.truncate (0);
1281 m_sorted_allocnos
.truncate (0);
1282 m_shared_allocnos
.truncate (0);
1283 m_colors
.truncate (0);
1284 m_insn_ranges
.truncate (0);
1285 for (auto &fpr_ranges
: m_fpr_ranges
)
1286 fpr_ranges
.truncate (0);
1287 for (auto &call_points
: m_call_points
)
1288 call_points
.truncate (0);
1289 gcc_assert (bitmap_empty_p (m_live_allocnos
) && m_live_fprs
== 0);
1290 m_dead_insns
.truncate (0);
1291 m_allocated_fprs
= 0;
1292 m_call_preserved_fprs
= 0;
1293 m_allocation_successful
= true;
1296 // Create and return an allocno group of size SIZE for register REGNO.
1297 // REGNO can be INVALID_REGNUM if the group just exists to allow
1298 // other groups to be chained together, and does not have any new
1299 // allocnos of its own.
1300 early_ra::allocno_group_info
*
1301 early_ra::create_allocno_group (unsigned int regno
, unsigned int size
)
1303 static_assert (alignof (unsigned int) == alignof (allocno_info
),
1304 "allocno_info alignment");
1305 unsigned int num_allocnos
= (regno
!= INVALID_REGNUM
? size
: 0);
1307 // Allocate an allocno_group_info, followed by an array of chain heads,
1308 // followed by the allocnos themselves.
1309 size_t alloc_size
= (sizeof (allocno_group_info
)
1310 + size
* sizeof (unsigned int)
1311 + num_allocnos
* sizeof (allocno_info
));
1312 void *data
= obstack_alloc (&m_region_obstack
, alloc_size
);
1314 // Initialize the group.
1315 auto *group
= reinterpret_cast<allocno_group_info
*> (data
);
1316 memset (group
, 0, sizeof (*group
));
1317 group
->m_color_rep
= group
;
1318 group
->regno
= regno
;
1321 group
->fpr_size
= FPR_D
;
1322 group
->fpr_candidates
= ~0U;
1324 // Initialize the chain heads.
1325 auto heads
= group
->chain_heads ();
1326 for (unsigned int i
= 0; i
< heads
.size (); ++i
)
1327 heads
[i
] = (i
< num_allocnos
? m_allocnos
.length () + i
: INVALID_ALLOCNO
);
1329 // Initialize the allocnos.
1330 if (num_allocnos
> 0)
1332 auto allocnos
= group
->allocnos ();
1333 memset (allocnos
.begin (), 0, num_allocnos
* sizeof (allocno_info
));
1334 for (unsigned int i
= 0; i
< num_allocnos
; ++i
)
1336 auto *allocno
= &allocnos
[i
];
1337 allocno
->id
= m_allocnos
.length ();
1338 allocno
->offset
= i
;
1339 allocno
->group_size
= size
;
1340 allocno
->hard_regno
= FIRST_PSEUDO_REGISTER
;
1341 allocno
->start_point
= END_OF_REGION
;
1342 allocno
->end_point
= START_OF_REGION
;
1343 allocno
->copy_dest
= INVALID_ALLOCNO
;
1344 allocno
->related_allocno
= INVALID_ALLOCNO
;
1345 allocno
->chain_next
= INVALID_ALLOCNO
;
1346 allocno
->chain_prev
= INVALID_ALLOCNO
;
1347 m_allocnos
.safe_push (allocno
);
1353 // If REG refers to a pseudo register that might be allocated to FPRs,
1354 // return the associated range of allocnos, creating new ones if necessary.
1355 // Return an empty range otherwise.
1356 early_ra::allocno_subgroup
1357 early_ra::get_allocno_subgroup (rtx reg
)
1359 if (GET_CODE (reg
) == SUBREG
)
1361 allocno_subgroup inner
= get_allocno_subgroup (SUBREG_REG (reg
));
1365 if (!targetm
.modes_tieable_p (GET_MODE (SUBREG_REG (reg
)),
1368 m_allocation_successful
= false;
1373 subreg_get_info (V0_REGNUM
, GET_MODE (SUBREG_REG (reg
)),
1374 SUBREG_BYTE (reg
), GET_MODE (reg
), &info
);
1375 if (!info
.representable_p
)
1377 m_allocation_successful
= false;
1381 inner
.start
+= info
.offset
;
1382 inner
.count
= info
.nregs
;
1386 if (!REG_P (reg
) || HARD_REGISTER_P (reg
))
1389 unsigned int regno
= REGNO (reg
);
1390 if (fpr_preference (regno
) <= 0)
1393 unsigned int count
= hard_regno_nregs (V0_REGNUM
, GET_MODE (reg
));
1395 auto &entry
= m_regno_to_group
.get_or_insert (regno
, &existed
);
1398 auto *group
= create_allocno_group (regno
, count
);
1399 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1401 auto allocnos
= group
->allocnos ();
1402 fprintf (dump_file
, "Creating allocnos [%d:%d] for r%d\n",
1403 allocnos
.front ().id
, allocnos
.back ().id
, regno
);
1406 auto reg_bits
= GET_MODE_BITSIZE (GET_MODE (reg
));
1407 auto fpr_bits
= exact_div (reg_bits
, count
);
1408 auto flags
= m_pseudo_regs
[regno
].flags
;
1410 // Punt for now if there is a choice to be made between using an
1411 // FPR and a non-FPR.
1412 if ((flags
& NEEDS_NONFPR
)
1413 || ((flags
& ALLOWS_NONFPR
)
1414 && !FLOAT_MODE_P (GET_MODE (reg
))
1415 && !VECTOR_MODE_P (GET_MODE (reg
))))
1416 m_allocation_successful
= false;
1418 if (flags
& ALLOWS_FPR8
)
1419 group
->fpr_candidates
&= 0xff;
1420 else if (flags
& ALLOWS_FPR16
)
1421 group
->fpr_candidates
&= 0xffff;
1422 group
->fpr_candidates
&= ~0U >> (count
- 1);
1424 group
->has_flexible_stride
= ((flags
& HAS_FLEXIBLE_STRIDE
) != 0
1425 && (flags
& HAS_FIXED_STRIDE
) == 0);
1427 group
->fpr_size
= (maybe_gt (fpr_bits
, 128) ? FPR_Z
1428 : maybe_gt (fpr_bits
, 64) ? FPR_Q
: FPR_D
);
1432 return { entry
, 0, count
};
1435 // Record a use of FPR REGNO at the current program point, as part of
1436 // a backwards walk over a block.
1438 early_ra::record_fpr_use (unsigned int regno
)
1440 gcc_assert (IN_RANGE (regno
, V0_REGNUM
, V31_REGNUM
));
1441 unsigned int offset
= regno
- V0_REGNUM
;
1442 if (!(m_live_fprs
& (1U << offset
)))
1444 m_fpr_ranges
[offset
].safe_push ({ START_OF_REGION
, m_current_point
,
1446 m_live_fprs
|= 1U << offset
;
1450 // Record a definition of FPR REGNO at the current program point, as part of
1451 // a backwards walk over a block.
1453 early_ra::record_fpr_def (unsigned int regno
)
1455 gcc_assert (IN_RANGE (regno
, V0_REGNUM
, V31_REGNUM
));
1456 unsigned int offset
= regno
- V0_REGNUM
;
1458 // The definition completes the current live range. If the result
1459 // of the definition is used, the live range extends to the last use.
1460 // Otherwise the live range is just a momentary blip at the current point.
1461 auto &ranges
= m_fpr_ranges
[offset
];
1462 if (m_live_fprs
& (1U << offset
))
1464 ranges
.last ().start_point
= m_current_point
;
1465 m_live_fprs
&= ~(1U << offset
);
1468 ranges
.safe_push ({ m_current_point
, m_current_point
, INVALID_ALLOCNO
});
1471 // Record a use of allocno ALLOCNO at the current program point, as part
1472 // of a backwards walk over a block.
1474 early_ra::record_allocno_use (allocno_info
*allocno
)
1476 if (allocno
->start_point
== m_current_point
)
1479 gcc_checking_assert (!allocno
->is_shared ());
1480 bitmap_set_bit (m_live_allocnos
, allocno
->id
);
1481 if (allocno
->end_point
> m_current_point
)
1483 allocno
->end_point
= m_current_point
;
1484 allocno
->last_def_point
= START_OF_REGION
;
1485 allocno
->last_use_point
= END_OF_REGION
;
1488 allocno
->last_use_point
= allocno
->start_point
;
1489 allocno
->start_point
= m_current_point
;
1490 allocno
->is_copy_dest
= false;
1491 allocno
->is_strong_copy_src
= false;
1492 allocno
->related_allocno
= INVALID_ALLOCNO
;
1493 allocno
->is_equiv
= false;
1496 // Record a definition of the allocno with index AI at the current program
1497 // point, as part of a backwards walk over a block. The allocno is known
1500 early_ra::record_allocno_def (allocno_info
*allocno
)
1502 gcc_checking_assert (!allocno
->is_shared ());
1503 allocno
->last_use_point
= allocno
->start_point
;
1504 allocno
->last_def_point
= m_current_point
;
1505 allocno
->start_point
= m_current_point
;
1506 allocno
->num_defs
= MIN (allocno
->num_defs
+ 1, 2);
1507 gcc_checking_assert (!allocno
->is_copy_dest
1508 && !allocno
->is_strong_copy_src
);
1509 if (!bitmap_clear_bit (m_live_allocnos
, allocno
->id
))
1513 // SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the
1514 // two allocnos are known to be equal. See whether we can mark a chain of
1515 // allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the
1516 // start of the chain if so, otherwise return null.
1518 // If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated
1519 // as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO.
1521 // Sharing chains are a rather hacky workaround for the fact that we
1522 // don't collect segmented live ranges, and that in the end we want to do
1523 // simple interval graph coloring.
1524 early_ra::allocno_info
*
1525 early_ra::find_related_start (allocno_info
*dest_allocno
,
1526 allocno_info
*src_allocno
, bool is_equiv
)
1528 allocno_info
*res
= nullptr;
1531 if (src_allocno
->end_point
> dest_allocno
->end_point
)
1532 // The src allocno dies first.
1535 if (src_allocno
->num_defs
!= 0)
1537 if (dest_allocno
->end_point
< m_current_bb_point
)
1538 // We don't currently track enough information to handle multiple
1539 // definitions across basic block boundaries.
1542 if (src_allocno
->last_def_point
>= dest_allocno
->end_point
)
1543 // There is another definition during the destination's live range.
1548 if (dest_allocno
->num_defs
== 1)
1549 // dest_allocno is equivalent to src_allocno for dest_allocno's
1550 // entire live range. Fall back to that if we can't establish
1556 if (src_allocno
->last_use_point
>= dest_allocno
->end_point
)
1557 // src_allocno is live during dest_allocno's live range,
1558 // and the two allocnos do not necessarily have the same value.
1562 if (dest_allocno
->group_size
!= 1
1563 || DF_REG_DEF_COUNT (dest_allocno
->group ()->regno
) != 1)
1564 // Currently only single allocnos that are defined once can
1565 // share registers with non-equivalent allocnos. This could be
1566 // relaxed, but at the time of writing, aggregates are not valid
1567 // SSA names and so generally only use a single pseudo throughout
1571 if (dest_allocno
->copy_dest
== src_allocno
->id
)
1572 // We've found a complete and valid sharing chain.
1573 return dest_allocno
;
1575 if (dest_allocno
->copy_dest
== INVALID_ALLOCNO
)
1578 auto *next_allocno
= m_allocnos
[dest_allocno
->copy_dest
];
1579 if (!is_chain_candidate (dest_allocno
, next_allocno
))
1582 dest_allocno
= next_allocno
;
1587 // Record any relevant allocno-related information for an actual or imagined
1588 // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
1589 // move instruction, false if it represents one way of satisfying the previous
1590 // instruction's constraints.
1592 early_ra::record_copy (rtx dest
, rtx src
, bool from_move_p
)
1594 auto dest_range
= get_allocno_subgroup (dest
);
1595 auto src_range
= get_allocno_subgroup (src
);
1599 && FP_REGNUM_P (REGNO (src
)))
1601 // A copy from an FPR to an allocno group.
1602 unsigned int fpr
= REGNO (src
) - V0_REGNUM
;
1603 m_allocno_copies
.safe_push ({ dest_range
.allocno (0)->id
, fpr
,
1604 dest_range
.count
});
1606 // If the allocno at the other end of the chain of copies from DEST
1607 // has a copy to the same FPR, record that all intervening copy chains
1608 // could become "strong" ones. This indicates that picking the FPR
1609 // avoids a copy at both ends.
1610 unsigned int hard_regno
= REGNO (src
);
1611 for (auto &dest_allocno
: dest_range
.allocnos ())
1612 if (dest_allocno
.hard_regno
== hard_regno
++)
1613 dest_allocno
.is_strong_copy_src
= true;
1615 else if (from_move_p
1618 && FP_REGNUM_P (REGNO (dest
)))
1620 // A copy from an allocno group to an FPR.
1621 unsigned int fpr
= REGNO (dest
) - V0_REGNUM
;
1622 m_allocno_copies
.safe_push ({ src_range
.allocno (0)->id
, fpr
,
1624 for (auto &src_allocno
: src_range
.allocnos ())
1626 // If the copy comes from a move, see whether the destination
1627 // FPR is known to be equal to the source allocno for the FPR's
1629 if (from_move_p
&& src_allocno
.num_defs
== 0)
1631 auto &last_range
= m_fpr_ranges
[fpr
].last ();
1632 if (last_range
.end_point
>= src_allocno
.end_point
)
1633 last_range
.allocno
= src_allocno
.id
;
1635 src_allocno
.hard_regno
= V0_REGNUM
+ fpr
;
1639 else if (src_range
&& dest_range
)
1641 // A copy between two allocno groups. We can only have a mismatched
1642 // number of FPRs for imaginary, non-move copies. In that case
1643 // the matching happens on the common lowparts.
1644 gcc_assert (!from_move_p
|| src_range
.count
== dest_range
.count
);
1645 unsigned int count
= std::min (src_range
.count
, dest_range
.count
);
1646 if (WORDS_BIG_ENDIAN
)
1648 src_range
.start
+= src_range
.count
- count
;
1649 dest_range
.start
+= dest_range
.count
- count
;
1651 src_range
.count
= count
;
1652 dest_range
.count
= count
;
1654 // Ignore (imaginary non-move) copies if the destination is still live.
1655 for (auto &dest_allocno
: dest_range
.allocnos ())
1656 if (bitmap_bit_p (m_live_allocnos
, dest_allocno
.id
))
1659 for (unsigned int i
= 0; i
< src_range
.count
; ++i
)
1661 auto *dest_allocno
= dest_range
.allocno (i
);
1662 auto *src_allocno
= src_range
.allocno (i
);
1663 if (src_allocno
->end_point
> dest_allocno
->start_point
)
1665 gcc_assert (src_allocno
->copy_dest
== INVALID_ALLOCNO
1666 || src_allocno
->copy_dest
== dest_allocno
->id
);
1667 src_allocno
->copy_dest
= dest_allocno
->id
;
1668 src_allocno
->hard_regno
= dest_allocno
->hard_regno
;
1669 dest_allocno
->is_copy_dest
= 1;
1671 else if (auto *start_allocno
= find_related_start (dest_allocno
,
1675 auto *next_allocno
= dest_allocno
;
1678 next_allocno
->related_allocno
= src_allocno
->id
;
1679 next_allocno
->is_equiv
= (start_allocno
== dest_allocno
1681 if (next_allocno
== start_allocno
)
1683 next_allocno
= m_allocnos
[next_allocno
->copy_dest
];
1690 // Record any relevant allocno-related information about the constraints
1691 // on INSN, which has already been extracted.
1693 early_ra::record_constraints (rtx_insn
*insn
)
1695 preprocess_constraints (insn
);
1697 int operand_matches
[MAX_RECOG_OPERANDS
];
1698 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1699 operand_matches
[i
] = -1;
1701 auto alts
= get_preferred_alternatives (insn
);
1702 bool any_ok
= recog_data
.n_alternatives
== 0;
1704 // The set of output operands that are earlyclobber in at least one
1706 operand_mask earlyclobber_operands
= 0;
1708 // The set of output operands that are matched to inputs in at least
1710 operand_mask matched_operands
= 0;
1712 // The set of output operands that are not matched to inputs in at least
1714 operand_mask unmatched_operands
= 0;
1716 // The set of input operands that are matched to outputs in at least one
1717 // alternative, or that overlap with such an input if the output is not
1718 // earlyclobber. The latter part of the condition copes with things
1719 // like y = x * x, where the first x is tied to the destination, and where
1720 // y is not earlyclobber.
1721 operand_mask matches_operands
= 0;
1723 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
1725 if (!(alts
& ALTERNATIVE_BIT (altno
)))
1728 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
1729 if (!likely_alternative_match_p (op_alt
))
1734 // Update the information for operand DEST_OPNO based on the constraint
1735 // information for operand SRC_OPNO. The numbers can be different for
1736 // swapped commutative operands.
1737 auto record_operand
= [&](int src_opno
, int dest_opno
)
1739 int matches
= op_alt
[src_opno
].matches
;
1740 // A matched earlyclobber cannot be used if the same operand value
1741 // occurs in an unmatched operand. E.g. for y = x * x, a matched
1742 // earlyclobber on the first input does not cover the second input.
1745 rtx op
= recog_data
.operand
[dest_opno
];
1746 operand_mask overlaps
= 0;
1747 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1749 && !recog_data
.is_operator
[i
]
1750 && recog_data
.operand_type
[i
] != OP_OUT
1751 && reg_overlap_mentioned_p (op
, recog_data
.operand
[i
]))
1752 overlaps
|= 1U << i
;
1753 if (!op_alt
[matches
].earlyclobber
|| overlaps
== 0)
1755 operand_matches
[dest_opno
] = matches
;
1756 matches_operands
|= (1U << dest_opno
) | overlaps
;
1761 auto reject
= count_rejects (op_alt
);
1762 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1764 operand_mask op_mask
= operand_mask (1) << opno
;
1766 if (recog_data
.operand_type
[opno
] != OP_IN
)
1768 if (reject
== 0 && op_alt
[opno
].matched
>= 0)
1769 matched_operands
|= op_mask
;
1771 unmatched_operands
|= op_mask
;
1774 if (op_alt
[opno
].earlyclobber
)
1775 earlyclobber_operands
|= op_mask
;
1777 // Punt for now on scratches. If we wanted to handle them,
1778 // we'd need to create allocnos for them, like IRA does.
1779 rtx op
= recog_data
.operand
[opno
];
1780 if (GET_CODE (op
) == SCRATCH
1781 && reg_classes_intersect_p (op_alt
[opno
].cl
, FP_REGS
))
1782 m_allocation_successful
= false;
1784 // Record filter information, which applies to the first register
1786 if (auto filters
= alternative_register_filters (op_alt
, opno
))
1787 if (auto range
= get_allocno_subgroup (recog_data
.operand
[opno
]))
1788 for (unsigned int fpr
= range
.start
; fpr
< 32; ++fpr
)
1789 if (!test_register_filters (filters
, fpr
))
1790 range
.group
->fpr_candidates
&= ~(1U << (fpr
- range
.start
));
1794 // Record possible matched operands.
1795 record_operand (opno
, opno
);
1796 if (recog_data
.constraints
[opno
][0] == '%')
1798 record_operand (opno
, opno
+ 1);
1799 record_operand (opno
+ 1, opno
);
1807 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1808 fprintf (dump_file
, " -- no match\n");
1809 m_allocation_successful
= false;
1812 // Record if there is an output operand that is never earlyclobber and never
1813 // matched to an input. See the comment below for how this is used.
1814 rtx dest_op
= NULL_RTX
;
1815 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1817 auto op_mask
= operand_mask (1) << opno
;
1818 if (recog_data
.operand_type
[opno
] == OP_OUT
1819 && (earlyclobber_operands
& op_mask
) == 0
1820 && (matched_operands
& op_mask
) == 0)
1822 dest_op
= recog_data
.operand
[opno
];
1827 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1829 auto op_mask
= operand_mask (1) << opno
;
1830 rtx op
= recog_data
.operand
[opno
];
1831 int matches
= operand_matches
[opno
];
1833 // Punt for now on operands that already have a fixed choice of
1834 // register, since we don't have IRA's ability to find an alternative.
1835 // It's better if earlier passes don't create this kind of situation.
1836 if (REG_P (op
) && FP_REGNUM_P (REGNO (op
)))
1837 m_allocation_successful
= false;
1839 // Treat input operands as being earlyclobbered if an output is
1840 // sometimes earlyclobber and if the input never matches an output.
1841 // Do the same if there is an output that is always matched to an
1842 // input, and if this operand doesn't match that input. In both
1843 // cases, tying the input and the output would lead to an impossible
1844 // combination (or at least one that is difficult to reload).
1845 if (recog_data
.operand_type
[opno
] != OP_OUT
1846 && ((earlyclobber_operands
&& matches
< 0)
1847 || ((matched_operands
& ~unmatched_operands
)
1848 && !(matches_operands
& op_mask
))))
1849 for (auto &allocno
: get_allocno_subgroup (op
).allocnos ())
1850 if (allocno
.end_point
+ 1 == m_current_point
)
1851 allocno
.is_earlyclobbered
= true;
1853 // Create copies between operands that can be tied. This (deliberately)
1854 // might add several copies to the same destination register; later code
1855 // can then choose between them based on other criteria.
1857 // If there is an output operand that is never matched or earlyclobber,
1858 // and an input operand that never matches an output operand, create
1859 // a tentative copy between them. This allows hard register preferences
1860 // to be transmitted along the copy chains.
1862 record_copy (recog_data
.operand
[matches
], op
);
1863 else if (dest_op
&& recog_data
.operand_type
[opno
] == OP_IN
)
1864 record_copy (dest_op
, op
);
1868 // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
1869 // start of the current basic block, otherwise model the artificial uses
1870 // and defs at the end of the basic block. This is done as part of a
1871 // backwards walk, so defs should be processed before uses.
1873 early_ra::record_artificial_refs (unsigned int flags
)
1877 FOR_EACH_ARTIFICIAL_DEF (ref
, m_current_bb
->index
)
1878 if ((DF_REF_FLAGS (ref
) & DF_REF_AT_TOP
) == flags
1879 && IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1880 record_fpr_def (DF_REF_REGNO (ref
));
1881 m_current_point
+= 1;
1883 FOR_EACH_ARTIFICIAL_USE (ref
, m_current_bb
->index
)
1884 if ((DF_REF_FLAGS (ref
) & DF_REF_AT_TOP
) == flags
1885 && IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1886 record_fpr_use (DF_REF_REGNO (ref
));
1887 m_current_point
+= 1;
1890 // Model the register references in INSN as part of a backwards walk.
1892 early_ra::record_insn_refs (rtx_insn
*insn
)
1896 // Record all definitions, excluding partial call clobbers.
1897 FOR_EACH_INSN_DEF (ref
, insn
)
1898 if (IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1899 record_fpr_def (DF_REF_REGNO (ref
));
1902 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
1903 for (auto &allocno
: range
.allocnos ())
1905 // If the destination is unused, record a momentary blip
1906 // in its live range.
1907 if (!bitmap_bit_p (m_live_allocnos
, allocno
.id
))
1908 record_allocno_use (&allocno
);
1909 record_allocno_def (&allocno
);
1912 m_current_point
+= 1;
1914 // Model the call made by a call insn as a separate phase in the
1915 // evaluation of the insn. Any partial call clobbers happen at that
1916 // point, rather than in the definition or use phase of the insn.
1917 if (auto *call_insn
= dyn_cast
<rtx_call_insn
*> (insn
))
1919 function_abi abi
= insn_callee_abi (call_insn
);
1920 m_call_points
[abi
.id ()].safe_push (m_current_point
);
1921 m_current_point
+= 1;
1924 // Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs,
1925 // since we track the FPR-sized parts of them individually.
1926 FOR_EACH_INSN_USE (ref
, insn
)
1927 if (IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1928 record_fpr_use (DF_REF_REGNO (ref
));
1929 else if (!DF_REF_FLAGS_IS_SET (ref
, DF_REF_READ_WRITE
)
1930 || DF_REF_FLAGS_IS_SET (ref
, DF_REF_STRICT_LOW_PART
)
1931 || DF_REF_FLAGS_IS_SET (ref
, DF_REF_ZERO_EXTRACT
))
1933 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
1934 for (auto &allocno
: range
.allocnos ())
1935 record_allocno_use (&allocno
);
1937 m_current_point
+= 1;
1940 // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
1941 // natural chain that has an affinity with the same hard register at
1944 early_ra::consider_strong_copy_src_chain (allocno_info
*allocno
)
1946 auto *src_allocno
= allocno
;
1947 while (src_allocno
->copy_dest
!= INVALID_ALLOCNO
)
1949 auto *dest_allocno
= m_allocnos
[src_allocno
->copy_dest
];
1950 if (dest_allocno
->start_point
> src_allocno
->end_point
1951 || dest_allocno
->hard_regno
!= src_allocno
->hard_regno
)
1953 gcc_checking_assert (dest_allocno
->is_copy_dest
);
1954 src_allocno
= dest_allocno
;
1957 while (allocno
->copy_dest
!= INVALID_ALLOCNO
)
1959 allocno
->is_strong_copy_src
= 1;
1960 allocno
= m_allocnos
[allocno
->copy_dest
];
1961 allocno
->is_strong_copy_dest
= 1;
1966 // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
1967 // chained together. See whether chaining them requires the containing
1968 // groups to have the same stride, or whether it requires them to have
1969 // different strides. Return 1 if they should have the same stride,
1970 // -1 if they should have different strides, or 0 if it doesn't matter.
1972 early_ra::strided_polarity_pref (allocno_info
*allocno1
,
1973 allocno_info
*allocno2
)
1975 if (allocno1
->offset
+ 1 < allocno1
->group_size
1976 && allocno2
->offset
+ 1 < allocno2
->group_size
)
1978 if (is_chain_candidate (allocno1
+ 1, allocno2
+ 1))
1984 if (allocno1
->offset
> 0 && allocno2
->offset
> 0)
1986 if (is_chain_candidate (allocno1
- 1, allocno2
- 1))
1995 // Decide which groups should be strided. Also complete "strong" copy chains.
1997 early_ra::find_strided_accesses ()
1999 // This function forms a graph of allocnos, linked by equivalences and
2000 // natural copy chains. It temporarily uses chain_next to record the
2001 // reverse of equivalence edges (related_allocno) and chain_prev to record
2002 // the reverse of copy edges (copy_dest).
2003 unsigned int allocno_info::*links
[] = {
2004 &allocno_info::chain_next
,
2005 &allocno_info::chain_prev
,
2006 &allocno_info::copy_dest
,
2007 &allocno_info::related_allocno
2010 // Set up the temporary reverse edges. Check for strong copy chains.
2011 for (unsigned int i
= m_allocnos
.length (); i
-- > 0; )
2013 auto *allocno1
= m_allocnos
[i
];
2014 if (allocno1
->copy_dest
!= INVALID_ALLOCNO
)
2015 m_allocnos
[allocno1
->copy_dest
]->chain_prev
= allocno1
->id
;
2016 if (allocno1
->related_allocno
!= INVALID_ALLOCNO
)
2017 m_allocnos
[allocno1
->related_allocno
]->chain_next
= allocno1
->id
;
2019 if (allocno1
->is_strong_copy_src
2020 && !allocno1
->is_copy_dest
2021 && !consider_strong_copy_src_chain (allocno1
))
2022 allocno1
->is_strong_copy_src
= false;
2025 // Partition the graph into cliques based on edges that have the following
2028 // - the edge joins two allocnos whose groups have a free choice between
2029 // consecutive and strided allocations.
2031 // - the two groups have a relative strided polarity preference (that is
2032 // they should make the same choice between consecutive and strided,
2033 // or they should make opposite choices).
2035 // Assign relative polarities to each group connected in this way.
2037 // The aim is to discover natural move-free striding choices, which will
2038 // often exist in carefully written ACLE code.
2039 unsigned int num_edges
= m_allocnos
.length () * ARRAY_SIZE (links
);
2040 auto_sbitmap
visited_edges (num_edges
);
2041 bitmap_clear (visited_edges
);
2043 auto_vec
<unsigned int, 32> worklist
;
2044 for (unsigned int i
= 0; i
< num_edges
; ++i
)
2046 if (!bitmap_set_bit (visited_edges
, i
))
2048 worklist
.quick_push (i
);
2049 while (!worklist
.is_empty ())
2051 auto ei
= worklist
.pop ();
2052 auto *allocno1
= m_allocnos
[ei
/ ARRAY_SIZE (links
)];
2053 auto ai2
= allocno1
->*links
[ei
% ARRAY_SIZE (links
)];
2054 if (ai2
== INVALID_ALLOCNO
)
2057 auto *allocno2
= m_allocnos
[ai2
];
2058 auto *group1
= allocno1
->group ();
2059 auto *group2
= allocno2
->group ();
2060 if (!group1
->has_flexible_stride
|| !group2
->has_flexible_stride
)
2063 int pref
= strided_polarity_pref (allocno1
, allocno2
);
2067 for (auto *group
: { group1
, group2
})
2068 for (auto &allocno
: group
->allocnos ())
2069 for (unsigned int j
= 0; j
< ARRAY_SIZE (links
); ++j
)
2070 if (bitmap_set_bit (visited_edges
, allocno
.id
* 4 + j
))
2071 worklist
.safe_push (allocno
.id
* 4 + j
);
2073 if (group1
->strided_polarity
)
2074 group2
->strided_polarity
= group1
->strided_polarity
* pref
;
2075 else if (group1
->strided_polarity
)
2076 group2
->strided_polarity
= group1
->strided_polarity
* pref
;
2079 group1
->strided_polarity
= 1;
2080 group2
->strided_polarity
= pref
;
2085 // Now look for edges between allocnos in multi-register groups where:
2087 // - the two groups have a relative strided polarity preference (as above).
2089 // - one group (G1) has a free choice between consecutive and strided
2092 // - the other group (G2) must use consecutive allocations.
2094 // Update G1's individual preference for strided or consecutive allocations
2095 // based on G2. If the previous loop chose a polarity for G1, work out
2096 // whether it is better for polarity 1 or -1 to correspond to consecutive
2098 int consecutive_pref
= 0;
2099 for (unsigned int i
= m_allocnos
.length (); i
-- > 0; )
2101 auto *allocno1
= m_allocnos
[i
];
2102 for (auto link
: links
)
2104 auto ai2
= allocno1
->*link
;
2105 if (ai2
== INVALID_ALLOCNO
)
2108 auto *allocno2
= m_allocnos
[ai2
];
2109 auto *group1
= allocno1
->group ();
2110 auto *group2
= allocno2
->group ();
2111 if (group1
->has_flexible_stride
== group2
->has_flexible_stride
)
2114 int pref
= strided_polarity_pref (allocno1
, allocno2
);
2118 auto *group
= (group1
->has_flexible_stride
? group1
: group2
);
2119 consecutive_pref
+= group
->strided_polarity
* pref
;
2120 group
->consecutive_pref
+= pref
;
2124 // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
2125 // allocation, arbitrarily pick 1.
2126 if (consecutive_pref
== 0)
2127 consecutive_pref
= 1;
2129 // Record which multi-register groups should use strided allocations.
2130 // Clear out the temporary edges.
2131 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
2133 auto *allocno
= m_allocnos
[ai
];
2134 allocno
->chain_prev
= INVALID_ALLOCNO
;
2135 allocno
->chain_next
= INVALID_ALLOCNO
;
2137 if (allocno
->offset
!= 0)
2140 auto *group
= allocno
->group ();
2141 if (!group
->has_flexible_stride
)
2144 bool make_strided
= (group
->strided_polarity
2145 ? (consecutive_pref
* group
->strided_polarity
) < 0
2146 : group
->consecutive_pref
< 0);
2147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2148 fprintf (dump_file
, "Allocno [%d:%d]: strided polarity %d,"
2149 " consecutive pref %d, %s\n",
2150 allocno
->id
, allocno
->id
+ group
->size
- 1,
2151 group
->strided_polarity
, group
->consecutive_pref
,
2152 make_strided
? "making strided" : "keeping consecutive");
2156 // 2-register groups have a stride of 8 FPRs and must start in
2157 // registers matching the mask 0x17. 4-register groups have a stride
2158 // of 4 FPRs and must start in registers matching the mask 0x13.
2159 group
->stride
= group
->size
== 2 ? 8 : 4;
2160 gcc_checking_assert (group
->fpr_candidates
2161 == (group
->size
== 2 ? 0x55555555 : 0x11111111));
2162 group
->fpr_candidates
= (group
->size
== 2 ? 0xff00ff : 0xf000f);
2166 // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
2167 // result that puts allocnos in order of increasing FIELD.
2168 template<unsigned int early_ra::allocno_info::*field
>
2170 early_ra::cmp_increasing (const void *allocno1_ptr
, const void *allocno2_ptr
)
2172 auto *allocno1
= *(allocno_info
*const *) allocno1_ptr
;
2173 auto *allocno2
= *(allocno_info
*const *) allocno2_ptr
;
2175 if (allocno1
->*field
!= allocno2
->*field
)
2176 return allocno1
->*field
< allocno2
->*field
? -1 : 1;
2177 return (allocno1
->id
< allocno2
->id
? -1
2178 : allocno1
->id
== allocno2
->id
? 0 : 1);
2181 // Return true if we should consider chaining ALLOCNO1 onto the head
2182 // of ALLOCNO2. This is just a local test of the two allocnos; it doesn't
2183 // guarantee that chaining them would give a self-consistent system.
2185 early_ra::is_chain_candidate (allocno_info
*allocno1
, allocno_info
*allocno2
)
2187 if (allocno2
->is_shared ())
2190 if (allocno1
->is_equiv
)
2191 allocno1
= m_allocnos
[allocno1
->related_allocno
];
2193 if (allocno2
->start_point
>= allocno1
->end_point
2194 && !allocno2
->is_equiv_to (allocno1
->id
))
2197 if (allocno2
->is_strong_copy_dest
)
2199 if (!allocno1
->is_strong_copy_src
2200 || allocno1
->copy_dest
!= allocno2
->id
)
2203 else if (allocno2
->is_copy_dest
)
2205 if (allocno1
->copy_dest
!= allocno2
->id
)
2208 else if (allocno1
->is_earlyclobbered
)
2210 if (allocno1
->end_point
== allocno2
->start_point
+ 1)
2217 // We're trying to chain allocno ALLOCNO1 to a later allocno.
2218 // Rate how good a choice ALLOCNO2 would be, with higher being better.
2220 early_ra::rate_chain (allocno_info
*allocno1
, allocno_info
*allocno2
)
2223 if (allocno2
->is_strong_copy_dest
)
2225 else if (allocno2
->is_copy_dest
)
2228 // Prefer well-aligned matches.
2229 auto *group1
= allocno1
->group ();
2230 auto *group2
= allocno2
->group ();
2231 if (group1
->stride
== 1 && group2
->stride
== 1)
2233 unsigned int min_size
= std::min (group1
->color_rep ()->size
,
2234 group2
->color_rep ()->size
);
2235 if ((group1
->color_rep_offset
+ allocno1
->offset
) % min_size
2236 == (group2
->color_rep_offset
+ allocno2
->offset
) % min_size
)
2244 // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
2247 early_ra::cmp_chain_candidates (const void *arg1
, const void *arg2
)
2249 auto &candidate1
= *(const chain_candidate_info
*) arg1
;
2250 auto &candidate2
= *(const chain_candidate_info
*) arg2
;
2251 if (candidate1
.score
!= candidate2
.score
)
2252 return candidate1
.score
> candidate2
.score
? -1 : 1;
2254 // Prefer to increase the gap between uses of the allocated register,
2255 // to give the scheduler more freedom.
2256 auto *allocno1
= candidate1
.allocno
;
2257 auto *allocno2
= candidate2
.allocno
;
2258 if (allocno1
->start_point
!= allocno2
->start_point
)
2259 return allocno1
->start_point
< allocno2
->start_point
? -1 : 1;
2261 if (allocno1
!= allocno2
)
2262 return allocno1
->id
< allocno2
->id
? -1 : 1;
2267 // Join the chains of allocnos that start at HEADI1 and HEADI2.
2268 // HEADI1 is either empty or a single allocno.
2270 early_ra::chain_allocnos (unsigned int &headi1
, unsigned int &headi2
)
2272 if (headi1
== INVALID_ALLOCNO
)
2274 else if (headi2
== INVALID_ALLOCNO
)
2278 auto *head1
= m_allocnos
[headi1
];
2279 auto *head2
= m_allocnos
[headi2
];
2280 gcc_checking_assert (head1
->chain_next
== INVALID_ALLOCNO
2281 && head1
->chain_prev
== INVALID_ALLOCNO
2282 && head2
->chain_prev
== INVALID_ALLOCNO
);
2285 && m_allocnos
[head1
->related_allocno
]->copy_dest
== headi2
)
2287 head1
->is_copy_dest
= head2
->is_copy_dest
;
2288 head1
->is_strong_copy_dest
= head2
->is_strong_copy_dest
;
2289 m_allocnos
[head1
->related_allocno
]->copy_dest
= headi1
;
2291 head1
->chain_next
= headi2
;
2292 head2
->chain_prev
= headi1
;
2298 // Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts
2299 // OFFSET allocnos into GROUP2.
2301 early_ra::merge_fpr_info (allocno_group_info
*group1
,
2302 allocno_group_info
*group2
,
2303 unsigned int offset
)
2305 group1
->fpr_size
= std::max (group1
->fpr_size
, group2
->fpr_size
);
2306 group1
->fpr_candidates
&= (group2
->fpr_candidates
2307 >> (offset
* group1
->stride
));
2310 // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
2311 // ends being at allocno offset REP_OFFSET from the start of REP.
2313 early_ra::set_single_color_rep (allocno_info
*allocno
, allocno_group_info
*rep
,
2314 unsigned int rep_offset
)
2316 auto *group
= allocno
->group ();
2317 if (group
->m_color_rep
== rep
)
2320 group
->m_color_rep
= rep
;
2321 gcc_checking_assert (multiple_p (group
->stride
, rep
->stride
));
2322 unsigned int factor
= group
->stride
/ rep
->stride
;
2323 gcc_checking_assert (rep_offset
>= allocno
->offset
* factor
);
2324 group
->color_rep_offset
= rep_offset
- allocno
->offset
* factor
;
2325 merge_fpr_info (rep
, group
, group
->color_rep_offset
);
2328 // REP1 and REP2 are color representatives. Change REP1's color representative
2329 // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
2331 early_ra::set_color_rep (allocno_group_info
*rep1
, allocno_group_info
*rep2
,
2332 unsigned int rep2_offset
)
2334 gcc_checking_assert (rep1
!= rep2
2335 && rep2
->m_color_rep
== rep2
2336 && multiple_p (rep1
->stride
, rep2
->stride
));
2338 auto heads1
= rep1
->chain_heads ();
2339 auto heads2
= rep2
->chain_heads ();
2340 for (unsigned int i1
= 0; i1
< heads1
.size (); ++i1
)
2341 if (heads1
[i1
] != INVALID_ALLOCNO
)
2343 unsigned int i2
= rep2_offset
+ i1
* rep1
->stride
/ rep2
->stride
;
2344 if (heads2
[i2
] == INVALID_ALLOCNO
)
2345 heads2
[i2
] = heads1
[i1
];
2347 gcc_checking_assert (heads2
[i2
] == heads1
[i1
]);
2348 set_single_color_rep (m_allocnos
[heads1
[i1
]], rep2
, i2
);
2352 // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
2353 // Return true on success.
2355 early_ra::try_to_chain_allocnos (allocno_info
*allocno1
,
2356 allocno_info
*allocno2
)
2358 auto *group1
= allocno1
->group ()->color_rep ();
2359 auto *group2
= allocno2
->group ()->color_rep ();
2361 // Avoid trying to tie different subgroups of the same group. This can
2362 // happen if the parts of a register are defined and used piecemeal.
2363 if (group1
== group2
)
2366 // The stride (in FPRs) between allocnos of each color representative.
2367 auto fpr_stride1
= group1
->stride
;
2368 auto fpr_stride2
= group2
->stride
;
2370 // The offset (in FPRs) of each allocno group from its color representative.
2371 auto fpr_offset1
= allocno1
->group ()->color_rep_offset
* fpr_stride1
;
2372 auto fpr_offset2
= allocno2
->group ()->color_rep_offset
* fpr_stride2
;
2374 // The offset (in FPRs) of each allocno from its color representative.
2375 fpr_offset1
+= allocno1
->offset
* allocno1
->group ()->stride
;
2376 fpr_offset2
+= allocno2
->offset
* allocno2
->group ()->stride
;
2378 // The FPR overlap is in multiples of the larger stride.
2379 auto max_fpr_stride
= std::max (fpr_stride1
, fpr_stride2
);
2380 auto min_fpr_offset
= std::min (fpr_offset1
, fpr_offset2
);
2381 auto fpr_overlap_offset
= ROUND_DOWN (min_fpr_offset
, max_fpr_stride
);
2383 // The offset (in FPRs) of the start of the overlapping region from
2384 // each color representative.
2385 fpr_offset1
-= fpr_overlap_offset
;
2386 fpr_offset2
-= fpr_overlap_offset
;
2388 // The number of FPRs in each color representative after the start
2389 // of the overlapping region.
2390 auto fpr_after1
= (group1
->size
- 1) * fpr_stride1
- fpr_offset1
;
2391 auto fpr_after2
= (group2
->size
- 1) * fpr_stride2
- fpr_offset2
;
2393 auto min_fpr_after
= std::min (fpr_after1
, fpr_after2
);
2395 // The number of overlapping allocnos.
2396 auto allocno_overlap_size
= min_fpr_after
/ max_fpr_stride
+ 1;
2398 // The offset (in allocnos) of the overlapping region from the start
2399 // of each color representative.
2400 auto allocno_offset1
= fpr_offset1
/ fpr_stride1
;
2401 auto allocno_offset2
= fpr_offset2
/ fpr_stride2
;
2403 // The stride (in allocnos) between overlapping allocnos.
2404 auto allocno_stride1
= max_fpr_stride
/ fpr_stride1
;
2405 auto allocno_stride2
= max_fpr_stride
/ fpr_stride2
;
2407 // Reject combinations that are impossible to allocate.
2408 auto fprs1
= group1
->fpr_candidates
;
2409 auto fprs2
= group2
->fpr_candidates
;
2410 if (fpr_offset1
> fpr_offset2
)
2411 fprs2
>>= (fpr_offset1
- fpr_offset2
);
2413 fprs1
>>= (fpr_offset2
- fpr_offset1
);
2414 if ((fprs1
& fprs2
) == 0)
2416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2417 fprintf (dump_file
, " - cannot chain %d->%d, no FPRs in common"
2418 " (%08x@%d and %08x@%d)\n", allocno1
->id
, allocno2
->id
,
2419 group1
->fpr_candidates
, fpr_offset1
,
2420 group2
->fpr_candidates
, fpr_offset2
);
2424 // Check whether the chain can be formed.
2425 auto heads1
= group1
->chain_heads ();
2426 auto heads2
= group2
->chain_heads ();
2427 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2429 auto headi1
= heads1
[allocno_offset1
+ i
* allocno_stride1
];
2430 auto headi2
= heads2
[allocno_offset2
+ i
* allocno_stride2
];
2431 if (headi1
!= INVALID_ALLOCNO
&& headi2
!= INVALID_ALLOCNO
)
2433 auto *head1
= m_allocnos
[headi1
];
2434 auto *head2
= m_allocnos
[headi2
];
2435 if (head1
->chain_next
!= INVALID_ALLOCNO
)
2437 if (!head2
->is_equiv_to (head1
->id
)
2438 && head1
->end_point
<= head2
->start_point
)
2443 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2445 fprintf (dump_file
, " - chaining allocnos [");
2446 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2447 fprintf (dump_file
, "%s%d", i
? "," : "",
2448 heads1
[allocno_offset1
+ i
* allocno_stride1
]);
2449 fprintf (dump_file
, "] and [");
2450 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2451 fprintf (dump_file
, "%s%d", i
? "," : "",
2452 heads2
[allocno_offset2
+ i
* allocno_stride2
]);
2453 fprintf (dump_file
, "]\n");
2456 // Chain the allocnos, updating the chain heads.
2457 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2458 chain_allocnos (heads1
[allocno_offset1
+ i
* allocno_stride1
],
2459 heads2
[allocno_offset2
+ i
* allocno_stride2
]);
2461 // Pick a color representative for the merged groups.
2462 allocno_group_info
*new_rep
;
2463 if (allocno_offset1
== 0
2464 && group1
->size
== allocno_overlap_size
* allocno_stride1
2465 && multiple_p (fpr_stride1
, fpr_stride2
))
2467 // The first group fits within the second.
2468 set_color_rep (group1
, group2
, allocno_offset2
);
2471 else if (allocno_offset2
== 0
2472 && group2
->size
== allocno_overlap_size
* allocno_stride2
2473 && multiple_p (fpr_stride2
, fpr_stride1
))
2475 // The second group fits within the first.
2476 set_color_rep (group2
, group1
, allocno_offset1
);
2481 // We need a new group that is big enough to span both groups.
2482 // The new group always has an FPR stride of 1.
2483 auto max_fpr_offset
= std::max (fpr_offset1
, fpr_offset2
);
2484 auto max_fpr_after
= std::max (fpr_after1
, fpr_after2
);
2485 auto new_size
= max_fpr_offset
+ max_fpr_after
+ 1;
2486 new_rep
= create_allocno_group (INVALID_REGNUM
, new_size
);
2488 set_color_rep (group1
, new_rep
, max_fpr_offset
- fpr_offset1
);
2489 set_color_rep (group2
, new_rep
, max_fpr_offset
- fpr_offset2
);
2492 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2494 fprintf (dump_file
, " - new frontier [");
2495 auto new_heads
= new_rep
->chain_heads ();
2496 for (unsigned int i
= 0; i
< new_heads
.size (); ++i
)
2499 fprintf (dump_file
, ",");
2500 if (new_heads
[i
] == INVALID_ALLOCNO
)
2501 fprintf (dump_file
, "-");
2503 fprintf (dump_file
, "%d", new_heads
[i
]);
2505 fprintf (dump_file
, "]\n");
2511 // Create a color_info for color representative GROUP.
2513 early_ra::create_color (allocno_group_info
*group
)
2515 auto *color
= region_allocate
<color_info
> ();
2516 color
->id
= m_colors
.length ();
2517 color
->hard_regno
= FIRST_PSEUDO_REGISTER
;
2518 color
->group
= group
;
2520 gcc_checking_assert (group
->m_color_rep
== group
);
2521 group
->has_color
= true;
2522 group
->color
= m_colors
.length ();
2524 m_colors
.safe_push (color
);
2527 // Form allocnos into chains. Create colors for each resulting clique.
2529 early_ra::form_chains ()
2531 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2532 fprintf (dump_file
, "\nChaining allocnos:\n");
2534 // Perform (modified) interval graph coloring. First sort by
2535 // increasing start point.
2536 m_sorted_allocnos
.reserve (m_allocnos
.length ());
2537 m_sorted_allocnos
.splice (m_allocnos
);
2538 m_sorted_allocnos
.qsort (cmp_increasing
<&allocno_info::start_point
>);
2540 // During this phase, color representatives are only correct for
2541 // unprocessed allocno groups (where the color representative is
2542 // the group itself) and for groups that contain a current chain head.
2543 unsigned int ti
= 0;
2544 auto_vec
<chain_candidate_info
> candidates
;
2545 for (unsigned int hi
= 0; hi
< m_sorted_allocnos
.length (); ++hi
)
2547 auto *allocno1
= m_sorted_allocnos
[hi
];
2548 if (allocno1
->chain_next
!= INVALID_ALLOCNO
)
2551 // Record conflicts with direct uses for FPR hard registers.
2552 auto *group1
= allocno1
->group ();
2553 for (unsigned int fpr
= allocno1
->offset
; fpr
< 32; ++fpr
)
2554 if (fpr_conflicts_with_allocno_p (fpr
, allocno1
))
2555 group1
->fpr_candidates
&= ~(1U << (fpr
- allocno1
->offset
));
2557 // Record conflicts due to partially call-clobbered registers.
2558 // (Full clobbers are handled by the previous loop.)
2559 for (unsigned int abi_id
= 0; abi_id
< NUM_ABI_IDS
; ++abi_id
)
2560 if (call_in_range_p (abi_id
, allocno1
->start_point
,
2561 allocno1
->end_point
))
2563 auto fprs
= partial_fpr_clobbers (abi_id
, group1
->fpr_size
);
2564 group1
->fpr_candidates
&= ~fprs
>> allocno1
->offset
;
2567 if (allocno1
->is_shared ())
2569 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2570 fprintf (dump_file
, " Allocno %d shares the same hard register"
2571 " as allocno %d\n", allocno1
->id
,
2572 allocno1
->related_allocno
);
2573 auto *allocno2
= m_allocnos
[allocno1
->related_allocno
];
2574 merge_fpr_info (allocno2
->group (), group1
, allocno2
->offset
);
2575 m_shared_allocnos
.safe_push (allocno1
);
2579 // Find earlier allocnos (in processing order) that could be chained
2581 candidates
.truncate (0);
2582 for (unsigned int sci
= ti
; sci
< hi
; ++sci
)
2584 auto *allocno2
= m_sorted_allocnos
[sci
];
2585 if (allocno2
->chain_prev
== INVALID_ALLOCNO
)
2587 if (!is_chain_candidate (allocno1
, allocno2
))
2589 chain_candidate_info candidate
;
2590 candidate
.allocno
= allocno2
;
2591 candidate
.score
= rate_chain (allocno1
, allocno2
);
2592 candidates
.safe_push (candidate
);
2598 // Sort the candidates by decreasing score.
2599 candidates
.qsort (cmp_chain_candidates
);
2600 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2602 fprintf (dump_file
, " Chain candidates for %d:", allocno1
->id
);
2603 for (auto &candidate
: candidates
)
2604 fprintf (dump_file
, " %d(%d)", candidate
.allocno
->id
,
2606 fprintf (dump_file
, "\n");
2609 // Pick the first candidate that works.
2610 for (auto &candidate
: candidates
)
2611 if (try_to_chain_allocnos (allocno1
, candidate
.allocno
))
2615 // Create color_infos for each group. Make sure that each group's
2616 // color representative is up to date.
2617 for (unsigned int hi
= m_sorted_allocnos
.length (); hi
-- > 0; )
2619 auto *allocno
= m_sorted_allocnos
[hi
];
2620 if (allocno
->is_shared ())
2623 auto *rep
= allocno
->group ()->color_rep ();
2628 auto heads
= rep
->chain_heads ();
2629 for (unsigned int i
= 0; i
< heads
.size (); ++i
)
2631 unsigned int ai
= heads
[i
];
2632 while (ai
!= INVALID_ALLOCNO
)
2634 allocno
= m_allocnos
[ai
];
2635 set_single_color_rep (allocno
, rep
, i
* rep
->stride
);
2636 ai
= allocno
->chain_next
;
2642 // Return true if the given FPR (starting at 0) conflicts with allocno
2645 early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr
,
2646 allocno_info
*allocno
)
2648 auto &ranges
= m_fpr_ranges
[fpr
];
2649 unsigned int start_i
= 0;
2650 unsigned int end_i
= ranges
.length ();
2651 while (start_i
< end_i
)
2653 unsigned int mid_i
= (start_i
+ end_i
) / 2;
2654 auto &range
= ranges
[mid_i
];
2655 if (allocno
->end_point
> range
.start_point
)
2656 start_i
= mid_i
+ 1;
2657 else if (allocno
->start_point
< range
.end_point
)
2661 if (range
.allocno
!= allocno
->id
)
2663 // The FPR is equivalent to ALLOCNO for this particular range.
2664 // See whether ALLOCNO conflicts with a neighboring range.
2666 && ranges
[mid_i
- 1].start_point
>= allocno
->end_point
)
2668 if (mid_i
+ 1 < ranges
.length ()
2669 && ranges
[mid_i
+ 1].end_point
<= allocno
->start_point
)
2677 // Return true if there is a call with ABI identifier ABI_ID in the inclusive
2678 // program point range [START_POINT, END_POINT].
2680 early_ra::call_in_range_p (unsigned int abi_id
, unsigned int start_point
,
2681 unsigned int end_point
)
2683 auto &points
= m_call_points
[abi_id
];
2684 unsigned int start_i
= 0;
2685 unsigned int end_i
= points
.length ();
2686 while (start_i
< end_i
)
2688 unsigned int mid_i
= (start_i
+ end_i
) / 2;
2689 auto point
= points
[mid_i
];
2690 if (end_point
> point
)
2691 start_i
= mid_i
+ 1;
2692 else if (start_point
< point
)
2700 // Return the set of FPRs for which a value of size SIZE will be clobbered
2701 // by a call to a function with ABI identifier ABI_ID, but would not be
2702 // for some smaller size. The set therefore excludes FPRs that are
2703 // fully-clobbered, like V0 in the base ABI.
2705 early_ra::partial_fpr_clobbers (unsigned int abi_id
, fpr_size_info size
)
2707 auto &abi
= function_abis
[abi_id
];
2708 unsigned int clobbers
= 0;
2709 machine_mode mode
= (size
== FPR_D
? V8QImode
2710 : size
== FPR_Q
? V16QImode
: VNx16QImode
);
2711 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
2712 if (!abi
.clobbers_full_reg_p (regno
)
2713 && abi
.clobbers_reg_p (mode
, regno
))
2714 clobbers
|= 1U << (regno
- V0_REGNUM
);
2718 // Process copies between pseudo registers and hard registers and update
2719 // the FPR preferences for the associated colors.
2721 early_ra::process_copies ()
2723 for (auto ©
: m_allocno_copies
)
2725 auto *allocno
= m_allocnos
[copy
.allocno
];
2726 auto *group
= allocno
->group ();
2727 auto offset
= group
->color_rep_offset
+ allocno
->offset
;
2728 if (offset
> copy
.fpr
)
2731 unsigned int fpr
= copy
.fpr
- offset
;
2732 auto *color
= m_colors
[group
->color_rep ()->color
];
2733 color
->fpr_preferences
[fpr
] = MIN (color
->fpr_preferences
[fpr
]
2734 + copy
.weight
, 127);
2735 color
->num_fpr_preferences
+= copy
.weight
;
2739 // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
2740 // result that puts colors in allocation order.
2742 early_ra::cmp_allocation_order (const void *color1_ptr
, const void *color2_ptr
)
2744 auto *color1
= *(color_info
*const *) color1_ptr
;
2745 auto *color2
= *(color_info
*const *) color2_ptr
;
2747 // Allocate bigger groups before smaller groups.
2748 if (color1
->group
->size
!= color2
->group
->size
)
2749 return color1
->group
->size
> color2
->group
->size
? -1 : 1;
2751 // Allocate groups with stronger FPR preferences before groups with weaker
2753 if (color1
->num_fpr_preferences
!= color2
->num_fpr_preferences
)
2754 return color1
->num_fpr_preferences
> color2
->num_fpr_preferences
? -1 : 1;
2756 return (color1
->id
< color2
->id
? -1
2757 : color1
->id
== color2
->id
? 0 : 1);
2760 // Allocate a register to each color. If we run out of registers,
2761 // give up on doing a full allocation of the FPR-based pseudos in the
2764 early_ra::allocate_colors ()
2766 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2767 fprintf (dump_file
, "\nAllocating registers:\n");
2769 auto_vec
<color_info
*> sorted_colors
;
2770 sorted_colors
.safe_splice (m_colors
);
2771 sorted_colors
.qsort (cmp_allocation_order
);
2773 for (unsigned int i
= 0; i
< 32; ++i
)
2774 if (!crtl
->abi
->clobbers_full_reg_p (V0_REGNUM
+ i
))
2775 m_call_preserved_fprs
|= 1U << i
;
2777 for (auto *color
: sorted_colors
)
2779 unsigned int candidates
= color
->group
->fpr_candidates
;
2780 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
2781 candidates
&= ~(m_allocated_fprs
>> i
);
2782 unsigned int best
= INVALID_REGNUM
;
2783 int best_weight
= 0;
2784 for (unsigned int fpr
= 0; fpr
<= 32U - color
->group
->size
; ++fpr
)
2786 if ((candidates
& (1U << fpr
)) == 0)
2788 int weight
= color
->fpr_preferences
[fpr
];
2789 // Account for registers that the current function must preserve.
2790 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
2791 if (m_call_preserved_fprs
& (1U << (fpr
+ i
)))
2793 if (best
== INVALID_REGNUM
|| best_weight
<= weight
)
2796 best_weight
= weight
;
2800 if (best
== INVALID_REGNUM
)
2802 m_allocation_successful
= false;
2806 color
->hard_regno
= best
+ V0_REGNUM
;
2807 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2808 fprintf (dump_file
, " Allocating [v%d:v%d] to color %d\n",
2809 best
, best
+ color
->group
->size
- 1, color
->id
);
2810 m_allocated_fprs
|= ((1U << color
->group
->size
) - 1) << best
;
2814 // See if ALLOCNO ends a subchain of single registers that can be split
2815 // off without affecting the rest of the chain, and without introducing
2816 // any moves. Return the start of the chain if so (which might be ALLOCNO
2817 // itself), otherwise return null.
2818 early_ra::allocno_info
*
2819 early_ra::find_independent_subchain (allocno_info
*allocno
)
2821 // Make sure ALLOCNO ends a natural subchain.
2822 if (auto *next_allocno
= chain_next (allocno
))
2823 if (next_allocno
->start_point
+ 1 >= allocno
->end_point
)
2826 // Check the allocnos in the purported subchain and find the other end.
2829 auto *group
= allocno
->group ();
2830 if (group
->m_color_rep
== group
)
2832 if (group
->size
!= 1)
2835 auto *prev_allocno
= chain_prev (allocno
);
2836 if (!prev_allocno
|| allocno
->start_point
+ 1 < prev_allocno
->end_point
)
2839 allocno
= prev_allocno
;
2843 // Search the colors starting at index FIRST_COLOR whose FPRs do not belong
2844 // to FPR_CONFLICTS. Return the first such color that has no group. If all
2845 // such colors have groups, instead return the color with the latest
2846 // (smallest) start point.
2847 early_ra::color_info
*
2848 early_ra::find_oldest_color (unsigned int first_color
,
2849 unsigned int fpr_conflicts
)
2851 color_info
*best
= nullptr;
2852 unsigned int best_start_point
= ~0U;
2853 for (unsigned int ci
= first_color
; ci
< m_colors
.length (); ++ci
)
2855 auto *color
= m_colors
[ci
];
2856 if (fpr_conflicts
& (1U << (color
->hard_regno
- V0_REGNUM
)))
2860 auto chain_head
= color
->group
->chain_heads ()[0];
2861 auto start_point
= m_allocnos
[chain_head
]->start_point
;
2862 if (!best
|| best_start_point
> start_point
)
2865 best_start_point
= start_point
;
2871 // If there are some spare FPRs that can be reused without introducing saves,
2872 // restores, or moves, use them to "broaden" the allocation, in order to give
2873 // the scheduler more freedom. This is particularly useful for forming LDPs
2876 early_ra::broaden_colors ()
2878 // Create dummy colors for every leftover FPR that can be used cheaply.
2879 unsigned int first_color
= m_colors
.length ();
2880 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
2881 if (((m_allocated_fprs
| m_call_preserved_fprs
) & (1U << fpr
)) == 0)
2883 auto *color
= region_allocate
<color_info
> ();
2884 color
->id
= m_colors
.length ();
2885 color
->hard_regno
= V0_REGNUM
+ fpr
;
2886 color
->group
= nullptr;
2887 m_colors
.safe_push (color
);
2890 // Exit early if there are no spare FPRs.
2891 if (first_color
== m_colors
.length ())
2894 // Go through the allocnos in order, seeing if there is a subchain of
2895 // single-FPR allocnos that can be split off from the containingg clique.
2896 // Allocate such subchains to the new colors on an oldest-first basis.
2897 for (auto *allocno
: m_sorted_allocnos
)
2898 if (auto *start_allocno
= find_independent_subchain (allocno
))
2900 unsigned int fpr_conflicts
= 0;
2901 auto *member
= allocno
;
2904 fpr_conflicts
|= ~member
->group ()->fpr_candidates
;
2905 if (member
== start_allocno
)
2907 member
= m_allocnos
[member
->chain_prev
];
2910 auto *color
= find_oldest_color (first_color
, fpr_conflicts
);
2916 auto *group
= allocno
->group ();
2917 color
->group
= group
;
2918 group
->color
= color
->id
;
2919 group
->chain_heads ()[0] = INVALID_ALLOCNO
;
2923 auto chain_head
= color
->group
->chain_heads ()[0];
2924 auto start_point
= m_allocnos
[chain_head
]->start_point
;
2925 if (start_point
>= allocno
->end_point
)
2926 // Allocating to COLOR isn't viable, and it was the best
2927 // option available.
2930 auto *next_allocno
= chain_next (allocno
);
2931 if (!next_allocno
|| next_allocno
->start_point
<= start_point
)
2932 // The current allocation gives at least as much scheduling
2933 // freedom as COLOR would.
2937 // Unlink the chain.
2938 if (auto *next_allocno
= chain_next (allocno
))
2939 next_allocno
->chain_prev
= start_allocno
->chain_prev
;
2940 if (auto *prev_allocno
= chain_prev (start_allocno
))
2941 prev_allocno
->chain_next
= allocno
->chain_next
;
2943 // Make the subchain use COLOR.
2944 allocno
->chain_next
= color
->group
->chain_heads ()[0];
2945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2946 fprintf (dump_file
, "Moving to optional color %d (register %s):",
2947 color
->id
, reg_names
[color
->hard_regno
]);
2950 auto *group
= allocno
->group ();
2951 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2952 fprintf (dump_file
, " r%d", group
->regno
);
2953 group
->m_color_rep
= color
->group
;
2954 group
->color_rep_offset
= 0;
2955 if (allocno
== start_allocno
)
2957 allocno
= m_allocnos
[allocno
->chain_prev
];
2959 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2960 fprintf (dump_file
, "\n");
2961 color
->group
->chain_heads ()[0] = start_allocno
->id
;
2965 // Record the final choice of hard register for each allocno.
2967 early_ra::finalize_allocation ()
2969 for (auto *allocno
: m_allocnos
)
2971 if (allocno
->is_shared ())
2973 auto *group
= allocno
->group ();
2974 auto *rep
= group
->color_rep ();
2975 auto rep_regno
= m_colors
[rep
->color
]->hard_regno
;
2976 auto group_regno
= rep_regno
+ group
->color_rep_offset
;
2977 allocno
->hard_regno
= group_regno
+ allocno
->offset
* group
->stride
;
2979 for (auto *allocno
: m_shared_allocnos
)
2980 allocno
->hard_regno
= m_allocnos
[allocno
->related_allocno
]->hard_regno
;
2983 // Replace any allocno references in REFS with the allocated register.
2985 early_ra::replace_regs (df_ref refs
)
2987 bool changed
= false;
2988 for (df_ref ref
= refs
; ref
; ref
= DF_REF_NEXT_LOC (ref
))
2990 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
2994 auto new_regno
= range
.allocno (0)->hard_regno
;
2995 *DF_REF_LOC (ref
) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref
)), new_regno
);
3001 // Try to make INSN match its FPR-related constraints. If this needs
3002 // a source operand (SRC) to be copied to a destination operand (DEST)
3003 // before INSN, add the associated (DEST, SRC) pairs to MOVES.
3005 // Return -1 on failure, otherwise return a ?/!-style reject count.
3006 // The reject count doesn't model the moves, just the internal alternative
3009 early_ra::try_enforce_constraints (rtx_insn
*insn
,
3010 vec
<std::pair
<int, int>> &moves
)
3012 if (!constrain_operands (0, get_preferred_alternatives (insn
)))
3015 // Pick the alternative with the lowest cost.
3017 auto alts
= get_preferred_alternatives (insn
);
3018 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
3020 if (!(alts
& ALTERNATIVE_BIT (altno
)))
3023 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
3024 if (!likely_alternative_match_p (op_alt
))
3027 auto_vec
<std::pair
<int, int>, 4> new_moves
;
3028 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
3030 rtx op
= recog_data
.operand
[opno
];
3032 && FP_REGNUM_P (REGNO (op
))
3033 && op_alt
[opno
].matched
>= 0)
3035 rtx old_src
= recog_data
.operand
[op_alt
[opno
].matched
];
3036 if (!operands_match_p (op
, old_src
))
3038 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
3041 rtx other
= recog_data
.operand
[i
];
3042 if (reg_overlap_mentioned_p (op
, other
))
3050 new_moves
.safe_push ({ opno
, op_alt
[opno
].matched
});
3054 int cost
= count_rejects (op_alt
) + new_moves
.length () * 7;
3055 if (best
< 0 || cost
< best
)
3059 moves
.safe_splice (new_moves
);
3065 // Make INSN matches its FPR-related constraints.
3067 early_ra::enforce_constraints (rtx_insn
*insn
)
3069 extract_insn (insn
);
3070 preprocess_constraints (insn
);
3072 // First try with the operands they are.
3073 auto_vec
<std::pair
<int, int>, 4> moves
;
3074 int cost
= try_enforce_constraints (insn
, moves
);
3076 // Next try taking advantage of commutativity.
3077 for (int opno
= 0; opno
< recog_data
.n_operands
- 1; ++opno
)
3078 if (recog_data
.constraints
[opno
][0] == '%')
3080 std::swap (*recog_data
.operand_loc
[opno
],
3081 *recog_data
.operand_loc
[opno
+ 1]);
3082 std::swap (recog_data
.operand
[opno
],
3083 recog_data
.operand
[opno
+ 1]);
3084 auto_vec
<std::pair
<int, int>, 4> swapped_moves
;
3085 int swapped_cost
= try_enforce_constraints (insn
, swapped_moves
);
3086 if (swapped_cost
>= 0 && (cost
< 0 || swapped_cost
< cost
))
3088 cost
= swapped_cost
;
3090 moves
.safe_splice (swapped_moves
);
3094 std::swap (*recog_data
.operand_loc
[opno
],
3095 *recog_data
.operand_loc
[opno
+ 1]);
3096 std::swap (recog_data
.operand
[opno
],
3097 recog_data
.operand
[opno
+ 1]);
3101 // The allocation should ensure that there is at least one valid combination.
3102 // It's too late to back out now if not.
3103 gcc_assert (cost
>= 0);
3104 for (int i
= 0; i
< recog_data
.n_dups
; ++i
)
3106 int dup_of
= recog_data
.dup_num
[i
];
3107 rtx new_op
= *recog_data
.operand_loc
[dup_of
];
3108 if (new_op
!= recog_data
.operand
[dup_of
])
3109 *recog_data
.dup_loc
[i
] = copy_rtx (new_op
);
3111 for (auto move
: moves
)
3113 int dest_opno
= move
.first
;
3114 int src_opno
= move
.second
;
3115 rtx dest
= recog_data
.operand
[dest_opno
];
3116 rtx old_src
= recog_data
.operand
[src_opno
];
3117 rtx new_src
= lowpart_subreg (GET_MODE (old_src
), dest
, GET_MODE (dest
));
3118 emit_insn_before (gen_move_insn (new_src
, old_src
), insn
);
3119 *recog_data
.operand_loc
[src_opno
] = new_src
;
3123 // See whether INSN is an instruction that operates on multi-register vectors,
3124 // and if we have decided to make it use strided rather than consecutive
3125 // accesses. Update the pattern and return true if so.
3127 early_ra::maybe_convert_to_strided_access (rtx_insn
*insn
)
3129 if (!NONJUMP_INSN_P (insn
) || recog_memoized (insn
) < 0)
3132 auto stride_type
= get_attr_stride_type (insn
);
3133 rtx pat
= PATTERN (insn
);
3135 if (stride_type
== STRIDE_TYPE_LUTI_CONSECUTIVE
3136 || stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
)
3137 op
= SET_DEST (pat
);
3138 else if (stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
)
3139 op
= XVECEXP (SET_SRC (pat
), 0, 1);
3143 auto range
= get_allocno_subgroup (op
);
3144 if (!range
|| range
.group
->stride
== 1)
3147 gcc_assert (range
.start
== 0 && range
.count
== range
.group
->size
);
3148 auto elt_mode
= GET_MODE_INNER (GET_MODE (op
));
3149 auto single_mode
= aarch64_full_sve_mode (elt_mode
).require ();
3150 auto_vec
<rtx
, 4> regs
;
3151 for (unsigned int i
= 0; i
< range
.count
; ++i
)
3152 regs
.quick_push (gen_rtx_REG (single_mode
, range
.allocno (i
)->hard_regno
));
3154 extract_insn (insn
);
3155 if (stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
)
3157 auto unspec
= XINT (SET_SRC (pat
), 1);
3158 if (range
.count
== 2)
3159 pat
= gen_aarch64_strided2 (unspec
, GET_MODE (op
), regs
[0], regs
[1],
3160 recog_data
.operand
[1],
3161 recog_data
.operand
[2]);
3163 pat
= gen_aarch64_strided4 (unspec
, GET_MODE (op
),
3164 regs
[0], regs
[1], regs
[2], regs
[3],
3165 recog_data
.operand
[1],
3166 recog_data
.operand
[2]);
3168 else if (stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
)
3170 auto unspec
= XINT (SET_SRC (pat
), 1);
3171 if (range
.count
== 2)
3172 pat
= gen_aarch64_strided2 (unspec
, GET_MODE (op
),
3173 recog_data
.operand
[0],
3174 recog_data
.operand
[2], regs
[0], regs
[1]);
3176 pat
= gen_aarch64_strided4 (unspec
, GET_MODE (op
),
3177 recog_data
.operand
[0],
3178 recog_data
.operand
[2],
3179 regs
[0], regs
[1], regs
[2], regs
[3]);
3180 // Ensure correct sharing for the source memory.
3182 // ??? Why doesn't the generator get this right?
3183 XVECEXP (SET_SRC (pat
), 0, XVECLEN (SET_SRC (pat
), 0) - 1)
3184 = *recog_data
.dup_loc
[0];
3186 else if (stride_type
== STRIDE_TYPE_LUTI_CONSECUTIVE
)
3188 auto bits
= INTVAL (XVECEXP (SET_SRC (pat
), 0, 4));
3189 if (range
.count
== 2)
3190 pat
= gen_aarch64_sme_lut_strided2 (bits
, single_mode
,
3192 recog_data
.operand
[1],
3193 recog_data
.operand
[2]);
3195 pat
= gen_aarch64_sme_lut_strided4 (bits
, single_mode
,
3196 regs
[0], regs
[1], regs
[2], regs
[3],
3197 recog_data
.operand
[1],
3198 recog_data
.operand
[2]);
3202 PATTERN (insn
) = pat
;
3203 INSN_CODE (insn
) = -1;
3204 df_insn_rescan (insn
);
3208 // We've successfully allocated the current region. Apply the allocation
3209 // to the instructions.
3211 early_ra::apply_allocation ()
3214 for (auto insn_range
: m_insn_ranges
)
3215 for (rtx_insn
*insn
= insn_range
.first
;
3216 insn
!= insn_range
.second
;
3219 prev
= PREV_INSN (insn
);
3223 bool changed
= maybe_convert_to_strided_access (insn
);
3224 changed
|= replace_regs (DF_INSN_DEFS (insn
));
3225 changed
|= replace_regs (DF_INSN_USES (insn
));
3226 if (changed
&& NONDEBUG_INSN_P (insn
))
3228 if (GET_CODE (PATTERN (insn
)) != USE
3229 && GET_CODE (PATTERN (insn
)) != CLOBBER
3230 && !is_move_set (PATTERN (insn
)))
3231 enforce_constraints (insn
);
3233 // A REG_EQUIV note establishes an equivalence throughout
3234 // the function, but here we're reusing hard registers for
3235 // multiple pseudo registers. We also no longer need REG_EQUIV
3236 // notes that record potential spill locations, since we've
3237 // allocated the pseudo register without spilling.
3238 rtx
*ptr
= ®_NOTES (insn
);
3240 if (REG_NOTE_KIND (*ptr
) == REG_EQUIV
)
3241 *ptr
= XEXP (*ptr
, 1);
3243 ptr
= &XEXP (*ptr
, 1);
3245 changed
|= replace_regs (DF_INSN_EQ_USES (insn
));
3247 df_insn_rescan (insn
);
3250 for (auto *insn
: m_dead_insns
)
3254 // Try to allocate the current region. Update the instructions if successful.
3256 early_ra::process_region ()
3258 for (auto *allocno
: m_allocnos
)
3260 allocno
->chain_next
= INVALID_ALLOCNO
;
3261 allocno
->chain_prev
= INVALID_ALLOCNO
;
3264 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3271 find_strided_accesses ();
3273 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3278 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3283 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3287 if (!m_allocation_successful
)
3291 finalize_allocation ();
3293 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3295 fprintf (dump_file
, "\nAllocation successful\nFinal allocation:\n");
3300 apply_allocation ();
3303 // Return true if INSN would become dead if we successfully allocate the
3306 early_ra::is_dead_insn (rtx_insn
*insn
)
3308 rtx set
= single_set (insn
);
3312 rtx dest
= SET_DEST (set
);
3313 auto dest_range
= get_allocno_subgroup (dest
);
3317 for (auto &allocno
: dest_range
.allocnos ())
3318 if (bitmap_bit_p (m_live_allocnos
, allocno
.id
))
3321 if (side_effects_p (set
))
3327 // Build up information about block BB. IS_ISOLATED is true if the
3328 // block is not part of a larger region.
3330 early_ra::process_block (basic_block bb
, bool is_isolated
)
3333 m_current_point
+= 1;
3334 m_current_bb_point
= m_current_point
;
3336 // Process live-out FPRs.
3337 bitmap live_out
= df_get_live_out (bb
);
3338 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
3339 if (bitmap_bit_p (live_out
, regno
))
3340 record_fpr_use (regno
);
3342 // Process live-out allocnos. We don't track individual FPR liveness
3343 // across block boundaries, so we have to assume that the whole pseudo
3344 // register is live.
3347 EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb
), m_fpr_pseudos
,
3348 FIRST_PSEUDO_REGISTER
, regno
, bi
)
3350 auto range
= get_allocno_subgroup (regno_reg_rtx
[regno
]);
3351 for (auto &allocno
: range
.allocnos ())
3352 record_allocno_use (&allocno
);
3355 m_current_point
+= 1;
3357 record_artificial_refs (0);
3359 bool is_first
= true;
3360 rtx_insn
*start_insn
= BB_END (bb
);
3362 FOR_BB_INSNS_REVERSE (bb
, insn
)
3364 if (!NONDEBUG_INSN_P (insn
))
3367 // CLOBBERs are used to prevent pseudos from being upwards exposed.
3368 // We can ignore them if allocation is successful.
3369 if (GET_CODE (PATTERN (insn
)) == CLOBBER
)
3371 if (get_allocno_subgroup (XEXP (PATTERN (insn
), 0)))
3372 m_dead_insns
.safe_push (insn
);
3376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3379 fprintf (dump_file
, "\nBlock %d:\n", bb
->index
);
3380 fprintf (dump_file
, "%6d:", m_current_point
);
3381 pretty_printer rtl_slim_pp
;
3382 rtl_slim_pp
.buffer
->stream
= dump_file
;
3383 print_insn (&rtl_slim_pp
, insn
, 1);
3384 pp_flush (&rtl_slim_pp
);
3385 fprintf (dump_file
, "\n");
3389 if (is_dead_insn (insn
))
3391 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3392 fprintf (dump_file
, "%14s -- dead\n", "");
3393 m_dead_insns
.safe_push (insn
);
3397 record_insn_refs (insn
);
3398 rtx pat
= PATTERN (insn
);
3399 if (is_move_set (pat
))
3400 record_copy (SET_DEST (pat
), SET_SRC (pat
), true);
3403 extract_insn (insn
);
3404 record_constraints (insn
);
3408 // See whether we have a complete region, with no remaining live
3411 && bitmap_empty_p (m_live_allocnos
)
3413 && m_allocation_successful
3414 && !m_allocnos
.is_empty ())
3416 rtx_insn
*prev_insn
= PREV_INSN (insn
);
3417 m_insn_ranges
.safe_push ({ start_insn
, prev_insn
});
3419 start_new_region ();
3421 start_insn
= prev_insn
;
3424 m_insn_ranges
.safe_push ({ start_insn
, BB_HEAD (bb
) });
3426 record_artificial_refs (DF_REF_AT_TOP
);
3428 // Process live-in FPRs.
3429 bitmap live_in
= df_get_live_in (bb
);
3430 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
3431 if (bitmap_bit_p (live_in
, regno
)
3432 && (m_live_fprs
& (1U << (regno
- V0_REGNUM
))))
3433 record_fpr_def (regno
);
3435 // Process live-in allocnos.
3436 EXECUTE_IF_AND_IN_BITMAP (live_in
, m_fpr_pseudos
,
3437 FIRST_PSEUDO_REGISTER
, regno
, bi
)
3439 auto range
= get_allocno_subgroup (regno_reg_rtx
[regno
]);
3440 for (auto &allocno
: range
.allocnos ())
3441 if (bitmap_bit_p (m_live_allocnos
, allocno
.id
))
3442 record_allocno_def (&allocno
);
3445 m_current_point
+= 1;
3447 bitmap_clear (m_live_allocnos
);
3451 // Divide the function into regions, such that there no edges into or out
3452 // of the region have live "FPR pseudos".
3454 early_ra::process_blocks ()
3456 auto_sbitmap
visited (last_basic_block_for_fn (m_fn
));
3457 auto_sbitmap
fpr_pseudos_live_out (last_basic_block_for_fn (m_fn
));
3458 auto_sbitmap
fpr_pseudos_live_in (last_basic_block_for_fn (m_fn
));
3460 bitmap_clear (visited
);
3461 bitmap_clear (fpr_pseudos_live_out
);
3462 bitmap_clear (fpr_pseudos_live_in
);
3464 // Record which blocks have live FPR pseudos on entry and exit.
3466 FOR_EACH_BB_FN (bb
, m_fn
)
3468 if (bitmap_intersect_p (df_get_live_out (bb
), m_fpr_pseudos
))
3469 bitmap_set_bit (fpr_pseudos_live_out
, bb
->index
);
3470 if (bitmap_intersect_p (df_get_live_in (bb
), m_fpr_pseudos
))
3471 bitmap_set_bit (fpr_pseudos_live_in
, bb
->index
);
3474 struct stack_node
{ edge_iterator ei
; basic_block bb
; };
3476 auto_vec
<stack_node
, 32> stack
;
3477 auto_vec
<basic_block
, 32> region
;
3479 // Go through the function in reverse postorder and process the region
3480 // containing each block.
3481 unsigned int n_blocks
= df_get_n_blocks (DF_FORWARD
);
3482 int *order
= df_get_postorder (DF_FORWARD
);
3483 for (unsigned int bbi
= 0; bbi
< n_blocks
; ++bbi
)
3485 basic_block bb
= BASIC_BLOCK_FOR_FN (m_fn
, order
[bbi
]);
3486 if (bb
->index
< NUM_FIXED_BLOCKS
)
3489 if (!bitmap_set_bit (visited
, bb
->index
))
3492 // Process forward edges before backward edges (so push backward
3493 // edges first). Build the region in an approximation of reverse
3495 if (bitmap_bit_p (fpr_pseudos_live_in
, bb
->index
))
3496 stack
.quick_push ({ ei_start (bb
->preds
), nullptr });
3497 if (bitmap_bit_p (fpr_pseudos_live_out
, bb
->index
))
3498 stack
.quick_push ({ ei_start (bb
->succs
), bb
});
3500 region
.safe_push (bb
);
3501 while (!stack
.is_empty ())
3503 auto &node
= stack
.last ();
3504 if (ei_end_p (node
.ei
))
3507 region
.safe_push (node
.bb
);
3511 edge e
= ei_edge (node
.ei
);
3514 // A forward edge from a node that has not yet been added
3516 if (bitmap_bit_p (fpr_pseudos_live_in
, e
->dest
->index
)
3517 && bitmap_set_bit (visited
, e
->dest
->index
))
3519 stack
.safe_push ({ ei_start (e
->dest
->preds
), nullptr });
3520 if (bitmap_bit_p (fpr_pseudos_live_out
, e
->dest
->index
))
3521 stack
.safe_push ({ ei_start (e
->dest
->succs
), e
->dest
});
3523 region
.safe_push (e
->dest
);
3530 // A backward edge from a node that has already been added
3532 if (bitmap_bit_p (fpr_pseudos_live_out
, e
->src
->index
)
3533 && bitmap_set_bit (visited
, e
->src
->index
))
3535 if (bitmap_bit_p (fpr_pseudos_live_in
, e
->src
->index
))
3536 stack
.safe_push ({ ei_start (e
->src
->preds
), nullptr });
3537 stack
.safe_push ({ ei_start (e
->src
->succs
), e
->src
});
3544 m_current_point
= 2;
3545 start_new_region ();
3547 if (region
.is_empty ())
3548 process_block (bb
, true);
3551 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3553 fprintf (dump_file
, "\nRegion (from %d):", bb
->index
);
3554 for (unsigned int j
= 0; j
< region
.length (); ++j
)
3555 fprintf (dump_file
, " %d", region
[j
]->index
);
3556 fprintf (dump_file
, "\n");
3558 for (unsigned int j
= 0; j
< region
.length (); ++j
)
3560 basic_block bb
= region
[j
];
3562 = ((j
== 0 && !bitmap_bit_p (fpr_pseudos_live_out
, bb
->index
))
3563 || (j
== region
.length () - 1
3564 && !bitmap_bit_p (fpr_pseudos_live_in
, bb
->index
)));
3565 process_block (bb
, is_isolated
);
3568 region
.truncate (0);
3570 if (!m_allocnos
.is_empty () && m_allocation_successful
)
3575 // Run the pass on the current function.
3577 early_ra::execute ()
3581 preprocess_insns ();
3582 propagate_pseudo_reg_info ();
3583 choose_fpr_pseudos ();
3584 if (bitmap_empty_p (m_fpr_pseudos
))
3587 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3588 dump_pseudo_regs ();
3594 class pass_early_ra
: public rtl_opt_pass
3597 pass_early_ra (gcc::context
*ctxt
)
3598 : rtl_opt_pass (pass_data_early_ra
, ctxt
)
3601 // opt_pass methods:
3602 virtual bool gate (function
*);
3603 virtual unsigned int execute (function
*);
3607 pass_early_ra::gate (function
*)
3609 // Require a vector ISA to be enabled.
3610 if (!TARGET_SIMD
&& !TARGET_SVE
)
3613 if (aarch64_early_ra
== AARCH64_EARLY_RA_NONE
)
3616 if (aarch64_early_ra
== AARCH64_EARLY_RA_STRIDED
3617 && !TARGET_STREAMING_SME2
)
3624 pass_early_ra::execute (function
*fn
)
3626 early_ra (fn
).execute ();
3632 // Create a new instance of the pass.
3634 make_pass_aarch64_early_ra (gcc::context
*ctxt
)
3636 return new pass_early_ra (ctxt
);