]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-early-ra.cc
aarch64: Some tweaks to the early-ra pass
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-early-ra.cc
1 // Early register allocation pass.
2 // Copyright (C) 2023 Free Software Foundation, Inc.
3 //
4 // This file is part of GCC.
5 //
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
9 // version.
10 //
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 // for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
19
20 // This pass implements a simple form of early register allocation.
21 // It is restricted to FP/SIMD registers, and it only allocates
22 // a region of FP/SIMD usage if it can do so without any spilling.
23 // It punts on anything too complicated, leaving it to the real
24 // register allocator.
25 //
26 // There are two main purposes:
27 //
28 // (1) The pass runs before scheduling. It therefore has a chance to
29 // bag a spill-free allocation, if there is one, before scheduling
30 // moves things around.
31 //
32 // (2) The pass can make use of strided register operations, such as the
33 // strided forms of LD1 and ST1 in SME2.
34 //
35 // The allocator works at the level of individual FPRs, rather than whole
36 // pseudo registers. It is mostly intended to help optimize ACLE code.
37 //
38 // The pass is very simplistic. There are many things that could be improved.
39 #define IN_TARGET_CODE 1
40
41 #define INCLUDE_ALGORITHM
42 #define INCLUDE_FUNCTIONAL
43 #include "config.h"
44 #include "system.h"
45 #include "coretypes.h"
46 #include "backend.h"
47 #include "rtl.h"
48 #include "df.h"
49 #include "rtl-ssa.h"
50 #include "tree-pass.h"
51 #include "target.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "print-rtl.h"
55 #include "insn-attr.h"
56 #include "insn-opinit.h"
57 #include "reload.h"
58
59 template<typename T>
60 class simple_iterator : public wrapper_iterator<T>
61 {
62 public:
63 using wrapper_iterator<T>::wrapper_iterator;
64
65 simple_iterator &operator-- () { --this->m_contents; return *this; }
66 simple_iterator operator-- (int) { return this->m_contents--; }
67 simple_iterator &operator++ () { ++this->m_contents; return *this; }
68 simple_iterator operator++ (int) { return this->m_contents++; }
69 };
70
71 using namespace rtl_ssa;
72
73 namespace {
74 const pass_data pass_data_early_ra =
75 {
76 RTL_PASS, // type
77 "early_ra", // name
78 OPTGROUP_NONE, // optinfo_flags
79 TV_NONE, // tv_id
80 0, // properties_required
81 0, // properties_provided
82 0, // properties_destroyed
83 0, // todo_flags_start
84 TODO_df_finish, // todo_flags_finish
85 };
86
87 using allocno_iterator = simple_iterator<unsigned int>;
88
89 // Class that represents one run of the pass.
90 class early_ra
91 {
92 public:
93 early_ra (function *fn);
94 ~early_ra ();
95 void execute ();
96
97 private:
98 static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
99 using operand_mask = uint32_t;
100
101 // Points in the function are represented using "program points".
102 // The program points are allocated in reverse order, with smaller
103 // numbers indicating later points. These special values indicate
104 // the start and end of a region.
105 static constexpr unsigned int START_OF_REGION = ~0U;
106 static constexpr unsigned int END_OF_REGION = 0U;
107
108 // An invalid allocno index, used to represent no allocno.
109 static constexpr unsigned int INVALID_ALLOCNO = ~0U;
110
111 // Enumerates the single FPR sizes that matter for register allocation.
112 // Anything smaller than 64 bits is treated as FPR_D.
113 enum fpr_size_info
114 {
115 FPR_D,
116 FPR_Q,
117 FPR_Z
118 };
119
120 // A live range for an FPR, containing program points [START_POINT,
121 // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
122 // to be equal to ALLOCNO for the duration of the live range.
123 struct fpr_range_info
124 {
125 unsigned int start_point;
126 unsigned int end_point;
127 unsigned int allocno;
128 };
129
130 // Flags used in pseudo_reg_info.
131 //
132 // Whether the pseudo register occurs in one instruction alternative that
133 // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
134 static constexpr unsigned int ALLOWS_FPR8 = 1U << 0;
135 static constexpr unsigned int ALLOWS_FPR16 = 1U << 1;
136 static constexpr unsigned int ALLOWS_FPR32 = 1U << 2;
137 static constexpr unsigned int ALLOWS_NONFPR = 1U << 3;
138 //
139 // Likewise whether the register occurs in an instruction that requires
140 // the associated register type.
141 static constexpr unsigned int NEEDS_FPR8 = 1U << 4;
142 static constexpr unsigned int NEEDS_FPR16 = 1U << 5;
143 static constexpr unsigned int NEEDS_FPR32 = 1U << 6;
144 static constexpr unsigned int NEEDS_NONFPR = 1U << 7;
145 //
146 // Whether the pseudo register is copied to or from a hard FP register.
147 static constexpr unsigned int HAS_FPR_COPY = 1U << 8;
148 //
149 // Whether the pseudo register is copied to or from a hard non-FP register.
150 static constexpr unsigned int HAS_NONFPR_COPY = 1U << 9;
151 //
152 // Whether the pseudo register is used as a multi-register vector operand
153 // to an instruction that supports strided accesses, and whether it is used
154 // as a multi-register vector operand in some other non-move instruction.
155 static constexpr unsigned int HAS_FLEXIBLE_STRIDE = 1U << 10;
156 static constexpr unsigned int HAS_FIXED_STRIDE = 1U << 11;
157
158 // Flags that should be propagated across moves between pseudo registers.
159 static constexpr unsigned int PSEUDO_COPY_FLAGS = ~(HAS_FLEXIBLE_STRIDE
160 | HAS_FIXED_STRIDE);
161
162 // Information about a copy between two registers.
163 struct reg_copy_info
164 {
165 // The two registers, in order.
166 unsigned int regnos[2];
167
168 // Index I gives the index of the next reg_copy_info involving REGNOS[I],
169 // or 0 if none.
170 unsigned int next_copies[2];
171 };
172
173 // Information about a pseudo register.
174 struct pseudo_reg_info
175 {
176 // Flags describing how the register is used, defined above.
177 unsigned int flags : 16;
178
179 // The mode of the pseudo register, cached for convenience.
180 machine_mode mode : 16;
181
182 // The index of the first copy, or 0 if none.
183 unsigned int first_copy;
184 };
185
186 // Information about a group of allocnos that have a fixed offset
187 // relative to each other. The allocnos in each group must be allocated
188 // together.
189 //
190 // Allocnos that can share the same hard register are eventually
191 // chained together. These chains represent edges on a graph of
192 // allocnos, such that two allocnos joined by an edge use the same FPR.
193 // These chains are formed between individual allocnos rather than
194 // whole groups, although the system is required to be self-consistent.
195 // Each clique in the graph has at least one "full-width" allocno group
196 // that has one allocno for every FPR that needs to be allocated to
197 // the clique.
198 //
199 // One group of allocnos is chosen as the "color representative" of
200 // each clique in the graph. This group will be a full-width group.
201 struct allocno_info;
202 struct allocno_group_info
203 {
204 array_slice<unsigned int> chain_heads ();
205 array_slice<allocno_info> allocnos ();
206 allocno_group_info *color_rep ();
207 allocno_info *allocno (unsigned int);
208
209 // The color representative of the containing clique.
210 allocno_group_info *m_color_rep;
211
212 // The pseudo register associated with this allocno, or INVALID_REGNUM
213 // if none.
214 unsigned int regno;
215
216 // The offset of the first allocno (and thus this group) from the start
217 // of color_rep.
218 unsigned int color_rep_offset : 8;
219
220 // The number of allocnos in the group, and thus the number of FPRs
221 // that need to be allocated.
222 unsigned int size : 8;
223
224 // The gap between FPRs in the group. This is normally 1, but can be
225 // higher if we've decided to use strided multi-register accesses.
226 unsigned int stride : 4;
227
228 // Used temporarily while deciding which allocnos should have non-unit
229 // strides; see find_strided_accesses for details.
230 int consecutive_pref : 4;
231 int strided_polarity : 2;
232
233 // The largest size of FPR needed by references to the allocno group.
234 fpr_size_info fpr_size : 2;
235
236 // True if all non-move accesses can be converted to strided form.
237 unsigned int has_flexible_stride : 1;
238
239 // True if we've assigned a color index to this group.
240 unsigned int has_color : 1;
241
242 // The mask of FPRs that would make valid choices for the first allocno,
243 // taking the requirements of all the allocnos in the group into account.
244 unsigned int fpr_candidates;
245
246 // The index of the color that has been assigned to the containing clique.
247 unsigned int color;
248 };
249
250 // Represents a single FPR-sized quantity that needs to be allocated.
251 // Each allocno is identified by index (for compactness).
252 //
253 // Quantities that span multiple FPRs are assigned groups of consecutive
254 // allocnos. Quantities that occupy a single FPR are assigned their own
255 // group.
256 struct allocno_info
257 {
258 allocno_group_info *group ();
259
260 // The allocno's unique identifier.
261 unsigned int id;
262
263 // The offset of this allocno into the containing group.
264 unsigned int offset : 8;
265
266 // The number of allocnos in the containing group.
267 unsigned int group_size : 8;
268
269 // If the allocno has an affinity with at least one hard register
270 // (so that choosing that hard register would avoid a copy), this is
271 // the number of one such hard register, otherwise it is
272 // FIRST_PSEUDO_REGISTER.
273 unsigned int hard_regno : 8;
274
275 // Set to 1 if the allocno has a single definition or 2 if it has more.
276 unsigned int num_defs : 2;
277
278 // True if, at START_POINT, another allocno is copied to this one.
279 // See callers of record_copy for what counts as a copy.
280 unsigned int is_copy_dest : 1;
281
282 // True if, at START_POINT, another allocno is copied to this one,
283 // and if the allocnos at both ends of the copy chain have an affinity
284 // with the same hard register.
285 unsigned int is_strong_copy_dest : 1;
286
287 // True if, at END_POINT, this allocno is copied to another one,
288 // and both allocnos have an affinity with the same hard register.
289 unsigned int is_strong_copy_src : 1;
290
291 // True if the allocno is subject to an earlyclobber at END_POINT,
292 // so that it cannot be tied to the destination of the instruction.
293 unsigned int is_earlyclobbered : 1;
294
295 // The inclusive range of program points spanned by the allocno.
296 // START_POINT >= END_POINT.
297 unsigned int start_point;
298 unsigned int end_point;
299
300 // If, at END_POINT, this allocno is copied to another allocno, this
301 // is the index of that allocno, otherwise it is INVALID_ALLOCNO.
302 // See callers of record_copy for what counts as a copy.
303 unsigned int copy_dest;
304
305 // If this field is not INVALID_ALLOCNO, this allocno is known to be
306 // equivalent to EQUIV_ALLOCNO for the whole of this allocno's lifetime.
307 unsigned int equiv_allocno;
308
309 union
310 {
311 // The program point at which the allocno was last defined,
312 // or START_OF_REGION if none. This is only used temporarily
313 // while recording allocnos; after that, chain_next below is
314 // used instead.
315 unsigned int last_def_point;
316
317 // The next chained allocno in program order (i.e. at lower program
318 // points), or INVALID_ALLOCNO if none.
319 unsigned int chain_next;
320 };
321
322 // The previous chained allocno in program order (i.e. at higher
323 // program points), or INVALID_ALLOCNO if none.
324 unsigned int chain_prev;
325 };
326
327 // Information about a full allocno group or a subgroup of it.
328 // The subgroup can be empty to indicate "none".
329 struct allocno_subgroup
330 {
331 array_slice<allocno_info> allocnos ();
332 allocno_info *allocno (unsigned int);
333
334 // True if a subgroup is present.
335 operator bool () const { return count; }
336
337 // The containing group.
338 allocno_group_info *group;
339
340 // The offset of the subgroup from the start of GROUP.
341 unsigned int start;
342
343 // The number of allocnos in the subgroup.
344 unsigned int count;
345 };
346
347 // Represents information about a copy between an allocno and an FPR.
348 // This establishes an affinity between the allocno and the FPR.
349 struct allocno_copy_info
350 {
351 // The allocno involved in the copy.
352 unsigned int allocno;
353
354 // The FPR involved in the copy, relative to V0_REGNUM.
355 unsigned int fpr : 16;
356
357 // A measure of how strong the affinity between the allocno and FPR is.
358 unsigned int weight : 16;
359 };
360
361 // Information about a possible allocno chain.
362 struct chain_candidate_info
363 {
364 // The candidate target allocno.
365 allocno_info *allocno;
366
367 // A rating of the candidate (higher is better).
368 int score;
369 };
370
371 // Information about an allocno color.
372 struct color_info
373 {
374 // The color's unique identifier.
375 int id;
376
377 // The allocated hard register, when known.
378 unsigned int hard_regno;
379
380 // The clique's representative group.
381 allocno_group_info *group;
382
383 // Weights in favor of choosing each FPR as the first register for GROUP.
384 int8_t fpr_preferences[32];
385 };
386
387 template<typename T, typename... Ts>
388 T *region_allocate (Ts...);
389
390 allocno_info *chain_prev (allocno_info *);
391 allocno_info *chain_next (allocno_info *);
392
393 void dump_pseudo_regs ();
394 void dump_fpr_ranges ();
395 void dump_copies ();
396 void dump_allocnos ();
397 void dump_colors ();
398
399 iterator_range<allocno_iterator> get_group_allocnos (unsigned int);
400
401 void preprocess_move (rtx, rtx);
402 void process_pseudo_reg_constraints (rtx_insn *);
403 void preprocess_insns ();
404
405 int fpr_preference (unsigned int);
406 void propagate_pseudo_reg_info ();
407
408 void choose_fpr_pseudos ();
409
410 void start_new_region ();
411
412 allocno_group_info *create_allocno_group (unsigned int, unsigned int);
413 allocno_subgroup get_allocno_subgroup (rtx);
414 void record_fpr_use (unsigned int);
415 void record_fpr_def (unsigned int);
416 void record_allocno_use (allocno_info *);
417 void record_allocno_def (allocno_info *);
418 bool valid_equivalence_p (allocno_info *, allocno_info *);
419 void record_copy (rtx, rtx, bool = false);
420 void record_constraints (rtx_insn *);
421 void record_artificial_refs (unsigned int);
422 void record_insn_refs (rtx_insn *);
423
424 bool consider_strong_copy_src_chain (allocno_info *);
425 int strided_polarity_pref (allocno_info *, allocno_info *);
426 void find_strided_accesses ();
427
428 template<unsigned int allocno_info::*field>
429 static int cmp_increasing (const void *, const void *);
430 bool is_chain_candidate (allocno_info *, allocno_info *);
431 int rate_chain (allocno_info *, allocno_info *);
432 static int cmp_chain_candidates (const void *, const void *);
433 void chain_allocnos (unsigned int &, unsigned int &);
434 void set_single_color_rep (allocno_info *, allocno_group_info *,
435 unsigned int);
436 void set_color_rep (allocno_group_info *, allocno_group_info *,
437 unsigned int);
438 bool try_to_chain_allocnos (allocno_info *, allocno_info *);
439 void create_color (allocno_group_info *);
440 void form_chains ();
441
442 bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info *);
443 bool call_in_range_p (unsigned int, unsigned int, unsigned int);
444 unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info);
445
446 void process_copies ();
447
448 static int cmp_decreasing_size (const void *, const void *);
449 void allocate_colors ();
450 allocno_info *find_independent_subchain (allocno_info *);
451 color_info *find_oldest_color (unsigned int, unsigned int);
452 void broaden_colors ();
453 void finalize_allocation ();
454
455 bool replace_regs (df_ref);
456 int try_enforce_constraints (rtx_insn *, vec<std::pair<int, int>> &);
457 void enforce_constraints (rtx_insn *);
458 bool maybe_convert_to_strided_access (rtx_insn *);
459 void apply_allocation ();
460
461 void process_region ();
462 bool is_dead_insn (rtx_insn *);
463 void process_block (basic_block, bool);
464 void process_blocks ();
465
466 // ----------------------------------------------------------------------
467
468 // The function we're operating on.
469 function *m_fn;
470
471 // Information about each pseudo register, indexed by REGNO.
472 auto_vec<pseudo_reg_info> m_pseudo_regs;
473
474 // All recorded register copies.
475 auto_vec<reg_copy_info> m_pseudo_reg_copies;
476
477 // The set of pseudos that we've decided to allocate an FPR to.
478 auto_bitmap m_fpr_pseudos;
479
480 // ----------------------------------------------------------------------
481
482 // An obstack for allocating information that is referenced by the member
483 // variables below.
484 obstack m_region_obstack;
485 void *m_region_alloc_start;
486
487 // ----------------------------------------------------------------------
488
489 // The basic block that we're currently processing.
490 basic_block m_current_bb;
491
492 // The lowest-numbered program point in the current basic block.
493 unsigned int m_current_bb_point;
494
495 // The program point that we're currently processing (described above).
496 unsigned int m_current_point;
497
498 // The set of allocnos that are currently live.
499 auto_bitmap m_live_allocnos;
500
501 // The set of FPRs that are currently live.
502 unsigned int m_live_fprs;
503
504 // ----------------------------------------------------------------------
505
506 // A mask of the FPRs that have already been allocated.
507 unsigned int m_allocated_fprs;
508
509 // A mask of the FPRs that must be at least partially preserved by the
510 // current function.
511 unsigned int m_call_preserved_fprs;
512
513 // True if we haven't yet failed to allocate the current region.
514 bool m_allocation_successful;
515
516 // A map from pseudo registers to the first allocno in their associated
517 // allocno groups.
518 hash_map<int_hash<unsigned int, INVALID_REGNUM>,
519 allocno_group_info *> m_regno_to_group;
520
521 // All recorded copies between allocnos and FPRs.
522 auto_vec<allocno_copy_info> m_allocno_copies;
523
524 // All allocnos, by index.
525 auto_vec<allocno_info *> m_allocnos;
526
527 // All allocnos, by increasing START_POINT.
528 auto_vec<allocno_info *> m_sorted_allocnos;
529
530 // All colors, by index.
531 auto_vec<color_info *> m_colors;
532
533 // The instruction ranges that make up the current region,
534 // as half-open ranges [LAST, FIRST).
535 auto_vec<std::pair<rtx_insn *, rtx_insn *>> m_insn_ranges;
536
537 // The live ranges of each FPR, in order of increasing program point.
538 auto_vec<fpr_range_info> m_fpr_ranges[32];
539
540 // For each function call id, a list of program points at which a call
541 // to such a function is made. Each list is in order of increasing
542 // program point.
543 auto_vec<unsigned int> m_call_points[NUM_ABI_IDS];
544
545 // A list of instructions that can be removed if allocation succeeds.
546 auto_vec<rtx_insn *> m_dead_insns;
547 };
548
549 // True if PAT is something that would typically be treated as a move.
550 static inline bool
551 is_move_set (rtx pat)
552 {
553 if (GET_CODE (pat) != SET)
554 return false;
555
556 rtx dest = SET_DEST (pat);
557 if (SUBREG_P (dest))
558 dest = SUBREG_REG (dest);
559 if (!OBJECT_P (dest))
560 return false;
561
562 rtx src = SET_SRC (pat);
563 if (SUBREG_P (src))
564 src = SUBREG_REG (src);
565 if (!OBJECT_P (src) && !CONSTANT_P (src))
566 return false;
567
568 return true;
569 }
570
571 // Return true if operand OP is likely to match OP_ALT after register
572 // allocation.
573 static bool
574 likely_operand_match_p (const operand_alternative &op_alt, rtx op)
575 {
576 // Empty constraints match everything.
577 const char *constraint = op_alt.constraint;
578 if (constraint[0] == 0 || constraint[0] == ',')
579 return true;
580
581 for (;;)
582 {
583 char c = *constraint;
584 int len = CONSTRAINT_LEN (c, constraint);
585 if (c == 0 || c == ',')
586 break;
587
588 if (c == 'X')
589 return true;
590
591 auto cn = lookup_constraint (constraint);
592 switch (get_constraint_type (cn))
593 {
594 case CT_REGISTER:
595 if (REG_P (op) || SUBREG_P (op))
596 return true;
597 break;
598
599 case CT_MEMORY:
600 case CT_SPECIAL_MEMORY:
601 case CT_RELAXED_MEMORY:
602 if (MEM_P (op))
603 return true;
604 break;
605
606 case CT_CONST_INT:
607 case CT_ADDRESS:
608 case CT_FIXED_FORM:
609 if (constraint_satisfied_p (op, cn))
610 return true;
611 break;
612 }
613
614 constraint += len;
615 }
616
617 if (op_alt.matches >= 0)
618 {
619 rtx other = recog_data.operand[op_alt.matches];
620 if ((REG_P (other) || SUBREG_P (other))
621 && (REG_P (op) || SUBREG_P (op)))
622 return true;
623 }
624 return false;
625 }
626
627 // Return true if the operands of the current instruction are likely to
628 // match OP_ALT.
629 static bool
630 likely_alternative_match_p (const operand_alternative *op_alt)
631 {
632 for (int i = 0; i < recog_data.n_operands; ++i)
633 if (!likely_operand_match_p (op_alt[i], recog_data.operand[i]))
634 return false;
635 return true;
636 }
637
638 // Return the sum of how disparaged OP_ALT is.
639 static int
640 count_rejects (const operand_alternative *op_alt)
641 {
642 int reject = 0;
643 for (int opno = 0; opno < recog_data.n_operands; ++opno)
644 reject += op_alt[opno].reject;
645 return reject;
646 }
647
648 // Allocate a T from the region obstack.
649 template<typename T, typename... Ts>
650 inline T *
651 early_ra::region_allocate (Ts... args)
652 {
653 static_assert (std::is_trivially_destructible<T>::value,
654 "destructor won't be called");
655 void *addr = obstack_alloc (&m_region_obstack, sizeof (T));
656 return new (addr) T (std::forward<Ts> (args)...);
657 }
658
659 early_ra::early_ra (function *fn) : m_fn (fn), m_live_fprs (0)
660 {
661 gcc_obstack_init (&m_region_obstack);
662 m_region_alloc_start = obstack_alloc (&m_region_obstack, 0);
663 bitmap_tree_view (m_live_allocnos);
664 }
665
666 early_ra::~early_ra ()
667 {
668 obstack_free (&m_region_obstack, nullptr);
669 }
670
671 // Return an array that, for each allocno A in the group, contains the index
672 // of the allocno at the head of A's chain (that is, the one with the highest
673 // START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
674 inline array_slice<unsigned int>
675 early_ra::allocno_group_info::chain_heads ()
676 {
677 auto *start = reinterpret_cast<unsigned int *> (this + 1);
678 return { start, size };
679 }
680
681 // Return the array of allocnos in the group.
682 inline array_slice<early_ra::allocno_info>
683 early_ra::allocno_group_info::allocnos ()
684 {
685 gcc_checking_assert (regno != INVALID_REGNUM);
686 auto *chain_end = reinterpret_cast<unsigned int *> (this + 1) + size;
687 auto *allocno_start = reinterpret_cast<allocno_info *> (chain_end);
688 return { allocno_start, size };
689 }
690
691 // Return the group's color representative.
692 inline early_ra::allocno_group_info *
693 early_ra::allocno_group_info::color_rep ()
694 {
695 gcc_checking_assert (m_color_rep->m_color_rep == m_color_rep);
696 return m_color_rep;
697 }
698
699 // Return the group that contains the allocno.
700 inline early_ra::allocno_group_info *
701 early_ra::allocno_info::group ()
702 {
703 auto *chain_end = reinterpret_cast<unsigned int *> (this - offset);
704 return reinterpret_cast<allocno_group_info *> (chain_end - group_size) - 1;
705 }
706
707 // Return the allocnos in the subgroup.
708 inline array_slice<early_ra::allocno_info>
709 early_ra::allocno_subgroup::allocnos ()
710 {
711 if (!count)
712 return {};
713 return { &group->allocnos ()[start], count };
714 }
715
716 // Return allocno I in the subgroup, with 0 being the first.
717 inline early_ra::allocno_info *
718 early_ra::allocno_subgroup::allocno (unsigned int i)
719 {
720 return &group->allocnos ()[start + i];
721 }
722
723 // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
724 inline early_ra::allocno_info *
725 early_ra::chain_prev (allocno_info *allocno)
726 {
727 if (allocno->chain_prev != INVALID_ALLOCNO)
728 return m_allocnos[allocno->chain_prev];
729 return nullptr;
730 }
731
732 // Return the next (later) allocno in ALLOCNO's chain, or null if none.
733 inline early_ra::allocno_info *
734 early_ra::chain_next (allocno_info *allocno)
735 {
736 if (allocno->chain_next != INVALID_ALLOCNO)
737 return m_allocnos[allocno->chain_next];
738 return nullptr;
739 }
740
741 // Dump the information in m_pseudo_regs.
742 void
743 early_ra::dump_pseudo_regs ()
744 {
745 fprintf (dump_file, "\nPseudos:\n");
746 fprintf (dump_file, " %6s %6s %6s %6s %6s %6s %8s %s\n",
747 "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
748 "FPRness", "Copies");
749 pseudo_reg_info unused_reg = {};
750 for (unsigned int regno = FIRST_PSEUDO_REGISTER;
751 regno < m_pseudo_regs.length (); ++regno)
752 {
753 const auto &reg = m_pseudo_regs[regno];
754 if (memcmp (&reg, &unused_reg, sizeof (reg)) == 0)
755 continue;
756
757 fprintf (dump_file, " %6d %6s %6s %6s %6s %6s %8d", regno,
758 reg.flags & NEEDS_FPR8 ? "Req"
759 : reg.flags & ALLOWS_FPR8 ? "OK" : "-",
760 reg.flags & NEEDS_FPR16 ? "Req"
761 : reg.flags & ALLOWS_FPR16 ? "OK" : "-",
762 reg.flags & NEEDS_FPR32 ? "Req"
763 : reg.flags & ALLOWS_FPR32 ? "OK" : "-",
764 reg.flags & NEEDS_NONFPR ? "Req"
765 : reg.flags & ALLOWS_NONFPR ? "OK" : "-",
766 ~reg.flags & HAS_FLEXIBLE_STRIDE ? "-"
767 : reg.flags & HAS_FIXED_STRIDE ? "Some" : "All",
768 fpr_preference (regno));
769 if (reg.flags & HAS_FPR_COPY)
770 fprintf (dump_file, " FPR");
771 if (reg.flags & HAS_NONFPR_COPY)
772 fprintf (dump_file, " Non-FPR");
773 unsigned int copyi = reg.first_copy;
774 while (copyi)
775 {
776 const auto &copy = m_pseudo_reg_copies[copyi];
777 if (copy.regnos[0] == regno)
778 {
779 fprintf (dump_file, " r%d", copy.regnos[1]);
780 copyi = copy.next_copies[0];
781 }
782 else
783 {
784 fprintf (dump_file, " r%d", copy.regnos[0]);
785 copyi = copy.next_copies[1];
786 }
787 }
788 fprintf (dump_file, "\n");
789 }
790 }
791
792 // Dump the information in m_fpr_ranges.
793 void
794 early_ra::dump_fpr_ranges ()
795 {
796 fprintf (dump_file, "\nFPR live ranges:\n");
797 for (unsigned int fpr = 0; fpr < 32; ++fpr)
798 {
799 auto &intervals = m_fpr_ranges[fpr];
800 if (intervals.is_empty ())
801 continue;
802
803 fprintf (dump_file, " %2d", fpr);
804 for (unsigned int i = 0; i < intervals.length (); ++i)
805 {
806 auto &interval = intervals[i];
807 if (i && (i % 4) == 0)
808 fprintf (dump_file, "\n ");
809 fprintf (dump_file, " [ %6d %6d ]", interval.start_point,
810 interval.end_point);
811 }
812 fprintf (dump_file, "\n");
813 }
814 }
815
816 // Dump the information in m_allocno_copies.
817 void
818 early_ra::dump_copies ()
819 {
820 fprintf (dump_file, "\nCopies:\n");
821 fprintf (dump_file, " %8s %3s %6s\n",
822 "Allocno", "FPR", "Weight");
823 for (const auto &copy : m_allocno_copies)
824 fprintf (dump_file, " %8d %3d %6d\n", copy.allocno,
825 copy.fpr, copy.weight);
826 }
827
828 // Dump the information in m_allocnos.
829 void
830 early_ra::dump_allocnos ()
831 {
832 char buffer[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
833 fprintf (dump_file, "\nAllocno groups:\n");
834 fprintf (dump_file,
835 " %12s %12s %4s %6s %8s %s\n",
836 "Ids", "Regno", "Size", "Stride", "Cands", "Heads");
837 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
838 {
839 auto *allocno = m_allocnos[ai];
840 if (allocno->offset != 0)
841 continue;
842 auto *group = allocno->group ();
843 snprintf (buffer, sizeof (buffer), "[%d:%d]", allocno->id,
844 allocno->id + group->size - 1);
845 fprintf (dump_file, " %12s", buffer);
846 snprintf (buffer, sizeof (buffer), "r%d[0:%d]", group->regno,
847 group->size - 1);
848 fprintf (dump_file, " %12s %4s %6d %08x", buffer,
849 group->fpr_size == FPR_D ? "D"
850 : group->fpr_size == FPR_Q ? "Q" : "Z",
851 group->stride,
852 group->fpr_candidates);
853 for (auto head : group->chain_heads ())
854 if (head == INVALID_ALLOCNO)
855 fprintf (dump_file, " -");
856 else
857 fprintf (dump_file, " %d", head);
858 fprintf (dump_file, "\n");
859 }
860
861 fprintf (dump_file, "\nAllocno chains:\n");
862 fprintf (dump_file, " %5s %12s %12s %5s %5s %5s %5s\n",
863 "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "FPR");
864 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
865 {
866 auto *allocno = m_allocnos[ai];
867 if (allocno->chain_prev != INVALID_ALLOCNO)
868 continue;
869 const char *prefix = "=>";
870 for (;;)
871 {
872 auto *group = allocno->group ();
873 fprintf (dump_file, " %2s", prefix);
874 fprintf (dump_file, " %5d", allocno->id);
875 snprintf (buffer, sizeof (buffer), "r%d[%d]", group->regno,
876 allocno->offset);
877 fprintf (dump_file, " %12s", buffer);
878 snprintf (buffer, sizeof (buffer), "[%d,%d]",
879 allocno->start_point, allocno->end_point);
880 fprintf (dump_file, " %11s%s %5s", buffer,
881 allocno->is_earlyclobbered ? "*" : " ",
882 allocno->is_strong_copy_dest ? "Strong"
883 : allocno->is_copy_dest ? "Yes" : "-");
884 if (allocno->copy_dest == INVALID_ALLOCNO)
885 fprintf (dump_file, " %5s", "-");
886 else
887 fprintf (dump_file, " %5d", allocno->copy_dest);
888 if (allocno->equiv_allocno != INVALID_ALLOCNO)
889 fprintf (dump_file, " %5d", allocno->equiv_allocno);
890 else
891 fprintf (dump_file, " %5s", "-");
892 if (allocno->hard_regno == FIRST_PSEUDO_REGISTER)
893 fprintf (dump_file, " %5s", "-");
894 else
895 fprintf (dump_file, " %5s", reg_names[allocno->hard_regno]);
896 fprintf (dump_file, "\n");
897 if (allocno->chain_next == INVALID_ALLOCNO)
898 break;
899 allocno = m_allocnos[allocno->chain_next];
900 prefix = "";
901 }
902 }
903 }
904
905 // Dump the information in m_colors.
906 void
907 early_ra::dump_colors ()
908 {
909 fprintf (dump_file, "\nColors:\n");
910 for (unsigned int i = 0; i < m_colors.length (); ++i)
911 {
912 auto *color = m_colors[i];
913 if (!color->group)
914 continue;
915
916 fprintf (dump_file, " color %d:\n", i);
917 fprintf (dump_file, " chains:\n");
918 auto heads = color->group->chain_heads ();
919 for (unsigned int i = 0; i < color->group->size; ++i)
920 {
921 fprintf (dump_file, " %2d:", i);
922 auto ai = heads[i];
923 while (ai != INVALID_ALLOCNO)
924 {
925 auto *allocno = m_allocnos[ai];
926 fprintf (dump_file, " r%d[%d]", allocno->group ()->regno,
927 allocno->offset);
928 ai = allocno->chain_next;
929 }
930 fprintf (dump_file, "\n");
931 }
932 fprintf (dump_file, " FPR candidates:");
933 for (unsigned int fpr = 0; fpr < 32; ++fpr)
934 fprintf (dump_file, "%s%c", fpr % 8 ? "" : " ",
935 color->group->fpr_candidates & (1U << fpr) ? 'Y' : '-');
936 fprintf (dump_file, "\n");
937 fprintf (dump_file, " FPR preferences:");
938 for (unsigned int fpr = 0; fpr < 32; ++fpr)
939 if (color->fpr_preferences[fpr])
940 fprintf (dump_file, " %d(%d)", fpr, color->fpr_preferences[fpr]);
941 fprintf (dump_file, "\n");
942 }
943 }
944
945 // Record any necessary information about a move from SRC to DEST.
946 void
947 early_ra::preprocess_move (rtx dest, rtx src)
948 {
949 if (SUBREG_P (dest))
950 dest = SUBREG_REG (dest);
951 if (!REG_P (dest))
952 return;
953
954 if (SUBREG_P (src))
955 src = SUBREG_REG (src);
956 if (!REG_P (src))
957 return;
958
959 // Sort the registers by increasing REGNO.
960 rtx regs[] = { dest, src };
961 if (REGNO (dest) > REGNO (src))
962 std::swap (regs[0], regs[1]);
963 unsigned int regno0 = REGNO (regs[0]);
964 unsigned int regno1 = REGNO (regs[1]);
965
966 // Ignore moves between hard registers.
967 if (HARD_REGISTER_NUM_P (regno1))
968 return;
969
970 // For moves between hard registers and pseudos, just record the type
971 // of hard register involved.
972 auto &reg1 = m_pseudo_regs[regno1];
973 reg1.mode = GET_MODE (regs[1]);
974 if (HARD_REGISTER_NUM_P (regno0))
975 {
976 reg1.flags |= (FP_REGNUM_P (regno0) ? HAS_FPR_COPY : HAS_NONFPR_COPY);
977 return;
978 }
979
980 // Record a move between two pseudo registers.
981 auto &reg0 = m_pseudo_regs[regno0];
982 reg0.mode = GET_MODE (regs[0]);
983
984 reg_copy_info copy;
985 copy.regnos[0] = regno0;
986 copy.regnos[1] = regno1;
987 copy.next_copies[0] = reg0.first_copy;
988 copy.next_copies[1] = reg1.first_copy;
989
990 reg0.first_copy = reg1.first_copy = m_pseudo_reg_copies.length ();
991 m_pseudo_reg_copies.safe_push (copy);
992 }
993
994 // Return true if INSN has a multi-vector operand and if that operand
995 // could be converted to strided form.
996 static bool
997 is_stride_candidate (rtx_insn *insn)
998 {
999 if (recog_memoized (insn) < 0)
1000 return false;
1001
1002 auto stride_type = get_attr_stride_type (insn);
1003 return (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE
1004 || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
1005 || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
1006 }
1007
1008 // Go through the constraints of INSN, which has already been extracted,
1009 // and record any relevant information about pseudo registers.
1010 void
1011 early_ra::process_pseudo_reg_constraints (rtx_insn *insn)
1012 {
1013 extract_insn (insn);
1014 preprocess_constraints (insn);
1015
1016 // Flags that describe any multi-register vector operands.
1017 unsigned int insn_flags = (is_stride_candidate (insn)
1018 ? HAS_FLEXIBLE_STRIDE
1019 : HAS_FIXED_STRIDE);
1020
1021 auto alts = get_preferred_alternatives (insn);
1022
1023 int operand_matches[MAX_RECOG_OPERANDS];
1024 unsigned int operand_flags[MAX_RECOG_OPERANDS];
1025 for (int i = 0; i < recog_data.n_operands; ++i)
1026 {
1027 operand_matches[i] = -1;
1028 operand_flags[i] = 0;
1029 }
1030
1031 // Extract information from the constraints, considering all plausible
1032 // alternatives.
1033 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
1034 {
1035 if (!(alts & ALTERNATIVE_BIT (altno)))
1036 continue;
1037
1038 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
1039 if (!likely_alternative_match_p (op_alt))
1040 continue;
1041
1042 // Use SRC_OPNO's constraints to derive information about DEST_OPNO.
1043 auto record_operand = [&](int src_opno, int dest_opno)
1044 {
1045 int matches = op_alt[src_opno].matches;
1046 if (matches >= 0)
1047 operand_matches[dest_opno] = matches;
1048
1049 auto cl = alternative_class (op_alt, src_opno);
1050 if (cl != NO_REGS)
1051 {
1052 if (reg_class_subset_p (cl, FP_REGS))
1053 operand_flags[dest_opno] |= ALLOWS_FPR32;
1054 if (reg_class_subset_p (cl, FP_LO_REGS))
1055 operand_flags[dest_opno] |= ALLOWS_FPR16;
1056 if (reg_class_subset_p (cl, FP_LO8_REGS))
1057 operand_flags[dest_opno] |= ALLOWS_FPR8;
1058 if (!reg_classes_intersect_p (cl, FP_REGS))
1059 operand_flags[dest_opno] |= ALLOWS_NONFPR;
1060 }
1061 };
1062
1063 for (int i = 0; i < recog_data.n_operands; ++i)
1064 {
1065 record_operand (i, i);
1066 if (recog_data.constraints[i][0] == '%')
1067 {
1068 record_operand (i, i + 1);
1069 record_operand (i + 1, i);
1070 }
1071 }
1072 }
1073
1074 // Process the information we collected above.
1075 for (int i = 0; i < recog_data.n_operands; ++i)
1076 {
1077 rtx op = recog_data.operand[i];
1078 machine_mode orig_mode = GET_MODE (op);
1079 if (SUBREG_P (op))
1080 op = SUBREG_REG (op);
1081
1082 // Record the accumulated information in m_pseudo_regs.
1083 if (REG_P (op) && !HARD_REGISTER_P (op))
1084 {
1085 // The flags so far just describe what at least one alternative
1086 // would accept. Calculate the associated NEEDS_* information.
1087 auto flags = operand_flags[i];
1088 if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
1089 flags |= NEEDS_NONFPR;
1090 else if ((flags & ALLOWS_FPR32) && !(flags & ALLOWS_NONFPR))
1091 {
1092 if (flags & ALLOWS_FPR8)
1093 flags |= NEEDS_FPR8;
1094 if (flags & ALLOWS_FPR16)
1095 flags |= NEEDS_FPR16;
1096 flags |= NEEDS_FPR32;
1097 }
1098
1099 // Look for multi-register vector operands.
1100 if (VECTOR_MODE_P (orig_mode)
1101 && targetm.hard_regno_mode_ok (V0_REGNUM, orig_mode)
1102 && hard_regno_nregs (V0_REGNUM, orig_mode) > 1)
1103 flags |= insn_flags;
1104
1105 m_pseudo_regs[REGNO (op)].flags |= flags;
1106 m_pseudo_regs[REGNO (op)].mode = GET_MODE (op);
1107 }
1108
1109 // Treat matching constraints as equivalent to moves.
1110 if (operand_matches[i] >= 0)
1111 preprocess_move (recog_data.operand[operand_matches[i]], op);
1112 }
1113 }
1114
1115 // Make one pass through the instructions, collecting information that
1116 // will be needed later.
1117 void
1118 early_ra::preprocess_insns ()
1119 {
1120 m_pseudo_regs.safe_grow_cleared (max_reg_num ());
1121 m_pseudo_reg_copies.safe_push (reg_copy_info ());
1122 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
1123 {
1124 if (!NONDEBUG_INSN_P (insn))
1125 continue;
1126
1127 if (GET_CODE (PATTERN (insn)) == USE
1128 || GET_CODE (PATTERN (insn)) == CLOBBER)
1129 continue;
1130
1131 rtx set = single_set (insn);
1132 if (set && is_move_set (set))
1133 preprocess_move (SET_DEST (set), SET_SRC (set));
1134 else
1135 process_pseudo_reg_constraints (insn);
1136 }
1137 }
1138
1139 // Return a signed integer that says (roughly) how strong an affinity
1140 // pseudo register REGNO has with FPRs. A positive value indicates
1141 // that we should try to allocate an FPR, a negative value indicates
1142 // that we shouldn't, and 0 indicates neutrality.
1143 int
1144 early_ra::fpr_preference (unsigned int regno)
1145 {
1146 auto mode = m_pseudo_regs[regno].mode;
1147 auto flags = m_pseudo_regs[regno].flags;
1148 if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode))
1149 return -3;
1150 else if (flags & HAS_FLEXIBLE_STRIDE)
1151 return 3;
1152 else if (flags & NEEDS_FPR32)
1153 return 2;
1154 else if (!(flags & ALLOWS_FPR32))
1155 return -2;
1156 else if ((flags & HAS_FPR_COPY) && !(flags & HAS_NONFPR_COPY))
1157 return 1;
1158 else if ((flags & HAS_NONFPR_COPY) && !(flags & HAS_FPR_COPY))
1159 return -1;
1160 else
1161 return 0;
1162 }
1163
1164 // Propagate information about pseudo-registers along copy edges,
1165 // while doing so doesn't create conflicting FPR preferences.
1166 void
1167 early_ra::propagate_pseudo_reg_info ()
1168 {
1169 struct stack_entry { unsigned int regno, copyi; };
1170
1171 auto_vec<stack_entry, 32> stack;
1172 for (unsigned int i = FIRST_PSEUDO_REGISTER;
1173 i < m_pseudo_regs.length (); ++i)
1174 {
1175 auto start = m_pseudo_regs[i].first_copy;
1176 if (!start)
1177 continue;
1178
1179 stack.quick_push ({ i, start });
1180 while (!stack.is_empty ())
1181 {
1182 auto entry = stack.pop ();
1183 auto &copy = m_pseudo_reg_copies[entry.copyi];
1184 auto src_regno = entry.regno;
1185 auto dest_regno = (src_regno == copy.regnos[1]
1186 ? copy.regnos[0]
1187 : copy.regnos[1]);
1188 auto next_copyi = (src_regno == copy.regnos[1]
1189 ? copy.next_copies[1]
1190 : copy.next_copies[0]);
1191 if (next_copyi)
1192 stack.safe_push ({ src_regno, next_copyi });
1193
1194 auto &src_reg = m_pseudo_regs[src_regno];
1195 auto &dest_reg = m_pseudo_regs[dest_regno];
1196
1197 if (src_reg.flags & ~dest_reg.flags & PSEUDO_COPY_FLAGS)
1198 {
1199 auto src_preference = fpr_preference (src_regno);
1200 auto dest_preference = fpr_preference (dest_regno);
1201 if ((src_preference >= 0 && dest_preference >= 0)
1202 || (src_preference <= 0 && dest_preference <= 0))
1203 {
1204 dest_reg.flags |= (src_reg.flags & PSEUDO_COPY_FLAGS);
1205 stack.safe_push ({ dest_regno, dest_reg.first_copy });
1206 }
1207 }
1208 }
1209 }
1210 }
1211
1212 // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
1213 // accordingly.
1214 void
1215 early_ra::choose_fpr_pseudos ()
1216 {
1217 for (unsigned int i = FIRST_PSEUDO_REGISTER;
1218 i < m_pseudo_regs.length (); ++i)
1219 if (fpr_preference (i) > 0)
1220 bitmap_set_bit (m_fpr_pseudos, i);
1221 }
1222
1223 // Clear out information about the previous CFG region (if any)
1224 // and set up the data for a new region.
1225 void
1226 early_ra::start_new_region ()
1227 {
1228 obstack_free (&m_region_obstack, m_region_alloc_start);
1229 m_regno_to_group.empty ();
1230 m_allocno_copies.truncate (0);
1231 m_allocnos.truncate (0);
1232 m_sorted_allocnos.truncate (0);
1233 m_colors.truncate (0);
1234 m_insn_ranges.truncate (0);
1235 for (auto &fpr_ranges : m_fpr_ranges)
1236 fpr_ranges.truncate (0);
1237 for (auto &call_points : m_call_points)
1238 call_points.truncate (0);
1239 gcc_assert (bitmap_empty_p (m_live_allocnos) && m_live_fprs == 0);
1240 m_dead_insns.truncate (0);
1241 m_allocated_fprs = 0;
1242 m_call_preserved_fprs = 0;
1243 m_allocation_successful = true;
1244 }
1245
1246 // Create and return an allocno group of size SIZE for register REGNO.
1247 // REGNO can be INVALID_REGNUM if the group just exists to allow
1248 // other groups to be chained together, and does not have any new
1249 // allocnos of its own.
1250 early_ra::allocno_group_info *
1251 early_ra::create_allocno_group (unsigned int regno, unsigned int size)
1252 {
1253 static_assert (alignof (unsigned int) == alignof (allocno_info),
1254 "allocno_info alignment");
1255 unsigned int num_allocnos = (regno != INVALID_REGNUM ? size : 0);
1256
1257 // Allocate an allocno_group_info, followed by an array of chain heads,
1258 // followed by the allocnos themselves.
1259 size_t alloc_size = (sizeof (allocno_group_info)
1260 + size * sizeof (unsigned int)
1261 + num_allocnos * sizeof (allocno_info));
1262 void *data = obstack_alloc (&m_region_obstack, alloc_size);
1263
1264 // Initialize the group.
1265 auto *group = reinterpret_cast<allocno_group_info *> (data);
1266 memset (group, 0, sizeof (*group));
1267 group->m_color_rep = group;
1268 group->regno = regno;
1269 group->size = size;
1270 group->stride = 1;
1271 group->fpr_size = FPR_D;
1272 group->fpr_candidates = ~0U;
1273
1274 // Initialize the chain heads.
1275 auto heads = group->chain_heads ();
1276 for (unsigned int i = 0; i < heads.size (); ++i)
1277 heads[i] = (i < num_allocnos ? m_allocnos.length () + i : INVALID_ALLOCNO);
1278
1279 // Initialize the allocnos.
1280 if (num_allocnos > 0)
1281 {
1282 auto allocnos = group->allocnos ();
1283 memset (allocnos.begin (), 0, num_allocnos * sizeof (allocno_info));
1284 for (unsigned int i = 0; i < num_allocnos; ++i)
1285 {
1286 auto *allocno = &allocnos[i];
1287 allocno->id = m_allocnos.length ();
1288 allocno->offset = i;
1289 allocno->group_size = size;
1290 allocno->hard_regno = FIRST_PSEUDO_REGISTER;
1291 allocno->start_point = END_OF_REGION;
1292 allocno->end_point = START_OF_REGION;
1293 allocno->copy_dest = INVALID_ALLOCNO;
1294 allocno->equiv_allocno = INVALID_ALLOCNO;
1295 allocno->chain_next = INVALID_ALLOCNO;
1296 allocno->chain_prev = INVALID_ALLOCNO;
1297 m_allocnos.safe_push (allocno);
1298 }
1299 }
1300 return group;
1301 }
1302
1303 // If REG refers to a pseudo register that might be allocated to FPRs,
1304 // return the associated range of allocnos, creating new ones if necessary.
1305 // Return an empty range otherwise.
1306 early_ra::allocno_subgroup
1307 early_ra::get_allocno_subgroup (rtx reg)
1308 {
1309 if (GET_CODE (reg) == SUBREG)
1310 {
1311 allocno_subgroup inner = get_allocno_subgroup (SUBREG_REG (reg));
1312 if (!inner)
1313 return {};
1314
1315 if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)),
1316 GET_MODE (reg)))
1317 {
1318 m_allocation_successful = false;
1319 return {};
1320 }
1321
1322 subreg_info info;
1323 subreg_get_info (V0_REGNUM, GET_MODE (SUBREG_REG (reg)),
1324 SUBREG_BYTE (reg), GET_MODE (reg), &info);
1325 if (!info.representable_p)
1326 {
1327 m_allocation_successful = false;
1328 return {};
1329 }
1330
1331 inner.start += info.offset;
1332 inner.count = info.nregs;
1333 return inner;
1334 }
1335
1336 if (!REG_P (reg) || HARD_REGISTER_P (reg))
1337 return {};
1338
1339 unsigned int regno = REGNO (reg);
1340 if (fpr_preference (regno) <= 0)
1341 return {};
1342
1343 unsigned int count = hard_regno_nregs (V0_REGNUM, GET_MODE (reg));
1344 bool existed;
1345 auto &entry = m_regno_to_group.get_or_insert (regno, &existed);
1346 if (!existed)
1347 {
1348 auto *group = create_allocno_group (regno, count);
1349 if (dump_file && (dump_flags & TDF_DETAILS))
1350 {
1351 auto allocnos = group->allocnos ();
1352 fprintf (dump_file, "Creating allocnos [%d:%d] for r%d\n",
1353 allocnos.front ().id, allocnos.back ().id, regno);
1354 }
1355
1356 auto reg_bits = GET_MODE_BITSIZE (GET_MODE (reg));
1357 auto fpr_bits = exact_div (reg_bits, count);
1358 auto flags = m_pseudo_regs[regno].flags;
1359
1360 // Punt for now if there is a choice to be made between using an
1361 // FPR and a non-FPR.
1362 if ((flags & NEEDS_NONFPR)
1363 || ((flags & ALLOWS_NONFPR)
1364 && !FLOAT_MODE_P (GET_MODE (reg))
1365 && !VECTOR_MODE_P (GET_MODE (reg))))
1366 m_allocation_successful = false;
1367
1368 if (flags & ALLOWS_FPR8)
1369 group->fpr_candidates &= 0xff;
1370 else if (flags & ALLOWS_FPR16)
1371 group->fpr_candidates &= 0xffff;
1372 group->fpr_candidates &= ~0U >> (count - 1);
1373
1374 group->has_flexible_stride = ((flags & HAS_FLEXIBLE_STRIDE) != 0
1375 && (flags & HAS_FIXED_STRIDE) == 0);
1376
1377 group->fpr_size = (maybe_gt (fpr_bits, 128) ? FPR_Z
1378 : maybe_gt (fpr_bits, 64) ? FPR_Q : FPR_D);
1379
1380 entry = group;
1381 }
1382 return { entry, 0, count };
1383 }
1384
1385 // Record a use of FPR REGNO at the current program point, as part of
1386 // a backwards walk over a block.
1387 void
1388 early_ra::record_fpr_use (unsigned int regno)
1389 {
1390 gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
1391 unsigned int offset = regno - V0_REGNUM;
1392 if (!(m_live_fprs & (1U << offset)))
1393 {
1394 m_fpr_ranges[offset].safe_push ({ START_OF_REGION, m_current_point,
1395 INVALID_ALLOCNO });
1396 m_live_fprs |= 1U << offset;
1397 }
1398 }
1399
1400 // Record a definition of FPR REGNO at the current program point, as part of
1401 // a backwards walk over a block.
1402 void
1403 early_ra::record_fpr_def (unsigned int regno)
1404 {
1405 gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
1406 unsigned int offset = regno - V0_REGNUM;
1407
1408 // The definition completes the current live range. If the result
1409 // of the definition is used, the live range extends to the last use.
1410 // Otherwise the live range is just a momentary blip at the current point.
1411 auto &ranges = m_fpr_ranges[offset];
1412 if (m_live_fprs & (1U << offset))
1413 {
1414 ranges.last ().start_point = m_current_point;
1415 m_live_fprs &= ~(1U << offset);
1416 }
1417 else
1418 ranges.safe_push ({ m_current_point, m_current_point, INVALID_ALLOCNO });
1419 }
1420
1421 // Record a use of allocno ALLOCNO at the current program point, as part
1422 // of a backwards walk over a block.
1423 void
1424 early_ra::record_allocno_use (allocno_info *allocno)
1425 {
1426 bitmap_set_bit (m_live_allocnos, allocno->id);
1427 if (allocno->end_point > m_current_point)
1428 {
1429 allocno->end_point = m_current_point;
1430 allocno->last_def_point = START_OF_REGION;
1431 }
1432 allocno->start_point = m_current_point;
1433 allocno->is_copy_dest = false;
1434 allocno->is_strong_copy_dest = false;
1435 allocno->equiv_allocno = INVALID_ALLOCNO;
1436 }
1437
1438 // Record a definition of the allocno with index AI at the current program
1439 // point, as part of a backwards walk over a block. The allocno is known
1440 // to be live.
1441 void
1442 early_ra::record_allocno_def (allocno_info *allocno)
1443 {
1444 allocno->last_def_point = m_current_point;
1445 allocno->start_point = m_current_point;
1446 allocno->num_defs = MIN (allocno->num_defs + 1, 2);
1447 gcc_checking_assert (!allocno->is_copy_dest
1448 && !allocno->is_strong_copy_dest);
1449 if (!bitmap_clear_bit (m_live_allocnos, allocno->id))
1450 gcc_unreachable ();
1451 }
1452
1453 // Return true if a move from SRC_ALLOCNO to DEST_ALLOCNO could be treated
1454 // as an equivalence.
1455 bool
1456 early_ra::valid_equivalence_p (allocno_info *dest_allocno,
1457 allocno_info *src_allocno)
1458 {
1459 if (src_allocno->end_point > dest_allocno->end_point)
1460 // The src allocno dies first.
1461 return false;
1462
1463 if (src_allocno->num_defs != 0)
1464 {
1465 if (dest_allocno->end_point < m_current_bb_point)
1466 // We don't currently track enough information to handle multiple
1467 // definitions across basic block boundaries.
1468 return false;
1469
1470 if (src_allocno->last_def_point >= dest_allocno->end_point)
1471 // There is another definition during the destination's live range.
1472 return false;
1473 }
1474 return dest_allocno->num_defs == 1;
1475 }
1476
1477 // Record any relevant allocno-related information for an actual or imagined
1478 // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
1479 // move instruction, false if it represents one way of satisfying the previous
1480 // instruction's constraints.
1481 void
1482 early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
1483 {
1484 auto dest_range = get_allocno_subgroup (dest);
1485 auto src_range = get_allocno_subgroup (src);
1486 if (from_move_p
1487 && dest_range
1488 && REG_P (src)
1489 && FP_REGNUM_P (REGNO (src)))
1490 {
1491 // A copy from an FPR to an allocno group.
1492 unsigned int fpr = REGNO (src) - V0_REGNUM;
1493 m_allocno_copies.safe_push ({ dest_range.allocno (0)->id, fpr,
1494 dest_range.count });
1495
1496 // If the allocno at the other end of the chain of copies from DEST
1497 // has a copy to the same FPR, record that all intervening copy chains
1498 // could become "strong" ones. This indicates that picking the FPR
1499 // avoids a copy at both ends.
1500 unsigned int hard_regno = REGNO (src);
1501 for (auto &dest_allocno : dest_range.allocnos ())
1502 if (dest_allocno.hard_regno == hard_regno++)
1503 dest_allocno.is_strong_copy_src = true;
1504 }
1505 else if (from_move_p
1506 && src_range
1507 && REG_P (dest)
1508 && FP_REGNUM_P (REGNO (dest)))
1509 {
1510 // A copy from an allocno group to an FPR.
1511 unsigned int fpr = REGNO (dest) - V0_REGNUM;
1512 m_allocno_copies.safe_push ({ src_range.allocno (0)->id, fpr,
1513 src_range.count });
1514 for (auto &src_allocno : src_range.allocnos ())
1515 {
1516 // If the copy comes from a move, see whether the destination
1517 // FPR is known to be equal to the source allocno for the FPR's
1518 // last live range.
1519 if (from_move_p && src_allocno.num_defs == 0)
1520 {
1521 auto &last_range = m_fpr_ranges[fpr].last ();
1522 if (last_range.end_point >= src_allocno.end_point)
1523 last_range.allocno = src_allocno.id;
1524 }
1525 src_allocno.hard_regno = V0_REGNUM + fpr;
1526 fpr += 1;
1527 }
1528 }
1529 else if (src_range && dest_range)
1530 {
1531 // A copy between two allocno groups. We can only have a mismatched
1532 // number of FPRs for imaginary, non-move copies. In that case
1533 // the matching happens on the common lowparts.
1534 gcc_assert (!from_move_p || src_range.count == dest_range.count);
1535 unsigned int count = std::min (src_range.count, dest_range.count);
1536 if (WORDS_BIG_ENDIAN)
1537 {
1538 src_range.start += src_range.count - count;
1539 dest_range.start += dest_range.count - count;
1540 }
1541 src_range.count = count;
1542 dest_range.count = count;
1543
1544 // Ignore (imaginary non-move) copies if the destination is still live.
1545 for (auto &dest_allocno : dest_range.allocnos ())
1546 if (bitmap_bit_p (m_live_allocnos, dest_allocno.id))
1547 return;
1548
1549 for (unsigned int i = 0; i < src_range.count; ++i)
1550 {
1551 auto *dest_allocno = dest_range.allocno (i);
1552 auto *src_allocno = src_range.allocno (i);
1553 if (src_allocno->end_point > dest_allocno->start_point)
1554 {
1555 gcc_assert (src_allocno->copy_dest == INVALID_ALLOCNO
1556 || src_allocno->copy_dest == dest_allocno->id);
1557 src_allocno->copy_dest = dest_allocno->id;
1558 src_allocno->hard_regno = dest_allocno->hard_regno;
1559 dest_allocno->is_copy_dest = 1;
1560 }
1561 else if (from_move_p
1562 && valid_equivalence_p (dest_allocno, src_allocno))
1563 dest_allocno->equiv_allocno = src_allocno->id;
1564 }
1565 }
1566 }
1567
1568 // Record any relevant allocno-related information about the constraints
1569 // on INSN, which has already been extracted.
1570 void
1571 early_ra::record_constraints (rtx_insn *insn)
1572 {
1573 preprocess_constraints (insn);
1574
1575 int operand_matches[MAX_RECOG_OPERANDS];
1576 for (int i = 0; i < recog_data.n_operands; ++i)
1577 operand_matches[i] = -1;
1578
1579 auto alts = get_preferred_alternatives (insn);
1580 bool any_ok = recog_data.n_alternatives == 0;
1581
1582 // The set of output operands that are earlyclobber in at least one
1583 // alternative.
1584 operand_mask earlyclobber_operands = 0;
1585
1586 // The set of output operands that are matched to inputs in at least
1587 // one alternative.
1588 operand_mask matched_operands = 0;
1589
1590 // The set of output operands that are not matched to inputs in at least
1591 // one alternative.
1592 operand_mask unmatched_operands = 0;
1593
1594 // The set of input operands that are matched to outputs in at least one
1595 // alternative, or that overlap with such an input if the output is not
1596 // earlyclobber. The latter part of the condition copes with things
1597 // like y = x * x, where the first x is tied to the destination, and where
1598 // y is not earlyclobber.
1599 operand_mask matches_operands = 0;
1600
1601 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
1602 {
1603 if (!(alts & ALTERNATIVE_BIT (altno)))
1604 continue;
1605
1606 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
1607 if (!likely_alternative_match_p (op_alt))
1608 continue;
1609
1610 any_ok = true;
1611
1612 // Update the information for operand DEST_OPNO based on the constraint
1613 // information for operand SRC_OPNO. The numbers can be different for
1614 // swapped commutative operands.
1615 auto record_operand = [&](int src_opno, int dest_opno)
1616 {
1617 int matches = op_alt[src_opno].matches;
1618 // A matched earlyclobber cannot be used if the same operand value
1619 // occurs in an unmatched operand. E.g. for y = x * x, a matched
1620 // earlyclobber on the first input does not cover the second input.
1621 if (matches >= 0)
1622 {
1623 rtx op = recog_data.operand[dest_opno];
1624 operand_mask overlaps = 0;
1625 for (int i = 0; i < recog_data.n_operands; ++i)
1626 if (i != dest_opno
1627 && !recog_data.is_operator[i]
1628 && recog_data.operand_type[i] != OP_OUT
1629 && reg_overlap_mentioned_p (op, recog_data.operand[i]))
1630 overlaps |= 1U << i;
1631 if (!op_alt[matches].earlyclobber || overlaps == 0)
1632 {
1633 operand_matches[dest_opno] = matches;
1634 matches_operands |= (1U << dest_opno) | overlaps;
1635 }
1636 }
1637 };
1638
1639 auto reject = count_rejects (op_alt);
1640 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1641 {
1642 operand_mask op_mask = operand_mask (1) << opno;
1643
1644 if (recog_data.operand_type[opno] != OP_IN)
1645 {
1646 if (reject == 0 && op_alt[opno].matched >= 0)
1647 matched_operands |= op_mask;
1648 else
1649 unmatched_operands |= op_mask;
1650 }
1651
1652 if (op_alt[opno].earlyclobber)
1653 earlyclobber_operands |= op_mask;
1654
1655 // Punt for now on scratches. If we wanted to handle them,
1656 // we'd need to create allocnos for them, like IRA does.
1657 rtx op = recog_data.operand[opno];
1658 if (GET_CODE (op) == SCRATCH
1659 && reg_classes_intersect_p (op_alt[opno].cl, FP_REGS))
1660 m_allocation_successful = false;
1661
1662 // Record filter information, which applies to the first register
1663 // in the operand.
1664 if (auto filters = alternative_register_filters (op_alt, opno))
1665 if (auto range = get_allocno_subgroup (recog_data.operand[opno]))
1666 for (unsigned int fpr = range.start; fpr < 32; ++fpr)
1667 if (!test_register_filters (filters, fpr))
1668 range.group->fpr_candidates &= ~(1U << (fpr - range.start));
1669
1670 if (reject == 0)
1671 {
1672 // Record possible matched operands.
1673 record_operand (opno, opno);
1674 if (recog_data.constraints[opno][0] == '%')
1675 {
1676 record_operand (opno, opno + 1);
1677 record_operand (opno + 1, opno);
1678 }
1679 }
1680 }
1681 }
1682
1683 if (!any_ok)
1684 {
1685 if (dump_file && (dump_flags & TDF_DETAILS))
1686 fprintf (dump_file, " -- no match\n");
1687 m_allocation_successful = false;
1688 }
1689
1690 // Record if there is an output operand that is never earlyclobber and never
1691 // matched to an input. See the comment below for how this is used.
1692 rtx dest_op = NULL_RTX;
1693 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1694 {
1695 auto op_mask = operand_mask (1) << opno;
1696 if (recog_data.operand_type[opno] == OP_OUT
1697 && (earlyclobber_operands & op_mask) == 0
1698 && (matched_operands & op_mask) == 0)
1699 {
1700 dest_op = recog_data.operand[opno];
1701 break;
1702 }
1703 }
1704
1705 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1706 {
1707 auto op_mask = operand_mask (1) << opno;
1708 rtx op = recog_data.operand[opno];
1709 int matches = operand_matches[opno];
1710
1711 // Punt for now on operands that already have a fixed choice of
1712 // register, since we don't have IRA's ability to find an alternative.
1713 // It's better if earlier passes don't create this kind of situation.
1714 if (REG_P (op) && FP_REGNUM_P (REGNO (op)))
1715 m_allocation_successful = false;
1716
1717 // Treat input operands as being earlyclobbered if an output is
1718 // sometimes earlyclobber and if the input never matches an output.
1719 // Do the same if there is an output that is always matched to an
1720 // input, and if this operand doesn't match that input. In both
1721 // cases, tying the input and the output would lead to an impossible
1722 // combination (or at least one that is difficult to reload).
1723 if (recog_data.operand_type[opno] != OP_OUT
1724 && ((earlyclobber_operands && matches < 0)
1725 || ((matched_operands & ~unmatched_operands)
1726 && !(matches_operands & op_mask))))
1727 for (auto &allocno : get_allocno_subgroup (op).allocnos ())
1728 if (allocno.end_point + 1 == m_current_point)
1729 allocno.is_earlyclobbered = true;
1730
1731 // Create copies between operands that can be tied. This (deliberately)
1732 // might add several copies to the same destination register; later code
1733 // can then choose between them based on other criteria.
1734 //
1735 // If there is an output operand that is never matched or earlyclobber,
1736 // and an input operand that never matches an output operand, create
1737 // a tentative copy between them. This allows hard register preferences
1738 // to be transmitted along the copy chains.
1739 if (matches >= 0)
1740 record_copy (recog_data.operand[matches], op);
1741 else if (dest_op && recog_data.operand_type[opno] == OP_IN)
1742 record_copy (dest_op, op);
1743 }
1744 }
1745
1746 // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
1747 // start of the current basic block, otherwise model the artificial uses
1748 // and defs at the end of the basic block. This is done as part of a
1749 // backwards walk, so defs should be processed before uses.
1750 void
1751 early_ra::record_artificial_refs (unsigned int flags)
1752 {
1753 df_ref ref;
1754
1755 FOR_EACH_ARTIFICIAL_DEF (ref, m_current_bb->index)
1756 if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
1757 && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1758 record_fpr_def (DF_REF_REGNO (ref));
1759 m_current_point += 1;
1760
1761 FOR_EACH_ARTIFICIAL_USE (ref, m_current_bb->index)
1762 if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
1763 && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1764 record_fpr_use (DF_REF_REGNO (ref));
1765 m_current_point += 1;
1766 }
1767
1768 // Model the register references in INSN as part of a backwards walk.
1769 void
1770 early_ra::record_insn_refs (rtx_insn *insn)
1771 {
1772 df_ref ref;
1773
1774 // Record all definitions, excluding partial call clobbers.
1775 FOR_EACH_INSN_DEF (ref, insn)
1776 if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1777 record_fpr_def (DF_REF_REGNO (ref));
1778 else
1779 {
1780 auto range = get_allocno_subgroup (DF_REF_REG (ref));
1781 for (auto &allocno : range.allocnos ())
1782 {
1783 // If the destination is unused, record a momentary blip
1784 // in its live range.
1785 if (!bitmap_bit_p (m_live_allocnos, allocno.id))
1786 record_allocno_use (&allocno);
1787 record_allocno_def (&allocno);
1788 }
1789 }
1790 m_current_point += 1;
1791
1792 // Model the call made by a call insn as a separate phase in the
1793 // evaluation of the insn. Any partial call clobbers happen at that
1794 // point, rather than in the definition or use phase of the insn.
1795 if (auto *call_insn = dyn_cast<rtx_call_insn *> (insn))
1796 {
1797 function_abi abi = insn_callee_abi (call_insn);
1798 m_call_points[abi.id ()].safe_push (m_current_point);
1799 m_current_point += 1;
1800 }
1801
1802 // Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs,
1803 // since we track the FPR-sized parts of them individually.
1804 FOR_EACH_INSN_USE (ref, insn)
1805 if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1806 record_fpr_use (DF_REF_REGNO (ref));
1807 else if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE)
1808 || DF_REF_FLAGS_IS_SET (ref, DF_REF_STRICT_LOW_PART)
1809 || DF_REF_FLAGS_IS_SET (ref, DF_REF_ZERO_EXTRACT))
1810 {
1811 auto range = get_allocno_subgroup (DF_REF_REG (ref));
1812 for (auto &allocno : range.allocnos ())
1813 record_allocno_use (&allocno);
1814 }
1815 m_current_point += 1;
1816 }
1817
1818 // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
1819 // natural chain that has an affinity with the same hard register at
1820 // both ends.
1821 bool
1822 early_ra::consider_strong_copy_src_chain (allocno_info *allocno)
1823 {
1824 auto *src_allocno = allocno;
1825 while (src_allocno->copy_dest != INVALID_ALLOCNO)
1826 {
1827 auto *dest_allocno = m_allocnos[src_allocno->copy_dest];
1828 if (dest_allocno->start_point > src_allocno->end_point
1829 || dest_allocno->hard_regno != src_allocno->hard_regno)
1830 return false;
1831 gcc_checking_assert (dest_allocno->is_copy_dest);
1832 src_allocno = dest_allocno;
1833 }
1834
1835 while (allocno->copy_dest != INVALID_ALLOCNO)
1836 {
1837 allocno->is_strong_copy_src = 1;
1838 allocno = m_allocnos[allocno->copy_dest];
1839 allocno->is_strong_copy_dest = 1;
1840 }
1841 return true;
1842 }
1843
1844 // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
1845 // chained together. See whether chaining them requires the containing
1846 // groups to have the same stride, or whether it requires them to have
1847 // different strides. Return 1 if they should have the same stride,
1848 // -1 if they should have different strides, or 0 if it doesn't matter.
1849 int
1850 early_ra::strided_polarity_pref (allocno_info *allocno1,
1851 allocno_info *allocno2)
1852 {
1853 if (allocno1->offset + 1 < allocno1->group_size
1854 && allocno2->offset + 1 < allocno2->group_size)
1855 {
1856 if (is_chain_candidate (allocno1 + 1, allocno2 + 1))
1857 return 1;
1858 else
1859 return -1;
1860 }
1861
1862 if (allocno1->offset > 0 && allocno2->offset > 0)
1863 {
1864 if (is_chain_candidate (allocno1 - 1, allocno2 - 1))
1865 return 1;
1866 else
1867 return -1;
1868 }
1869
1870 return 0;
1871 }
1872
1873 // Decide which groups should be strided. Also complete "strong" copy chains.
1874 void
1875 early_ra::find_strided_accesses ()
1876 {
1877 // This function forms a graph of allocnos, linked by equivalences and
1878 // natural copy chains. It temporarily uses chain_next to record the
1879 // reverse of equivalence edges (equiv_allocno) and chain_prev to record
1880 // the reverse of copy edges (copy_dest).
1881 unsigned int allocno_info::*links[] = {
1882 &allocno_info::chain_next,
1883 &allocno_info::chain_prev,
1884 &allocno_info::copy_dest,
1885 &allocno_info::equiv_allocno
1886 };
1887
1888 // Set up the temporary reverse edges. Check for strong copy chains.
1889 for (unsigned int i = m_allocnos.length (); i-- > 0; )
1890 {
1891 auto *allocno1 = m_allocnos[i];
1892 if (allocno1->copy_dest != INVALID_ALLOCNO)
1893 m_allocnos[allocno1->copy_dest]->chain_prev = allocno1->id;
1894 if (allocno1->equiv_allocno != INVALID_ALLOCNO)
1895 m_allocnos[allocno1->equiv_allocno]->chain_next = allocno1->id;
1896
1897 if (allocno1->is_strong_copy_src
1898 && (allocno1->is_copy_dest
1899 || !consider_strong_copy_src_chain (allocno1)))
1900 allocno1->is_strong_copy_src = false;
1901 }
1902
1903 // Partition the graph into cliques based on edges that have the following
1904 // properties:
1905 //
1906 // - the edge joins two allocnos whose groups have a free choice between
1907 // consecutive and strided allocations.
1908 //
1909 // - the two groups have a relative strided polarity preference (that is
1910 // they should make the same choice between consecutive and strided,
1911 // or they should make opposite choices).
1912 //
1913 // Assign relative polarities to each group connected in this way.
1914 //
1915 // The aim is to discover natural move-free striding choices, which will
1916 // often exist in carefully written ACLE code.
1917 unsigned int num_edges = m_allocnos.length () * ARRAY_SIZE (links);
1918 auto_sbitmap visited_edges (num_edges);
1919 bitmap_clear (visited_edges);
1920
1921 auto_vec<unsigned int, 32> worklist;
1922 for (unsigned int i = 0; i < num_edges; ++i)
1923 {
1924 if (!bitmap_set_bit (visited_edges, i))
1925 continue;
1926 worklist.quick_push (i);
1927 while (!worklist.is_empty ())
1928 {
1929 auto ei = worklist.pop ();
1930 auto *allocno1 = m_allocnos[ei / ARRAY_SIZE (links)];
1931 auto ai2 = allocno1->*links[ei % ARRAY_SIZE (links)];
1932 if (ai2 == INVALID_ALLOCNO)
1933 continue;
1934
1935 auto *allocno2 = m_allocnos[ai2];
1936 auto *group1 = allocno1->group ();
1937 auto *group2 = allocno2->group ();
1938 if (!group1->has_flexible_stride || !group2->has_flexible_stride)
1939 continue;
1940
1941 int pref = strided_polarity_pref (allocno1, allocno2);
1942 if (pref == 0)
1943 continue;
1944
1945 for (auto *group : { group1, group2 })
1946 for (auto &allocno : group->allocnos ())
1947 for (unsigned int j = 0; j < ARRAY_SIZE (links); ++j)
1948 if (bitmap_set_bit (visited_edges, allocno.id * 4 + j))
1949 worklist.safe_push (allocno.id * 4 + j);
1950
1951 if (group1->strided_polarity)
1952 group2->strided_polarity = group1->strided_polarity * pref;
1953 else if (group1->strided_polarity)
1954 group2->strided_polarity = group1->strided_polarity * pref;
1955 else
1956 {
1957 group1->strided_polarity = 1;
1958 group2->strided_polarity = pref;
1959 }
1960 }
1961 }
1962
1963 // Now look for edges between allocnos in multi-register groups where:
1964 //
1965 // - the two groups have a relative strided polarity preference (as above).
1966 //
1967 // - one group (G1) has a free choice between consecutive and strided
1968 // allocations.
1969 //
1970 // - the other group (G2) must use consecutive allocations.
1971 //
1972 // Update G1's individual preference for strided or consecutive allocations
1973 // based on G2. If the previous loop chose a polarity for G1, work out
1974 // whether it is better for polarity 1 or -1 to correspond to consecutive
1975 // allocation.
1976 int consecutive_pref = 0;
1977 for (unsigned int i = m_allocnos.length (); i-- > 0; )
1978 {
1979 auto *allocno1 = m_allocnos[i];
1980 for (auto link : links)
1981 {
1982 auto ai2 = allocno1->*link;
1983 if (ai2 == INVALID_ALLOCNO)
1984 continue;
1985
1986 auto *allocno2 = m_allocnos[ai2];
1987 auto *group1 = allocno1->group ();
1988 auto *group2 = allocno2->group ();
1989 if (group1->has_flexible_stride == group2->has_flexible_stride)
1990 continue;
1991
1992 int pref = strided_polarity_pref (allocno1, allocno2);
1993 if (pref == 0)
1994 continue;
1995
1996 auto *group = (group1->has_flexible_stride ? group1 : group2);
1997 consecutive_pref += group->strided_polarity * pref;
1998 group->consecutive_pref += pref;
1999 }
2000 }
2001
2002 // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
2003 // allocation, arbitrarily pick 1.
2004 if (consecutive_pref == 0)
2005 consecutive_pref = 1;
2006
2007 // Record which multi-register groups should use strided allocations.
2008 // Clear out the temporary edges.
2009 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
2010 {
2011 auto *allocno = m_allocnos[ai];
2012 allocno->chain_prev = INVALID_ALLOCNO;
2013 allocno->chain_next = INVALID_ALLOCNO;
2014
2015 if (allocno->offset != 0)
2016 continue;
2017
2018 auto *group = allocno->group ();
2019 if (!group->has_flexible_stride)
2020 continue;
2021
2022 bool make_strided = (group->strided_polarity
2023 ? (consecutive_pref * group->strided_polarity) < 0
2024 : group->consecutive_pref < 0);
2025 if (dump_file && (dump_flags & TDF_DETAILS))
2026 fprintf (dump_file, "Allocno [%d:%d]: strided polarity %d,"
2027 " consecutive pref %d, %s\n",
2028 allocno->id, allocno->id + group->size - 1,
2029 group->strided_polarity, group->consecutive_pref,
2030 make_strided ? "making strided" : "keeping consecutive");
2031 if (!make_strided)
2032 continue;
2033
2034 // 2-register groups have a stride of 8 FPRs and must start in
2035 // registers matching the mask 0x17. 4-register groups have a stride
2036 // of 4 FPRs and must start in registers matching the mask 0x13.
2037 group->stride = group->size == 2 ? 8 : 4;
2038 gcc_checking_assert (group->fpr_candidates
2039 == (group->size == 2 ? 0x55555555 : 0x11111111));
2040 group->fpr_candidates = (group->size == 2 ? 0xff00ff : 0xf000f);
2041 }
2042 }
2043
2044 // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
2045 // result that puts allocnos in order of increasing FIELD.
2046 template<unsigned int early_ra::allocno_info::*field>
2047 int
2048 early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
2049 {
2050 auto *allocno1 = *(allocno_info *const *) allocno1_ptr;
2051 auto *allocno2 = *(allocno_info *const *) allocno2_ptr;
2052
2053 if (allocno1->*field != allocno2->*field)
2054 return allocno1->*field < allocno2->*field ? -1 : 1;
2055 return (allocno1->id < allocno2->id ? -1
2056 : allocno1->id == allocno2->id ? 0 : 1);
2057 }
2058
2059 // Return true if we should consider chaining ALLOCNO1 onto the head
2060 // of ALLOCNO2. This is just a local test of the two allocnos; it doesn't
2061 // guarantee that chaining them would give a self-consistent system.
2062 bool
2063 early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2)
2064 {
2065 if (allocno1->equiv_allocno != INVALID_ALLOCNO)
2066 allocno1 = m_allocnos[allocno1->equiv_allocno];
2067
2068 if (allocno2->start_point >= allocno1->end_point
2069 && allocno2->equiv_allocno != allocno1->id)
2070 return false;
2071
2072 if (allocno2->is_strong_copy_dest)
2073 {
2074 if (!allocno1->is_strong_copy_src
2075 || allocno1->copy_dest != allocno2->id)
2076 return false;
2077 }
2078 else if (allocno2->is_copy_dest)
2079 {
2080 if (allocno1->copy_dest != allocno2->id)
2081 return false;
2082 }
2083 else if (allocno1->is_earlyclobbered)
2084 {
2085 if (allocno1->end_point == allocno2->start_point + 1)
2086 return false;
2087 }
2088
2089 return true;
2090 }
2091
2092 // We're trying to chain allocno ALLOCNO1 to a later allocno.
2093 // Rate how good a choice ALLOCNO2 would be, with higher being better.
2094 int
2095 early_ra::rate_chain (allocno_info *allocno1, allocno_info *allocno2)
2096 {
2097 int score = 0;
2098 if (allocno2->is_strong_copy_dest)
2099 score += 256;
2100 else if (allocno2->is_copy_dest)
2101 score += 128;
2102
2103 // Prefer well-aligned matches.
2104 auto *group1 = allocno1->group ();
2105 auto *group2 = allocno2->group ();
2106 if (group1->stride == 1 && group2->stride == 1)
2107 {
2108 unsigned int min_size = std::min (group1->color_rep ()->size,
2109 group2->color_rep ()->size);
2110 if ((group1->color_rep_offset + allocno1->offset) % min_size
2111 == (group2->color_rep_offset + allocno2->offset) % min_size)
2112 score += min_size;
2113 else
2114 score -= min_size;
2115 }
2116 return score;
2117 }
2118
2119 // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
2120 // score.
2121 int
2122 early_ra::cmp_chain_candidates (const void *arg1, const void *arg2)
2123 {
2124 auto &candidate1 = *(const chain_candidate_info *) arg1;
2125 auto &candidate2 = *(const chain_candidate_info *) arg2;
2126 if (candidate1.score != candidate2.score)
2127 return candidate1.score > candidate2.score ? -1 : 1;
2128
2129 // Prefer to increase the gap between uses of the allocated register,
2130 // to give the scheduler more freedom.
2131 auto *allocno1 = candidate1.allocno;
2132 auto *allocno2 = candidate2.allocno;
2133 if (allocno1->start_point != allocno2->start_point)
2134 return allocno1->start_point < allocno2->start_point ? -1 : 1;
2135
2136 if (allocno1 != allocno2)
2137 return allocno1->id < allocno2->id ? -1 : 1;
2138
2139 return 0;
2140 }
2141
2142 // Join the chains of allocnos that start at HEADI1 and HEADI2.
2143 // HEADI1 is either empty or a single allocno.
2144 void
2145 early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2)
2146 {
2147 if (headi1 == INVALID_ALLOCNO)
2148 headi1 = headi2;
2149 else if (headi2 == INVALID_ALLOCNO)
2150 headi2 = headi1;
2151 else
2152 {
2153 auto *head1 = m_allocnos[headi1];
2154 auto *head2 = m_allocnos[headi2];
2155 gcc_checking_assert (head1->chain_next == INVALID_ALLOCNO
2156 && head1->chain_prev == INVALID_ALLOCNO
2157 && head2->chain_prev == INVALID_ALLOCNO);
2158
2159 if (head1->equiv_allocno != INVALID_ALLOCNO
2160 && m_allocnos[head1->equiv_allocno]->copy_dest == headi2)
2161 {
2162 head1->is_copy_dest = head2->is_copy_dest;
2163 head1->is_strong_copy_dest = head2->is_strong_copy_dest;
2164 m_allocnos[head1->equiv_allocno]->copy_dest = headi1;
2165 }
2166 head1->chain_next = headi2;
2167 head2->chain_prev = headi1;
2168
2169 headi2 = headi1;
2170 }
2171 }
2172
2173 // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
2174 // ends being at allocno offset REP_OFFSET from the start of REP.
2175 void
2176 early_ra::set_single_color_rep (allocno_info *allocno, allocno_group_info *rep,
2177 unsigned int rep_offset)
2178 {
2179 auto *group = allocno->group ();
2180 if (group->m_color_rep == rep)
2181 return;
2182
2183 group->m_color_rep = rep;
2184 gcc_checking_assert (multiple_p (group->stride, rep->stride));
2185 unsigned int factor = group->stride / rep->stride;
2186 gcc_checking_assert (rep_offset >= allocno->offset * factor);
2187 group->color_rep_offset = rep_offset - allocno->offset * factor;
2188 rep->fpr_size = std::max (rep->fpr_size, group->fpr_size);
2189 rep->fpr_candidates &= (group->fpr_candidates
2190 >> (group->color_rep_offset * rep->stride));
2191 }
2192
2193 // REP1 and REP2 are color representatives. Change REP1's color representative
2194 // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
2195 void
2196 early_ra::set_color_rep (allocno_group_info *rep1, allocno_group_info *rep2,
2197 unsigned int rep2_offset)
2198 {
2199 gcc_checking_assert (rep1 != rep2
2200 && rep2->m_color_rep == rep2
2201 && multiple_p (rep1->stride, rep2->stride));
2202
2203 auto heads1 = rep1->chain_heads ();
2204 auto heads2 = rep2->chain_heads ();
2205 for (unsigned int i1 = 0; i1 < heads1.size (); ++i1)
2206 if (heads1[i1] != INVALID_ALLOCNO)
2207 {
2208 unsigned int i2 = rep2_offset + i1 * rep1->stride / rep2->stride;
2209 if (heads2[i2] == INVALID_ALLOCNO)
2210 heads2[i2] = heads1[i1];
2211 else
2212 gcc_checking_assert (heads2[i2] == heads1[i1]);
2213 set_single_color_rep (m_allocnos[heads1[i1]], rep2, i2);
2214 }
2215 }
2216
2217 // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
2218 // Return true on success.
2219 bool
2220 early_ra::try_to_chain_allocnos (allocno_info *allocno1,
2221 allocno_info *allocno2)
2222 {
2223 auto *group1 = allocno1->group ()->color_rep ();
2224 auto *group2 = allocno2->group ()->color_rep ();
2225
2226 // Avoid trying to tie different subgroups of the same group. This can
2227 // happen if the parts of a register are defined and used piecemeal.
2228 if (group1 == group2)
2229 return false;
2230
2231 // The stride (in FPRs) between allocnos of each color representative.
2232 auto fpr_stride1 = group1->stride;
2233 auto fpr_stride2 = group2->stride;
2234
2235 // The offset (in FPRs) of each allocno group from its color representative.
2236 auto fpr_offset1 = allocno1->group ()->color_rep_offset * fpr_stride1;
2237 auto fpr_offset2 = allocno2->group ()->color_rep_offset * fpr_stride2;
2238
2239 // The offset (in FPRs) of each allocno from its color representative.
2240 fpr_offset1 += allocno1->offset * allocno1->group ()->stride;
2241 fpr_offset2 += allocno2->offset * allocno2->group ()->stride;
2242
2243 // The FPR overlap is in multiples of the larger stride.
2244 auto max_fpr_stride = std::max (fpr_stride1, fpr_stride2);
2245 auto min_fpr_offset = std::min (fpr_offset1, fpr_offset2);
2246 auto fpr_overlap_offset = ROUND_DOWN (min_fpr_offset, max_fpr_stride);
2247
2248 // The offset (in FPRs) of the start of the overlapping region from
2249 // each color representative.
2250 fpr_offset1 -= fpr_overlap_offset;
2251 fpr_offset2 -= fpr_overlap_offset;
2252
2253 // The number of FPRs in each color representative after the start
2254 // of the overlapping region.
2255 auto fpr_after1 = (group1->size - 1) * fpr_stride1 - fpr_offset1;
2256 auto fpr_after2 = (group2->size - 1) * fpr_stride2 - fpr_offset2;
2257
2258 auto min_fpr_after = std::min (fpr_after1, fpr_after2);
2259
2260 // The number of overlapping allocnos.
2261 auto allocno_overlap_size = min_fpr_after / max_fpr_stride + 1;
2262
2263 // The offset (in allocnos) of the overlapping region from the start
2264 // of each color representative.
2265 auto allocno_offset1 = fpr_offset1 / fpr_stride1;
2266 auto allocno_offset2 = fpr_offset2 / fpr_stride2;
2267
2268 // The stride (in allocnos) between overlapping allocnos.
2269 auto allocno_stride1 = max_fpr_stride / fpr_stride1;
2270 auto allocno_stride2 = max_fpr_stride / fpr_stride2;
2271
2272 // Reject combinations that are impossible to allocate.
2273 auto fprs1 = group1->fpr_candidates;
2274 auto fprs2 = group2->fpr_candidates;
2275 if (fpr_offset1 > fpr_offset2)
2276 fprs2 >>= (fpr_offset1 - fpr_offset2);
2277 else
2278 fprs1 >>= (fpr_offset2 - fpr_offset1);
2279 if ((fprs1 & fprs2) == 0)
2280 {
2281 if (dump_file && (dump_flags & TDF_DETAILS))
2282 fprintf (dump_file, " - cannot chain %d->%d, no FPRs in common"
2283 " (%08x@%d and %08x@%d)\n", allocno1->id, allocno2->id,
2284 group1->fpr_candidates, fpr_offset1,
2285 group2->fpr_candidates, fpr_offset2);
2286 return false;
2287 }
2288
2289 // Check whether the chain can be formed.
2290 auto heads1 = group1->chain_heads ();
2291 auto heads2 = group2->chain_heads ();
2292 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2293 {
2294 auto headi1 = heads1[allocno_offset1 + i * allocno_stride1];
2295 auto headi2 = heads2[allocno_offset2 + i * allocno_stride2];
2296 if (headi1 != INVALID_ALLOCNO && headi2 != INVALID_ALLOCNO)
2297 {
2298 auto *head1 = m_allocnos[headi1];
2299 auto *head2 = m_allocnos[headi2];
2300 if (head1->chain_next != INVALID_ALLOCNO)
2301 return false;
2302 if (head2->equiv_allocno != head1->id
2303 && head1->end_point <= head2->start_point)
2304 return false;
2305 }
2306 }
2307
2308 if (dump_file && (dump_flags & TDF_DETAILS))
2309 {
2310 fprintf (dump_file, " - chaining allocnos [");
2311 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2312 fprintf (dump_file, "%s%d", i ? "," : "",
2313 heads1[allocno_offset1 + i * allocno_stride1]);
2314 fprintf (dump_file, "] and [");
2315 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2316 fprintf (dump_file, "%s%d", i ? "," : "",
2317 heads2[allocno_offset2 + i * allocno_stride2]);
2318 fprintf (dump_file, "]\n");
2319 }
2320
2321 // Chain the allocnos, updating the chain heads.
2322 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2323 chain_allocnos (heads1[allocno_offset1 + i * allocno_stride1],
2324 heads2[allocno_offset2 + i * allocno_stride2]);
2325
2326 // Pick a color representative for the merged groups.
2327 allocno_group_info *new_rep;
2328 if (allocno_offset1 == 0
2329 && group1->size == allocno_overlap_size * allocno_stride1
2330 && multiple_p (fpr_stride1, fpr_stride2))
2331 {
2332 // The first group fits within the second.
2333 set_color_rep (group1, group2, allocno_offset2);
2334 new_rep = group2;
2335 }
2336 else if (allocno_offset2 == 0
2337 && group2->size == allocno_overlap_size * allocno_stride2
2338 && multiple_p (fpr_stride2, fpr_stride1))
2339 {
2340 // The second group fits within the first.
2341 set_color_rep (group2, group1, allocno_offset1);
2342 new_rep = group1;
2343 }
2344 else
2345 {
2346 // We need a new group that is big enough to span both groups.
2347 // The new group always has an FPR stride of 1.
2348 auto max_fpr_offset = std::max (fpr_offset1, fpr_offset2);
2349 auto max_fpr_after = std::max (fpr_after1, fpr_after2);
2350 auto new_size = max_fpr_offset + max_fpr_after + 1;
2351 new_rep = create_allocno_group (INVALID_REGNUM, new_size);
2352
2353 set_color_rep (group1, new_rep, max_fpr_offset - fpr_offset1);
2354 set_color_rep (group2, new_rep, max_fpr_offset - fpr_offset2);
2355 }
2356
2357 if (dump_file && (dump_flags & TDF_DETAILS))
2358 {
2359 fprintf (dump_file, " - new frontier [");
2360 auto new_heads = new_rep->chain_heads ();
2361 for (unsigned int i = 0; i < new_heads.size (); ++i)
2362 {
2363 if (i)
2364 fprintf (dump_file, ",");
2365 if (new_heads[i] == INVALID_ALLOCNO)
2366 fprintf (dump_file, "-");
2367 else
2368 fprintf (dump_file, "%d", new_heads[i]);
2369 }
2370 fprintf (dump_file, "]\n");
2371 }
2372
2373 return true;
2374 }
2375
2376 // Create a color_info for color representative GROUP.
2377 void
2378 early_ra::create_color (allocno_group_info *group)
2379 {
2380 auto *color = region_allocate<color_info> ();
2381 color->id = m_colors.length ();
2382 color->hard_regno = FIRST_PSEUDO_REGISTER;
2383 color->group = group;
2384
2385 gcc_checking_assert (group->m_color_rep == group);
2386 group->has_color = true;
2387 group->color = m_colors.length ();
2388
2389 m_colors.safe_push (color);
2390 }
2391
2392 // Form allocnos into chains. Create colors for each resulting clique.
2393 void
2394 early_ra::form_chains ()
2395 {
2396 if (dump_file && (dump_flags & TDF_DETAILS))
2397 fprintf (dump_file, "\nChaining allocnos:\n");
2398
2399 // Perform (modified) interval graph coloring. First sort by
2400 // increasing start point.
2401 m_sorted_allocnos.reserve (m_allocnos.length ());
2402 m_sorted_allocnos.splice (m_allocnos);
2403 m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>);
2404
2405 // During this phase, color representatives are only correct for
2406 // unprocessed allocno groups (where the color representative is
2407 // the group itself) and for groups that contain a current chain head.
2408 unsigned int ti = 0;
2409 auto_vec<chain_candidate_info> candidates;
2410 for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi)
2411 {
2412 auto *allocno1 = m_sorted_allocnos[hi];
2413 if (allocno1->chain_next != INVALID_ALLOCNO)
2414 continue;
2415
2416 // Record conflicts with direct uses for FPR hard registers.
2417 auto *group1 = allocno1->group ();
2418 for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
2419 if (fpr_conflicts_with_allocno_p (fpr, allocno1))
2420 group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
2421
2422 // Record conflicts due to partially call-clobbered registers.
2423 // (Full clobbers are handled by the previous loop.)
2424 for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
2425 if (call_in_range_p (abi_id, allocno1->start_point,
2426 allocno1->end_point))
2427 {
2428 auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
2429 group1->fpr_candidates &= ~fprs >> allocno1->offset;
2430 }
2431
2432 // Find earlier allocnos (in processing order) that could be chained
2433 // to this one.
2434 candidates.truncate (0);
2435 for (unsigned int sci = ti; sci < hi; ++sci)
2436 {
2437 auto *allocno2 = m_sorted_allocnos[sci];
2438 if (allocno2->chain_prev == INVALID_ALLOCNO)
2439 {
2440 if (!is_chain_candidate (allocno1, allocno2))
2441 continue;
2442 chain_candidate_info candidate;
2443 candidate.allocno = allocno2;
2444 candidate.score = rate_chain (allocno1, allocno2);
2445 candidates.safe_push (candidate);
2446 }
2447 else if (sci == ti)
2448 ++ti;
2449 }
2450
2451 // Sort the candidates by decreasing score.
2452 candidates.qsort (cmp_chain_candidates);
2453 if (dump_file && (dump_flags & TDF_DETAILS))
2454 {
2455 fprintf (dump_file, " Chain candidates for %d:", allocno1->id);
2456 for (auto &candidate : candidates)
2457 fprintf (dump_file, " %d(%d)", candidate.allocno->id,
2458 candidate.score);
2459 fprintf (dump_file, "\n");
2460 }
2461
2462 // Pick the first candidate that works.
2463 for (auto &candidate : candidates)
2464 if (try_to_chain_allocnos (allocno1, candidate.allocno))
2465 break;
2466 }
2467
2468 // Create color_infos for each group. Make sure that each group's
2469 // color representative is up to date.
2470 for (unsigned int hi = m_sorted_allocnos.length (); hi-- > 0; )
2471 {
2472 auto *allocno = m_sorted_allocnos[hi];
2473 auto *rep = allocno->group ()->color_rep ();
2474 if (rep->has_color)
2475 continue;
2476
2477 create_color (rep);
2478 auto heads = rep->chain_heads ();
2479 for (unsigned int i = 0; i < heads.size (); ++i)
2480 {
2481 unsigned int ai = heads[i];
2482 while (ai != INVALID_ALLOCNO)
2483 {
2484 allocno = m_allocnos[ai];
2485 set_single_color_rep (allocno, rep, i * rep->stride);
2486 ai = allocno->chain_next;
2487 }
2488 }
2489 }
2490 }
2491
2492 // Return true if the given FPR (starting at 0) conflicts with allocno
2493 // ALLOCNO.
2494 bool
2495 early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr,
2496 allocno_info *allocno)
2497 {
2498 auto &ranges = m_fpr_ranges[fpr];
2499 unsigned int start_i = 0;
2500 unsigned int end_i = ranges.length ();
2501 while (start_i < end_i)
2502 {
2503 unsigned int mid_i = (start_i + end_i) / 2;
2504 auto &range = ranges[mid_i];
2505 if (allocno->end_point > range.start_point)
2506 start_i = mid_i + 1;
2507 else if (allocno->start_point < range.end_point)
2508 end_i = mid_i;
2509 else
2510 {
2511 if (range.allocno != allocno->id)
2512 return true;
2513 // The FPR is equivalent to ALLOCNO for this particular range.
2514 // See whether ALLOCNO conflicts with a neighboring range.
2515 if (mid_i > 0
2516 && ranges[mid_i - 1].start_point >= allocno->end_point)
2517 return true;
2518 if (mid_i + 1 < ranges.length ()
2519 && ranges[mid_i + 1].end_point <= allocno->start_point)
2520 return true;
2521 return false;
2522 }
2523 }
2524 return false;
2525 }
2526
2527 // Return true if there is a call with ABI identifier ABI_ID in the inclusive
2528 // program point range [START_POINT, END_POINT].
2529 bool
2530 early_ra::call_in_range_p (unsigned int abi_id, unsigned int start_point,
2531 unsigned int end_point)
2532 {
2533 auto &points = m_call_points[abi_id];
2534 unsigned int start_i = 0;
2535 unsigned int end_i = points.length ();
2536 while (start_i < end_i)
2537 {
2538 unsigned int mid_i = (start_i + end_i) / 2;
2539 auto point = points[mid_i];
2540 if (end_point > point)
2541 start_i = mid_i + 1;
2542 else if (start_point < point)
2543 end_i = mid_i;
2544 else
2545 return true;
2546 }
2547 return false;
2548 }
2549
2550 // Return the set of FPRs for which a value of size SIZE will be clobbered
2551 // by a call to a function with ABI identifier ABI_ID, but would not be
2552 // for some smaller size. The set therefore excludes FPRs that are
2553 // fully-clobbered, like V0 in the base ABI.
2554 unsigned int
2555 early_ra::partial_fpr_clobbers (unsigned int abi_id, fpr_size_info size)
2556 {
2557 auto &abi = function_abis[abi_id];
2558 unsigned int clobbers = 0;
2559 machine_mode mode = (size == FPR_D ? V8QImode
2560 : size == FPR_Q ? V16QImode : VNx16QImode);
2561 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
2562 if (!abi.clobbers_full_reg_p (regno)
2563 && abi.clobbers_reg_p (mode, regno))
2564 clobbers |= 1U << (regno - V0_REGNUM);
2565 return clobbers;
2566 }
2567
2568 // Process copies between pseudo registers and hard registers and update
2569 // the FPR preferences for the associated colors.
2570 void
2571 early_ra::process_copies ()
2572 {
2573 for (auto &copy : m_allocno_copies)
2574 {
2575 auto *allocno = m_allocnos[copy.allocno];
2576 auto *group = allocno->group ();
2577 auto offset = group->color_rep_offset + allocno->offset;
2578 if (offset > copy.fpr)
2579 continue;
2580
2581 unsigned int fpr = copy.fpr - offset;
2582 auto *color = m_colors[group->color_rep ()->color];
2583 color->fpr_preferences[fpr] = MIN (color->fpr_preferences[fpr]
2584 + copy.weight, 127);
2585 }
2586 }
2587
2588 // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
2589 // result that puts colors in order of decreasing size.
2590 int
2591 early_ra::cmp_decreasing_size (const void *color1_ptr, const void *color2_ptr)
2592 {
2593 auto *color1 = *(color_info *const *) color1_ptr;
2594 auto *color2 = *(color_info *const *) color2_ptr;
2595
2596 if (color1->group->size != color2->group->size)
2597 return color1->group->size > color2->group->size ? -1 : 1;
2598 return (color1->id < color2->id ? -1
2599 : color1->id == color2->id ? 0 : 1);
2600 }
2601
2602 // Allocate a register to each color. If we run out of registers,
2603 // give up on doing a full allocation of the FPR-based pseudos in the
2604 // region.
2605 void
2606 early_ra::allocate_colors ()
2607 {
2608 if (dump_file && (dump_flags & TDF_DETAILS))
2609 fprintf (dump_file, "\nAllocating registers:\n");
2610
2611 auto_vec<color_info *> sorted_colors;
2612 sorted_colors.safe_splice (m_colors);
2613 sorted_colors.qsort (cmp_decreasing_size);
2614
2615 for (unsigned int i = 0; i < 32; ++i)
2616 if (!crtl->abi->clobbers_full_reg_p (V0_REGNUM + i))
2617 m_call_preserved_fprs |= 1U << i;
2618
2619 for (auto *color : sorted_colors)
2620 {
2621 unsigned int candidates = color->group->fpr_candidates;
2622 for (unsigned int i = 0; i < color->group->size; ++i)
2623 candidates &= ~(m_allocated_fprs >> i);
2624 unsigned int best = INVALID_REGNUM;
2625 int best_weight = 0;
2626 for (unsigned int fpr = 0; fpr <= 32U - color->group->size; ++fpr)
2627 {
2628 if ((candidates & (1U << fpr)) == 0)
2629 continue;
2630 int weight = color->fpr_preferences[fpr];
2631 // Account for registers that the current function must preserve.
2632 for (unsigned int i = 0; i < color->group->size; ++i)
2633 if (m_call_preserved_fprs & (1U << (fpr + i)))
2634 weight -= 1;
2635 if (best == INVALID_REGNUM || best_weight <= weight)
2636 {
2637 best = fpr;
2638 best_weight = weight;
2639 }
2640 }
2641
2642 if (best == INVALID_REGNUM)
2643 {
2644 m_allocation_successful = false;
2645 return;
2646 }
2647
2648 color->hard_regno = best + V0_REGNUM;
2649 if (dump_file && (dump_flags & TDF_DETAILS))
2650 fprintf (dump_file, " Allocating [v%d:v%d] to color %d\n",
2651 best, best + color->group->size - 1, color->id);
2652 m_allocated_fprs |= ((1U << color->group->size) - 1) << best;
2653 }
2654 }
2655
2656 // See if ALLOCNO ends a subchain of single registers that can be split
2657 // off without affecting the rest of the chain, and without introducing
2658 // any moves. Return the start of the chain if so (which might be ALLOCNO
2659 // itself), otherwise return null.
2660 early_ra::allocno_info *
2661 early_ra::find_independent_subchain (allocno_info *allocno)
2662 {
2663 // Make sure ALLOCNO ends a natural subchain.
2664 if (auto *next_allocno = chain_next (allocno))
2665 if (next_allocno->start_point + 1 >= allocno->end_point)
2666 return nullptr;
2667
2668 // Check the allocnos in the purported subchain and find the other end.
2669 for (;;)
2670 {
2671 auto *group = allocno->group ();
2672 if (group->m_color_rep == group)
2673 return nullptr;
2674 if (group->size != 1)
2675 return nullptr;
2676
2677 auto *prev_allocno = chain_prev (allocno);
2678 if (!prev_allocno || allocno->start_point + 1 < prev_allocno->end_point)
2679 return allocno;
2680
2681 allocno = prev_allocno;
2682 }
2683 }
2684
2685 // Search the colors starting at index FIRST_COLOR whose FPRs do not belong
2686 // to FPR_CONFLICTS. Return the first such color that has no group. If all
2687 // such colors have groups, instead return the color with the latest
2688 // (smallest) start point.
2689 early_ra::color_info *
2690 early_ra::find_oldest_color (unsigned int first_color,
2691 unsigned int fpr_conflicts)
2692 {
2693 color_info *best = nullptr;
2694 unsigned int best_start_point = ~0U;
2695 for (unsigned int ci = first_color; ci < m_colors.length (); ++ci)
2696 {
2697 auto *color = m_colors[ci];
2698 if (fpr_conflicts & (1U << (color->hard_regno - V0_REGNUM)))
2699 continue;
2700 if (!color->group)
2701 return color;
2702 auto chain_head = color->group->chain_heads ()[0];
2703 auto start_point = m_allocnos[chain_head]->start_point;
2704 if (!best || best_start_point > start_point)
2705 {
2706 best = color;
2707 best_start_point = start_point;
2708 }
2709 }
2710 return best;
2711 }
2712
2713 // If there are some spare FPRs that can be reused without introducing saves,
2714 // restores, or moves, use them to "broaden" the allocation, in order to give
2715 // the scheduler more freedom. This is particularly useful for forming LDPs
2716 // and STPs.
2717 void
2718 early_ra::broaden_colors ()
2719 {
2720 // Create dummy colors for every leftover FPR that can be used cheaply.
2721 unsigned int first_color = m_colors.length ();
2722 for (unsigned int fpr = 0; fpr < 32; ++fpr)
2723 if (((m_allocated_fprs | m_call_preserved_fprs) & (1U << fpr)) == 0)
2724 {
2725 auto *color = region_allocate<color_info> ();
2726 color->id = m_colors.length ();
2727 color->hard_regno = V0_REGNUM + fpr;
2728 color->group = nullptr;
2729 m_colors.safe_push (color);
2730 }
2731
2732 // Exit early if there are no spare FPRs.
2733 if (first_color == m_colors.length ())
2734 return;
2735
2736 // Go through the allocnos in order, seeing if there is a subchain of
2737 // single-FPR allocnos that can be split off from the containingg clique.
2738 // Allocate such subchains to the new colors on an oldest-first basis.
2739 for (auto *allocno : m_sorted_allocnos)
2740 if (auto *start_allocno = find_independent_subchain (allocno))
2741 {
2742 unsigned int fpr_conflicts = 0;
2743 auto *member = allocno;
2744 for (;;)
2745 {
2746 fpr_conflicts |= ~member->group ()->fpr_candidates;
2747 if (member == start_allocno)
2748 break;
2749 member = m_allocnos[member->chain_prev];
2750 }
2751
2752 auto *color = find_oldest_color (first_color, fpr_conflicts);
2753 if (!color)
2754 continue;
2755
2756 if (!color->group)
2757 {
2758 auto *group = allocno->group ();
2759 color->group = group;
2760 group->color = color->id;
2761 group->chain_heads ()[0] = INVALID_ALLOCNO;
2762 }
2763 else
2764 {
2765 auto chain_head = color->group->chain_heads ()[0];
2766 auto start_point = m_allocnos[chain_head]->start_point;
2767 if (start_point >= allocno->end_point)
2768 // Allocating to COLOR isn't viable, and it was the best
2769 // option available.
2770 continue;
2771
2772 auto *next_allocno = chain_next (allocno);
2773 if (!next_allocno || next_allocno->start_point <= start_point)
2774 // The current allocation gives at least as much scheduling
2775 // freedom as COLOR would.
2776 continue;
2777 }
2778
2779 // Unlink the chain.
2780 if (auto *next_allocno = chain_next (allocno))
2781 next_allocno->chain_prev = start_allocno->chain_prev;
2782 if (auto *prev_allocno = chain_prev (start_allocno))
2783 prev_allocno->chain_next = allocno->chain_next;
2784
2785 // Make the subchain use COLOR.
2786 allocno->chain_next = color->group->chain_heads ()[0];
2787 if (dump_file && (dump_flags & TDF_DETAILS))
2788 fprintf (dump_file, "Moving to optional color %d (register %s):",
2789 color->id, reg_names[color->hard_regno]);
2790 for (;;)
2791 {
2792 auto *group = allocno->group ();
2793 if (dump_file && (dump_flags & TDF_DETAILS))
2794 fprintf (dump_file, " r%d", group->regno);
2795 group->m_color_rep = color->group;
2796 group->color_rep_offset = 0;
2797 if (allocno == start_allocno)
2798 break;
2799 allocno = m_allocnos[allocno->chain_prev];
2800 }
2801 if (dump_file && (dump_flags & TDF_DETAILS))
2802 fprintf (dump_file, "\n");
2803 color->group->chain_heads ()[0] = start_allocno->id;
2804 }
2805 }
2806
2807 // Record the final choice of hard register for each allocno.
2808 void
2809 early_ra::finalize_allocation ()
2810 {
2811 for (auto *allocno : m_allocnos)
2812 {
2813 auto *group = allocno->group ();
2814 auto *rep = group->color_rep ();
2815 auto rep_regno = m_colors[rep->color]->hard_regno;
2816 auto group_regno = rep_regno + group->color_rep_offset;
2817 allocno->hard_regno = group_regno + allocno->offset * group->stride;
2818 }
2819 }
2820
2821 // Replace any allocno references in REFS with the allocated register.
2822 bool
2823 early_ra::replace_regs (df_ref refs)
2824 {
2825 bool changed = false;
2826 for (df_ref ref = refs; ref; ref = DF_REF_NEXT_LOC (ref))
2827 {
2828 auto range = get_allocno_subgroup (DF_REF_REG (ref));
2829 if (!range)
2830 continue;
2831
2832 auto new_regno = range.allocno (0)->hard_regno;
2833 *DF_REF_LOC (ref) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref)), new_regno);
2834 changed = true;
2835 }
2836 return changed;
2837 }
2838
2839 // Try to make INSN match its FPR-related constraints. If this needs
2840 // a source operand (SRC) to be copied to a destination operand (DEST)
2841 // before INSN, add the associated (DEST, SRC) pairs to MOVES.
2842 //
2843 // Return -1 on failure, otherwise return a ?/!-style reject count.
2844 // The reject count doesn't model the moves, just the internal alternative
2845 // preferences.
2846 int
2847 early_ra::try_enforce_constraints (rtx_insn *insn,
2848 vec<std::pair<int, int>> &moves)
2849 {
2850 if (!constrain_operands (0, get_preferred_alternatives (insn)))
2851 return -1;
2852
2853 // Pick the alternative with the lowest cost.
2854 int best = -1;
2855 auto alts = get_preferred_alternatives (insn);
2856 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
2857 {
2858 if (!(alts & ALTERNATIVE_BIT (altno)))
2859 continue;
2860
2861 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
2862 if (!likely_alternative_match_p (op_alt))
2863 continue;
2864
2865 auto_vec<std::pair<int, int>, 4> new_moves;
2866 for (int opno = 0; opno < recog_data.n_operands; ++opno)
2867 {
2868 rtx op = recog_data.operand[opno];
2869 if (REG_P (op)
2870 && FP_REGNUM_P (REGNO (op))
2871 && op_alt[opno].matched >= 0)
2872 {
2873 rtx old_src = recog_data.operand[op_alt[opno].matched];
2874 if (!operands_match_p (op, old_src))
2875 {
2876 for (int i = 0; i < recog_data.n_operands; ++i)
2877 if (i != opno)
2878 {
2879 rtx other = recog_data.operand[i];
2880 if (reg_overlap_mentioned_p (op, other))
2881 {
2882 old_src = NULL_RTX;
2883 break;
2884 }
2885 }
2886 if (!old_src)
2887 continue;
2888 new_moves.safe_push ({ opno, op_alt[opno].matched });
2889 }
2890 }
2891 }
2892 int cost = count_rejects (op_alt) + new_moves.length () * 7;
2893 if (best < 0 || cost < best)
2894 {
2895 best = cost;
2896 moves.truncate (0);
2897 moves.safe_splice (new_moves);
2898 }
2899 }
2900 return best;
2901 }
2902
2903 // Make INSN matches its FPR-related constraints.
2904 void
2905 early_ra::enforce_constraints (rtx_insn *insn)
2906 {
2907 extract_insn (insn);
2908 preprocess_constraints (insn);
2909
2910 // First try with the operands they are.
2911 auto_vec<std::pair<int, int>, 4> moves;
2912 int cost = try_enforce_constraints (insn, moves);
2913
2914 // Next try taking advantage of commutativity.
2915 for (int opno = 0; opno < recog_data.n_operands - 1; ++opno)
2916 if (recog_data.constraints[opno][0] == '%')
2917 {
2918 std::swap (*recog_data.operand_loc[opno],
2919 *recog_data.operand_loc[opno + 1]);
2920 std::swap (recog_data.operand[opno],
2921 recog_data.operand[opno + 1]);
2922 auto_vec<std::pair<int, int>, 4> swapped_moves;
2923 int swapped_cost = try_enforce_constraints (insn, swapped_moves);
2924 if (swapped_cost >= 0 && (cost < 0 || swapped_cost < cost))
2925 {
2926 cost = swapped_cost;
2927 moves.truncate (0);
2928 moves.safe_splice (swapped_moves);
2929 }
2930 else
2931 {
2932 std::swap (*recog_data.operand_loc[opno],
2933 *recog_data.operand_loc[opno + 1]);
2934 std::swap (recog_data.operand[opno],
2935 recog_data.operand[opno + 1]);
2936 }
2937 }
2938
2939 // The allocation should ensure that there is at least one valid combination.
2940 // It's too late to back out now if not.
2941 gcc_assert (cost >= 0);
2942 for (int i = 0; i < recog_data.n_dups; ++i)
2943 {
2944 int dup_of = recog_data.dup_num[i];
2945 rtx new_op = *recog_data.operand_loc[dup_of];
2946 if (new_op != recog_data.operand[dup_of])
2947 *recog_data.dup_loc[i] = copy_rtx (new_op);
2948 }
2949 for (auto move : moves)
2950 {
2951 int dest_opno = move.first;
2952 int src_opno = move.second;
2953 rtx dest = recog_data.operand[dest_opno];
2954 rtx old_src = recog_data.operand[src_opno];
2955 rtx new_src = lowpart_subreg (GET_MODE (old_src), dest, GET_MODE (dest));
2956 emit_insn_before (gen_move_insn (new_src, old_src), insn);
2957 *recog_data.operand_loc[src_opno] = new_src;
2958 }
2959 }
2960
2961 // See whether INSN is an instruction that operates on multi-register vectors,
2962 // and if we have decided to make it use strided rather than consecutive
2963 // accesses. Update the pattern and return true if so.
2964 bool
2965 early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
2966 {
2967 if (!NONJUMP_INSN_P (insn) || recog_memoized (insn) < 0)
2968 return false;
2969
2970 auto stride_type = get_attr_stride_type (insn);
2971 rtx pat = PATTERN (insn);
2972 rtx op;
2973 if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE
2974 || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
2975 op = SET_DEST (pat);
2976 else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
2977 op = XVECEXP (SET_SRC (pat), 0, 1);
2978 else
2979 return false;
2980
2981 auto range = get_allocno_subgroup (op);
2982 if (!range || range.group->stride == 1)
2983 return false;
2984
2985 gcc_assert (range.start == 0 && range.count == range.group->size);
2986 auto elt_mode = GET_MODE_INNER (GET_MODE (op));
2987 auto single_mode = aarch64_full_sve_mode (elt_mode).require ();
2988 auto_vec<rtx, 4> regs;
2989 for (unsigned int i = 0; i < range.count; ++i)
2990 regs.quick_push (gen_rtx_REG (single_mode, range.allocno (i)->hard_regno));
2991
2992 extract_insn (insn);
2993 if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
2994 {
2995 auto unspec = XINT (SET_SRC (pat), 1);
2996 if (range.count == 2)
2997 pat = gen_aarch64_strided2 (unspec, GET_MODE (op), regs[0], regs[1],
2998 recog_data.operand[1],
2999 recog_data.operand[2]);
3000 else
3001 pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
3002 regs[0], regs[1], regs[2], regs[3],
3003 recog_data.operand[1],
3004 recog_data.operand[2]);
3005 }
3006 else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
3007 {
3008 auto unspec = XINT (SET_SRC (pat), 1);
3009 if (range.count == 2)
3010 pat = gen_aarch64_strided2 (unspec, GET_MODE (op),
3011 recog_data.operand[0],
3012 recog_data.operand[2], regs[0], regs[1]);
3013 else
3014 pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
3015 recog_data.operand[0],
3016 recog_data.operand[2],
3017 regs[0], regs[1], regs[2], regs[3]);
3018 // Ensure correct sharing for the source memory.
3019 //
3020 // ??? Why doesn't the generator get this right?
3021 XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1)
3022 = *recog_data.dup_loc[0];
3023 }
3024 else if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE)
3025 {
3026 auto bits = INTVAL (XVECEXP (SET_SRC (pat), 0, 4));
3027 if (range.count == 2)
3028 pat = gen_aarch64_sme_lut_strided2 (bits, single_mode,
3029 regs[0], regs[1],
3030 recog_data.operand[1],
3031 recog_data.operand[2]);
3032 else
3033 pat = gen_aarch64_sme_lut_strided4 (bits, single_mode,
3034 regs[0], regs[1], regs[2], regs[3],
3035 recog_data.operand[1],
3036 recog_data.operand[2]);
3037 }
3038 else
3039 gcc_unreachable ();
3040 PATTERN (insn) = pat;
3041 INSN_CODE (insn) = -1;
3042 df_insn_rescan (insn);
3043 return true;
3044 }
3045
3046 // We've successfully allocated the current region. Apply the allocation
3047 // to the instructions.
3048 void
3049 early_ra::apply_allocation ()
3050 {
3051 rtx_insn *prev;
3052 for (auto insn_range : m_insn_ranges)
3053 for (rtx_insn *insn = insn_range.first;
3054 insn != insn_range.second;
3055 insn = prev)
3056 {
3057 prev = PREV_INSN (insn);
3058 if (!INSN_P (insn))
3059 continue;
3060
3061 bool changed = maybe_convert_to_strided_access (insn);
3062 changed |= replace_regs (DF_INSN_DEFS (insn));
3063 changed |= replace_regs (DF_INSN_USES (insn));
3064 if (changed && NONDEBUG_INSN_P (insn))
3065 {
3066 if (GET_CODE (PATTERN (insn)) != USE
3067 && GET_CODE (PATTERN (insn)) != CLOBBER
3068 && !is_move_set (PATTERN (insn)))
3069 enforce_constraints (insn);
3070
3071 // A REG_EQUIV note establishes an equivalence throughout
3072 // the function, but here we're reusing hard registers for
3073 // multiple pseudo registers. We also no longer need REG_EQUIV
3074 // notes that record potential spill locations, since we've
3075 // allocated the pseudo register without spilling.
3076 rtx *ptr = &REG_NOTES (insn);
3077 while (*ptr)
3078 if (REG_NOTE_KIND (*ptr) == REG_EQUIV)
3079 *ptr = XEXP (*ptr, 1);
3080 else
3081 ptr = &XEXP (*ptr, 1);
3082 }
3083 changed |= replace_regs (DF_INSN_EQ_USES (insn));
3084 if (changed)
3085 df_insn_rescan (insn);
3086 }
3087
3088 for (auto *insn : m_dead_insns)
3089 delete_insn (insn);
3090 }
3091
3092 // Try to allocate the current region. Update the instructions if successful.
3093 void
3094 early_ra::process_region ()
3095 {
3096 for (auto *allocno : m_allocnos)
3097 allocno->chain_next = INVALID_ALLOCNO;
3098
3099 if (dump_file && (dump_flags & TDF_DETAILS))
3100 {
3101 dump_fpr_ranges ();
3102 dump_copies ();
3103 dump_allocnos ();
3104 }
3105
3106 find_strided_accesses ();
3107
3108 if (dump_file && (dump_flags & TDF_DETAILS))
3109 dump_allocnos ();
3110
3111 form_chains ();
3112
3113 if (dump_file && (dump_flags & TDF_DETAILS))
3114 dump_allocnos ();
3115
3116 process_copies ();
3117
3118 if (dump_file && (dump_flags & TDF_DETAILS))
3119 dump_colors ();
3120
3121 allocate_colors ();
3122 if (!m_allocation_successful)
3123 return;
3124
3125 broaden_colors ();
3126 finalize_allocation ();
3127
3128 if (dump_file && (dump_flags & TDF_DETAILS))
3129 {
3130 fprintf (dump_file, "\nAllocation successful\nFinal allocation:\n");
3131 dump_allocnos ();
3132 dump_colors ();
3133 }
3134
3135 apply_allocation ();
3136 }
3137
3138 // Return true if INSN would become dead if we successfully allocate the
3139 // current region.
3140 bool
3141 early_ra::is_dead_insn (rtx_insn *insn)
3142 {
3143 rtx set = single_set (insn);
3144 if (!set)
3145 return false;
3146
3147 rtx dest = SET_DEST (set);
3148 auto dest_range = get_allocno_subgroup (dest);
3149 if (!dest_range)
3150 return false;
3151
3152 for (auto &allocno : dest_range.allocnos ())
3153 if (bitmap_bit_p (m_live_allocnos, allocno.id))
3154 return false;
3155
3156 if (side_effects_p (set))
3157 return false;
3158
3159 return true;
3160 }
3161
3162 // Build up information about block BB. IS_ISOLATED is true if the
3163 // block is not part of a larger region.
3164 void
3165 early_ra::process_block (basic_block bb, bool is_isolated)
3166 {
3167 m_current_bb = bb;
3168 m_current_point += 1;
3169 m_current_bb_point = m_current_point;
3170
3171 // Process live-out FPRs.
3172 bitmap live_out = df_get_live_out (bb);
3173 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
3174 if (bitmap_bit_p (live_out, regno))
3175 record_fpr_use (regno);
3176
3177 // Process live-out allocnos. We don't track individual FPR liveness
3178 // across block boundaries, so we have to assume that the whole pseudo
3179 // register is live.
3180 bitmap_iterator bi;
3181 unsigned int regno;
3182 EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb), m_fpr_pseudos,
3183 FIRST_PSEUDO_REGISTER, regno, bi)
3184 {
3185 auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
3186 for (auto &allocno : range.allocnos ())
3187 record_allocno_use (&allocno);
3188 }
3189
3190 m_current_point += 1;
3191
3192 record_artificial_refs (0);
3193
3194 bool is_first = true;
3195 rtx_insn *start_insn = BB_END (bb);
3196 rtx_insn *insn;
3197 FOR_BB_INSNS_REVERSE (bb, insn)
3198 {
3199 if (!NONDEBUG_INSN_P (insn))
3200 continue;
3201
3202 // CLOBBERs are used to prevent pseudos from being upwards exposed.
3203 // We can ignore them if allocation is successful.
3204 if (GET_CODE (PATTERN (insn)) == CLOBBER)
3205 {
3206 if (get_allocno_subgroup (XEXP (PATTERN (insn), 0)))
3207 m_dead_insns.safe_push (insn);
3208 continue;
3209 }
3210
3211 if (dump_file && (dump_flags & TDF_DETAILS))
3212 {
3213 if (is_first)
3214 fprintf (dump_file, "\nBlock %d:\n", bb->index);
3215 fprintf (dump_file, "%6d:", m_current_point);
3216 pretty_printer rtl_slim_pp;
3217 rtl_slim_pp.buffer->stream = dump_file;
3218 print_insn (&rtl_slim_pp, insn, 1);
3219 pp_flush (&rtl_slim_pp);
3220 fprintf (dump_file, "\n");
3221 }
3222 is_first = false;
3223
3224 if (is_dead_insn (insn))
3225 {
3226 if (dump_file && (dump_flags & TDF_DETAILS))
3227 fprintf (dump_file, "%14s -- dead\n", "");
3228 m_dead_insns.safe_push (insn);
3229 }
3230 else
3231 {
3232 record_insn_refs (insn);
3233 rtx pat = PATTERN (insn);
3234 if (is_move_set (pat))
3235 record_copy (SET_DEST (pat), SET_SRC (pat), true);
3236 else
3237 {
3238 extract_insn (insn);
3239 record_constraints (insn);
3240 }
3241 }
3242
3243 // See whether we have a complete region, with no remaining live
3244 // allocnos.
3245 if (is_isolated
3246 && bitmap_empty_p (m_live_allocnos)
3247 && m_live_fprs == 0
3248 && m_allocation_successful
3249 && !m_allocnos.is_empty ())
3250 {
3251 rtx_insn *prev_insn = PREV_INSN (insn);
3252 m_insn_ranges.safe_push ({ start_insn, prev_insn });
3253 process_region ();
3254 start_new_region ();
3255 is_first = true;
3256 start_insn = prev_insn;
3257 }
3258 }
3259 m_insn_ranges.safe_push ({ start_insn, BB_HEAD (bb) });
3260
3261 record_artificial_refs (DF_REF_AT_TOP);
3262
3263 // Process live-in FPRs.
3264 bitmap live_in = df_get_live_in (bb);
3265 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
3266 if (bitmap_bit_p (live_in, regno)
3267 && (m_live_fprs & (1U << (regno - V0_REGNUM))))
3268 record_fpr_def (regno);
3269
3270 // Process live-in allocnos.
3271 EXECUTE_IF_AND_IN_BITMAP (live_in, m_fpr_pseudos,
3272 FIRST_PSEUDO_REGISTER, regno, bi)
3273 {
3274 auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
3275 for (auto &allocno : range.allocnos ())
3276 if (bitmap_bit_p (m_live_allocnos, allocno.id))
3277 record_allocno_def (&allocno);
3278 }
3279
3280 m_current_point += 1;
3281
3282 bitmap_clear (m_live_allocnos);
3283 m_live_fprs = 0;
3284 }
3285
3286 // Divide the function into regions, such that there no edges into or out
3287 // of the region have live "FPR pseudos".
3288 void
3289 early_ra::process_blocks ()
3290 {
3291 auto_sbitmap visited (last_basic_block_for_fn (m_fn));
3292 auto_sbitmap fpr_pseudos_live_out (last_basic_block_for_fn (m_fn));
3293 auto_sbitmap fpr_pseudos_live_in (last_basic_block_for_fn (m_fn));
3294
3295 bitmap_clear (visited);
3296 bitmap_clear (fpr_pseudos_live_out);
3297 bitmap_clear (fpr_pseudos_live_in);
3298
3299 // Record which blocks have live FPR pseudos on entry and exit.
3300 basic_block bb;
3301 FOR_EACH_BB_FN (bb, m_fn)
3302 {
3303 if (bitmap_intersect_p (df_get_live_out (bb), m_fpr_pseudos))
3304 bitmap_set_bit (fpr_pseudos_live_out, bb->index);
3305 if (bitmap_intersect_p (df_get_live_in (bb), m_fpr_pseudos))
3306 bitmap_set_bit (fpr_pseudos_live_in, bb->index);
3307 }
3308
3309 struct stack_node { edge_iterator ei; basic_block bb; };
3310
3311 auto_vec<stack_node, 32> stack;
3312 auto_vec<basic_block, 32> region;
3313
3314 // Go through the function in reverse postorder and process the region
3315 // containing each block.
3316 unsigned int n_blocks = df_get_n_blocks (DF_FORWARD);
3317 int *order = df_get_postorder (DF_FORWARD);
3318 for (unsigned int bbi = 0; bbi < n_blocks; ++bbi)
3319 {
3320 basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, order[bbi]);
3321 if (bb->index < NUM_FIXED_BLOCKS)
3322 continue;
3323
3324 if (!bitmap_set_bit (visited, bb->index))
3325 continue;
3326
3327 // Process forward edges before backward edges (so push backward
3328 // edges first). Build the region in an approximation of reverse
3329 // program order.
3330 if (bitmap_bit_p (fpr_pseudos_live_in, bb->index))
3331 stack.quick_push ({ ei_start (bb->preds), nullptr });
3332 if (bitmap_bit_p (fpr_pseudos_live_out, bb->index))
3333 stack.quick_push ({ ei_start (bb->succs), bb });
3334 else
3335 region.safe_push (bb);
3336 while (!stack.is_empty ())
3337 {
3338 auto &node = stack.last ();
3339 if (ei_end_p (node.ei))
3340 {
3341 if (node.bb)
3342 region.safe_push (node.bb);
3343 stack.pop ();
3344 continue;
3345 }
3346 edge e = ei_edge (node.ei);
3347 if (node.bb)
3348 {
3349 // A forward edge from a node that has not yet been added
3350 // to region.
3351 if (bitmap_bit_p (fpr_pseudos_live_in, e->dest->index)
3352 && bitmap_set_bit (visited, e->dest->index))
3353 {
3354 stack.safe_push ({ ei_start (e->dest->preds), nullptr });
3355 if (bitmap_bit_p (fpr_pseudos_live_out, e->dest->index))
3356 stack.safe_push ({ ei_start (e->dest->succs), e->dest });
3357 else
3358 region.safe_push (e->dest);
3359 }
3360 else
3361 ei_next (&node.ei);
3362 }
3363 else
3364 {
3365 // A backward edge from a node that has already been added
3366 // to the region.
3367 if (bitmap_bit_p (fpr_pseudos_live_out, e->src->index)
3368 && bitmap_set_bit (visited, e->src->index))
3369 {
3370 if (bitmap_bit_p (fpr_pseudos_live_in, e->src->index))
3371 stack.safe_push ({ ei_start (e->src->preds), nullptr });
3372 stack.safe_push ({ ei_start (e->src->succs), e->src });
3373 }
3374 else
3375 ei_next (&node.ei);
3376 }
3377 }
3378
3379 m_current_point = 2;
3380 start_new_region ();
3381
3382 if (region.is_empty ())
3383 process_block (bb, true);
3384 else
3385 {
3386 if (dump_file && (dump_flags & TDF_DETAILS))
3387 {
3388 fprintf (dump_file, "\nRegion (from %d):", bb->index);
3389 for (unsigned int j = 0; j < region.length (); ++j)
3390 fprintf (dump_file, " %d", region[j]->index);
3391 fprintf (dump_file, "\n");
3392 }
3393 for (unsigned int j = 0; j < region.length (); ++j)
3394 {
3395 basic_block bb = region[j];
3396 bool is_isolated
3397 = ((j == 0 && !bitmap_bit_p (fpr_pseudos_live_out, bb->index))
3398 || (j == region.length () - 1
3399 && !bitmap_bit_p (fpr_pseudos_live_in, bb->index)));
3400 process_block (bb, is_isolated);
3401 }
3402 }
3403 region.truncate (0);
3404
3405 if (!m_allocnos.is_empty () && m_allocation_successful)
3406 process_region ();
3407 }
3408 }
3409
3410 // Run the pass on the current function.
3411 void
3412 early_ra::execute ()
3413 {
3414 df_analyze ();
3415
3416 preprocess_insns ();
3417 propagate_pseudo_reg_info ();
3418 choose_fpr_pseudos ();
3419 if (bitmap_empty_p (m_fpr_pseudos))
3420 return;
3421
3422 if (dump_file && (dump_flags & TDF_DETAILS))
3423 dump_pseudo_regs ();
3424
3425 process_blocks ();
3426 df_verify ();
3427 }
3428
3429 class pass_early_ra : public rtl_opt_pass
3430 {
3431 public:
3432 pass_early_ra (gcc::context *ctxt)
3433 : rtl_opt_pass (pass_data_early_ra, ctxt)
3434 {}
3435
3436 // opt_pass methods:
3437 virtual bool gate (function *);
3438 virtual unsigned int execute (function *);
3439 };
3440
3441 bool
3442 pass_early_ra::gate (function *)
3443 {
3444 // Require a vector ISA to be enabled.
3445 if (!TARGET_SIMD && !TARGET_SVE)
3446 return false;
3447
3448 if (aarch64_early_ra == AARCH64_EARLY_RA_NONE)
3449 return false;
3450
3451 if (aarch64_early_ra == AARCH64_EARLY_RA_STRIDED
3452 && !TARGET_STREAMING_SME2)
3453 return false;
3454
3455 return true;
3456 }
3457
3458 unsigned int
3459 pass_early_ra::execute (function *fn)
3460 {
3461 early_ra (fn).execute ();
3462 return 0;
3463 }
3464
3465 } // end namespace
3466
3467 // Create a new instance of the pass.
3468 rtl_opt_pass *
3469 make_pass_aarch64_early_ra (gcc::context *ctxt)
3470 {
3471 return new pass_early_ra (ctxt);
3472 }