]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-early-ra.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-early-ra.cc
1 // Early register allocation pass.
2 // Copyright (C) 2023-2024 Free Software Foundation, Inc.
3 //
4 // This file is part of GCC.
5 //
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
9 // version.
10 //
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 // for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
19
20 // This pass implements a simple form of early register allocation.
21 // It is restricted to FP/SIMD registers, and it only allocates
22 // a region of FP/SIMD usage if it can do so without any spilling.
23 // It punts on anything too complicated, leaving it to the real
24 // register allocator.
25 //
26 // There are two main purposes:
27 //
28 // (1) The pass runs before scheduling. It therefore has a chance to
29 // bag a spill-free allocation, if there is one, before scheduling
30 // moves things around.
31 //
32 // (2) The pass can make use of strided register operations, such as the
33 // strided forms of LD1 and ST1 in SME2.
34 //
35 // The allocator works at the level of individual FPRs, rather than whole
36 // pseudo registers. It is mostly intended to help optimize ACLE code.
37 //
38 // The pass is very simplistic. There are many things that could be improved.
39 #define IN_TARGET_CODE 1
40
41 #define INCLUDE_ALGORITHM
42 #define INCLUDE_FUNCTIONAL
43 #include "config.h"
44 #include "system.h"
45 #include "coretypes.h"
46 #include "backend.h"
47 #include "rtl.h"
48 #include "df.h"
49 #include "rtl-ssa.h"
50 #include "tree-pass.h"
51 #include "target.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "print-rtl.h"
55 #include "insn-attr.h"
56 #include "insn-opinit.h"
57 #include "reload.h"
58
59 template<typename T>
60 class simple_iterator : public wrapper_iterator<T>
61 {
62 public:
63 using wrapper_iterator<T>::wrapper_iterator;
64
65 simple_iterator &operator-- () { --this->m_contents; return *this; }
66 simple_iterator operator-- (int) { return this->m_contents--; }
67 simple_iterator &operator++ () { ++this->m_contents; return *this; }
68 simple_iterator operator++ (int) { return this->m_contents++; }
69 };
70
71 using namespace rtl_ssa;
72
73 namespace {
74 const pass_data pass_data_early_ra =
75 {
76 RTL_PASS, // type
77 "early_ra", // name
78 OPTGROUP_NONE, // optinfo_flags
79 TV_NONE, // tv_id
80 0, // properties_required
81 0, // properties_provided
82 0, // properties_destroyed
83 0, // todo_flags_start
84 TODO_df_finish, // todo_flags_finish
85 };
86
87 using allocno_iterator = simple_iterator<unsigned int>;
88
89 // Class that represents one run of the pass.
90 class early_ra
91 {
92 public:
93 early_ra (function *fn);
94 ~early_ra ();
95 void execute ();
96
97 private:
98 static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
99 using operand_mask = uint32_t;
100
101 // Points in the function are represented using "program points".
102 // The program points are allocated in reverse order, with smaller
103 // numbers indicating later points. These special values indicate
104 // the start and end of a region.
105 static constexpr unsigned int START_OF_REGION = ~0U;
106 static constexpr unsigned int END_OF_REGION = 0U;
107
108 // An invalid allocno index, used to represent no allocno.
109 static constexpr unsigned int INVALID_ALLOCNO = ~0U;
110
111 // Enumerates the single FPR sizes that matter for register allocation.
112 // Anything smaller than 64 bits is treated as FPR_D.
113 enum fpr_size_info
114 {
115 FPR_D,
116 FPR_Q,
117 FPR_Z
118 };
119
120 // A live range for an FPR, containing program points [START_POINT,
121 // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
122 // to be equal to ALLOCNO for the duration of the live range.
123 struct fpr_range_info
124 {
125 unsigned int start_point;
126 unsigned int end_point;
127 unsigned int allocno;
128 };
129
130 // Flags used in pseudo_reg_info.
131 //
132 // Whether the pseudo register occurs in one instruction alternative that
133 // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
134 static constexpr unsigned int ALLOWS_FPR8 = 1U << 0;
135 static constexpr unsigned int ALLOWS_FPR16 = 1U << 1;
136 static constexpr unsigned int ALLOWS_FPR32 = 1U << 2;
137 static constexpr unsigned int ALLOWS_NONFPR = 1U << 3;
138 //
139 // Likewise whether the register occurs in an instruction that requires
140 // the associated register type.
141 static constexpr unsigned int NEEDS_FPR8 = 1U << 4;
142 static constexpr unsigned int NEEDS_FPR16 = 1U << 5;
143 static constexpr unsigned int NEEDS_FPR32 = 1U << 6;
144 static constexpr unsigned int NEEDS_NONFPR = 1U << 7;
145 //
146 // Whether the pseudo register is copied to or from a hard FP register.
147 static constexpr unsigned int HAS_FPR_COPY = 1U << 8;
148 //
149 // Whether the pseudo register is copied to or from a hard non-FP register.
150 static constexpr unsigned int HAS_NONFPR_COPY = 1U << 9;
151 //
152 // Whether the pseudo register is used as a multi-register vector operand
153 // to an instruction that supports strided accesses, and whether it is used
154 // as a multi-register vector operand in some other non-move instruction.
155 static constexpr unsigned int HAS_FLEXIBLE_STRIDE = 1U << 10;
156 static constexpr unsigned int HAS_FIXED_STRIDE = 1U << 11;
157
158 // Flags that should be propagated across moves between pseudo registers.
159 static constexpr unsigned int PSEUDO_COPY_FLAGS = ~(HAS_FLEXIBLE_STRIDE
160 | HAS_FIXED_STRIDE);
161
162 // Information about a copy between two registers.
163 struct reg_copy_info
164 {
165 // The two registers, in order.
166 unsigned int regnos[2];
167
168 // Index I gives the index of the next reg_copy_info involving REGNOS[I],
169 // or 0 if none.
170 unsigned int next_copies[2];
171 };
172
173 // Information about a pseudo register.
174 struct pseudo_reg_info
175 {
176 // Flags describing how the register is used, defined above.
177 unsigned int flags : 16;
178
179 // The mode of the pseudo register, cached for convenience.
180 machine_mode mode : 16;
181
182 // The index of the first copy, or 0 if none.
183 unsigned int first_copy;
184 };
185
186 // Information about a group of allocnos that have a fixed offset
187 // relative to each other. The allocnos in each group must be allocated
188 // together.
189 //
190 // Allocnos that can share the same hard register are eventually
191 // chained together. These chains represent edges on a graph of
192 // allocnos, such that two allocnos joined by an edge use the same FPR.
193 // These chains are formed between individual allocnos rather than
194 // whole groups, although the system is required to be self-consistent.
195 // Each clique in the graph has at least one "full-width" allocno group
196 // that has one allocno for every FPR that needs to be allocated to
197 // the clique.
198 //
199 // One group of allocnos is chosen as the "color representative" of
200 // each clique in the graph. This group will be a full-width group.
201 struct allocno_info;
202 struct allocno_group_info
203 {
204 array_slice<unsigned int> chain_heads ();
205 array_slice<allocno_info> allocnos ();
206 allocno_group_info *color_rep ();
207 allocno_info *allocno (unsigned int);
208
209 // The color representative of the containing clique.
210 allocno_group_info *m_color_rep;
211
212 // The pseudo register associated with this allocno, or INVALID_REGNUM
213 // if none.
214 unsigned int regno;
215
216 // The offset of the first allocno (and thus this group) from the start
217 // of color_rep.
218 unsigned int color_rep_offset : 8;
219
220 // The number of allocnos in the group, and thus the number of FPRs
221 // that need to be allocated.
222 unsigned int size : 8;
223
224 // The gap between FPRs in the group. This is normally 1, but can be
225 // higher if we've decided to use strided multi-register accesses.
226 unsigned int stride : 4;
227
228 // Used temporarily while deciding which allocnos should have non-unit
229 // strides; see find_strided_accesses for details.
230 int consecutive_pref : 4;
231 int strided_polarity : 2;
232
233 // The largest size of FPR needed by references to the allocno group.
234 fpr_size_info fpr_size : 2;
235
236 // True if all non-move accesses can be converted to strided form.
237 unsigned int has_flexible_stride : 1;
238
239 // True if we've assigned a color index to this group.
240 unsigned int has_color : 1;
241
242 // The mask of FPRs that would make valid choices for the first allocno,
243 // taking the requirements of all the allocnos in the group into account.
244 unsigned int fpr_candidates;
245
246 // The index of the color that has been assigned to the containing clique.
247 unsigned int color;
248 };
249
250 // Represents a single FPR-sized quantity that needs to be allocated.
251 // Each allocno is identified by index (for compactness).
252 //
253 // Quantities that span multiple FPRs are assigned groups of consecutive
254 // allocnos. Quantities that occupy a single FPR are assigned their own
255 // group.
256 struct allocno_info
257 {
258 allocno_group_info *group ();
259 bool is_shared ();
260 bool is_equiv_to (unsigned int);
261
262 // The allocno's unique identifier.
263 unsigned int id;
264
265 // The offset of this allocno into the containing group.
266 unsigned int offset : 8;
267
268 // The number of allocnos in the containing group.
269 unsigned int group_size : 8;
270
271 // If the allocno has an affinity with at least one hard register
272 // (so that choosing that hard register would avoid a copy), this is
273 // the number of one such hard register, otherwise it is
274 // FIRST_PSEUDO_REGISTER.
275 unsigned int hard_regno : 8;
276
277 // Set to 1 if the allocno has a single definition or 2 if it has more.
278 unsigned int num_defs : 2;
279
280 // True if, at START_POINT, another allocno is copied to this one.
281 // See callers of record_copy for what counts as a copy.
282 unsigned int is_copy_dest : 1;
283
284 // True if, at START_POINT, another allocno is copied to this one,
285 // and if the allocnos at both ends of the copy chain have an affinity
286 // with the same hard register.
287 unsigned int is_strong_copy_dest : 1;
288
289 // True if, at END_POINT, this allocno is copied to another one,
290 // and both allocnos have an affinity with the same hard register.
291 unsigned int is_strong_copy_src : 1;
292
293 // True if the allocno is subject to an earlyclobber at END_POINT,
294 // so that it cannot be tied to the destination of the instruction.
295 unsigned int is_earlyclobbered : 1;
296
297 // True if this allocno is known to be equivalent to related_allocno
298 // for the whole of this allocno's lifetime.
299 unsigned int is_equiv : 1;
300
301 // The inclusive range of program points spanned by the allocno.
302 // START_POINT >= END_POINT.
303 unsigned int start_point;
304 unsigned int end_point;
305
306 // If, at END_POINT, this allocno is copied to another allocno, this
307 // is the index of that allocno, otherwise it is INVALID_ALLOCNO.
308 // See callers of record_copy for what counts as a copy.
309 unsigned int copy_dest;
310
311 // If this field is not INVALID_ALLOCNO, it indicates one of two things:
312 //
313 // - if is_equiv, this allocno is equivalent to related_allocno for
314 // the whole of this allocno's lifetime.
315 //
316 // - if !is_equiv, this allocno's live range is a subrange of
317 // related_allocno's and we have committed to making this allocno
318 // share whatever register related_allocno uses.
319 unsigned int related_allocno;
320
321 union
322 {
323 // The program point at which the allocno was last defined,
324 // or START_OF_REGION if none. This is only used temporarily
325 // while recording allocnos; after that, chain_next below is
326 // used instead.
327 unsigned int last_def_point;
328
329 // The next chained allocno in program order (i.e. at lower program
330 // points), or INVALID_ALLOCNO if none.
331 unsigned int chain_next;
332 };
333
334 union
335 {
336 // The program point before start_point at which the allocno was
337 // last used, or END_OF_REGION if none. This is only used temporarily
338 // while recording allocnos; after that, chain_prev below is used
339 // instead.
340 unsigned int last_use_point;
341
342 // The previous chained allocno in program order (i.e. at higher
343 // program points), or INVALID_ALLOCNO if none.
344 unsigned int chain_prev;
345 };
346 };
347
348 // Information about a full allocno group or a subgroup of it.
349 // The subgroup can be empty to indicate "none".
350 struct allocno_subgroup
351 {
352 array_slice<allocno_info> allocnos ();
353 allocno_info *allocno (unsigned int);
354
355 // True if a subgroup is present.
356 operator bool () const { return count; }
357
358 // The containing group.
359 allocno_group_info *group;
360
361 // The offset of the subgroup from the start of GROUP.
362 unsigned int start;
363
364 // The number of allocnos in the subgroup.
365 unsigned int count;
366 };
367
368 // Represents information about a copy between an allocno and an FPR.
369 // This establishes an affinity between the allocno and the FPR.
370 struct allocno_copy_info
371 {
372 // The allocno involved in the copy.
373 unsigned int allocno;
374
375 // The FPR involved in the copy, relative to V0_REGNUM.
376 unsigned int fpr : 16;
377
378 // A measure of how strong the affinity between the allocno and FPR is.
379 unsigned int weight : 16;
380 };
381
382 // Information about a possible allocno chain.
383 struct chain_candidate_info
384 {
385 // The candidate target allocno.
386 allocno_info *allocno;
387
388 // A rating of the candidate (higher is better).
389 int score;
390 };
391
392 // Information about an allocno color.
393 struct color_info
394 {
395 // The color's unique identifier.
396 int id;
397
398 // The allocated hard register, when known.
399 unsigned int hard_regno;
400
401 // The clique's representative group.
402 allocno_group_info *group;
403
404 // The number of FPR preferences recorded in fpr_preferences.
405 unsigned int num_fpr_preferences;
406
407 // Weights in favor of choosing each FPR as the first register for GROUP.
408 int8_t fpr_preferences[32];
409 };
410
411 template<typename T, typename... Ts>
412 T *region_allocate (Ts...);
413
414 allocno_info *chain_prev (allocno_info *);
415 allocno_info *chain_next (allocno_info *);
416
417 void dump_pseudo_regs ();
418 void dump_fpr_ranges ();
419 void dump_copies ();
420 void dump_allocnos ();
421 void dump_colors ();
422
423 iterator_range<allocno_iterator> get_group_allocnos (unsigned int);
424
425 void preprocess_move (rtx, rtx);
426 void process_pseudo_reg_constraints (rtx_insn *);
427 void preprocess_insns ();
428
429 int fpr_preference (unsigned int);
430 void propagate_pseudo_reg_info ();
431
432 void choose_fpr_pseudos ();
433
434 void start_new_region ();
435
436 allocno_group_info *create_allocno_group (unsigned int, unsigned int);
437 allocno_subgroup get_allocno_subgroup (rtx);
438 void record_fpr_use (unsigned int);
439 void record_fpr_def (unsigned int);
440 void record_allocno_use (allocno_info *);
441 void record_allocno_def (allocno_info *);
442 allocno_info *find_related_start (allocno_info *, allocno_info *, bool);
443 void record_copy (rtx, rtx, bool = false);
444 void record_constraints (rtx_insn *);
445 void record_artificial_refs (unsigned int);
446 void record_insn_refs (rtx_insn *);
447
448 bool consider_strong_copy_src_chain (allocno_info *);
449 int strided_polarity_pref (allocno_info *, allocno_info *);
450 void find_strided_accesses ();
451
452 template<unsigned int allocno_info::*field>
453 static int cmp_increasing (const void *, const void *);
454 bool is_chain_candidate (allocno_info *, allocno_info *);
455 int rate_chain (allocno_info *, allocno_info *);
456 static int cmp_chain_candidates (const void *, const void *);
457 void chain_allocnos (unsigned int &, unsigned int &);
458 void merge_fpr_info (allocno_group_info *, allocno_group_info *,
459 unsigned int);
460 void set_single_color_rep (allocno_info *, allocno_group_info *,
461 unsigned int);
462 void set_color_rep (allocno_group_info *, allocno_group_info *,
463 unsigned int);
464 bool try_to_chain_allocnos (allocno_info *, allocno_info *);
465 void create_color (allocno_group_info *);
466 void form_chains ();
467
468 bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info *);
469 bool call_in_range_p (unsigned int, unsigned int, unsigned int);
470 unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info);
471
472 void process_copies ();
473
474 static int cmp_allocation_order (const void *, const void *);
475 void allocate_colors ();
476 allocno_info *find_independent_subchain (allocno_info *);
477 color_info *find_oldest_color (unsigned int, unsigned int);
478 void broaden_colors ();
479 void finalize_allocation ();
480
481 bool replace_regs (df_ref);
482 int try_enforce_constraints (rtx_insn *, vec<std::pair<int, int>> &);
483 void enforce_constraints (rtx_insn *);
484 bool maybe_convert_to_strided_access (rtx_insn *);
485 void apply_allocation ();
486
487 void process_region ();
488 bool is_dead_insn (rtx_insn *);
489 void process_block (basic_block, bool);
490 void process_blocks ();
491
492 // ----------------------------------------------------------------------
493
494 // The function we're operating on.
495 function *m_fn;
496
497 // Information about each pseudo register, indexed by REGNO.
498 auto_vec<pseudo_reg_info> m_pseudo_regs;
499
500 // All recorded register copies.
501 auto_vec<reg_copy_info> m_pseudo_reg_copies;
502
503 // The set of pseudos that we've decided to allocate an FPR to.
504 auto_bitmap m_fpr_pseudos;
505
506 // ----------------------------------------------------------------------
507
508 // An obstack for allocating information that is referenced by the member
509 // variables below.
510 obstack m_region_obstack;
511 void *m_region_alloc_start;
512
513 // ----------------------------------------------------------------------
514
515 // The basic block that we're currently processing.
516 basic_block m_current_bb;
517
518 // The lowest-numbered program point in the current basic block.
519 unsigned int m_current_bb_point;
520
521 // The program point that we're currently processing (described above).
522 unsigned int m_current_point;
523
524 // The set of allocnos that are currently live.
525 auto_bitmap m_live_allocnos;
526
527 // The set of FPRs that are currently live.
528 unsigned int m_live_fprs;
529
530 // ----------------------------------------------------------------------
531
532 // A mask of the FPRs that have already been allocated.
533 unsigned int m_allocated_fprs;
534
535 // A mask of the FPRs that must be at least partially preserved by the
536 // current function.
537 unsigned int m_call_preserved_fprs;
538
539 // True if we haven't yet failed to allocate the current region.
540 bool m_allocation_successful;
541
542 // A map from pseudo registers to the first allocno in their associated
543 // allocno groups.
544 hash_map<int_hash<unsigned int, INVALID_REGNUM>,
545 allocno_group_info *> m_regno_to_group;
546
547 // All recorded copies between allocnos and FPRs.
548 auto_vec<allocno_copy_info> m_allocno_copies;
549
550 // All allocnos, by index.
551 auto_vec<allocno_info *> m_allocnos;
552
553 // All allocnos, by increasing START_POINT.
554 auto_vec<allocno_info *> m_sorted_allocnos;
555
556 // Allocnos for which is_shared is true.
557 auto_vec<allocno_info *> m_shared_allocnos;
558
559 // All colors, by index.
560 auto_vec<color_info *> m_colors;
561
562 // The instruction ranges that make up the current region,
563 // as half-open ranges [LAST, FIRST).
564 auto_vec<std::pair<rtx_insn *, rtx_insn *>> m_insn_ranges;
565
566 // The live ranges of each FPR, in order of increasing program point.
567 auto_vec<fpr_range_info> m_fpr_ranges[32];
568
569 // For each function call id, a list of program points at which a call
570 // to such a function is made. Each list is in order of increasing
571 // program point.
572 auto_vec<unsigned int> m_call_points[NUM_ABI_IDS];
573
574 // A list of instructions that can be removed if allocation succeeds.
575 auto_vec<rtx_insn *> m_dead_insns;
576 };
577
578 // True if PAT is something that would typically be treated as a move.
579 static inline bool
580 is_move_set (rtx pat)
581 {
582 if (GET_CODE (pat) != SET)
583 return false;
584
585 rtx dest = SET_DEST (pat);
586 if (SUBREG_P (dest))
587 dest = SUBREG_REG (dest);
588 if (!OBJECT_P (dest))
589 return false;
590
591 rtx src = SET_SRC (pat);
592 if (SUBREG_P (src))
593 src = SUBREG_REG (src);
594 if (!OBJECT_P (src) && !CONSTANT_P (src))
595 return false;
596
597 return true;
598 }
599
600 // Return true if operand OP is likely to match OP_ALT after register
601 // allocation.
602 static bool
603 likely_operand_match_p (const operand_alternative &op_alt, rtx op)
604 {
605 // Empty constraints match everything.
606 const char *constraint = op_alt.constraint;
607 if (constraint[0] == 0 || constraint[0] == ',')
608 return true;
609
610 for (;;)
611 {
612 char c = *constraint;
613 int len = CONSTRAINT_LEN (c, constraint);
614 if (c == 0 || c == ',')
615 break;
616
617 if (c == 'X')
618 return true;
619
620 auto cn = lookup_constraint (constraint);
621 switch (get_constraint_type (cn))
622 {
623 case CT_REGISTER:
624 if (REG_P (op) || SUBREG_P (op))
625 return true;
626 break;
627
628 case CT_MEMORY:
629 case CT_SPECIAL_MEMORY:
630 case CT_RELAXED_MEMORY:
631 if (MEM_P (op))
632 return true;
633 break;
634
635 case CT_CONST_INT:
636 case CT_ADDRESS:
637 case CT_FIXED_FORM:
638 if (constraint_satisfied_p (op, cn))
639 return true;
640 break;
641 }
642
643 constraint += len;
644 }
645
646 if (op_alt.matches >= 0)
647 {
648 rtx other = recog_data.operand[op_alt.matches];
649 if ((REG_P (other) || SUBREG_P (other))
650 && (REG_P (op) || SUBREG_P (op)))
651 return true;
652 }
653 return false;
654 }
655
656 // Return true if the operands of the current instruction are likely to
657 // match OP_ALT.
658 static bool
659 likely_alternative_match_p (const operand_alternative *op_alt)
660 {
661 for (int i = 0; i < recog_data.n_operands; ++i)
662 if (!likely_operand_match_p (op_alt[i], recog_data.operand[i]))
663 return false;
664 return true;
665 }
666
667 // Return the sum of how disparaged OP_ALT is.
668 static int
669 count_rejects (const operand_alternative *op_alt)
670 {
671 int reject = 0;
672 for (int opno = 0; opno < recog_data.n_operands; ++opno)
673 reject += op_alt[opno].reject;
674 return reject;
675 }
676
677 // Allocate a T from the region obstack.
678 template<typename T, typename... Ts>
679 inline T *
680 early_ra::region_allocate (Ts... args)
681 {
682 static_assert (std::is_trivially_destructible<T>::value,
683 "destructor won't be called");
684 void *addr = obstack_alloc (&m_region_obstack, sizeof (T));
685 return new (addr) T (std::forward<Ts> (args)...);
686 }
687
688 early_ra::early_ra (function *fn) : m_fn (fn), m_live_fprs (0)
689 {
690 gcc_obstack_init (&m_region_obstack);
691 m_region_alloc_start = obstack_alloc (&m_region_obstack, 0);
692 bitmap_tree_view (m_live_allocnos);
693 }
694
695 early_ra::~early_ra ()
696 {
697 obstack_free (&m_region_obstack, nullptr);
698 }
699
700 // Return an array that, for each allocno A in the group, contains the index
701 // of the allocno at the head of A's chain (that is, the one with the highest
702 // START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
703 inline array_slice<unsigned int>
704 early_ra::allocno_group_info::chain_heads ()
705 {
706 auto *start = reinterpret_cast<unsigned int *> (this + 1);
707 return { start, size };
708 }
709
710 // Return the array of allocnos in the group.
711 inline array_slice<early_ra::allocno_info>
712 early_ra::allocno_group_info::allocnos ()
713 {
714 gcc_checking_assert (regno != INVALID_REGNUM);
715 auto *chain_end = reinterpret_cast<unsigned int *> (this + 1) + size;
716 auto *allocno_start = reinterpret_cast<allocno_info *> (chain_end);
717 return { allocno_start, size };
718 }
719
720 // Return the group's color representative.
721 inline early_ra::allocno_group_info *
722 early_ra::allocno_group_info::color_rep ()
723 {
724 gcc_checking_assert (m_color_rep->m_color_rep == m_color_rep);
725 return m_color_rep;
726 }
727
728 // Return the group that contains the allocno.
729 inline early_ra::allocno_group_info *
730 early_ra::allocno_info::group ()
731 {
732 auto *chain_end = reinterpret_cast<unsigned int *> (this - offset);
733 return reinterpret_cast<allocno_group_info *> (chain_end - group_size) - 1;
734 }
735
736 // Return true if this allocno's live range is a subrange of related_allocno's
737 // and if we have committed to making this allocno share whatever register
738 // related_allocno uses.
739 inline bool
740 early_ra::allocno_info::is_shared ()
741 {
742 return related_allocno != INVALID_ALLOCNO && !is_equiv;
743 }
744
745 // Return true if this allocno is known to be equivalent to ALLOCNO.
746 inline bool
747 early_ra::allocno_info::is_equiv_to (unsigned int allocno)
748 {
749 return is_equiv && related_allocno == allocno;
750 }
751
752 // Return the allocnos in the subgroup.
753 inline array_slice<early_ra::allocno_info>
754 early_ra::allocno_subgroup::allocnos ()
755 {
756 if (!count)
757 return {};
758 return { &group->allocnos ()[start], count };
759 }
760
761 // Return allocno I in the subgroup, with 0 being the first.
762 inline early_ra::allocno_info *
763 early_ra::allocno_subgroup::allocno (unsigned int i)
764 {
765 return &group->allocnos ()[start + i];
766 }
767
768 // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
769 inline early_ra::allocno_info *
770 early_ra::chain_prev (allocno_info *allocno)
771 {
772 if (allocno->chain_prev != INVALID_ALLOCNO)
773 return m_allocnos[allocno->chain_prev];
774 return nullptr;
775 }
776
777 // Return the next (later) allocno in ALLOCNO's chain, or null if none.
778 inline early_ra::allocno_info *
779 early_ra::chain_next (allocno_info *allocno)
780 {
781 if (allocno->chain_next != INVALID_ALLOCNO)
782 return m_allocnos[allocno->chain_next];
783 return nullptr;
784 }
785
786 // Dump the information in m_pseudo_regs.
787 void
788 early_ra::dump_pseudo_regs ()
789 {
790 fprintf (dump_file, "\nPseudos:\n");
791 fprintf (dump_file, " %6s %6s %6s %6s %6s %6s %8s %s\n",
792 "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
793 "FPRness", "Copies");
794 pseudo_reg_info unused_reg = {};
795 for (unsigned int regno = FIRST_PSEUDO_REGISTER;
796 regno < m_pseudo_regs.length (); ++regno)
797 {
798 const auto &reg = m_pseudo_regs[regno];
799 if (memcmp (&reg, &unused_reg, sizeof (reg)) == 0)
800 continue;
801
802 fprintf (dump_file, " %6d %6s %6s %6s %6s %6s %8d", regno,
803 reg.flags & NEEDS_FPR8 ? "Req"
804 : reg.flags & ALLOWS_FPR8 ? "OK" : "-",
805 reg.flags & NEEDS_FPR16 ? "Req"
806 : reg.flags & ALLOWS_FPR16 ? "OK" : "-",
807 reg.flags & NEEDS_FPR32 ? "Req"
808 : reg.flags & ALLOWS_FPR32 ? "OK" : "-",
809 reg.flags & NEEDS_NONFPR ? "Req"
810 : reg.flags & ALLOWS_NONFPR ? "OK" : "-",
811 ~reg.flags & HAS_FLEXIBLE_STRIDE ? "-"
812 : reg.flags & HAS_FIXED_STRIDE ? "Some" : "All",
813 fpr_preference (regno));
814 if (reg.flags & HAS_FPR_COPY)
815 fprintf (dump_file, " FPR");
816 if (reg.flags & HAS_NONFPR_COPY)
817 fprintf (dump_file, " Non-FPR");
818 unsigned int copyi = reg.first_copy;
819 while (copyi)
820 {
821 const auto &copy = m_pseudo_reg_copies[copyi];
822 if (copy.regnos[0] == regno)
823 {
824 fprintf (dump_file, " r%d", copy.regnos[1]);
825 copyi = copy.next_copies[0];
826 }
827 else
828 {
829 fprintf (dump_file, " r%d", copy.regnos[0]);
830 copyi = copy.next_copies[1];
831 }
832 }
833 fprintf (dump_file, "\n");
834 }
835 }
836
837 // Dump the information in m_fpr_ranges.
838 void
839 early_ra::dump_fpr_ranges ()
840 {
841 fprintf (dump_file, "\nFPR live ranges:\n");
842 for (unsigned int fpr = 0; fpr < 32; ++fpr)
843 {
844 auto &intervals = m_fpr_ranges[fpr];
845 if (intervals.is_empty ())
846 continue;
847
848 fprintf (dump_file, " %2d", fpr);
849 for (unsigned int i = 0; i < intervals.length (); ++i)
850 {
851 auto &interval = intervals[i];
852 if (i && (i % 4) == 0)
853 fprintf (dump_file, "\n ");
854 fprintf (dump_file, " [ %6d %6d ]", interval.start_point,
855 interval.end_point);
856 }
857 fprintf (dump_file, "\n");
858 }
859 }
860
861 // Dump the information in m_allocno_copies.
862 void
863 early_ra::dump_copies ()
864 {
865 fprintf (dump_file, "\nCopies:\n");
866 fprintf (dump_file, " %8s %3s %6s\n",
867 "Allocno", "FPR", "Weight");
868 for (const auto &copy : m_allocno_copies)
869 fprintf (dump_file, " %8d %3d %6d\n", copy.allocno,
870 copy.fpr, copy.weight);
871 }
872
873 // Dump the information in m_allocnos.
874 void
875 early_ra::dump_allocnos ()
876 {
877 char buffer[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
878 fprintf (dump_file, "\nAllocno groups:\n");
879 fprintf (dump_file,
880 " %12s %12s %4s %6s %8s %s\n",
881 "Ids", "Regno", "Size", "Stride", "Cands", "Heads");
882 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
883 {
884 auto *allocno = m_allocnos[ai];
885 if (allocno->offset != 0)
886 continue;
887 auto *group = allocno->group ();
888 snprintf (buffer, sizeof (buffer), "[%d:%d]", allocno->id,
889 allocno->id + group->size - 1);
890 fprintf (dump_file, " %12s", buffer);
891 snprintf (buffer, sizeof (buffer), "r%d[0:%d]", group->regno,
892 group->size - 1);
893 fprintf (dump_file, " %12s %4s %6d %08x", buffer,
894 group->fpr_size == FPR_D ? "D"
895 : group->fpr_size == FPR_Q ? "Q" : "Z",
896 group->stride,
897 group->fpr_candidates);
898 for (auto head : group->chain_heads ())
899 if (head == INVALID_ALLOCNO)
900 fprintf (dump_file, " -");
901 else
902 fprintf (dump_file, " %d", head);
903 fprintf (dump_file, "\n");
904 }
905
906 fprintf (dump_file, "\nAllocno chains:\n");
907 fprintf (dump_file, " %5s %12s %12s %6s %5s %5s %6s %5s\n",
908 "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR");
909 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
910 {
911 auto *allocno = m_allocnos[ai];
912 if (allocno->chain_prev != INVALID_ALLOCNO)
913 continue;
914 const char *prefix = "=>";
915 for (;;)
916 {
917 auto *group = allocno->group ();
918 fprintf (dump_file, " %2s", prefix);
919 fprintf (dump_file, " %5d", allocno->id);
920 snprintf (buffer, sizeof (buffer), "r%d[%d]", group->regno,
921 allocno->offset);
922 fprintf (dump_file, " %12s", buffer);
923 snprintf (buffer, sizeof (buffer), "[%d,%d]",
924 allocno->start_point, allocno->end_point);
925 fprintf (dump_file, " %11s%s %6s", buffer,
926 allocno->is_earlyclobbered ? "*" : " ",
927 allocno->is_strong_copy_dest ? "Strong"
928 : allocno->is_copy_dest ? "Yes" : "-");
929 if (allocno->copy_dest == INVALID_ALLOCNO)
930 fprintf (dump_file, " %5s", "-");
931 else
932 fprintf (dump_file, " %5d", allocno->copy_dest);
933 if (allocno->is_equiv)
934 fprintf (dump_file, " %5d", allocno->related_allocno);
935 else
936 fprintf (dump_file, " %5s", "-");
937 if (allocno->is_shared ())
938 fprintf (dump_file, " %6d", allocno->related_allocno);
939 else
940 fprintf (dump_file, " %6s", "-");
941 if (allocno->hard_regno == FIRST_PSEUDO_REGISTER)
942 fprintf (dump_file, " %5s", "-");
943 else
944 fprintf (dump_file, " %5s", reg_names[allocno->hard_regno]);
945 fprintf (dump_file, "\n");
946 if (allocno->chain_next == INVALID_ALLOCNO)
947 break;
948 allocno = m_allocnos[allocno->chain_next];
949 prefix = "";
950 }
951 }
952 }
953
954 // Dump the information in m_colors.
955 void
956 early_ra::dump_colors ()
957 {
958 fprintf (dump_file, "\nColors:\n");
959 for (unsigned int i = 0; i < m_colors.length (); ++i)
960 {
961 auto *color = m_colors[i];
962 if (!color->group)
963 continue;
964
965 fprintf (dump_file, " color %d:\n", i);
966 fprintf (dump_file, " chains:\n");
967 auto heads = color->group->chain_heads ();
968 for (unsigned int i = 0; i < color->group->size; ++i)
969 {
970 fprintf (dump_file, " %2d:", i);
971 auto ai = heads[i];
972 while (ai != INVALID_ALLOCNO)
973 {
974 auto *allocno = m_allocnos[ai];
975 fprintf (dump_file, " r%d[%d]", allocno->group ()->regno,
976 allocno->offset);
977 ai = allocno->chain_next;
978 }
979 fprintf (dump_file, "\n");
980 }
981 fprintf (dump_file, " FPR candidates:");
982 for (unsigned int fpr = 0; fpr < 32; ++fpr)
983 fprintf (dump_file, "%s%c", fpr % 8 ? "" : " ",
984 color->group->fpr_candidates & (1U << fpr) ? 'Y' : '-');
985 fprintf (dump_file, "\n");
986 fprintf (dump_file, " FPR preferences:");
987 for (unsigned int fpr = 0; fpr < 32; ++fpr)
988 if (color->fpr_preferences[fpr])
989 fprintf (dump_file, " %d(%d)", fpr, color->fpr_preferences[fpr]);
990 fprintf (dump_file, "\n");
991 }
992 }
993
994 // Record any necessary information about a move from SRC to DEST.
995 void
996 early_ra::preprocess_move (rtx dest, rtx src)
997 {
998 if (SUBREG_P (dest))
999 dest = SUBREG_REG (dest);
1000 if (!REG_P (dest))
1001 return;
1002
1003 if (SUBREG_P (src))
1004 src = SUBREG_REG (src);
1005 if (!REG_P (src))
1006 return;
1007
1008 // Sort the registers by increasing REGNO.
1009 rtx regs[] = { dest, src };
1010 if (REGNO (dest) > REGNO (src))
1011 std::swap (regs[0], regs[1]);
1012 unsigned int regno0 = REGNO (regs[0]);
1013 unsigned int regno1 = REGNO (regs[1]);
1014
1015 // Ignore moves between hard registers.
1016 if (HARD_REGISTER_NUM_P (regno1))
1017 return;
1018
1019 // For moves between hard registers and pseudos, just record the type
1020 // of hard register involved.
1021 auto &reg1 = m_pseudo_regs[regno1];
1022 reg1.mode = GET_MODE (regs[1]);
1023 if (HARD_REGISTER_NUM_P (regno0))
1024 {
1025 reg1.flags |= (FP_REGNUM_P (regno0) ? HAS_FPR_COPY : HAS_NONFPR_COPY);
1026 return;
1027 }
1028
1029 // Record a move between two pseudo registers.
1030 auto &reg0 = m_pseudo_regs[regno0];
1031 reg0.mode = GET_MODE (regs[0]);
1032
1033 reg_copy_info copy;
1034 copy.regnos[0] = regno0;
1035 copy.regnos[1] = regno1;
1036 copy.next_copies[0] = reg0.first_copy;
1037 copy.next_copies[1] = reg1.first_copy;
1038
1039 reg0.first_copy = reg1.first_copy = m_pseudo_reg_copies.length ();
1040 m_pseudo_reg_copies.safe_push (copy);
1041 }
1042
1043 // Return true if INSN has a multi-vector operand and if that operand
1044 // could be converted to strided form.
1045 static bool
1046 is_stride_candidate (rtx_insn *insn)
1047 {
1048 if (recog_memoized (insn) < 0)
1049 return false;
1050
1051 auto stride_type = get_attr_stride_type (insn);
1052 return (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE
1053 || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
1054 || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
1055 }
1056
1057 // Go through the constraints of INSN, which has already been extracted,
1058 // and record any relevant information about pseudo registers.
1059 void
1060 early_ra::process_pseudo_reg_constraints (rtx_insn *insn)
1061 {
1062 extract_insn (insn);
1063 preprocess_constraints (insn);
1064
1065 // Flags that describe any multi-register vector operands.
1066 unsigned int insn_flags = (is_stride_candidate (insn)
1067 ? HAS_FLEXIBLE_STRIDE
1068 : HAS_FIXED_STRIDE);
1069
1070 auto alts = get_preferred_alternatives (insn);
1071
1072 int operand_matches[MAX_RECOG_OPERANDS];
1073 unsigned int operand_flags[MAX_RECOG_OPERANDS];
1074 for (int i = 0; i < recog_data.n_operands; ++i)
1075 {
1076 operand_matches[i] = -1;
1077 operand_flags[i] = 0;
1078 }
1079
1080 // Extract information from the constraints, considering all plausible
1081 // alternatives.
1082 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
1083 {
1084 if (!(alts & ALTERNATIVE_BIT (altno)))
1085 continue;
1086
1087 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
1088 if (!likely_alternative_match_p (op_alt))
1089 continue;
1090
1091 // Use SRC_OPNO's constraints to derive information about DEST_OPNO.
1092 auto record_operand = [&](int src_opno, int dest_opno)
1093 {
1094 int matches = op_alt[src_opno].matches;
1095 if (matches >= 0)
1096 operand_matches[dest_opno] = matches;
1097
1098 auto cl = alternative_class (op_alt, src_opno);
1099 if (cl != NO_REGS)
1100 {
1101 if (reg_class_subset_p (cl, FP_REGS))
1102 operand_flags[dest_opno] |= ALLOWS_FPR32;
1103 if (reg_class_subset_p (cl, FP_LO_REGS))
1104 operand_flags[dest_opno] |= ALLOWS_FPR16;
1105 if (reg_class_subset_p (cl, FP_LO8_REGS))
1106 operand_flags[dest_opno] |= ALLOWS_FPR8;
1107 if (!reg_classes_intersect_p (cl, FP_REGS))
1108 operand_flags[dest_opno] |= ALLOWS_NONFPR;
1109 }
1110 };
1111
1112 for (int i = 0; i < recog_data.n_operands; ++i)
1113 {
1114 record_operand (i, i);
1115 if (recog_data.constraints[i][0] == '%')
1116 {
1117 record_operand (i, i + 1);
1118 record_operand (i + 1, i);
1119 }
1120 }
1121 }
1122
1123 // Process the information we collected above.
1124 for (int i = 0; i < recog_data.n_operands; ++i)
1125 {
1126 rtx op = recog_data.operand[i];
1127 machine_mode orig_mode = GET_MODE (op);
1128 if (SUBREG_P (op))
1129 op = SUBREG_REG (op);
1130
1131 // Record the accumulated information in m_pseudo_regs.
1132 if (REG_P (op) && !HARD_REGISTER_P (op))
1133 {
1134 // The flags so far just describe what at least one alternative
1135 // would accept. Calculate the associated NEEDS_* information.
1136 auto flags = operand_flags[i];
1137 if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
1138 flags |= NEEDS_NONFPR;
1139 else if ((flags & ALLOWS_FPR32) && !(flags & ALLOWS_NONFPR))
1140 {
1141 if (flags & ALLOWS_FPR8)
1142 flags |= NEEDS_FPR8;
1143 if (flags & ALLOWS_FPR16)
1144 flags |= NEEDS_FPR16;
1145 flags |= NEEDS_FPR32;
1146 }
1147
1148 // Look for multi-register vector operands.
1149 if (VECTOR_MODE_P (orig_mode)
1150 && targetm.hard_regno_mode_ok (V0_REGNUM, orig_mode)
1151 && hard_regno_nregs (V0_REGNUM, orig_mode) > 1)
1152 flags |= insn_flags;
1153
1154 m_pseudo_regs[REGNO (op)].flags |= flags;
1155 m_pseudo_regs[REGNO (op)].mode = GET_MODE (op);
1156 }
1157
1158 // Treat matching constraints as equivalent to moves.
1159 if (operand_matches[i] >= 0)
1160 preprocess_move (recog_data.operand[operand_matches[i]], op);
1161 }
1162 }
1163
1164 // Make one pass through the instructions, collecting information that
1165 // will be needed later.
1166 void
1167 early_ra::preprocess_insns ()
1168 {
1169 m_pseudo_regs.safe_grow_cleared (max_reg_num ());
1170 m_pseudo_reg_copies.safe_push (reg_copy_info ());
1171 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
1172 {
1173 if (!NONDEBUG_INSN_P (insn))
1174 continue;
1175
1176 if (GET_CODE (PATTERN (insn)) == USE
1177 || GET_CODE (PATTERN (insn)) == CLOBBER)
1178 continue;
1179
1180 rtx set = single_set (insn);
1181 if (set && is_move_set (set))
1182 preprocess_move (SET_DEST (set), SET_SRC (set));
1183 else
1184 process_pseudo_reg_constraints (insn);
1185 }
1186 }
1187
1188 // Return a signed integer that says (roughly) how strong an affinity
1189 // pseudo register REGNO has with FPRs. A positive value indicates
1190 // that we should try to allocate an FPR, a negative value indicates
1191 // that we shouldn't, and 0 indicates neutrality.
1192 int
1193 early_ra::fpr_preference (unsigned int regno)
1194 {
1195 auto mode = m_pseudo_regs[regno].mode;
1196 auto flags = m_pseudo_regs[regno].flags;
1197 if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode))
1198 return -3;
1199 else if (flags & HAS_FLEXIBLE_STRIDE)
1200 return 3;
1201 else if (flags & NEEDS_FPR32)
1202 return 2;
1203 else if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
1204 return -2;
1205 else if ((flags & HAS_FPR_COPY) && !(flags & HAS_NONFPR_COPY))
1206 return 1;
1207 else if ((flags & HAS_NONFPR_COPY) && !(flags & HAS_FPR_COPY))
1208 return -1;
1209 else
1210 return 0;
1211 }
1212
1213 // Propagate information about pseudo-registers along copy edges,
1214 // while doing so doesn't create conflicting FPR preferences.
1215 void
1216 early_ra::propagate_pseudo_reg_info ()
1217 {
1218 struct stack_entry { unsigned int regno, copyi; };
1219
1220 auto_vec<stack_entry, 32> stack;
1221 for (unsigned int i = FIRST_PSEUDO_REGISTER;
1222 i < m_pseudo_regs.length (); ++i)
1223 {
1224 auto start = m_pseudo_regs[i].first_copy;
1225 if (!start)
1226 continue;
1227
1228 stack.quick_push ({ i, start });
1229 while (!stack.is_empty ())
1230 {
1231 auto entry = stack.pop ();
1232 auto &copy = m_pseudo_reg_copies[entry.copyi];
1233 auto src_regno = entry.regno;
1234 auto dest_regno = (src_regno == copy.regnos[1]
1235 ? copy.regnos[0]
1236 : copy.regnos[1]);
1237 auto next_copyi = (src_regno == copy.regnos[1]
1238 ? copy.next_copies[1]
1239 : copy.next_copies[0]);
1240 if (next_copyi)
1241 stack.safe_push ({ src_regno, next_copyi });
1242
1243 auto &src_reg = m_pseudo_regs[src_regno];
1244 auto &dest_reg = m_pseudo_regs[dest_regno];
1245
1246 if (src_reg.flags & ~dest_reg.flags & PSEUDO_COPY_FLAGS)
1247 {
1248 auto src_preference = fpr_preference (src_regno);
1249 auto dest_preference = fpr_preference (dest_regno);
1250 if ((src_preference >= 0 && dest_preference >= 0)
1251 || (src_preference <= 0 && dest_preference <= 0))
1252 {
1253 dest_reg.flags |= (src_reg.flags & PSEUDO_COPY_FLAGS);
1254 stack.safe_push ({ dest_regno, dest_reg.first_copy });
1255 }
1256 }
1257 }
1258 }
1259 }
1260
1261 // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
1262 // accordingly.
1263 void
1264 early_ra::choose_fpr_pseudos ()
1265 {
1266 for (unsigned int i = FIRST_PSEUDO_REGISTER;
1267 i < m_pseudo_regs.length (); ++i)
1268 if (fpr_preference (i) > 0)
1269 bitmap_set_bit (m_fpr_pseudos, i);
1270 }
1271
1272 // Clear out information about the previous CFG region (if any)
1273 // and set up the data for a new region.
1274 void
1275 early_ra::start_new_region ()
1276 {
1277 obstack_free (&m_region_obstack, m_region_alloc_start);
1278 m_regno_to_group.empty ();
1279 m_allocno_copies.truncate (0);
1280 m_allocnos.truncate (0);
1281 m_sorted_allocnos.truncate (0);
1282 m_shared_allocnos.truncate (0);
1283 m_colors.truncate (0);
1284 m_insn_ranges.truncate (0);
1285 for (auto &fpr_ranges : m_fpr_ranges)
1286 fpr_ranges.truncate (0);
1287 for (auto &call_points : m_call_points)
1288 call_points.truncate (0);
1289 gcc_assert (bitmap_empty_p (m_live_allocnos) && m_live_fprs == 0);
1290 m_dead_insns.truncate (0);
1291 m_allocated_fprs = 0;
1292 m_call_preserved_fprs = 0;
1293 m_allocation_successful = true;
1294 }
1295
1296 // Create and return an allocno group of size SIZE for register REGNO.
1297 // REGNO can be INVALID_REGNUM if the group just exists to allow
1298 // other groups to be chained together, and does not have any new
1299 // allocnos of its own.
1300 early_ra::allocno_group_info *
1301 early_ra::create_allocno_group (unsigned int regno, unsigned int size)
1302 {
1303 static_assert (alignof (unsigned int) == alignof (allocno_info),
1304 "allocno_info alignment");
1305 unsigned int num_allocnos = (regno != INVALID_REGNUM ? size : 0);
1306
1307 // Allocate an allocno_group_info, followed by an array of chain heads,
1308 // followed by the allocnos themselves.
1309 size_t alloc_size = (sizeof (allocno_group_info)
1310 + size * sizeof (unsigned int)
1311 + num_allocnos * sizeof (allocno_info));
1312 void *data = obstack_alloc (&m_region_obstack, alloc_size);
1313
1314 // Initialize the group.
1315 auto *group = reinterpret_cast<allocno_group_info *> (data);
1316 memset (group, 0, sizeof (*group));
1317 group->m_color_rep = group;
1318 group->regno = regno;
1319 group->size = size;
1320 group->stride = 1;
1321 group->fpr_size = FPR_D;
1322 group->fpr_candidates = ~0U;
1323
1324 // Initialize the chain heads.
1325 auto heads = group->chain_heads ();
1326 for (unsigned int i = 0; i < heads.size (); ++i)
1327 heads[i] = (i < num_allocnos ? m_allocnos.length () + i : INVALID_ALLOCNO);
1328
1329 // Initialize the allocnos.
1330 if (num_allocnos > 0)
1331 {
1332 auto allocnos = group->allocnos ();
1333 memset (allocnos.begin (), 0, num_allocnos * sizeof (allocno_info));
1334 for (unsigned int i = 0; i < num_allocnos; ++i)
1335 {
1336 auto *allocno = &allocnos[i];
1337 allocno->id = m_allocnos.length ();
1338 allocno->offset = i;
1339 allocno->group_size = size;
1340 allocno->hard_regno = FIRST_PSEUDO_REGISTER;
1341 allocno->start_point = END_OF_REGION;
1342 allocno->end_point = START_OF_REGION;
1343 allocno->copy_dest = INVALID_ALLOCNO;
1344 allocno->related_allocno = INVALID_ALLOCNO;
1345 allocno->chain_next = INVALID_ALLOCNO;
1346 allocno->chain_prev = INVALID_ALLOCNO;
1347 m_allocnos.safe_push (allocno);
1348 }
1349 }
1350 return group;
1351 }
1352
1353 // If REG refers to a pseudo register that might be allocated to FPRs,
1354 // return the associated range of allocnos, creating new ones if necessary.
1355 // Return an empty range otherwise.
1356 early_ra::allocno_subgroup
1357 early_ra::get_allocno_subgroup (rtx reg)
1358 {
1359 if (GET_CODE (reg) == SUBREG)
1360 {
1361 allocno_subgroup inner = get_allocno_subgroup (SUBREG_REG (reg));
1362 if (!inner)
1363 return {};
1364
1365 if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)),
1366 GET_MODE (reg)))
1367 {
1368 m_allocation_successful = false;
1369 return {};
1370 }
1371
1372 subreg_info info;
1373 subreg_get_info (V0_REGNUM, GET_MODE (SUBREG_REG (reg)),
1374 SUBREG_BYTE (reg), GET_MODE (reg), &info);
1375 if (!info.representable_p)
1376 {
1377 m_allocation_successful = false;
1378 return {};
1379 }
1380
1381 inner.start += info.offset;
1382 inner.count = info.nregs;
1383 return inner;
1384 }
1385
1386 if (!REG_P (reg) || HARD_REGISTER_P (reg))
1387 return {};
1388
1389 unsigned int regno = REGNO (reg);
1390 if (fpr_preference (regno) <= 0)
1391 return {};
1392
1393 unsigned int count = hard_regno_nregs (V0_REGNUM, GET_MODE (reg));
1394 bool existed;
1395 auto &entry = m_regno_to_group.get_or_insert (regno, &existed);
1396 if (!existed)
1397 {
1398 auto *group = create_allocno_group (regno, count);
1399 if (dump_file && (dump_flags & TDF_DETAILS))
1400 {
1401 auto allocnos = group->allocnos ();
1402 fprintf (dump_file, "Creating allocnos [%d:%d] for r%d\n",
1403 allocnos.front ().id, allocnos.back ().id, regno);
1404 }
1405
1406 auto reg_bits = GET_MODE_BITSIZE (GET_MODE (reg));
1407 auto fpr_bits = exact_div (reg_bits, count);
1408 auto flags = m_pseudo_regs[regno].flags;
1409
1410 // Punt for now if there is a choice to be made between using an
1411 // FPR and a non-FPR.
1412 if ((flags & NEEDS_NONFPR)
1413 || ((flags & ALLOWS_NONFPR)
1414 && !FLOAT_MODE_P (GET_MODE (reg))
1415 && !VECTOR_MODE_P (GET_MODE (reg))))
1416 m_allocation_successful = false;
1417
1418 if (flags & ALLOWS_FPR8)
1419 group->fpr_candidates &= 0xff;
1420 else if (flags & ALLOWS_FPR16)
1421 group->fpr_candidates &= 0xffff;
1422 group->fpr_candidates &= ~0U >> (count - 1);
1423
1424 group->has_flexible_stride = ((flags & HAS_FLEXIBLE_STRIDE) != 0
1425 && (flags & HAS_FIXED_STRIDE) == 0);
1426
1427 group->fpr_size = (maybe_gt (fpr_bits, 128) ? FPR_Z
1428 : maybe_gt (fpr_bits, 64) ? FPR_Q : FPR_D);
1429
1430 entry = group;
1431 }
1432 return { entry, 0, count };
1433 }
1434
1435 // Record a use of FPR REGNO at the current program point, as part of
1436 // a backwards walk over a block.
1437 void
1438 early_ra::record_fpr_use (unsigned int regno)
1439 {
1440 gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
1441 unsigned int offset = regno - V0_REGNUM;
1442 if (!(m_live_fprs & (1U << offset)))
1443 {
1444 m_fpr_ranges[offset].safe_push ({ START_OF_REGION, m_current_point,
1445 INVALID_ALLOCNO });
1446 m_live_fprs |= 1U << offset;
1447 }
1448 }
1449
1450 // Record a definition of FPR REGNO at the current program point, as part of
1451 // a backwards walk over a block.
1452 void
1453 early_ra::record_fpr_def (unsigned int regno)
1454 {
1455 gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
1456 unsigned int offset = regno - V0_REGNUM;
1457
1458 // The definition completes the current live range. If the result
1459 // of the definition is used, the live range extends to the last use.
1460 // Otherwise the live range is just a momentary blip at the current point.
1461 auto &ranges = m_fpr_ranges[offset];
1462 if (m_live_fprs & (1U << offset))
1463 {
1464 ranges.last ().start_point = m_current_point;
1465 m_live_fprs &= ~(1U << offset);
1466 }
1467 else
1468 ranges.safe_push ({ m_current_point, m_current_point, INVALID_ALLOCNO });
1469 }
1470
1471 // Record a use of allocno ALLOCNO at the current program point, as part
1472 // of a backwards walk over a block.
1473 void
1474 early_ra::record_allocno_use (allocno_info *allocno)
1475 {
1476 if (allocno->start_point == m_current_point)
1477 return;
1478
1479 gcc_checking_assert (!allocno->is_shared ());
1480 bitmap_set_bit (m_live_allocnos, allocno->id);
1481 if (allocno->end_point > m_current_point)
1482 {
1483 allocno->end_point = m_current_point;
1484 allocno->last_def_point = START_OF_REGION;
1485 allocno->last_use_point = END_OF_REGION;
1486 }
1487 else
1488 allocno->last_use_point = allocno->start_point;
1489 allocno->start_point = m_current_point;
1490 allocno->is_copy_dest = false;
1491 allocno->is_strong_copy_src = false;
1492 allocno->related_allocno = INVALID_ALLOCNO;
1493 allocno->is_equiv = false;
1494 }
1495
1496 // Record a definition of the allocno with index AI at the current program
1497 // point, as part of a backwards walk over a block. The allocno is known
1498 // to be live.
1499 void
1500 early_ra::record_allocno_def (allocno_info *allocno)
1501 {
1502 gcc_checking_assert (!allocno->is_shared ());
1503 allocno->last_use_point = allocno->start_point;
1504 allocno->last_def_point = m_current_point;
1505 allocno->start_point = m_current_point;
1506 allocno->num_defs = MIN (allocno->num_defs + 1, 2);
1507 gcc_checking_assert (!allocno->is_copy_dest
1508 && !allocno->is_strong_copy_src);
1509 if (!bitmap_clear_bit (m_live_allocnos, allocno->id))
1510 gcc_unreachable ();
1511 }
1512
1513 // SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the
1514 // two allocnos are known to be equal. See whether we can mark a chain of
1515 // allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the
1516 // start of the chain if so, otherwise return null.
1517 //
1518 // If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated
1519 // as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO.
1520 //
1521 // Sharing chains are a rather hacky workaround for the fact that we
1522 // don't collect segmented live ranges, and that in the end we want to do
1523 // simple interval graph coloring.
1524 early_ra::allocno_info *
1525 early_ra::find_related_start (allocno_info *dest_allocno,
1526 allocno_info *src_allocno, bool is_equiv)
1527 {
1528 allocno_info *res = nullptr;
1529 for (;;)
1530 {
1531 if (src_allocno->end_point > dest_allocno->end_point)
1532 // The src allocno dies first.
1533 return res;
1534
1535 if (src_allocno->num_defs != 0)
1536 {
1537 if (dest_allocno->end_point < m_current_bb_point)
1538 // We don't currently track enough information to handle multiple
1539 // definitions across basic block boundaries.
1540 return res;
1541
1542 if (src_allocno->last_def_point >= dest_allocno->end_point)
1543 // There is another definition during the destination's live range.
1544 return res;
1545 }
1546 if (is_equiv)
1547 {
1548 if (dest_allocno->num_defs == 1)
1549 // dest_allocno is equivalent to src_allocno for dest_allocno's
1550 // entire live range. Fall back to that if we can't establish
1551 // a sharing chain.
1552 res = dest_allocno;
1553 }
1554 else
1555 {
1556 if (src_allocno->last_use_point >= dest_allocno->end_point)
1557 // src_allocno is live during dest_allocno's live range,
1558 // and the two allocnos do not necessarily have the same value.
1559 return res;
1560 }
1561
1562 if (dest_allocno->group_size != 1
1563 || DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1)
1564 // Currently only single allocnos that are defined once can
1565 // share registers with non-equivalent allocnos. This could be
1566 // relaxed, but at the time of writing, aggregates are not valid
1567 // SSA names and so generally only use a single pseudo throughout
1568 // their lifetime.
1569 return res;
1570
1571 if (dest_allocno->copy_dest == src_allocno->id)
1572 // We've found a complete and valid sharing chain.
1573 return dest_allocno;
1574
1575 if (dest_allocno->copy_dest == INVALID_ALLOCNO)
1576 return res;
1577
1578 auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
1579 if (!is_chain_candidate (dest_allocno, next_allocno))
1580 return res;
1581
1582 dest_allocno = next_allocno;
1583 is_equiv = false;
1584 }
1585 }
1586
1587 // Record any relevant allocno-related information for an actual or imagined
1588 // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
1589 // move instruction, false if it represents one way of satisfying the previous
1590 // instruction's constraints.
1591 void
1592 early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
1593 {
1594 auto dest_range = get_allocno_subgroup (dest);
1595 auto src_range = get_allocno_subgroup (src);
1596 if (from_move_p
1597 && dest_range
1598 && REG_P (src)
1599 && FP_REGNUM_P (REGNO (src)))
1600 {
1601 // A copy from an FPR to an allocno group.
1602 unsigned int fpr = REGNO (src) - V0_REGNUM;
1603 m_allocno_copies.safe_push ({ dest_range.allocno (0)->id, fpr,
1604 dest_range.count });
1605
1606 // If the allocno at the other end of the chain of copies from DEST
1607 // has a copy to the same FPR, record that all intervening copy chains
1608 // could become "strong" ones. This indicates that picking the FPR
1609 // avoids a copy at both ends.
1610 unsigned int hard_regno = REGNO (src);
1611 for (auto &dest_allocno : dest_range.allocnos ())
1612 if (dest_allocno.hard_regno == hard_regno++)
1613 dest_allocno.is_strong_copy_src = true;
1614 }
1615 else if (from_move_p
1616 && src_range
1617 && REG_P (dest)
1618 && FP_REGNUM_P (REGNO (dest)))
1619 {
1620 // A copy from an allocno group to an FPR.
1621 unsigned int fpr = REGNO (dest) - V0_REGNUM;
1622 m_allocno_copies.safe_push ({ src_range.allocno (0)->id, fpr,
1623 src_range.count });
1624 for (auto &src_allocno : src_range.allocnos ())
1625 {
1626 // If the copy comes from a move, see whether the destination
1627 // FPR is known to be equal to the source allocno for the FPR's
1628 // last live range.
1629 if (from_move_p && src_allocno.num_defs == 0)
1630 {
1631 auto &last_range = m_fpr_ranges[fpr].last ();
1632 if (last_range.end_point >= src_allocno.end_point)
1633 last_range.allocno = src_allocno.id;
1634 }
1635 src_allocno.hard_regno = V0_REGNUM + fpr;
1636 fpr += 1;
1637 }
1638 }
1639 else if (src_range && dest_range)
1640 {
1641 // A copy between two allocno groups. We can only have a mismatched
1642 // number of FPRs for imaginary, non-move copies. In that case
1643 // the matching happens on the common lowparts.
1644 gcc_assert (!from_move_p || src_range.count == dest_range.count);
1645 unsigned int count = std::min (src_range.count, dest_range.count);
1646 if (WORDS_BIG_ENDIAN)
1647 {
1648 src_range.start += src_range.count - count;
1649 dest_range.start += dest_range.count - count;
1650 }
1651 src_range.count = count;
1652 dest_range.count = count;
1653
1654 // Ignore (imaginary non-move) copies if the destination is still live.
1655 for (auto &dest_allocno : dest_range.allocnos ())
1656 if (bitmap_bit_p (m_live_allocnos, dest_allocno.id))
1657 return;
1658
1659 for (unsigned int i = 0; i < src_range.count; ++i)
1660 {
1661 auto *dest_allocno = dest_range.allocno (i);
1662 auto *src_allocno = src_range.allocno (i);
1663 if (src_allocno->end_point > dest_allocno->start_point)
1664 {
1665 gcc_assert (src_allocno->copy_dest == INVALID_ALLOCNO
1666 || src_allocno->copy_dest == dest_allocno->id);
1667 src_allocno->copy_dest = dest_allocno->id;
1668 src_allocno->hard_regno = dest_allocno->hard_regno;
1669 dest_allocno->is_copy_dest = 1;
1670 }
1671 else if (auto *start_allocno = find_related_start (dest_allocno,
1672 src_allocno,
1673 from_move_p))
1674 {
1675 auto *next_allocno = dest_allocno;
1676 for (;;)
1677 {
1678 next_allocno->related_allocno = src_allocno->id;
1679 next_allocno->is_equiv = (start_allocno == dest_allocno
1680 && from_move_p);
1681 if (next_allocno == start_allocno)
1682 break;
1683 next_allocno = m_allocnos[next_allocno->copy_dest];
1684 }
1685 }
1686 }
1687 }
1688 }
1689
1690 // Record any relevant allocno-related information about the constraints
1691 // on INSN, which has already been extracted.
1692 void
1693 early_ra::record_constraints (rtx_insn *insn)
1694 {
1695 preprocess_constraints (insn);
1696
1697 int operand_matches[MAX_RECOG_OPERANDS];
1698 for (int i = 0; i < recog_data.n_operands; ++i)
1699 operand_matches[i] = -1;
1700
1701 auto alts = get_preferred_alternatives (insn);
1702 bool any_ok = recog_data.n_alternatives == 0;
1703
1704 // The set of output operands that are earlyclobber in at least one
1705 // alternative.
1706 operand_mask earlyclobber_operands = 0;
1707
1708 // The set of output operands that are matched to inputs in at least
1709 // one alternative.
1710 operand_mask matched_operands = 0;
1711
1712 // The set of output operands that are not matched to inputs in at least
1713 // one alternative.
1714 operand_mask unmatched_operands = 0;
1715
1716 // The set of input operands that are matched to outputs in at least one
1717 // alternative, or that overlap with such an input if the output is not
1718 // earlyclobber. The latter part of the condition copes with things
1719 // like y = x * x, where the first x is tied to the destination, and where
1720 // y is not earlyclobber.
1721 operand_mask matches_operands = 0;
1722
1723 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
1724 {
1725 if (!(alts & ALTERNATIVE_BIT (altno)))
1726 continue;
1727
1728 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
1729 if (!likely_alternative_match_p (op_alt))
1730 continue;
1731
1732 any_ok = true;
1733
1734 // Update the information for operand DEST_OPNO based on the constraint
1735 // information for operand SRC_OPNO. The numbers can be different for
1736 // swapped commutative operands.
1737 auto record_operand = [&](int src_opno, int dest_opno)
1738 {
1739 int matches = op_alt[src_opno].matches;
1740 // A matched earlyclobber cannot be used if the same operand value
1741 // occurs in an unmatched operand. E.g. for y = x * x, a matched
1742 // earlyclobber on the first input does not cover the second input.
1743 if (matches >= 0)
1744 {
1745 rtx op = recog_data.operand[dest_opno];
1746 operand_mask overlaps = 0;
1747 for (int i = 0; i < recog_data.n_operands; ++i)
1748 if (i != dest_opno
1749 && !recog_data.is_operator[i]
1750 && recog_data.operand_type[i] != OP_OUT
1751 && reg_overlap_mentioned_p (op, recog_data.operand[i]))
1752 overlaps |= 1U << i;
1753 if (!op_alt[matches].earlyclobber || overlaps == 0)
1754 {
1755 operand_matches[dest_opno] = matches;
1756 matches_operands |= (1U << dest_opno) | overlaps;
1757 }
1758 }
1759 };
1760
1761 auto reject = count_rejects (op_alt);
1762 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1763 {
1764 operand_mask op_mask = operand_mask (1) << opno;
1765
1766 if (recog_data.operand_type[opno] != OP_IN)
1767 {
1768 if (reject == 0 && op_alt[opno].matched >= 0)
1769 matched_operands |= op_mask;
1770 else
1771 unmatched_operands |= op_mask;
1772 }
1773
1774 if (op_alt[opno].earlyclobber)
1775 earlyclobber_operands |= op_mask;
1776
1777 // Punt for now on scratches. If we wanted to handle them,
1778 // we'd need to create allocnos for them, like IRA does.
1779 rtx op = recog_data.operand[opno];
1780 if (GET_CODE (op) == SCRATCH
1781 && reg_classes_intersect_p (op_alt[opno].cl, FP_REGS))
1782 m_allocation_successful = false;
1783
1784 // Record filter information, which applies to the first register
1785 // in the operand.
1786 if (auto filters = alternative_register_filters (op_alt, opno))
1787 if (auto range = get_allocno_subgroup (recog_data.operand[opno]))
1788 for (unsigned int fpr = range.start; fpr < 32; ++fpr)
1789 if (!test_register_filters (filters, fpr))
1790 range.group->fpr_candidates &= ~(1U << (fpr - range.start));
1791
1792 if (reject == 0)
1793 {
1794 // Record possible matched operands.
1795 record_operand (opno, opno);
1796 if (recog_data.constraints[opno][0] == '%')
1797 {
1798 record_operand (opno, opno + 1);
1799 record_operand (opno + 1, opno);
1800 }
1801 }
1802 }
1803 }
1804
1805 if (!any_ok)
1806 {
1807 if (dump_file && (dump_flags & TDF_DETAILS))
1808 fprintf (dump_file, " -- no match\n");
1809 m_allocation_successful = false;
1810 }
1811
1812 // Record if there is an output operand that is never earlyclobber and never
1813 // matched to an input. See the comment below for how this is used.
1814 rtx dest_op = NULL_RTX;
1815 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1816 {
1817 auto op_mask = operand_mask (1) << opno;
1818 if (recog_data.operand_type[opno] == OP_OUT
1819 && (earlyclobber_operands & op_mask) == 0
1820 && (matched_operands & op_mask) == 0)
1821 {
1822 dest_op = recog_data.operand[opno];
1823 break;
1824 }
1825 }
1826
1827 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1828 {
1829 auto op_mask = operand_mask (1) << opno;
1830 rtx op = recog_data.operand[opno];
1831 int matches = operand_matches[opno];
1832
1833 // Punt for now on operands that already have a fixed choice of
1834 // register, since we don't have IRA's ability to find an alternative.
1835 // It's better if earlier passes don't create this kind of situation.
1836 if (REG_P (op) && FP_REGNUM_P (REGNO (op)))
1837 m_allocation_successful = false;
1838
1839 // Treat input operands as being earlyclobbered if an output is
1840 // sometimes earlyclobber and if the input never matches an output.
1841 // Do the same if there is an output that is always matched to an
1842 // input, and if this operand doesn't match that input. In both
1843 // cases, tying the input and the output would lead to an impossible
1844 // combination (or at least one that is difficult to reload).
1845 if (recog_data.operand_type[opno] != OP_OUT
1846 && ((earlyclobber_operands && matches < 0)
1847 || ((matched_operands & ~unmatched_operands)
1848 && !(matches_operands & op_mask))))
1849 for (auto &allocno : get_allocno_subgroup (op).allocnos ())
1850 if (allocno.end_point + 1 == m_current_point)
1851 allocno.is_earlyclobbered = true;
1852
1853 // Create copies between operands that can be tied. This (deliberately)
1854 // might add several copies to the same destination register; later code
1855 // can then choose between them based on other criteria.
1856 //
1857 // If there is an output operand that is never matched or earlyclobber,
1858 // and an input operand that never matches an output operand, create
1859 // a tentative copy between them. This allows hard register preferences
1860 // to be transmitted along the copy chains.
1861 if (matches >= 0)
1862 record_copy (recog_data.operand[matches], op);
1863 else if (dest_op && recog_data.operand_type[opno] == OP_IN)
1864 record_copy (dest_op, op);
1865 }
1866 }
1867
1868 // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
1869 // start of the current basic block, otherwise model the artificial uses
1870 // and defs at the end of the basic block. This is done as part of a
1871 // backwards walk, so defs should be processed before uses.
1872 void
1873 early_ra::record_artificial_refs (unsigned int flags)
1874 {
1875 df_ref ref;
1876
1877 FOR_EACH_ARTIFICIAL_DEF (ref, m_current_bb->index)
1878 if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
1879 && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1880 record_fpr_def (DF_REF_REGNO (ref));
1881 m_current_point += 1;
1882
1883 FOR_EACH_ARTIFICIAL_USE (ref, m_current_bb->index)
1884 if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
1885 && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1886 record_fpr_use (DF_REF_REGNO (ref));
1887 m_current_point += 1;
1888 }
1889
1890 // Model the register references in INSN as part of a backwards walk.
1891 void
1892 early_ra::record_insn_refs (rtx_insn *insn)
1893 {
1894 df_ref ref;
1895
1896 // Record all definitions, excluding partial call clobbers.
1897 FOR_EACH_INSN_DEF (ref, insn)
1898 if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1899 record_fpr_def (DF_REF_REGNO (ref));
1900 else
1901 {
1902 auto range = get_allocno_subgroup (DF_REF_REG (ref));
1903 for (auto &allocno : range.allocnos ())
1904 {
1905 // If the destination is unused, record a momentary blip
1906 // in its live range.
1907 if (!bitmap_bit_p (m_live_allocnos, allocno.id))
1908 record_allocno_use (&allocno);
1909 record_allocno_def (&allocno);
1910 }
1911 }
1912 m_current_point += 1;
1913
1914 // Model the call made by a call insn as a separate phase in the
1915 // evaluation of the insn. Any partial call clobbers happen at that
1916 // point, rather than in the definition or use phase of the insn.
1917 if (auto *call_insn = dyn_cast<rtx_call_insn *> (insn))
1918 {
1919 function_abi abi = insn_callee_abi (call_insn);
1920 m_call_points[abi.id ()].safe_push (m_current_point);
1921 m_current_point += 1;
1922 }
1923
1924 // Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs,
1925 // since we track the FPR-sized parts of them individually.
1926 FOR_EACH_INSN_USE (ref, insn)
1927 if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1928 record_fpr_use (DF_REF_REGNO (ref));
1929 else if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE)
1930 || DF_REF_FLAGS_IS_SET (ref, DF_REF_STRICT_LOW_PART)
1931 || DF_REF_FLAGS_IS_SET (ref, DF_REF_ZERO_EXTRACT))
1932 {
1933 auto range = get_allocno_subgroup (DF_REF_REG (ref));
1934 for (auto &allocno : range.allocnos ())
1935 record_allocno_use (&allocno);
1936 }
1937 m_current_point += 1;
1938 }
1939
1940 // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
1941 // natural chain that has an affinity with the same hard register at
1942 // both ends.
1943 bool
1944 early_ra::consider_strong_copy_src_chain (allocno_info *allocno)
1945 {
1946 auto *src_allocno = allocno;
1947 while (src_allocno->copy_dest != INVALID_ALLOCNO)
1948 {
1949 auto *dest_allocno = m_allocnos[src_allocno->copy_dest];
1950 if (dest_allocno->start_point > src_allocno->end_point
1951 || dest_allocno->hard_regno != src_allocno->hard_regno)
1952 return false;
1953 gcc_checking_assert (dest_allocno->is_copy_dest);
1954 src_allocno = dest_allocno;
1955 }
1956
1957 while (allocno->copy_dest != INVALID_ALLOCNO)
1958 {
1959 allocno->is_strong_copy_src = 1;
1960 allocno = m_allocnos[allocno->copy_dest];
1961 allocno->is_strong_copy_dest = 1;
1962 }
1963 return true;
1964 }
1965
1966 // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
1967 // chained together. See whether chaining them requires the containing
1968 // groups to have the same stride, or whether it requires them to have
1969 // different strides. Return 1 if they should have the same stride,
1970 // -1 if they should have different strides, or 0 if it doesn't matter.
1971 int
1972 early_ra::strided_polarity_pref (allocno_info *allocno1,
1973 allocno_info *allocno2)
1974 {
1975 if (allocno1->offset + 1 < allocno1->group_size
1976 && allocno2->offset + 1 < allocno2->group_size)
1977 {
1978 if (is_chain_candidate (allocno1 + 1, allocno2 + 1))
1979 return 1;
1980 else
1981 return -1;
1982 }
1983
1984 if (allocno1->offset > 0 && allocno2->offset > 0)
1985 {
1986 if (is_chain_candidate (allocno1 - 1, allocno2 - 1))
1987 return 1;
1988 else
1989 return -1;
1990 }
1991
1992 return 0;
1993 }
1994
1995 // Decide which groups should be strided. Also complete "strong" copy chains.
1996 void
1997 early_ra::find_strided_accesses ()
1998 {
1999 // This function forms a graph of allocnos, linked by equivalences and
2000 // natural copy chains. It temporarily uses chain_next to record the
2001 // reverse of equivalence edges (related_allocno) and chain_prev to record
2002 // the reverse of copy edges (copy_dest).
2003 unsigned int allocno_info::*links[] = {
2004 &allocno_info::chain_next,
2005 &allocno_info::chain_prev,
2006 &allocno_info::copy_dest,
2007 &allocno_info::related_allocno
2008 };
2009
2010 // Set up the temporary reverse edges. Check for strong copy chains.
2011 for (unsigned int i = m_allocnos.length (); i-- > 0; )
2012 {
2013 auto *allocno1 = m_allocnos[i];
2014 if (allocno1->copy_dest != INVALID_ALLOCNO)
2015 m_allocnos[allocno1->copy_dest]->chain_prev = allocno1->id;
2016 if (allocno1->related_allocno != INVALID_ALLOCNO)
2017 m_allocnos[allocno1->related_allocno]->chain_next = allocno1->id;
2018
2019 if (allocno1->is_strong_copy_src
2020 && !allocno1->is_copy_dest
2021 && !consider_strong_copy_src_chain (allocno1))
2022 allocno1->is_strong_copy_src = false;
2023 }
2024
2025 // Partition the graph into cliques based on edges that have the following
2026 // properties:
2027 //
2028 // - the edge joins two allocnos whose groups have a free choice between
2029 // consecutive and strided allocations.
2030 //
2031 // - the two groups have a relative strided polarity preference (that is
2032 // they should make the same choice between consecutive and strided,
2033 // or they should make opposite choices).
2034 //
2035 // Assign relative polarities to each group connected in this way.
2036 //
2037 // The aim is to discover natural move-free striding choices, which will
2038 // often exist in carefully written ACLE code.
2039 unsigned int num_edges = m_allocnos.length () * ARRAY_SIZE (links);
2040 auto_sbitmap visited_edges (num_edges);
2041 bitmap_clear (visited_edges);
2042
2043 auto_vec<unsigned int, 32> worklist;
2044 for (unsigned int i = 0; i < num_edges; ++i)
2045 {
2046 if (!bitmap_set_bit (visited_edges, i))
2047 continue;
2048 worklist.quick_push (i);
2049 while (!worklist.is_empty ())
2050 {
2051 auto ei = worklist.pop ();
2052 auto *allocno1 = m_allocnos[ei / ARRAY_SIZE (links)];
2053 auto ai2 = allocno1->*links[ei % ARRAY_SIZE (links)];
2054 if (ai2 == INVALID_ALLOCNO)
2055 continue;
2056
2057 auto *allocno2 = m_allocnos[ai2];
2058 auto *group1 = allocno1->group ();
2059 auto *group2 = allocno2->group ();
2060 if (!group1->has_flexible_stride || !group2->has_flexible_stride)
2061 continue;
2062
2063 int pref = strided_polarity_pref (allocno1, allocno2);
2064 if (pref == 0)
2065 continue;
2066
2067 for (auto *group : { group1, group2 })
2068 for (auto &allocno : group->allocnos ())
2069 for (unsigned int j = 0; j < ARRAY_SIZE (links); ++j)
2070 if (bitmap_set_bit (visited_edges, allocno.id * 4 + j))
2071 worklist.safe_push (allocno.id * 4 + j);
2072
2073 if (group1->strided_polarity)
2074 group2->strided_polarity = group1->strided_polarity * pref;
2075 else if (group2->strided_polarity)
2076 group1->strided_polarity = group2->strided_polarity * pref;
2077 else
2078 {
2079 group1->strided_polarity = 1;
2080 group2->strided_polarity = pref;
2081 }
2082 }
2083 }
2084
2085 // Now look for edges between allocnos in multi-register groups where:
2086 //
2087 // - the two groups have a relative strided polarity preference (as above).
2088 //
2089 // - one group (G1) has a free choice between consecutive and strided
2090 // allocations.
2091 //
2092 // - the other group (G2) must use consecutive allocations.
2093 //
2094 // Update G1's individual preference for strided or consecutive allocations
2095 // based on G2. If the previous loop chose a polarity for G1, work out
2096 // whether it is better for polarity 1 or -1 to correspond to consecutive
2097 // allocation.
2098 int consecutive_pref = 0;
2099 for (unsigned int i = m_allocnos.length (); i-- > 0; )
2100 {
2101 auto *allocno1 = m_allocnos[i];
2102 for (auto link : links)
2103 {
2104 auto ai2 = allocno1->*link;
2105 if (ai2 == INVALID_ALLOCNO)
2106 continue;
2107
2108 auto *allocno2 = m_allocnos[ai2];
2109 auto *group1 = allocno1->group ();
2110 auto *group2 = allocno2->group ();
2111 if (group1->has_flexible_stride == group2->has_flexible_stride)
2112 continue;
2113
2114 int pref = strided_polarity_pref (allocno1, allocno2);
2115 if (pref == 0)
2116 continue;
2117
2118 auto *group = (group1->has_flexible_stride ? group1 : group2);
2119 consecutive_pref += group->strided_polarity * pref;
2120 group->consecutive_pref += pref;
2121 }
2122 }
2123
2124 // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
2125 // allocation, arbitrarily pick 1.
2126 if (consecutive_pref == 0)
2127 consecutive_pref = 1;
2128
2129 // Record which multi-register groups should use strided allocations.
2130 // Clear out the temporary edges.
2131 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
2132 {
2133 auto *allocno = m_allocnos[ai];
2134 allocno->chain_prev = INVALID_ALLOCNO;
2135 allocno->chain_next = INVALID_ALLOCNO;
2136
2137 if (allocno->offset != 0)
2138 continue;
2139
2140 auto *group = allocno->group ();
2141 if (!group->has_flexible_stride)
2142 continue;
2143
2144 bool make_strided = (group->strided_polarity
2145 ? (consecutive_pref * group->strided_polarity) < 0
2146 : group->consecutive_pref < 0);
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2148 fprintf (dump_file, "Allocno [%d:%d]: strided polarity %d,"
2149 " consecutive pref %d, %s\n",
2150 allocno->id, allocno->id + group->size - 1,
2151 group->strided_polarity, group->consecutive_pref,
2152 make_strided ? "making strided" : "keeping consecutive");
2153 if (!make_strided)
2154 continue;
2155
2156 // 2-register groups have a stride of 8 FPRs and must start in
2157 // registers matching the mask 0x17. 4-register groups have a stride
2158 // of 4 FPRs and must start in registers matching the mask 0x13.
2159 group->stride = group->size == 2 ? 8 : 4;
2160 gcc_checking_assert (group->fpr_candidates
2161 == (group->size == 2 ? 0x55555555 : 0x11111111));
2162 group->fpr_candidates = (group->size == 2 ? 0xff00ff : 0xf000f);
2163 }
2164 }
2165
2166 // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
2167 // result that puts allocnos in order of increasing FIELD.
2168 template<unsigned int early_ra::allocno_info::*field>
2169 int
2170 early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
2171 {
2172 auto *allocno1 = *(allocno_info *const *) allocno1_ptr;
2173 auto *allocno2 = *(allocno_info *const *) allocno2_ptr;
2174
2175 if (allocno1->*field != allocno2->*field)
2176 return allocno1->*field < allocno2->*field ? -1 : 1;
2177 return (allocno1->id < allocno2->id ? -1
2178 : allocno1->id == allocno2->id ? 0 : 1);
2179 }
2180
2181 // Return true if we should consider chaining ALLOCNO1 onto the head
2182 // of ALLOCNO2. This is just a local test of the two allocnos; it doesn't
2183 // guarantee that chaining them would give a self-consistent system.
2184 bool
2185 early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2)
2186 {
2187 if (allocno2->is_shared ())
2188 return false;
2189
2190 if (allocno1->is_equiv)
2191 allocno1 = m_allocnos[allocno1->related_allocno];
2192
2193 if (allocno2->start_point >= allocno1->end_point
2194 && !allocno2->is_equiv_to (allocno1->id))
2195 return false;
2196
2197 if (allocno2->is_strong_copy_dest)
2198 {
2199 if (!allocno1->is_strong_copy_src
2200 || allocno1->copy_dest != allocno2->id)
2201 return false;
2202 }
2203 else if (allocno2->is_copy_dest)
2204 {
2205 if (allocno1->copy_dest != allocno2->id)
2206 return false;
2207 }
2208 else if (allocno1->is_earlyclobbered)
2209 {
2210 if (allocno1->end_point == allocno2->start_point + 1)
2211 return false;
2212 }
2213
2214 return true;
2215 }
2216
2217 // We're trying to chain allocno ALLOCNO1 to a later allocno.
2218 // Rate how good a choice ALLOCNO2 would be, with higher being better.
2219 int
2220 early_ra::rate_chain (allocno_info *allocno1, allocno_info *allocno2)
2221 {
2222 int score = 0;
2223 if (allocno2->is_strong_copy_dest)
2224 score += 256;
2225 else if (allocno2->is_copy_dest)
2226 score += 128;
2227
2228 // Prefer well-aligned matches.
2229 auto *group1 = allocno1->group ();
2230 auto *group2 = allocno2->group ();
2231 if (group1->stride == 1 && group2->stride == 1)
2232 {
2233 unsigned int min_size = std::min (group1->color_rep ()->size,
2234 group2->color_rep ()->size);
2235 if ((group1->color_rep_offset + allocno1->offset) % min_size
2236 == (group2->color_rep_offset + allocno2->offset) % min_size)
2237 score += min_size;
2238 else
2239 score -= min_size;
2240 }
2241 return score;
2242 }
2243
2244 // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
2245 // score.
2246 int
2247 early_ra::cmp_chain_candidates (const void *arg1, const void *arg2)
2248 {
2249 auto &candidate1 = *(const chain_candidate_info *) arg1;
2250 auto &candidate2 = *(const chain_candidate_info *) arg2;
2251 if (candidate1.score != candidate2.score)
2252 return candidate1.score > candidate2.score ? -1 : 1;
2253
2254 // Prefer to increase the gap between uses of the allocated register,
2255 // to give the scheduler more freedom.
2256 auto *allocno1 = candidate1.allocno;
2257 auto *allocno2 = candidate2.allocno;
2258 if (allocno1->start_point != allocno2->start_point)
2259 return allocno1->start_point < allocno2->start_point ? -1 : 1;
2260
2261 if (allocno1 != allocno2)
2262 return allocno1->id < allocno2->id ? -1 : 1;
2263
2264 return 0;
2265 }
2266
2267 // Join the chains of allocnos that start at HEADI1 and HEADI2.
2268 // HEADI1 is either empty or a single allocno.
2269 void
2270 early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2)
2271 {
2272 if (headi1 == INVALID_ALLOCNO)
2273 headi1 = headi2;
2274 else if (headi2 == INVALID_ALLOCNO)
2275 headi2 = headi1;
2276 else
2277 {
2278 auto *head1 = m_allocnos[headi1];
2279 auto *head2 = m_allocnos[headi2];
2280 gcc_checking_assert (head1->chain_next == INVALID_ALLOCNO
2281 && head1->chain_prev == INVALID_ALLOCNO
2282 && head2->chain_prev == INVALID_ALLOCNO);
2283
2284 if (head1->is_equiv
2285 && m_allocnos[head1->related_allocno]->copy_dest == headi2)
2286 {
2287 head1->is_copy_dest = head2->is_copy_dest;
2288 head1->is_strong_copy_dest = head2->is_strong_copy_dest;
2289 m_allocnos[head1->related_allocno]->copy_dest = headi1;
2290 }
2291 head1->chain_next = headi2;
2292 head2->chain_prev = headi1;
2293
2294 headi2 = headi1;
2295 }
2296 }
2297
2298 // Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts
2299 // OFFSET allocnos into GROUP2.
2300 void
2301 early_ra::merge_fpr_info (allocno_group_info *group1,
2302 allocno_group_info *group2,
2303 unsigned int offset)
2304 {
2305 group1->fpr_size = std::max (group1->fpr_size, group2->fpr_size);
2306 group1->fpr_candidates &= (group2->fpr_candidates
2307 >> (offset * group1->stride));
2308 }
2309
2310 // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
2311 // ends being at allocno offset REP_OFFSET from the start of REP.
2312 void
2313 early_ra::set_single_color_rep (allocno_info *allocno, allocno_group_info *rep,
2314 unsigned int rep_offset)
2315 {
2316 auto *group = allocno->group ();
2317 if (group->m_color_rep == rep)
2318 return;
2319
2320 group->m_color_rep = rep;
2321 gcc_checking_assert (multiple_p (group->stride, rep->stride));
2322 unsigned int factor = group->stride / rep->stride;
2323 gcc_checking_assert (rep_offset >= allocno->offset * factor);
2324 group->color_rep_offset = rep_offset - allocno->offset * factor;
2325 merge_fpr_info (rep, group, group->color_rep_offset);
2326 }
2327
2328 // REP1 and REP2 are color representatives. Change REP1's color representative
2329 // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
2330 void
2331 early_ra::set_color_rep (allocno_group_info *rep1, allocno_group_info *rep2,
2332 unsigned int rep2_offset)
2333 {
2334 gcc_checking_assert (rep1 != rep2
2335 && rep2->m_color_rep == rep2
2336 && multiple_p (rep1->stride, rep2->stride));
2337
2338 auto heads1 = rep1->chain_heads ();
2339 auto heads2 = rep2->chain_heads ();
2340 for (unsigned int i1 = 0; i1 < heads1.size (); ++i1)
2341 if (heads1[i1] != INVALID_ALLOCNO)
2342 {
2343 unsigned int i2 = rep2_offset + i1 * rep1->stride / rep2->stride;
2344 if (heads2[i2] == INVALID_ALLOCNO)
2345 heads2[i2] = heads1[i1];
2346 else
2347 gcc_checking_assert (heads2[i2] == heads1[i1]);
2348 set_single_color_rep (m_allocnos[heads1[i1]], rep2, i2);
2349 }
2350 }
2351
2352 // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
2353 // Return true on success.
2354 bool
2355 early_ra::try_to_chain_allocnos (allocno_info *allocno1,
2356 allocno_info *allocno2)
2357 {
2358 auto *group1 = allocno1->group ()->color_rep ();
2359 auto *group2 = allocno2->group ()->color_rep ();
2360
2361 // Avoid trying to tie different subgroups of the same group. This can
2362 // happen if the parts of a register are defined and used piecemeal.
2363 if (group1 == group2)
2364 return false;
2365
2366 // The stride (in FPRs) between allocnos of each color representative.
2367 auto fpr_stride1 = group1->stride;
2368 auto fpr_stride2 = group2->stride;
2369
2370 // The offset (in FPRs) of each allocno group from its color representative.
2371 auto fpr_offset1 = allocno1->group ()->color_rep_offset * fpr_stride1;
2372 auto fpr_offset2 = allocno2->group ()->color_rep_offset * fpr_stride2;
2373
2374 // The offset (in FPRs) of each allocno from its color representative.
2375 fpr_offset1 += allocno1->offset * allocno1->group ()->stride;
2376 fpr_offset2 += allocno2->offset * allocno2->group ()->stride;
2377
2378 // The FPR overlap is in multiples of the larger stride.
2379 auto max_fpr_stride = std::max (fpr_stride1, fpr_stride2);
2380 auto min_fpr_offset = std::min (fpr_offset1, fpr_offset2);
2381 auto fpr_overlap_offset = ROUND_DOWN (min_fpr_offset, max_fpr_stride);
2382
2383 // The offset (in FPRs) of the start of the overlapping region from
2384 // each color representative.
2385 fpr_offset1 -= fpr_overlap_offset;
2386 fpr_offset2 -= fpr_overlap_offset;
2387
2388 // The number of FPRs in each color representative after the start
2389 // of the overlapping region.
2390 auto fpr_after1 = (group1->size - 1) * fpr_stride1 - fpr_offset1;
2391 auto fpr_after2 = (group2->size - 1) * fpr_stride2 - fpr_offset2;
2392
2393 auto min_fpr_after = std::min (fpr_after1, fpr_after2);
2394
2395 // The number of overlapping allocnos.
2396 auto allocno_overlap_size = min_fpr_after / max_fpr_stride + 1;
2397
2398 // The offset (in allocnos) of the overlapping region from the start
2399 // of each color representative.
2400 auto allocno_offset1 = fpr_offset1 / fpr_stride1;
2401 auto allocno_offset2 = fpr_offset2 / fpr_stride2;
2402
2403 // The stride (in allocnos) between overlapping allocnos.
2404 auto allocno_stride1 = max_fpr_stride / fpr_stride1;
2405 auto allocno_stride2 = max_fpr_stride / fpr_stride2;
2406
2407 // Reject combinations that are impossible to allocate.
2408 auto fprs1 = group1->fpr_candidates;
2409 auto fprs2 = group2->fpr_candidates;
2410 if (fpr_offset1 > fpr_offset2)
2411 fprs2 >>= (fpr_offset1 - fpr_offset2);
2412 else
2413 fprs1 >>= (fpr_offset2 - fpr_offset1);
2414 if ((fprs1 & fprs2) == 0)
2415 {
2416 if (dump_file && (dump_flags & TDF_DETAILS))
2417 fprintf (dump_file, " - cannot chain %d->%d, no FPRs in common"
2418 " (%08x@%d and %08x@%d)\n", allocno1->id, allocno2->id,
2419 group1->fpr_candidates, fpr_offset1,
2420 group2->fpr_candidates, fpr_offset2);
2421 return false;
2422 }
2423
2424 // Check whether the chain can be formed.
2425 auto heads1 = group1->chain_heads ();
2426 auto heads2 = group2->chain_heads ();
2427 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2428 {
2429 auto headi1 = heads1[allocno_offset1 + i * allocno_stride1];
2430 auto headi2 = heads2[allocno_offset2 + i * allocno_stride2];
2431 if (headi1 != INVALID_ALLOCNO && headi2 != INVALID_ALLOCNO)
2432 {
2433 auto *head1 = m_allocnos[headi1];
2434 auto *head2 = m_allocnos[headi2];
2435 if (head1->chain_next != INVALID_ALLOCNO)
2436 return false;
2437 if (!head2->is_equiv_to (head1->id)
2438 && head1->end_point <= head2->start_point)
2439 return false;
2440 }
2441 }
2442
2443 if (dump_file && (dump_flags & TDF_DETAILS))
2444 {
2445 fprintf (dump_file, " - chaining allocnos [");
2446 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2447 fprintf (dump_file, "%s%d", i ? "," : "",
2448 heads1[allocno_offset1 + i * allocno_stride1]);
2449 fprintf (dump_file, "] and [");
2450 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2451 fprintf (dump_file, "%s%d", i ? "," : "",
2452 heads2[allocno_offset2 + i * allocno_stride2]);
2453 fprintf (dump_file, "]\n");
2454 }
2455
2456 // Chain the allocnos, updating the chain heads.
2457 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2458 chain_allocnos (heads1[allocno_offset1 + i * allocno_stride1],
2459 heads2[allocno_offset2 + i * allocno_stride2]);
2460
2461 // Pick a color representative for the merged groups.
2462 allocno_group_info *new_rep;
2463 if (allocno_offset1 == 0
2464 && group1->size == allocno_overlap_size * allocno_stride1
2465 && multiple_p (fpr_stride1, fpr_stride2))
2466 {
2467 // The first group fits within the second.
2468 set_color_rep (group1, group2, allocno_offset2);
2469 new_rep = group2;
2470 }
2471 else if (allocno_offset2 == 0
2472 && group2->size == allocno_overlap_size * allocno_stride2
2473 && multiple_p (fpr_stride2, fpr_stride1))
2474 {
2475 // The second group fits within the first.
2476 set_color_rep (group2, group1, allocno_offset1);
2477 new_rep = group1;
2478 }
2479 else
2480 {
2481 // We need a new group that is big enough to span both groups.
2482 // The new group always has an FPR stride of 1.
2483 auto max_fpr_offset = std::max (fpr_offset1, fpr_offset2);
2484 auto max_fpr_after = std::max (fpr_after1, fpr_after2);
2485 auto new_size = max_fpr_offset + max_fpr_after + 1;
2486 new_rep = create_allocno_group (INVALID_REGNUM, new_size);
2487
2488 set_color_rep (group1, new_rep, max_fpr_offset - fpr_offset1);
2489 set_color_rep (group2, new_rep, max_fpr_offset - fpr_offset2);
2490 }
2491
2492 if (dump_file && (dump_flags & TDF_DETAILS))
2493 {
2494 fprintf (dump_file, " - new frontier [");
2495 auto new_heads = new_rep->chain_heads ();
2496 for (unsigned int i = 0; i < new_heads.size (); ++i)
2497 {
2498 if (i)
2499 fprintf (dump_file, ",");
2500 if (new_heads[i] == INVALID_ALLOCNO)
2501 fprintf (dump_file, "-");
2502 else
2503 fprintf (dump_file, "%d", new_heads[i]);
2504 }
2505 fprintf (dump_file, "]\n");
2506 }
2507
2508 return true;
2509 }
2510
2511 // Create a color_info for color representative GROUP.
2512 void
2513 early_ra::create_color (allocno_group_info *group)
2514 {
2515 auto *color = region_allocate<color_info> ();
2516 color->id = m_colors.length ();
2517 color->hard_regno = FIRST_PSEUDO_REGISTER;
2518 color->group = group;
2519
2520 gcc_checking_assert (group->m_color_rep == group);
2521 group->has_color = true;
2522 group->color = m_colors.length ();
2523
2524 m_colors.safe_push (color);
2525 }
2526
2527 // Form allocnos into chains. Create colors for each resulting clique.
2528 void
2529 early_ra::form_chains ()
2530 {
2531 if (dump_file && (dump_flags & TDF_DETAILS))
2532 fprintf (dump_file, "\nChaining allocnos:\n");
2533
2534 // Perform (modified) interval graph coloring. First sort by
2535 // increasing start point.
2536 m_sorted_allocnos.reserve (m_allocnos.length ());
2537 m_sorted_allocnos.splice (m_allocnos);
2538 m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>);
2539
2540 // During this phase, color representatives are only correct for
2541 // unprocessed allocno groups (where the color representative is
2542 // the group itself) and for groups that contain a current chain head.
2543 unsigned int ti = 0;
2544 auto_vec<chain_candidate_info> candidates;
2545 for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi)
2546 {
2547 auto *allocno1 = m_sorted_allocnos[hi];
2548 if (allocno1->chain_next != INVALID_ALLOCNO)
2549 continue;
2550
2551 // Record conflicts with direct uses for FPR hard registers.
2552 auto *group1 = allocno1->group ();
2553 for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
2554 if (fpr_conflicts_with_allocno_p (fpr, allocno1))
2555 group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
2556
2557 // Record conflicts due to partially call-clobbered registers.
2558 // (Full clobbers are handled by the previous loop.)
2559 for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
2560 if (call_in_range_p (abi_id, allocno1->start_point,
2561 allocno1->end_point))
2562 {
2563 auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
2564 group1->fpr_candidates &= ~fprs >> allocno1->offset;
2565 }
2566
2567 if (allocno1->is_shared ())
2568 {
2569 if (dump_file && (dump_flags & TDF_DETAILS))
2570 fprintf (dump_file, " Allocno %d shares the same hard register"
2571 " as allocno %d\n", allocno1->id,
2572 allocno1->related_allocno);
2573 auto *allocno2 = m_allocnos[allocno1->related_allocno];
2574 merge_fpr_info (allocno2->group (), group1, allocno2->offset);
2575 m_shared_allocnos.safe_push (allocno1);
2576 continue;
2577 }
2578
2579 // Find earlier allocnos (in processing order) that could be chained
2580 // to this one.
2581 candidates.truncate (0);
2582 for (unsigned int sci = ti; sci < hi; ++sci)
2583 {
2584 auto *allocno2 = m_sorted_allocnos[sci];
2585 if (allocno2->chain_prev == INVALID_ALLOCNO)
2586 {
2587 if (!is_chain_candidate (allocno1, allocno2))
2588 continue;
2589 chain_candidate_info candidate;
2590 candidate.allocno = allocno2;
2591 candidate.score = rate_chain (allocno1, allocno2);
2592 candidates.safe_push (candidate);
2593 }
2594 else if (sci == ti)
2595 ++ti;
2596 }
2597
2598 // Sort the candidates by decreasing score.
2599 candidates.qsort (cmp_chain_candidates);
2600 if (dump_file && (dump_flags & TDF_DETAILS))
2601 {
2602 fprintf (dump_file, " Chain candidates for %d:", allocno1->id);
2603 for (auto &candidate : candidates)
2604 fprintf (dump_file, " %d(%d)", candidate.allocno->id,
2605 candidate.score);
2606 fprintf (dump_file, "\n");
2607 }
2608
2609 // Pick the first candidate that works.
2610 for (auto &candidate : candidates)
2611 if (try_to_chain_allocnos (allocno1, candidate.allocno))
2612 break;
2613 }
2614
2615 // Create color_infos for each group. Make sure that each group's
2616 // color representative is up to date.
2617 for (unsigned int hi = m_sorted_allocnos.length (); hi-- > 0; )
2618 {
2619 auto *allocno = m_sorted_allocnos[hi];
2620 if (allocno->is_shared ())
2621 continue;
2622
2623 auto *rep = allocno->group ()->color_rep ();
2624 if (rep->has_color)
2625 continue;
2626
2627 create_color (rep);
2628 auto heads = rep->chain_heads ();
2629 for (unsigned int i = 0; i < heads.size (); ++i)
2630 {
2631 unsigned int ai = heads[i];
2632 while (ai != INVALID_ALLOCNO)
2633 {
2634 allocno = m_allocnos[ai];
2635 set_single_color_rep (allocno, rep, i * rep->stride);
2636 ai = allocno->chain_next;
2637 }
2638 }
2639 }
2640 }
2641
2642 // Return true if the given FPR (starting at 0) conflicts with allocno
2643 // ALLOCNO.
2644 bool
2645 early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr,
2646 allocno_info *allocno)
2647 {
2648 auto &ranges = m_fpr_ranges[fpr];
2649 unsigned int start_i = 0;
2650 unsigned int end_i = ranges.length ();
2651 while (start_i < end_i)
2652 {
2653 unsigned int mid_i = (start_i + end_i) / 2;
2654 auto &range = ranges[mid_i];
2655 if (allocno->end_point > range.start_point)
2656 start_i = mid_i + 1;
2657 else if (allocno->start_point < range.end_point)
2658 end_i = mid_i;
2659 else
2660 {
2661 if (range.allocno != allocno->id)
2662 return true;
2663 // The FPR is equivalent to ALLOCNO for this particular range.
2664 // See whether ALLOCNO conflicts with a neighboring range.
2665 if (mid_i > 0
2666 && ranges[mid_i - 1].start_point >= allocno->end_point)
2667 return true;
2668 if (mid_i + 1 < ranges.length ()
2669 && ranges[mid_i + 1].end_point <= allocno->start_point)
2670 return true;
2671 return false;
2672 }
2673 }
2674 return false;
2675 }
2676
2677 // Return true if there is a call with ABI identifier ABI_ID in the inclusive
2678 // program point range [START_POINT, END_POINT].
2679 bool
2680 early_ra::call_in_range_p (unsigned int abi_id, unsigned int start_point,
2681 unsigned int end_point)
2682 {
2683 auto &points = m_call_points[abi_id];
2684 unsigned int start_i = 0;
2685 unsigned int end_i = points.length ();
2686 while (start_i < end_i)
2687 {
2688 unsigned int mid_i = (start_i + end_i) / 2;
2689 auto point = points[mid_i];
2690 if (end_point > point)
2691 start_i = mid_i + 1;
2692 else if (start_point < point)
2693 end_i = mid_i;
2694 else
2695 return true;
2696 }
2697 return false;
2698 }
2699
2700 // Return the set of FPRs for which a value of size SIZE will be clobbered
2701 // by a call to a function with ABI identifier ABI_ID, but would not be
2702 // for some smaller size. The set therefore excludes FPRs that are
2703 // fully-clobbered, like V0 in the base ABI.
2704 unsigned int
2705 early_ra::partial_fpr_clobbers (unsigned int abi_id, fpr_size_info size)
2706 {
2707 auto &abi = function_abis[abi_id];
2708 unsigned int clobbers = 0;
2709 machine_mode mode = (size == FPR_D ? V8QImode
2710 : size == FPR_Q ? V16QImode : VNx16QImode);
2711 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
2712 if (!abi.clobbers_full_reg_p (regno)
2713 && abi.clobbers_reg_p (mode, regno))
2714 clobbers |= 1U << (regno - V0_REGNUM);
2715 return clobbers;
2716 }
2717
2718 // Process copies between pseudo registers and hard registers and update
2719 // the FPR preferences for the associated colors.
2720 void
2721 early_ra::process_copies ()
2722 {
2723 for (auto &copy : m_allocno_copies)
2724 {
2725 auto *allocno = m_allocnos[copy.allocno];
2726 auto *group = allocno->group ();
2727 auto offset = group->color_rep_offset + allocno->offset;
2728 if (offset > copy.fpr)
2729 continue;
2730
2731 unsigned int fpr = copy.fpr - offset;
2732 auto *color = m_colors[group->color_rep ()->color];
2733 color->fpr_preferences[fpr] = MIN (color->fpr_preferences[fpr]
2734 + copy.weight, 127);
2735 color->num_fpr_preferences += copy.weight;
2736 }
2737 }
2738
2739 // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
2740 // result that puts colors in allocation order.
2741 int
2742 early_ra::cmp_allocation_order (const void *color1_ptr, const void *color2_ptr)
2743 {
2744 auto *color1 = *(color_info *const *) color1_ptr;
2745 auto *color2 = *(color_info *const *) color2_ptr;
2746
2747 // Allocate bigger groups before smaller groups.
2748 if (color1->group->size != color2->group->size)
2749 return color1->group->size > color2->group->size ? -1 : 1;
2750
2751 // Allocate groups with stronger FPR preferences before groups with weaker
2752 // FPR preferences.
2753 if (color1->num_fpr_preferences != color2->num_fpr_preferences)
2754 return color1->num_fpr_preferences > color2->num_fpr_preferences ? -1 : 1;
2755
2756 return (color1->id < color2->id ? -1
2757 : color1->id == color2->id ? 0 : 1);
2758 }
2759
2760 // Allocate a register to each color. If we run out of registers,
2761 // give up on doing a full allocation of the FPR-based pseudos in the
2762 // region.
2763 void
2764 early_ra::allocate_colors ()
2765 {
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2767 fprintf (dump_file, "\nAllocating registers:\n");
2768
2769 auto_vec<color_info *> sorted_colors;
2770 sorted_colors.safe_splice (m_colors);
2771 sorted_colors.qsort (cmp_allocation_order);
2772
2773 for (unsigned int i = 0; i < 32; ++i)
2774 if (!crtl->abi->clobbers_full_reg_p (V0_REGNUM + i))
2775 m_call_preserved_fprs |= 1U << i;
2776
2777 for (auto *color : sorted_colors)
2778 {
2779 unsigned int candidates = color->group->fpr_candidates;
2780 for (unsigned int i = 0; i < color->group->size; ++i)
2781 candidates &= ~(m_allocated_fprs >> i);
2782 unsigned int best = INVALID_REGNUM;
2783 int best_weight = 0;
2784 for (unsigned int fpr = 0; fpr <= 32U - color->group->size; ++fpr)
2785 {
2786 if ((candidates & (1U << fpr)) == 0)
2787 continue;
2788 int weight = color->fpr_preferences[fpr];
2789 // Account for registers that the current function must preserve.
2790 for (unsigned int i = 0; i < color->group->size; ++i)
2791 if (m_call_preserved_fprs & (1U << (fpr + i)))
2792 weight -= 1;
2793 if (best == INVALID_REGNUM || best_weight <= weight)
2794 {
2795 best = fpr;
2796 best_weight = weight;
2797 }
2798 }
2799
2800 if (best == INVALID_REGNUM)
2801 {
2802 m_allocation_successful = false;
2803 return;
2804 }
2805
2806 color->hard_regno = best + V0_REGNUM;
2807 if (dump_file && (dump_flags & TDF_DETAILS))
2808 fprintf (dump_file, " Allocating [v%d:v%d] to color %d\n",
2809 best, best + color->group->size - 1, color->id);
2810 m_allocated_fprs |= ((1U << color->group->size) - 1) << best;
2811 }
2812 }
2813
2814 // See if ALLOCNO ends a subchain of single registers that can be split
2815 // off without affecting the rest of the chain, and without introducing
2816 // any moves. Return the start of the chain if so (which might be ALLOCNO
2817 // itself), otherwise return null.
2818 early_ra::allocno_info *
2819 early_ra::find_independent_subchain (allocno_info *allocno)
2820 {
2821 // Make sure ALLOCNO ends a natural subchain.
2822 if (auto *next_allocno = chain_next (allocno))
2823 if (next_allocno->start_point + 1 >= allocno->end_point)
2824 return nullptr;
2825
2826 // Check the allocnos in the purported subchain and find the other end.
2827 for (;;)
2828 {
2829 auto *group = allocno->group ();
2830 if (group->m_color_rep == group)
2831 return nullptr;
2832 if (group->size != 1)
2833 return nullptr;
2834
2835 auto *prev_allocno = chain_prev (allocno);
2836 if (!prev_allocno || allocno->start_point + 1 < prev_allocno->end_point)
2837 return allocno;
2838
2839 allocno = prev_allocno;
2840 }
2841 }
2842
2843 // Search the colors starting at index FIRST_COLOR whose FPRs do not belong
2844 // to FPR_CONFLICTS. Return the first such color that has no group. If all
2845 // such colors have groups, instead return the color with the latest
2846 // (smallest) start point.
2847 early_ra::color_info *
2848 early_ra::find_oldest_color (unsigned int first_color,
2849 unsigned int fpr_conflicts)
2850 {
2851 color_info *best = nullptr;
2852 unsigned int best_start_point = ~0U;
2853 for (unsigned int ci = first_color; ci < m_colors.length (); ++ci)
2854 {
2855 auto *color = m_colors[ci];
2856 if (fpr_conflicts & (1U << (color->hard_regno - V0_REGNUM)))
2857 continue;
2858 if (!color->group)
2859 return color;
2860 auto chain_head = color->group->chain_heads ()[0];
2861 auto start_point = m_allocnos[chain_head]->start_point;
2862 if (!best || best_start_point > start_point)
2863 {
2864 best = color;
2865 best_start_point = start_point;
2866 }
2867 }
2868 return best;
2869 }
2870
2871 // If there are some spare FPRs that can be reused without introducing saves,
2872 // restores, or moves, use them to "broaden" the allocation, in order to give
2873 // the scheduler more freedom. This is particularly useful for forming LDPs
2874 // and STPs.
2875 void
2876 early_ra::broaden_colors ()
2877 {
2878 // Create dummy colors for every leftover FPR that can be used cheaply.
2879 unsigned int first_color = m_colors.length ();
2880 for (unsigned int fpr = 0; fpr < 32; ++fpr)
2881 if (((m_allocated_fprs | m_call_preserved_fprs) & (1U << fpr)) == 0)
2882 {
2883 auto *color = region_allocate<color_info> ();
2884 color->id = m_colors.length ();
2885 color->hard_regno = V0_REGNUM + fpr;
2886 color->group = nullptr;
2887 m_colors.safe_push (color);
2888 }
2889
2890 // Exit early if there are no spare FPRs.
2891 if (first_color == m_colors.length ())
2892 return;
2893
2894 // Go through the allocnos in order, seeing if there is a subchain of
2895 // single-FPR allocnos that can be split off from the containingg clique.
2896 // Allocate such subchains to the new colors on an oldest-first basis.
2897 for (auto *allocno : m_sorted_allocnos)
2898 if (auto *start_allocno = find_independent_subchain (allocno))
2899 {
2900 unsigned int fpr_conflicts = 0;
2901 auto *member = allocno;
2902 for (;;)
2903 {
2904 fpr_conflicts |= ~member->group ()->fpr_candidates;
2905 if (member == start_allocno)
2906 break;
2907 member = m_allocnos[member->chain_prev];
2908 }
2909
2910 auto *color = find_oldest_color (first_color, fpr_conflicts);
2911 if (!color)
2912 continue;
2913
2914 if (!color->group)
2915 {
2916 auto *group = allocno->group ();
2917 color->group = group;
2918 group->color = color->id;
2919 group->chain_heads ()[0] = INVALID_ALLOCNO;
2920 }
2921 else
2922 {
2923 auto chain_head = color->group->chain_heads ()[0];
2924 auto start_point = m_allocnos[chain_head]->start_point;
2925 if (start_point >= allocno->end_point)
2926 // Allocating to COLOR isn't viable, and it was the best
2927 // option available.
2928 continue;
2929
2930 auto *next_allocno = chain_next (allocno);
2931 if (!next_allocno || next_allocno->start_point <= start_point)
2932 // The current allocation gives at least as much scheduling
2933 // freedom as COLOR would.
2934 continue;
2935 }
2936
2937 // Unlink the chain.
2938 if (auto *next_allocno = chain_next (allocno))
2939 next_allocno->chain_prev = start_allocno->chain_prev;
2940 if (auto *prev_allocno = chain_prev (start_allocno))
2941 prev_allocno->chain_next = allocno->chain_next;
2942
2943 // Make the subchain use COLOR.
2944 allocno->chain_next = color->group->chain_heads ()[0];
2945 if (dump_file && (dump_flags & TDF_DETAILS))
2946 fprintf (dump_file, "Moving to optional color %d (register %s):",
2947 color->id, reg_names[color->hard_regno]);
2948 for (;;)
2949 {
2950 auto *group = allocno->group ();
2951 if (dump_file && (dump_flags & TDF_DETAILS))
2952 fprintf (dump_file, " r%d", group->regno);
2953 group->m_color_rep = color->group;
2954 group->color_rep_offset = 0;
2955 if (allocno == start_allocno)
2956 break;
2957 allocno = m_allocnos[allocno->chain_prev];
2958 }
2959 if (dump_file && (dump_flags & TDF_DETAILS))
2960 fprintf (dump_file, "\n");
2961 color->group->chain_heads ()[0] = start_allocno->id;
2962 }
2963 }
2964
2965 // Record the final choice of hard register for each allocno.
2966 void
2967 early_ra::finalize_allocation ()
2968 {
2969 for (auto *allocno : m_allocnos)
2970 {
2971 if (allocno->is_shared ())
2972 continue;
2973 auto *group = allocno->group ();
2974 auto *rep = group->color_rep ();
2975 auto rep_regno = m_colors[rep->color]->hard_regno;
2976 auto group_regno = rep_regno + group->color_rep_offset;
2977 allocno->hard_regno = group_regno + allocno->offset * group->stride;
2978 }
2979 for (auto *allocno : m_shared_allocnos)
2980 allocno->hard_regno = m_allocnos[allocno->related_allocno]->hard_regno;
2981 }
2982
2983 // Replace any allocno references in REFS with the allocated register.
2984 bool
2985 early_ra::replace_regs (df_ref refs)
2986 {
2987 bool changed = false;
2988 for (df_ref ref = refs; ref; ref = DF_REF_NEXT_LOC (ref))
2989 {
2990 auto range = get_allocno_subgroup (DF_REF_REG (ref));
2991 if (!range)
2992 continue;
2993
2994 auto new_regno = range.allocno (0)->hard_regno;
2995 *DF_REF_LOC (ref) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref)), new_regno);
2996 changed = true;
2997 }
2998 return changed;
2999 }
3000
3001 // Try to make INSN match its FPR-related constraints. If this needs
3002 // a source operand (SRC) to be copied to a destination operand (DEST)
3003 // before INSN, add the associated (DEST, SRC) pairs to MOVES.
3004 //
3005 // Return -1 on failure, otherwise return a ?/!-style reject count.
3006 // The reject count doesn't model the moves, just the internal alternative
3007 // preferences.
3008 int
3009 early_ra::try_enforce_constraints (rtx_insn *insn,
3010 vec<std::pair<int, int>> &moves)
3011 {
3012 if (!constrain_operands (0, get_preferred_alternatives (insn)))
3013 return -1;
3014
3015 // Pick the alternative with the lowest cost.
3016 int best = -1;
3017 auto alts = get_preferred_alternatives (insn);
3018 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
3019 {
3020 if (!(alts & ALTERNATIVE_BIT (altno)))
3021 continue;
3022
3023 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
3024 if (!likely_alternative_match_p (op_alt))
3025 continue;
3026
3027 auto_vec<std::pair<int, int>, 4> new_moves;
3028 for (int opno = 0; opno < recog_data.n_operands; ++opno)
3029 {
3030 rtx op = recog_data.operand[opno];
3031 if (REG_P (op)
3032 && FP_REGNUM_P (REGNO (op))
3033 && op_alt[opno].matched >= 0)
3034 {
3035 rtx old_src = recog_data.operand[op_alt[opno].matched];
3036 if (!operands_match_p (op, old_src))
3037 {
3038 for (int i = 0; i < recog_data.n_operands; ++i)
3039 if (i != opno)
3040 {
3041 rtx other = recog_data.operand[i];
3042 if (reg_overlap_mentioned_p (op, other))
3043 {
3044 old_src = NULL_RTX;
3045 break;
3046 }
3047 }
3048 if (!old_src)
3049 continue;
3050 new_moves.safe_push ({ opno, op_alt[opno].matched });
3051 }
3052 }
3053 }
3054 int cost = count_rejects (op_alt) + new_moves.length () * 7;
3055 if (best < 0 || cost < best)
3056 {
3057 best = cost;
3058 moves.truncate (0);
3059 moves.safe_splice (new_moves);
3060 }
3061 }
3062 return best;
3063 }
3064
3065 // Make INSN matches its FPR-related constraints.
3066 void
3067 early_ra::enforce_constraints (rtx_insn *insn)
3068 {
3069 extract_insn (insn);
3070 preprocess_constraints (insn);
3071
3072 // First try with the operands they are.
3073 auto_vec<std::pair<int, int>, 4> moves;
3074 int cost = try_enforce_constraints (insn, moves);
3075
3076 // Next try taking advantage of commutativity.
3077 for (int opno = 0; opno < recog_data.n_operands - 1; ++opno)
3078 if (recog_data.constraints[opno][0] == '%')
3079 {
3080 std::swap (*recog_data.operand_loc[opno],
3081 *recog_data.operand_loc[opno + 1]);
3082 std::swap (recog_data.operand[opno],
3083 recog_data.operand[opno + 1]);
3084 auto_vec<std::pair<int, int>, 4> swapped_moves;
3085 int swapped_cost = try_enforce_constraints (insn, swapped_moves);
3086 if (swapped_cost >= 0 && (cost < 0 || swapped_cost < cost))
3087 {
3088 cost = swapped_cost;
3089 moves.truncate (0);
3090 moves.safe_splice (swapped_moves);
3091 }
3092 else
3093 {
3094 std::swap (*recog_data.operand_loc[opno],
3095 *recog_data.operand_loc[opno + 1]);
3096 std::swap (recog_data.operand[opno],
3097 recog_data.operand[opno + 1]);
3098 }
3099 }
3100
3101 // The allocation should ensure that there is at least one valid combination.
3102 // It's too late to back out now if not.
3103 gcc_assert (cost >= 0);
3104 for (int i = 0; i < recog_data.n_dups; ++i)
3105 {
3106 int dup_of = recog_data.dup_num[i];
3107 rtx new_op = *recog_data.operand_loc[dup_of];
3108 if (new_op != recog_data.operand[dup_of])
3109 *recog_data.dup_loc[i] = copy_rtx (new_op);
3110 }
3111 for (auto move : moves)
3112 {
3113 int dest_opno = move.first;
3114 int src_opno = move.second;
3115 rtx dest = recog_data.operand[dest_opno];
3116 rtx old_src = recog_data.operand[src_opno];
3117 rtx new_src = lowpart_subreg (GET_MODE (old_src), dest, GET_MODE (dest));
3118 emit_insn_before (gen_move_insn (new_src, old_src), insn);
3119 *recog_data.operand_loc[src_opno] = new_src;
3120 }
3121 }
3122
3123 // See whether INSN is an instruction that operates on multi-register vectors,
3124 // and if we have decided to make it use strided rather than consecutive
3125 // accesses. Update the pattern and return true if so.
3126 bool
3127 early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
3128 {
3129 if (!NONJUMP_INSN_P (insn) || recog_memoized (insn) < 0)
3130 return false;
3131
3132 auto stride_type = get_attr_stride_type (insn);
3133 rtx pat = PATTERN (insn);
3134 rtx op;
3135 if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE
3136 || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
3137 op = SET_DEST (pat);
3138 else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
3139 op = XVECEXP (SET_SRC (pat), 0, 1);
3140 else
3141 return false;
3142
3143 auto range = get_allocno_subgroup (op);
3144 if (!range || range.group->stride == 1)
3145 return false;
3146
3147 gcc_assert (range.start == 0 && range.count == range.group->size);
3148 auto elt_mode = GET_MODE_INNER (GET_MODE (op));
3149 auto single_mode = aarch64_full_sve_mode (elt_mode).require ();
3150 auto_vec<rtx, 4> regs;
3151 for (unsigned int i = 0; i < range.count; ++i)
3152 regs.quick_push (gen_rtx_REG (single_mode, range.allocno (i)->hard_regno));
3153
3154 extract_insn (insn);
3155 if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
3156 {
3157 auto unspec = XINT (SET_SRC (pat), 1);
3158 if (range.count == 2)
3159 pat = gen_aarch64_strided2 (unspec, GET_MODE (op), regs[0], regs[1],
3160 recog_data.operand[1],
3161 recog_data.operand[2]);
3162 else
3163 pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
3164 regs[0], regs[1], regs[2], regs[3],
3165 recog_data.operand[1],
3166 recog_data.operand[2]);
3167 }
3168 else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
3169 {
3170 auto unspec = XINT (SET_SRC (pat), 1);
3171 if (range.count == 2)
3172 pat = gen_aarch64_strided2 (unspec, GET_MODE (op),
3173 recog_data.operand[0],
3174 recog_data.operand[2], regs[0], regs[1]);
3175 else
3176 pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
3177 recog_data.operand[0],
3178 recog_data.operand[2],
3179 regs[0], regs[1], regs[2], regs[3]);
3180 // Ensure correct sharing for the source memory.
3181 //
3182 // ??? Why doesn't the generator get this right?
3183 XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1)
3184 = *recog_data.dup_loc[0];
3185 }
3186 else if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE)
3187 {
3188 auto bits = INTVAL (XVECEXP (SET_SRC (pat), 0, 4));
3189 if (range.count == 2)
3190 pat = gen_aarch64_sme_lut_strided2 (bits, single_mode,
3191 regs[0], regs[1],
3192 recog_data.operand[1],
3193 recog_data.operand[2]);
3194 else
3195 pat = gen_aarch64_sme_lut_strided4 (bits, single_mode,
3196 regs[0], regs[1], regs[2], regs[3],
3197 recog_data.operand[1],
3198 recog_data.operand[2]);
3199 }
3200 else
3201 gcc_unreachable ();
3202 PATTERN (insn) = pat;
3203 INSN_CODE (insn) = -1;
3204 df_insn_rescan (insn);
3205 return true;
3206 }
3207
3208 // We've successfully allocated the current region. Apply the allocation
3209 // to the instructions.
3210 void
3211 early_ra::apply_allocation ()
3212 {
3213 for (auto *insn : m_dead_insns)
3214 set_insn_deleted (insn);
3215
3216 rtx_insn *prev;
3217 for (auto insn_range : m_insn_ranges)
3218 for (rtx_insn *insn = insn_range.first;
3219 insn != insn_range.second;
3220 insn = prev)
3221 {
3222 prev = PREV_INSN (insn);
3223 if (!INSN_P (insn))
3224 continue;
3225
3226 bool changed = maybe_convert_to_strided_access (insn);
3227 changed |= replace_regs (DF_INSN_DEFS (insn));
3228 changed |= replace_regs (DF_INSN_USES (insn));
3229 if (changed && NONDEBUG_INSN_P (insn))
3230 {
3231 if (GET_CODE (PATTERN (insn)) != USE
3232 && GET_CODE (PATTERN (insn)) != CLOBBER
3233 && !is_move_set (PATTERN (insn)))
3234 enforce_constraints (insn);
3235
3236 // A REG_EQUIV note establishes an equivalence throughout
3237 // the function, but here we're reusing hard registers for
3238 // multiple pseudo registers. We also no longer need REG_EQUIV
3239 // notes that record potential spill locations, since we've
3240 // allocated the pseudo register without spilling.
3241 rtx *ptr = &REG_NOTES (insn);
3242 while (*ptr)
3243 if (REG_NOTE_KIND (*ptr) == REG_EQUIV)
3244 *ptr = XEXP (*ptr, 1);
3245 else
3246 ptr = &XEXP (*ptr, 1);
3247 }
3248 changed |= replace_regs (DF_INSN_EQ_USES (insn));
3249 if (changed)
3250 df_insn_rescan (insn);
3251 }
3252
3253 for (auto *insn : m_dead_insns)
3254 delete_insn (insn);
3255 }
3256
3257 // Try to allocate the current region. Update the instructions if successful.
3258 void
3259 early_ra::process_region ()
3260 {
3261 for (auto *allocno : m_allocnos)
3262 {
3263 allocno->chain_next = INVALID_ALLOCNO;
3264 allocno->chain_prev = INVALID_ALLOCNO;
3265 }
3266
3267 if (dump_file && (dump_flags & TDF_DETAILS))
3268 {
3269 dump_fpr_ranges ();
3270 dump_copies ();
3271 dump_allocnos ();
3272 }
3273
3274 find_strided_accesses ();
3275
3276 if (dump_file && (dump_flags & TDF_DETAILS))
3277 dump_allocnos ();
3278
3279 form_chains ();
3280
3281 if (dump_file && (dump_flags & TDF_DETAILS))
3282 dump_allocnos ();
3283
3284 process_copies ();
3285
3286 if (dump_file && (dump_flags & TDF_DETAILS))
3287 dump_colors ();
3288
3289 allocate_colors ();
3290 if (!m_allocation_successful)
3291 return;
3292
3293 broaden_colors ();
3294 finalize_allocation ();
3295
3296 if (dump_file && (dump_flags & TDF_DETAILS))
3297 {
3298 fprintf (dump_file, "\nAllocation successful\nFinal allocation:\n");
3299 dump_allocnos ();
3300 dump_colors ();
3301 }
3302
3303 apply_allocation ();
3304 }
3305
3306 // Return true if INSN would become dead if we successfully allocate the
3307 // current region.
3308 bool
3309 early_ra::is_dead_insn (rtx_insn *insn)
3310 {
3311 rtx set = single_set (insn);
3312 if (!set)
3313 return false;
3314
3315 rtx dest = SET_DEST (set);
3316 auto dest_range = get_allocno_subgroup (dest);
3317 if (!dest_range)
3318 return false;
3319
3320 for (auto &allocno : dest_range.allocnos ())
3321 if (bitmap_bit_p (m_live_allocnos, allocno.id))
3322 return false;
3323
3324 if (side_effects_p (set))
3325 return false;
3326
3327 return true;
3328 }
3329
3330 // Build up information about block BB. IS_ISOLATED is true if the
3331 // block is not part of a larger region.
3332 void
3333 early_ra::process_block (basic_block bb, bool is_isolated)
3334 {
3335 m_current_bb = bb;
3336 m_current_point += 1;
3337 m_current_bb_point = m_current_point;
3338
3339 // Process live-out FPRs.
3340 bitmap live_out = df_get_live_out (bb);
3341 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
3342 if (bitmap_bit_p (live_out, regno))
3343 record_fpr_use (regno);
3344
3345 // Process live-out allocnos. We don't track individual FPR liveness
3346 // across block boundaries, so we have to assume that the whole pseudo
3347 // register is live.
3348 bitmap_iterator bi;
3349 unsigned int regno;
3350 EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb), m_fpr_pseudos,
3351 FIRST_PSEUDO_REGISTER, regno, bi)
3352 {
3353 auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
3354 for (auto &allocno : range.allocnos ())
3355 record_allocno_use (&allocno);
3356 }
3357
3358 m_current_point += 1;
3359
3360 record_artificial_refs (0);
3361
3362 bool is_first = true;
3363 rtx_insn *start_insn = BB_END (bb);
3364 rtx_insn *insn;
3365 FOR_BB_INSNS_REVERSE (bb, insn)
3366 {
3367 if (!NONDEBUG_INSN_P (insn))
3368 continue;
3369
3370 // CLOBBERs are used to prevent pseudos from being upwards exposed.
3371 // We can ignore them if allocation is successful.
3372 if (GET_CODE (PATTERN (insn)) == CLOBBER)
3373 {
3374 if (get_allocno_subgroup (XEXP (PATTERN (insn), 0)))
3375 m_dead_insns.safe_push (insn);
3376 continue;
3377 }
3378
3379 if (dump_file && (dump_flags & TDF_DETAILS))
3380 {
3381 if (is_first)
3382 fprintf (dump_file, "\nBlock %d:\n", bb->index);
3383 fprintf (dump_file, "%6d:", m_current_point);
3384 pretty_printer rtl_slim_pp;
3385 rtl_slim_pp.buffer->stream = dump_file;
3386 print_insn (&rtl_slim_pp, insn, 1);
3387 pp_flush (&rtl_slim_pp);
3388 fprintf (dump_file, "\n");
3389 }
3390 is_first = false;
3391
3392 if (is_dead_insn (insn))
3393 {
3394 if (dump_file && (dump_flags & TDF_DETAILS))
3395 fprintf (dump_file, "%14s -- dead\n", "");
3396 m_dead_insns.safe_push (insn);
3397 }
3398 else
3399 {
3400 record_insn_refs (insn);
3401 rtx pat = PATTERN (insn);
3402 if (is_move_set (pat))
3403 record_copy (SET_DEST (pat), SET_SRC (pat), true);
3404 else
3405 {
3406 extract_insn (insn);
3407 record_constraints (insn);
3408 }
3409 }
3410
3411 // See whether we have a complete region, with no remaining live
3412 // allocnos.
3413 if (is_isolated
3414 && bitmap_empty_p (m_live_allocnos)
3415 && m_live_fprs == 0
3416 && m_allocation_successful
3417 && !m_allocnos.is_empty ())
3418 {
3419 rtx_insn *prev_insn = PREV_INSN (insn);
3420 m_insn_ranges.safe_push ({ start_insn, prev_insn });
3421 process_region ();
3422 start_new_region ();
3423 is_first = true;
3424 start_insn = prev_insn;
3425 }
3426 }
3427 m_insn_ranges.safe_push ({ start_insn, BB_HEAD (bb) });
3428
3429 record_artificial_refs (DF_REF_AT_TOP);
3430
3431 // Process live-in FPRs.
3432 bitmap live_in = df_get_live_in (bb);
3433 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
3434 if (bitmap_bit_p (live_in, regno)
3435 && (m_live_fprs & (1U << (regno - V0_REGNUM))))
3436 record_fpr_def (regno);
3437
3438 // Process live-in allocnos.
3439 EXECUTE_IF_AND_IN_BITMAP (live_in, m_fpr_pseudos,
3440 FIRST_PSEUDO_REGISTER, regno, bi)
3441 {
3442 auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
3443 for (auto &allocno : range.allocnos ())
3444 if (bitmap_bit_p (m_live_allocnos, allocno.id))
3445 record_allocno_def (&allocno);
3446 }
3447
3448 m_current_point += 1;
3449
3450 bitmap_clear (m_live_allocnos);
3451 m_live_fprs = 0;
3452 }
3453
3454 // Divide the function into regions, such that there no edges into or out
3455 // of the region have live "FPR pseudos".
3456 void
3457 early_ra::process_blocks ()
3458 {
3459 auto_sbitmap visited (last_basic_block_for_fn (m_fn));
3460 auto_sbitmap fpr_pseudos_live_out (last_basic_block_for_fn (m_fn));
3461 auto_sbitmap fpr_pseudos_live_in (last_basic_block_for_fn (m_fn));
3462
3463 bitmap_clear (visited);
3464 bitmap_clear (fpr_pseudos_live_out);
3465 bitmap_clear (fpr_pseudos_live_in);
3466
3467 // Record which blocks have live FPR pseudos on entry and exit.
3468 basic_block bb;
3469 FOR_EACH_BB_FN (bb, m_fn)
3470 {
3471 if (bitmap_intersect_p (df_get_live_out (bb), m_fpr_pseudos))
3472 bitmap_set_bit (fpr_pseudos_live_out, bb->index);
3473 if (bitmap_intersect_p (df_get_live_in (bb), m_fpr_pseudos))
3474 bitmap_set_bit (fpr_pseudos_live_in, bb->index);
3475 }
3476
3477 struct stack_node { edge_iterator ei; basic_block bb; };
3478
3479 auto_vec<stack_node, 32> stack;
3480 auto_vec<basic_block, 32> region;
3481
3482 // Go through the function in reverse postorder and process the region
3483 // containing each block.
3484 unsigned int n_blocks = df_get_n_blocks (DF_FORWARD);
3485 int *order = df_get_postorder (DF_FORWARD);
3486 for (unsigned int bbi = 0; bbi < n_blocks; ++bbi)
3487 {
3488 basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, order[bbi]);
3489 if (bb->index < NUM_FIXED_BLOCKS)
3490 continue;
3491
3492 if (!bitmap_set_bit (visited, bb->index))
3493 continue;
3494
3495 // Process forward edges before backward edges (so push backward
3496 // edges first). Build the region in an approximation of reverse
3497 // program order.
3498 if (bitmap_bit_p (fpr_pseudos_live_in, bb->index))
3499 stack.quick_push ({ ei_start (bb->preds), nullptr });
3500 if (bitmap_bit_p (fpr_pseudos_live_out, bb->index))
3501 stack.quick_push ({ ei_start (bb->succs), bb });
3502 else
3503 region.safe_push (bb);
3504 while (!stack.is_empty ())
3505 {
3506 auto &node = stack.last ();
3507 if (ei_end_p (node.ei))
3508 {
3509 if (node.bb)
3510 region.safe_push (node.bb);
3511 stack.pop ();
3512 continue;
3513 }
3514 edge e = ei_edge (node.ei);
3515 if (node.bb)
3516 {
3517 // A forward edge from a node that has not yet been added
3518 // to region.
3519 if (bitmap_bit_p (fpr_pseudos_live_in, e->dest->index)
3520 && bitmap_set_bit (visited, e->dest->index))
3521 {
3522 stack.safe_push ({ ei_start (e->dest->preds), nullptr });
3523 if (bitmap_bit_p (fpr_pseudos_live_out, e->dest->index))
3524 stack.safe_push ({ ei_start (e->dest->succs), e->dest });
3525 else
3526 region.safe_push (e->dest);
3527 }
3528 else
3529 ei_next (&node.ei);
3530 }
3531 else
3532 {
3533 // A backward edge from a node that has already been added
3534 // to the region.
3535 if (bitmap_bit_p (fpr_pseudos_live_out, e->src->index)
3536 && bitmap_set_bit (visited, e->src->index))
3537 {
3538 if (bitmap_bit_p (fpr_pseudos_live_in, e->src->index))
3539 stack.safe_push ({ ei_start (e->src->preds), nullptr });
3540 stack.safe_push ({ ei_start (e->src->succs), e->src });
3541 }
3542 else
3543 ei_next (&node.ei);
3544 }
3545 }
3546
3547 m_current_point = 2;
3548 start_new_region ();
3549
3550 if (region.is_empty ())
3551 process_block (bb, true);
3552 else
3553 {
3554 if (dump_file && (dump_flags & TDF_DETAILS))
3555 {
3556 fprintf (dump_file, "\nRegion (from %d):", bb->index);
3557 for (unsigned int j = 0; j < region.length (); ++j)
3558 fprintf (dump_file, " %d", region[j]->index);
3559 fprintf (dump_file, "\n");
3560 }
3561 for (unsigned int j = 0; j < region.length (); ++j)
3562 {
3563 basic_block bb = region[j];
3564 bool is_isolated
3565 = ((j == 0 && !bitmap_bit_p (fpr_pseudos_live_out, bb->index))
3566 || (j == region.length () - 1
3567 && !bitmap_bit_p (fpr_pseudos_live_in, bb->index)));
3568 process_block (bb, is_isolated);
3569 }
3570 }
3571 region.truncate (0);
3572
3573 if (!m_allocnos.is_empty () && m_allocation_successful)
3574 process_region ();
3575 }
3576 }
3577
3578 // Run the pass on the current function.
3579 void
3580 early_ra::execute ()
3581 {
3582 df_analyze ();
3583
3584 preprocess_insns ();
3585 propagate_pseudo_reg_info ();
3586 choose_fpr_pseudos ();
3587 if (bitmap_empty_p (m_fpr_pseudos))
3588 return;
3589
3590 if (dump_file && (dump_flags & TDF_DETAILS))
3591 dump_pseudo_regs ();
3592
3593 process_blocks ();
3594 df_verify ();
3595 }
3596
3597 class pass_early_ra : public rtl_opt_pass
3598 {
3599 public:
3600 pass_early_ra (gcc::context *ctxt)
3601 : rtl_opt_pass (pass_data_early_ra, ctxt)
3602 {}
3603
3604 // opt_pass methods:
3605 virtual bool gate (function *);
3606 virtual unsigned int execute (function *);
3607 };
3608
3609 bool
3610 pass_early_ra::gate (function *)
3611 {
3612 // Require a vector ISA to be enabled.
3613 if (!TARGET_SIMD && !TARGET_SVE)
3614 return false;
3615
3616 if (aarch64_early_ra == AARCH64_EARLY_RA_NONE)
3617 return false;
3618
3619 if (aarch64_early_ra == AARCH64_EARLY_RA_STRIDED
3620 && !TARGET_STREAMING_SME2)
3621 return false;
3622
3623 return true;
3624 }
3625
3626 unsigned int
3627 pass_early_ra::execute (function *fn)
3628 {
3629 early_ra (fn).execute ();
3630 return 0;
3631 }
3632
3633 } // end namespace
3634
3635 // Create a new instance of the pass.
3636 rtl_opt_pass *
3637 make_pass_aarch64_early_ra (gcc::context *ctxt)
3638 {
3639 return new pass_early_ra (ctxt);
3640 }