]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/falkor-tag-collision-avoidance.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / falkor-tag-collision-avoidance.cc
1 /* Tag Collision Avoidance pass for Falkor.
2 Copyright (C) 2018-2024 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #define INCLUDE_LIST
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "tree-pass.h"
31 #include "aarch64-protos.h"
32 #include "hash-map.h"
33 #include "cfgloop.h"
34 #include "cfgrtl.h"
35 #include "rtl-iter.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "recog.h"
41 #include "function-abi.h"
42 #include "regrename.h"
43 #include "print-rtl.h"
44
45 /* The Falkor hardware prefetching system uses the encoding of the registers
46 and offsets of loads to decide which of the multiple hardware prefetchers to
47 assign the load to. This has the positive effect of accelerating prefetches
48 when all related loads with uniform strides are assigned to the same
49 prefetcher unit. The down side is that because of the way the assignment
50 works, multiple unrelated loads may end up on the same prefetch unit, thus
51 causing the unit to bounce between different sets of addresses and never
52 train correctly. The point of this pass is to avoid such collisions so that
53 unrelated loads are spread out to different prefetchers. It also makes a
54 rudimentary attempt to ensure that related loads with the same tags don't
55 get moved out unnecessarily.
56
57 Perhaps a future enhancement would be to make a more concerted attempt to
58 get related loads under the same tag. See the memcpy/memset implementation
59 for falkor in glibc to understand the kind of impact this can have on
60 falkor.
61
62 The assignment of loads is based on a tag that is computed from the encoding
63 of the first destination register (the only destination in case of LDR), the
64 base register and the offset (either the register or the immediate value, as
65 encoded in the instruction). This is what the 14 bit tag looks like:
66
67 |<- 6 bits ->|<- 4b ->|<- 4b ->|
68 --------------------------------
69 | OFFSET | SRC | DST |
70 --------------------------------
71
72 For all cases, the SRC and DST are the 4 LSB of the encoding of the register
73 in the instruction. Offset computation is more involved and is as follows:
74
75 - For register offset addressing: 4 LSB of the offset register with the MSB
76 of the 6 bits set to 1.
77
78 - For immediate offset: 4 LSB of the encoded immediate offset. The encoding
79 depends on the width of the load and is expressed as multiples of the
80 width.
81
82 - For loads with update: 4 LSB of the offset. The encoding here is the
83 exact number by which the base is offset and incremented.
84
85 Based on the above it is clear that registers 0 and 16 will result in
86 collisions, 1 and 17 and so on. This pass detects such collisions within a
87 def/use chain of the source register in a loop and tries to resolve the
88 collision by renaming one of the destination registers. */
89
90 /* Get the destination part of the tag. */
91 #define TAG_GET_DEST(__tag) ((__tag) & 0xf)
92
93 /* Get the tag with the destination part updated. */
94 #define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
95
96 #define MAX_PREFETCH_STRIDE 2048
97
98 /* The instruction information structure. This is used to cache information
99 about the INSN that we derive when traversing through all of the insns in
100 loops. */
101 class tag_insn_info
102 {
103 public:
104 rtx_insn *insn;
105 rtx dest;
106 rtx base;
107 rtx offset;
108 bool writeback;
109 bool ldp;
110
111 tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
112 : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
113 {}
114
115 /* Compute the tag based on BASE, DEST and OFFSET of the load. */
116 unsigned tag ()
117 {
118 unsigned int_offset = 0;
119 rtx offset = this->offset;
120 unsigned dest = REGNO (this->dest);
121 unsigned base = REGNO (this->base);
122 machine_mode dest_mode = GET_MODE (this->dest);
123
124 /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
125 destination mode is constant here. */
126 unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
127
128 /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */
129 if ((dest_mode_size << this->ldp) > 16)
130 dest = 0;
131
132 if (offset && REG_P (offset))
133 int_offset = (1 << 5) | REGNO (offset);
134 else if (offset && CONST_INT_P (offset))
135 {
136 int_offset = INTVAL (offset);
137 int_offset /= dest_mode_size;
138 if (!this->writeback)
139 int_offset >>= 2;
140 }
141 return ((dest & 0xf)
142 | ((base & 0xf) << 4)
143 | ((int_offset & 0x3f) << 8));
144 }
145 };
146
147 /* Hash map to traverse and process instructions with colliding tags. */
148 typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
149
150 /* Vector of instructions with colliding tags. */
151 typedef auto_vec <tag_insn_info *> insn_info_list_t;
152
153 /* Pair of instruction information and unavailable register set to pass to
154 CHECK_COLLIDING_TAGS. */
155 typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
156
157
158 /* Callback to free all tag_insn_info objects. */
159 bool
160 free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
161 void *arg ATTRIBUTE_UNUSED)
162 {
163 while (v->length () > 0)
164 delete v->pop ();
165
166 return true;
167 }
168
169
170 /* Add all aliases of the register to the unavailable register set. REG is the
171 smallest register number that can then be used to reference its aliases.
172 UNAVAILABLE is the hard register set to add the ignored register numbers to
173 and MODE is the mode in which the registers would have been used. */
174 static void
175 ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
176 {
177 add_to_hard_reg_set (unavailable, mode, reg);
178 add_to_hard_reg_set (unavailable, mode, reg + 16);
179 add_to_hard_reg_set (unavailable, mode, reg + 32);
180 add_to_hard_reg_set (unavailable, mode, reg + 48);
181 }
182
183
184 /* Callback to check which destination registers are unavailable to us for
185 renaming because of the base and offset colliding. This is a callback that
186 gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an
187 std::pair of the tag_insn_info of the original insn and the hard register
188 set UNAVAILABLE that is used to record hard register numbers that cannot be
189 used for the renaming. This always returns true since we want to traverse
190 through the entire TAG_MAP. */
191 bool
192 check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
193 {
194 HARD_REG_SET *unavailable = arg->second;
195 unsigned orig_tag = arg->first->tag ();
196 unsigned tag = INTVAL (t);
197 machine_mode mode = GET_MODE (arg->first->dest);
198
199 /* Can't collide with emptiness. */
200 if (v.length () == 0)
201 return true;
202
203 /* Drop all aliased destination registers that result in the same
204 tag. It is not necessary to drop all of them but we do anyway
205 because it is quicker than checking ranges. */
206 if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
207 ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
208
209 return true;
210 }
211
212
213 /* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
214 renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of
215 insns indexed by their tags, HEAD is the def/use chain head of the
216 destination register of the original insn. The routine returns the super
217 class of register classes that may be used during the renaming. */
218 static enum reg_class
219 init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
220 HARD_REG_SET *unavailable)
221 {
222 unsigned dest = head->regno;
223 enum reg_class super_class = NO_REGS;
224 machine_mode mode = GET_MODE (insn_info->dest);
225
226 CLEAR_HARD_REG_SET (*unavailable);
227
228 for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
229 {
230 if (DEBUG_INSN_P (tmp->insn))
231 continue;
232
233 *unavailable |= ~reg_class_contents[tmp->cl];
234 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
235 }
236
237 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
238 if (fixed_regs[i] || global_regs[i])
239 add_to_hard_reg_set (unavailable, mode, i);
240
241 arg_pair_t arg = arg_pair_t (insn_info, unavailable);
242
243 /* Exclude all registers that would lead to collisions with other loads. */
244 tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
245
246 /* Finally, also ignore all aliases of the current reg. */
247 ignore_all_aliases (unavailable, mode, dest & 0xf);
248
249 return super_class;
250 }
251
252
253 /* Find a suitable and available register and rename the chain of occurrences
254 of the register defined in the def/use chain headed by HEAD in which INSN
255 exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
256 unavailable due to a potential collision due to the rename. The routine
257 returns the register number in case of a successful rename or -1 to indicate
258 failure. */
259 static int
260 rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
261 {
262 unsigned dest_regno = head->regno;
263
264 if (head->cannot_rename || head->renamed)
265 return -1;
266
267 HARD_REG_SET unavailable;
268
269 enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
270 &unavailable);
271
272 unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
273 dest_regno, false);
274
275 /* Attempt to rename as long as regrename doesn't just throw the same
276 register at us. */
277 if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
278 {
279 if (dump_file && (dump_flags & TDF_DETAILS))
280 fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
281 INSN_UID (insn_info->insn), dest_regno, new_regno);
282
283 return new_regno;
284 }
285
286 return -1;
287 }
288
289
290 /* Return true if REGNO is not safe to rename. */
291 static bool
292 unsafe_rename_p (unsigned regno)
293 {
294 /* Avoid renaming registers used for argument passing and return value. In
295 future we could be a little less conservative and walk through the basic
296 blocks to see if there are any call or syscall sites. */
297 if (regno <= R8_REGNUM
298 || (regno >= V0_REGNUM && regno < V8_REGNUM))
299 return true;
300
301 /* Don't attempt to rename registers that may have specific meanings. */
302 switch (regno)
303 {
304 case LR_REGNUM:
305 case HARD_FRAME_POINTER_REGNUM:
306 case FRAME_POINTER_REGNUM:
307 case STACK_POINTER_REGNUM:
308 return true;
309 }
310
311 return false;
312 }
313
314
315 /* Go through the def/use chains for the register and find the chain for this
316 insn to rename. The function returns the hard register number in case of a
317 successful rename and -1 otherwise. */
318 static int
319 rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
320 {
321 struct du_chain *chain = NULL;
322 du_head_p head = NULL;
323 int i;
324
325 unsigned dest_regno = REGNO (insn_info->dest);
326
327 if (unsafe_rename_p (dest_regno))
328 return -1;
329
330 /* Search the chain where this instruction is (one of) the root. */
331 rtx_insn *insn = insn_info->insn;
332 operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
333
334 for (i = 0; i < dest_op_info->n_chains; i++)
335 {
336 /* The register tracked by this chain does not match the
337 destination register of insn. */
338 if (dest_op_info->heads[i]->regno != dest_regno)
339 continue;
340
341 head = dest_op_info->heads[i];
342 /* The chain was merged in another, find the new head. */
343 if (!head->first)
344 head = regrename_chain_from_id (head->id);
345
346 for (chain = head->first; chain; chain = chain->next_use)
347 /* Found the insn in the chain, so try renaming the register in this
348 chain. */
349 if (chain->insn == insn)
350 return rename_chain (insn_info, tag_map, head);
351 }
352
353 return -1;
354 }
355
356
357 /* Flag to track if the map has changed. */
358 static bool map_changed = false;
359
360 /* The actual reallocation logic. For each vector of collisions V, try to
361 resolve the collision by attempting to rename the destination register of
362 all but one of the loads. This is a callback that is invoked for each
363 name-value pair (T, V) in TAG_MAP. The function returns true whenever it
364 returns unchanged and false otherwise to halt traversal. */
365 bool
366 avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
367 {
368 /* We need at least two loads to cause a tag collision, return unchanged. */
369 if (v->length () < 2)
370 return true;
371
372 tag_insn_info *vec_start = v->pop ();
373 tag_insn_info *insn_info = vec_start;
374
375 /* Try to rename at least one register to reduce the collision. If we
376 iterate all the way through, we end up dropping one of the loads from the
377 list. This is fine because we want at most one element to ensure that a
378 subsequent rename attempt does not end up worsening the collision. */
379 do
380 {
381 int new_regno;
382
383 if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
384 {
385 rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
386
387 tag_map->get_or_insert (new_tag).safe_push (insn_info);
388 df_set_regs_ever_live (new_regno, true);
389 map_changed = true;
390 return false;
391 }
392
393 v->safe_insert (0, insn_info);
394 insn_info = v->pop ();
395 }
396 while (insn_info != vec_start);
397
398 if (dump_file)
399 fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
400 INSN_UID (insn_info->insn));
401
402 /* Drop the last element and move on to the next tag. */
403 delete insn_info;
404 return true;
405 }
406
407
408 /* For each set of collisions, attempt to rename the registers or insert a move
409 to avoid the collision. We repeatedly traverse through TAG_MAP using
410 AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
411 full traversal results in no change in the map. */
412 static void
413 avoid_collisions (tag_map_t &tag_map)
414 {
415 do
416 {
417 map_changed = false;
418 tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
419 }
420 while (map_changed);
421 }
422 \f
423
424
425 /* Find the use def chain in which INSN exists and then see if there is a
426 definition inside the loop and outside it. We use this as a simple
427 approximation to determine whether the base register is an IV. The basic
428 idea is to find INSN in the use-def chains for its base register and find
429 all definitions that reach it. Of all these definitions, there should be at
430 least one definition that is a simple addition of a constant value, either
431 as a binary operation or a pre or post update.
432
433 The function returns true if the base register is estimated to be an IV. */
434 static bool
435 iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
436 {
437 df_ref ause;
438 unsigned regno = REGNO (reg);
439
440 /* Ignore loads from the stack. */
441 if (regno == SP_REGNUM)
442 return false;
443
444 for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
445 {
446 if (!DF_REF_INSN_INFO (ause)
447 || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
448 continue;
449
450 if (insn != DF_REF_INSN (ause))
451 continue;
452
453 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
454 df_ref def_rec;
455
456 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
457 {
458 rtx_insn *insn = DF_REF_INSN (def_rec);
459 basic_block bb = BLOCK_FOR_INSN (insn);
460
461 if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
462 && bb->loop_father == loop)
463 {
464 if (recog_memoized (insn) < 0)
465 continue;
466
467 rtx pat = PATTERN (insn);
468
469 /* Prefetch or clobber; unlikely to be a constant stride. The
470 falkor software prefetcher tuning is pretty conservative, so
471 its presence indicates that the access pattern is probably
472 strided but most likely with an unknown stride size or a
473 stride size that is quite large. */
474 if (GET_CODE (pat) != SET)
475 continue;
476
477 rtx x = SET_SRC (pat);
478 if (GET_CODE (x) == ZERO_EXTRACT
479 || GET_CODE (x) == ZERO_EXTEND
480 || GET_CODE (x) == SIGN_EXTEND)
481 x = XEXP (x, 0);
482
483 /* Loading the value from memory; unlikely to be a constant
484 stride. */
485 if (MEM_P (x))
486 continue;
487
488 /* An increment or decrement by a constant MODE_SIZE amount or
489 the result of a binary expression is likely to be an IV. */
490 if (GET_CODE (x) == POST_INC
491 || GET_CODE (x) == POST_DEC
492 || GET_CODE (x) == PRE_INC
493 || GET_CODE (x) == PRE_DEC)
494 return true;
495 else if (BINARY_P (x)
496 && (CONST_INT_P (XEXP (x, 0))
497 || CONST_INT_P (XEXP (x, 1))))
498 {
499 rtx stride = (CONST_INT_P (XEXP (x, 0))
500 ? XEXP (x, 0) : XEXP (x, 1));
501
502 /* Don't bother with very long strides because the prefetcher
503 is unable to train on them anyway. */
504 if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
505 return true;
506 }
507 }
508 }
509 return false;
510 }
511 return false;
512 }
513
514
515 /* Return true if SRC is a strided load in the LOOP, false otherwise.
516 If it is a strided load, set the BASE and OFFSET. Also, if this is
517 a pre/post increment load, set PRE_POST to true. */
518 static bool
519 valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
520 rtx *base, rtx *offset, bool load_pair)
521 {
522 subrtx_var_iterator::array_type array;
523 rtx x = NULL_RTX;
524
525 FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
526 if (MEM_P (*iter))
527 {
528 x = *iter;
529 break;
530 }
531
532 if (!x)
533 return false;
534
535 struct aarch64_address_info addr;
536 machine_mode mode = GET_MODE (x);
537
538 if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
539 return false;
540
541 if (addr.type != ADDRESS_REG_IMM
542 && addr.type != ADDRESS_REG_WB
543 && addr.type != ADDRESS_REG_REG
544 && addr.type != ADDRESS_REG_UXTW
545 && addr.type != ADDRESS_REG_SXTW)
546 return false;
547
548 unsigned regno = REGNO (addr.base);
549 if (global_regs[regno] || fixed_regs[regno])
550 return false;
551
552 if (addr.type == ADDRESS_REG_WB)
553 {
554 unsigned code = GET_CODE (XEXP (x, 0));
555
556 *pre_post = true;
557 *base = addr.base;
558
559 if (code == PRE_MODIFY || code == POST_MODIFY)
560 *offset = addr.offset;
561 else
562 {
563 /*Writeback is only supported for fixed-width modes. */
564 unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
565
566 /* For post-incremented load pairs we would increment the base twice
567 over, so make that adjustment. */
568 if (load_pair && (code == POST_INC || code == POST_DEC))
569 int_offset *= 2;
570
571 *offset = GEN_INT (int_offset);
572 }
573 return true;
574 }
575 else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
576 {
577 /* Check if the load is strided. */
578 if (!iv_p (insn, addr.base, loop))
579 return false;
580
581 *base = addr.base;
582 *offset = addr.offset;
583 return true;
584 }
585
586 return false;
587 }
588
589
590 /* Return true if INSN is a strided load in LOOP. If it is a strided load, set
591 the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set
592 PRE_POST to true.
593
594 The routine does checks on the destination of the insn and depends on
595 STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */
596 static bool
597 get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
598 rtx *offset, bool *pre_post, bool *ldp)
599 {
600 if (!INSN_P (insn) || recog_memoized (insn) < 0)
601 return false;
602
603 rtx pat = PATTERN (insn);
604 unsigned code = GET_CODE (pat);
605 bool load_pair = (code == PARALLEL);
606
607 /* For a load pair we need only the first base and destination
608 registers. We however need to ensure that our pre/post increment
609 offset is doubled; we do that in STRIDED_LOAD_P. */
610 if (load_pair)
611 {
612 pat = XVECEXP (pat, 0, 0);
613 code = GET_CODE (pat);
614 }
615
616 if (code != SET)
617 return false;
618
619 rtx dest_rtx = SET_DEST (pat);
620
621 if (!REG_P (dest_rtx))
622 return false;
623
624 unsigned regno = REGNO (dest_rtx);
625 machine_mode mode = GET_MODE (dest_rtx);
626 machine_mode inner_mode = GET_MODE_INNER (mode);
627
628 /* Falkor does not support SVE vectors. */
629 if (!GET_MODE_SIZE (mode).is_constant ())
630 return false;
631
632 /* Ignore vector struct or lane loads. */
633 if (GET_MODE_SIZE (mode).to_constant ()
634 != GET_MODE_SIZE (inner_mode).to_constant ())
635 return false;
636
637 /* The largest width we want to bother with is a load of a pair of
638 quad-words. */
639 if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
640 > GET_MODE_SIZE (OImode))
641 return false;
642
643 /* Ignore loads into the stack pointer because it is unlikely to be a
644 stream. */
645 if (regno == SP_REGNUM)
646 return false;
647
648 if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
649 load_pair))
650 {
651 *dest = dest_rtx;
652 *ldp = load_pair;
653
654 return true;
655 }
656
657 return false;
658 }
659
660
661 /* Return whether INSN and CAND are in the same def/use chain. */
662 static bool
663 in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
664 {
665 struct du_chain *chain = NULL;
666 du_head_p head = NULL;
667 int i;
668
669 /* Search the chain where this instruction is (one of) the root. */
670 operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
671
672 for (i = 0; i < op_info->n_chains; i++)
673 {
674 /* The register tracked by this chain does not match the
675 dest register of insn. */
676 if (op_info->heads[i]->regno != regno)
677 continue;
678
679 head = op_info->heads[i];
680 /* The chain was merged in another, find the new head. */
681 if (!head->first)
682 head = regrename_chain_from_id (head->id);
683
684 bool found_insn = false, found_cand = false;
685
686 for (chain = head->first; chain; chain = chain->next_use)
687 {
688 rtx *loc = &SET_DEST (PATTERN (chain->insn));
689
690 if (chain->loc != loc)
691 continue;
692
693 if (chain->insn == insn)
694 found_insn = true;
695
696 if (chain->insn == cand)
697 found_cand = true;
698
699 if (found_insn && found_cand)
700 return true;
701 }
702 }
703
704 return false;
705 }
706
707
708 /* Callback function to traverse the tag map and drop loads that have the same
709 destination and are in the same chain of occurrence. Routine always returns
710 true to allow traversal through all of TAG_MAP. */
711 bool
712 single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
713 void *arg ATTRIBUTE_UNUSED)
714 {
715 for (int i = v->length () - 1; i>= 1; i--)
716 {
717 tag_insn_info *insn_info = (*v)[i];
718
719 for (int j = v->length () - 2; j >= 0; j--)
720 {
721 /* Filter out destinations in the same chain. */
722 if (in_same_chain (insn_info->insn, (*v)[j]->insn,
723 REGNO (insn_info->dest)))
724 {
725 v->ordered_remove (j);
726 i = v->length ();
727 break;
728 }
729 }
730 }
731
732 return true;
733 }
734
735
736 /* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
737 list INSN_INFO for tag T. */
738 bool
739 dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
740 void *unused ATTRIBUTE_UNUSED)
741 {
742 gcc_assert (dump_file);
743 fprintf (dump_file, "Tag 0x" HOST_WIDE_INT_PRINT_HEX_PURE " ::\n", INTVAL (t));
744
745 for (unsigned i = 0; i < insn_info.length (); i++)
746 dump_insn_slim (dump_file, insn_info[i]->insn);
747
748 fprintf (dump_file, "\n");
749
750 return true;
751 }
752
753
754 /* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
755 prefetcher memory tags. */
756 static void
757 record_loads (tag_map_t &tag_map, struct loop *loop)
758 {
759 rtx_insn *insn;
760 basic_block *body, bb;
761
762 body = get_loop_body (loop);
763
764 for (unsigned i = 0; i < loop->num_nodes; i++)
765 {
766 bb = body[i];
767 FOR_BB_INSNS (bb, insn)
768 {
769 rtx base = NULL_RTX;
770 rtx dest = NULL_RTX;
771 rtx offset = NULL_RTX;
772 bool writeback = false;
773 bool ldp = false;
774
775 if (!INSN_P (insn) || DEBUG_INSN_P (insn))
776 continue;
777
778 if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
779 &ldp))
780 {
781 tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
782 writeback, ldp);
783 rtx tag = GEN_INT (i->tag ());
784 tag_map.get_or_insert (tag).safe_push (i);
785 }
786 }
787 }
788
789 if (dump_file)
790 {
791 fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
792 tag_map.traverse <void *, dump_insn_list> (NULL);
793 }
794
795 /* Try to reduce the dataset before launching into the rename attempt. Drop
796 destinations in the same collision chain that appear in the same def/use
797 chain, all as defs. These chains will move together in a rename so
798 there's no point in keeping both in there. */
799 tag_map.traverse <void *, single_dest_per_chain> (NULL);
800 }
801
802
803 /* Tag collision avoidance pass for Falkor. The pass runs in two phases for
804 each loop; the first phase collects all loads that we consider as
805 interesting for renaming into a tag-indexed map of lists. The second phase
806 renames the destination register of the loads in an attempt to spread out
807 the loads into different tags. */
808 void
809 execute_tag_collision_avoidance ()
810 {
811 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
812 df_chain_add_problem (DF_UD_CHAIN);
813 df_compute_regs_ever_live (true);
814 df_note_add_problem ();
815 df_analyze ();
816 df_set_flags (DF_DEFER_INSN_RESCAN);
817
818 regrename_init (true);
819 regrename_analyze (NULL);
820
821 compute_bb_for_insn ();
822 calculate_dominance_info (CDI_DOMINATORS);
823 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
824
825 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
826 {
827 tag_map_t tag_map (512);
828
829 record_loads (tag_map, loop);
830 avoid_collisions (tag_map);
831 if (dump_file)
832 {
833 fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
834 tag_map.traverse <void *, dump_insn_list> (NULL);
835 }
836 tag_map.traverse <void *, free_insn_info> (NULL);
837 }
838
839 loop_optimizer_finalize ();
840 free_dominance_info (CDI_DOMINATORS);
841 regrename_finish ();
842 }
843
844
845 const pass_data pass_data_tag_collision_avoidance =
846 {
847 RTL_PASS, /* type */
848 "tag_collision_avoidance", /* name */
849 OPTGROUP_NONE, /* optinfo_flags */
850 TV_NONE, /* tv_id */
851 0, /* properties_required */
852 0, /* properties_provided */
853 0, /* properties_destroyed */
854 0, /* todo_flags_start */
855 TODO_df_finish, /* todo_flags_finish */
856 };
857
858
859 class pass_tag_collision_avoidance : public rtl_opt_pass
860 {
861 public:
862 pass_tag_collision_avoidance (gcc::context *ctxt)
863 : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
864 {}
865
866 /* opt_pass methods: */
867 virtual bool gate (function *)
868 {
869 return ((aarch64_tune_params.extra_tuning_flags
870 & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
871 && optimize >= 2);
872 }
873
874 virtual unsigned int execute (function *)
875 {
876 execute_tag_collision_avoidance ();
877 return 0;
878 }
879
880 }; // class pass_tag_collision_avoidance
881
882
883 /* Create a new pass instance. */
884 rtl_opt_pass *
885 make_pass_tag_collision_avoidance (gcc::context *ctxt)
886 {
887 return new pass_tag_collision_avoidance (ctxt);
888 }