]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/falkor-tag-collision-avoidance.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / falkor-tag-collision-avoidance.c
1 /* Tag Collision Avoidance pass for Falkor.
2 Copyright (C) 2018-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #define INCLUDE_LIST
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "tree-pass.h"
31 #include "aarch64-protos.h"
32 #include "hash-map.h"
33 #include "cfgloop.h"
34 #include "cfgrtl.h"
35 #include "rtl-iter.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "recog.h"
41 #include "function-abi.h"
42 #include "regrename.h"
43 #include "print-rtl.h"
44
45 /* The Falkor hardware prefetching system uses the encoding of the registers
46 and offsets of loads to decide which of the multiple hardware prefetchers to
47 assign the load to. This has the positive effect of accelerating prefetches
48 when all related loads with uniform strides are assigned to the same
49 prefetcher unit. The down side is that because of the way the assignment
50 works, multiple unrelated loads may end up on the same prefetch unit, thus
51 causing the unit to bounce between different sets of addresses and never
52 train correctly. The point of this pass is to avoid such collisions so that
53 unrelated loads are spread out to different prefetchers. It also makes a
54 rudimentary attempt to ensure that related loads with the same tags don't
55 get moved out unnecessarily.
56
57 Perhaps a future enhancement would be to make a more concerted attempt to
58 get related loads under the same tag. See the memcpy/memset implementation
59 for falkor in glibc to understand the kind of impact this can have on
60 falkor.
61
62 The assignment of loads is based on a tag that is computed from the encoding
63 of the first destination register (the only destination in case of LDR), the
64 base register and the offset (either the register or the immediate value, as
65 encoded in the instruction). This is what the 14 bit tag looks like:
66
67 |<- 6 bits ->|<- 4b ->|<- 4b ->|
68 --------------------------------
69 | OFFSET | SRC | DST |
70 --------------------------------
71
72 For all cases, the SRC and DST are the 4 LSB of the encoding of the register
73 in the instruction. Offset computation is more involved and is as follows:
74
75 - For register offset addressing: 4 LSB of the offset register with the MSB
76 of the 6 bits set to 1.
77
78 - For immediate offset: 4 LSB of the encoded immediate offset. The encoding
79 depends on the width of the load and is expressed as multiples of the
80 width.
81
82 - For loads with update: 4 LSB of the offset. The encoding here is the
83 exact number by which the base is offset and incremented.
84
85 Based on the above it is clear that registers 0 and 16 will result in
86 collisions, 1 and 17 and so on. This pass detects such collisions within a
87 def/use chain of the source register in a loop and tries to resolve the
88 collision by renaming one of the destination registers. */
89
90 /* Get the destination part of the tag. */
91 #define TAG_GET_DEST(__tag) ((__tag) & 0xf)
92
93 /* Get the tag with the destination part updated. */
94 #define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
95
96 #define MAX_PREFETCH_STRIDE 2048
97
98 /* The instruction information structure. This is used to cache information
99 about the INSN that we derive when traversing through all of the insns in
100 loops. */
101 class tag_insn_info
102 {
103 public:
104 rtx_insn *insn;
105 rtx dest;
106 rtx base;
107 rtx offset;
108 bool writeback;
109 bool ldp;
110
111 tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
112 : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
113 {}
114
115 /* Compute the tag based on BASE, DEST and OFFSET of the load. */
116 unsigned tag ()
117 {
118 unsigned int_offset = 0;
119 rtx offset = this->offset;
120 unsigned dest = REGNO (this->dest);
121 unsigned base = REGNO (this->base);
122 machine_mode dest_mode = GET_MODE (this->dest);
123
124 /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
125 destination mode is constant here. */
126 unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
127
128 /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */
129 if ((dest_mode_size << this->ldp) > 16)
130 dest = 0;
131
132 if (offset && REG_P (offset))
133 int_offset = (1 << 5) | REGNO (offset);
134 else if (offset && CONST_INT_P (offset))
135 {
136 int_offset = INTVAL (offset);
137 int_offset /= dest_mode_size;
138 if (!this->writeback)
139 int_offset >>= 2;
140 }
141 return ((dest & 0xf)
142 | ((base & 0xf) << 4)
143 | ((int_offset & 0x3f) << 8));
144 }
145 };
146
147 /* Hash map to traverse and process instructions with colliding tags. */
148 typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
149
150 /* Vector of instructions with colliding tags. */
151 typedef auto_vec <tag_insn_info *> insn_info_list_t;
152
153 /* Pair of instruction information and unavailable register set to pass to
154 CHECK_COLLIDING_TAGS. */
155 typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
156
157
158 /* Callback to free all tag_insn_info objects. */
159 bool
160 free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
161 void *arg ATTRIBUTE_UNUSED)
162 {
163 while (v->length () > 0)
164 delete v->pop ();
165
166 return true;
167 }
168
169
170 /* Add all aliases of the register to the unavailable register set. REG is the
171 smallest register number that can then be used to reference its aliases.
172 UNAVAILABLE is the hard register set to add the ignored register numbers to
173 and MODE is the mode in which the registers would have been used. */
174 static void
175 ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
176 {
177 add_to_hard_reg_set (unavailable, mode, reg);
178 add_to_hard_reg_set (unavailable, mode, reg + 16);
179 add_to_hard_reg_set (unavailable, mode, reg + 32);
180 add_to_hard_reg_set (unavailable, mode, reg + 48);
181 }
182
183
184 /* Callback to check which destination registers are unavailable to us for
185 renaming because of the base and offset colliding. This is a callback that
186 gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an
187 std::pair of the tag_insn_info of the original insn and the hard register
188 set UNAVAILABLE that is used to record hard register numbers that cannot be
189 used for the renaming. This always returns true since we want to traverse
190 through the entire TAG_MAP. */
191 bool
192 check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
193 {
194 HARD_REG_SET *unavailable = arg->second;
195 unsigned orig_tag = arg->first->tag ();
196 unsigned tag = INTVAL (t);
197 machine_mode mode = GET_MODE (arg->first->dest);
198
199 /* Can't collide with emptiness. */
200 if (v.length () == 0)
201 return true;
202
203 /* Drop all aliased destination registers that result in the same
204 tag. It is not necessary to drop all of them but we do anyway
205 because it is quicker than checking ranges. */
206 if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
207 ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
208
209 return true;
210 }
211
212
213 /* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
214 renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of
215 insns indexed by their tags, HEAD is the def/use chain head of the
216 destination register of the original insn. The routine returns the super
217 class of register classes that may be used during the renaming. */
218 static enum reg_class
219 init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
220 HARD_REG_SET *unavailable)
221 {
222 unsigned dest = head->regno;
223 enum reg_class super_class = NO_REGS;
224 machine_mode mode = GET_MODE (insn_info->dest);
225
226 CLEAR_HARD_REG_SET (*unavailable);
227
228 for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
229 {
230 if (DEBUG_INSN_P (tmp->insn))
231 continue;
232
233 *unavailable |= ~reg_class_contents[tmp->cl];
234 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
235 }
236
237 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
238 if (fixed_regs[i] || global_regs[i])
239 add_to_hard_reg_set (unavailable, mode, i);
240
241 arg_pair_t arg = arg_pair_t (insn_info, unavailable);
242
243 /* Exclude all registers that would lead to collisions with other loads. */
244 tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
245
246 /* Finally, also ignore all aliases of the current reg. */
247 ignore_all_aliases (unavailable, mode, dest & 0xf);
248
249 return super_class;
250 }
251
252
253 /* Find a suitable and available register and rename the chain of occurrences
254 of the register defined in the def/use chain headed by HEAD in which INSN
255 exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
256 unavailable due to a potential collision due to the rename. The routine
257 returns the register number in case of a successful rename or -1 to indicate
258 failure. */
259 static int
260 rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
261 {
262 unsigned dest_regno = head->regno;
263
264 if (head->cannot_rename || head->renamed)
265 return -1;
266
267 HARD_REG_SET unavailable;
268
269 enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
270 &unavailable);
271
272 unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
273 dest_regno, false);
274
275 /* Attempt to rename as long as regrename doesn't just throw the same
276 register at us. */
277 if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
278 {
279 if (dump_file && (dump_flags & TDF_DETAILS))
280 fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
281 INSN_UID (insn_info->insn), dest_regno, new_regno);
282
283 return new_regno;
284 }
285
286 return -1;
287 }
288
289
290 /* Return true if REGNO is not safe to rename. */
291 static bool
292 unsafe_rename_p (unsigned regno)
293 {
294 /* Avoid renaming registers used for argument passing and return value. In
295 future we could be a little less conservative and walk through the basic
296 blocks to see if there are any call or syscall sites. */
297 if (regno <= R8_REGNUM
298 || (regno >= V0_REGNUM && regno < V8_REGNUM))
299 return true;
300
301 /* Don't attempt to rename registers that may have specific meanings. */
302 switch (regno)
303 {
304 case LR_REGNUM:
305 case HARD_FRAME_POINTER_REGNUM:
306 case FRAME_POINTER_REGNUM:
307 case STACK_POINTER_REGNUM:
308 return true;
309 }
310
311 return false;
312 }
313
314
315 /* Go through the def/use chains for the register and find the chain for this
316 insn to rename. The function returns the hard register number in case of a
317 successful rename and -1 otherwise. */
318 static int
319 rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
320 {
321 struct du_chain *chain = NULL;
322 du_head_p head = NULL;
323 int i;
324
325 unsigned dest_regno = REGNO (insn_info->dest);
326
327 if (unsafe_rename_p (dest_regno))
328 return -1;
329
330 /* Search the chain where this instruction is (one of) the root. */
331 rtx_insn *insn = insn_info->insn;
332 operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
333
334 for (i = 0; i < dest_op_info->n_chains; i++)
335 {
336 /* The register tracked by this chain does not match the
337 destination register of insn. */
338 if (dest_op_info->heads[i]->regno != dest_regno)
339 continue;
340
341 head = dest_op_info->heads[i];
342 /* The chain was merged in another, find the new head. */
343 if (!head->first)
344 head = regrename_chain_from_id (head->id);
345
346 for (chain = head->first; chain; chain = chain->next_use)
347 /* Found the insn in the chain, so try renaming the register in this
348 chain. */
349 if (chain->insn == insn)
350 return rename_chain (insn_info, tag_map, head);
351 }
352
353 return -1;
354 }
355
356
357 /* Flag to track if the map has changed. */
358 static bool map_changed = false;
359
360 /* The actual reallocation logic. For each vector of collisions V, try to
361 resolve the collision by attempting to rename the destination register of
362 all but one of the loads. This is a callback that is invoked for each
363 name-value pair (T, V) in TAG_MAP. The function returns true whenever it
364 returns unchanged and false otherwise to halt traversal. */
365 bool
366 avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
367 {
368 /* We need at least two loads to cause a tag collision, return unchanged. */
369 if (v->length () < 2)
370 return true;
371
372 tag_insn_info *vec_start = v->pop ();
373 tag_insn_info *insn_info = vec_start;
374
375 /* Try to rename at least one register to reduce the collision. If we
376 iterate all the way through, we end up dropping one of the loads from the
377 list. This is fine because we want at most one element to ensure that a
378 subsequent rename attempt does not end up worsening the collision. */
379 do
380 {
381 int new_regno;
382
383 if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
384 {
385 rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
386
387 tag_map->get_or_insert (new_tag).safe_push (insn_info);
388 df_set_regs_ever_live (new_regno, true);
389 map_changed = true;
390 return false;
391 }
392
393 v->safe_insert (0, insn_info);
394 insn_info = v->pop ();
395 }
396 while (insn_info != vec_start);
397
398 if (dump_file)
399 fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
400 INSN_UID (insn_info->insn));
401
402 /* Drop the last element and move on to the next tag. */
403 delete insn_info;
404 return true;
405 }
406
407
408 /* For each set of collisions, attempt to rename the registers or insert a move
409 to avoid the collision. We repeatedly traverse through TAG_MAP using
410 AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
411 full traversal results in no change in the map. */
412 static void
413 avoid_collisions (tag_map_t &tag_map)
414 {
415 do
416 {
417 map_changed = false;
418 tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
419 }
420 while (map_changed);
421 }
422 \f
423
424
425 /* Find the use def chain in which INSN exists and then see if there is a
426 definition inside the loop and outside it. We use this as a simple
427 approximation to determine whether the base register is an IV. The basic
428 idea is to find INSN in the use-def chains for its base register and find
429 all definitions that reach it. Of all these definitions, there should be at
430 least one definition that is a simple addition of a constant value, either
431 as a binary operation or a pre or post update.
432
433 The function returns true if the base register is estimated to be an IV. */
434 static bool
435 iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
436 {
437 df_ref ause;
438 unsigned regno = REGNO (reg);
439
440 /* Ignore loads from the stack. */
441 if (regno == SP_REGNUM)
442 return false;
443
444 for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
445 {
446 if (!DF_REF_INSN_INFO (ause)
447 || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
448 continue;
449
450 if (insn != DF_REF_INSN (ause))
451 continue;
452
453 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
454 df_ref def_rec;
455
456 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
457 {
458 rtx_insn *insn = DF_REF_INSN (def_rec);
459 basic_block bb = BLOCK_FOR_INSN (insn);
460
461 if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
462 && bb->loop_father == loop)
463 {
464 if (recog_memoized (insn) < 0)
465 continue;
466
467 rtx pat = PATTERN (insn);
468
469 /* Prefetch or clobber; unlikely to be a constant stride. The
470 falkor software prefetcher tuning is pretty conservative, so
471 its presence indicates that the access pattern is probably
472 strided but most likely with an unknown stride size or a
473 stride size that is quite large. */
474 if (GET_CODE (pat) != SET)
475 continue;
476
477 rtx x = SET_SRC (pat);
478 if (GET_CODE (x) == ZERO_EXTRACT
479 || GET_CODE (x) == ZERO_EXTEND
480 || GET_CODE (x) == SIGN_EXTEND)
481 x = XEXP (x, 0);
482
483 /* Loading the value from memory; unlikely to be a constant
484 stride. */
485 if (MEM_P (x))
486 continue;
487
488 /* An increment or decrement by a constant MODE_SIZE amount or
489 the result of a binary expression is likely to be an IV. */
490 if (GET_CODE (x) == POST_INC
491 || GET_CODE (x) == POST_DEC
492 || GET_CODE (x) == PRE_INC
493 || GET_CODE (x) == PRE_DEC)
494 return true;
495 else if (BINARY_P (x)
496 && (CONST_INT_P (XEXP (x, 0))
497 || CONST_INT_P (XEXP (x, 1))))
498 {
499 rtx stride = (CONST_INT_P (XEXP (x, 0))
500 ? XEXP (x, 0) : XEXP (x, 1));
501
502 /* Don't bother with very long strides because the prefetcher
503 is unable to train on them anyway. */
504 if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
505 return true;
506 }
507 }
508 }
509 return false;
510 }
511 return false;
512 }
513
514
515 /* Return true if SRC is a strided load in the LOOP, false otherwise.
516 If it is a strided load, set the BASE and OFFSET. Also, if this is
517 a pre/post increment load, set PRE_POST to true. */
518 static bool
519 valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
520 rtx *base, rtx *offset, bool load_pair)
521 {
522 subrtx_var_iterator::array_type array;
523 rtx x = NULL_RTX;
524
525 FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
526 if (MEM_P (*iter))
527 {
528 x = *iter;
529 break;
530 }
531
532 if (!x)
533 return false;
534
535 struct aarch64_address_info addr;
536 machine_mode mode = GET_MODE (x);
537
538 if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
539 return false;
540
541 unsigned regno = REGNO (addr.base);
542 if (global_regs[regno] || fixed_regs[regno])
543 return false;
544
545 if (addr.type == ADDRESS_REG_WB)
546 {
547 unsigned code = GET_CODE (XEXP (x, 0));
548
549 *pre_post = true;
550 *base = addr.base;
551
552 if (code == PRE_MODIFY || code == POST_MODIFY)
553 *offset = addr.offset;
554 else
555 {
556 /*Writeback is only supported for fixed-width modes. */
557 unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
558
559 /* For post-incremented load pairs we would increment the base twice
560 over, so make that adjustment. */
561 if (load_pair && (code == POST_INC || code == POST_DEC))
562 int_offset *= 2;
563
564 *offset = GEN_INT (int_offset);
565 }
566 return true;
567 }
568 else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
569 {
570 /* Check if the load is strided. */
571 if (!iv_p (insn, addr.base, loop))
572 return false;
573
574 *base = addr.base;
575 *offset = addr.offset;
576 return true;
577 }
578
579 return false;
580 }
581
582
583 /* Return true if INSN is a strided load in LOOP. If it is a strided load, set
584 the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set
585 PRE_POST to true.
586
587 The routine does checks on the destination of the insn and depends on
588 STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */
589 static bool
590 get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
591 rtx *offset, bool *pre_post, bool *ldp)
592 {
593 if (!INSN_P (insn) || recog_memoized (insn) < 0)
594 return false;
595
596 rtx pat = PATTERN (insn);
597 unsigned code = GET_CODE (pat);
598 bool load_pair = (code == PARALLEL);
599
600 /* For a load pair we need only the first base and destination
601 registers. We however need to ensure that our pre/post increment
602 offset is doubled; we do that in STRIDED_LOAD_P. */
603 if (load_pair)
604 {
605 pat = XVECEXP (pat, 0, 0);
606 code = GET_CODE (pat);
607 }
608
609 if (code != SET)
610 return false;
611
612 rtx dest_rtx = SET_DEST (pat);
613
614 if (!REG_P (dest_rtx))
615 return false;
616
617 unsigned regno = REGNO (dest_rtx);
618 machine_mode mode = GET_MODE (dest_rtx);
619 machine_mode inner_mode = GET_MODE_INNER (mode);
620
621 /* Falkor does not support SVE vectors. */
622 if (!GET_MODE_SIZE (mode).is_constant ())
623 return false;
624
625 /* Ignore vector struct or lane loads. */
626 if (GET_MODE_SIZE (mode).to_constant ()
627 != GET_MODE_SIZE (inner_mode).to_constant ())
628 return false;
629
630 /* The largest width we want to bother with is a load of a pair of
631 quad-words. */
632 if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
633 > GET_MODE_SIZE (OImode))
634 return false;
635
636 /* Ignore loads into the stack pointer because it is unlikely to be a
637 stream. */
638 if (regno == SP_REGNUM)
639 return false;
640
641 if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
642 load_pair))
643 {
644 *dest = dest_rtx;
645 *ldp = load_pair;
646
647 return true;
648 }
649
650 return false;
651 }
652
653
654 /* Return whether INSN and CAND are in the same def/use chain. */
655 static bool
656 in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
657 {
658 struct du_chain *chain = NULL;
659 du_head_p head = NULL;
660 int i;
661
662 /* Search the chain where this instruction is (one of) the root. */
663 operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
664
665 for (i = 0; i < op_info->n_chains; i++)
666 {
667 /* The register tracked by this chain does not match the
668 dest register of insn. */
669 if (op_info->heads[i]->regno != regno)
670 continue;
671
672 head = op_info->heads[i];
673 /* The chain was merged in another, find the new head. */
674 if (!head->first)
675 head = regrename_chain_from_id (head->id);
676
677 bool found_insn = false, found_cand = false;
678
679 for (chain = head->first; chain; chain = chain->next_use)
680 {
681 rtx *loc = &SET_DEST (PATTERN (chain->insn));
682
683 if (chain->loc != loc)
684 continue;
685
686 if (chain->insn == insn)
687 found_insn = true;
688
689 if (chain->insn == cand)
690 found_cand = true;
691
692 if (found_insn && found_cand)
693 return true;
694 }
695 }
696
697 return false;
698 }
699
700
701 /* Callback function to traverse the tag map and drop loads that have the same
702 destination and and in the same chain of occurrence. Routine always returns
703 true to allow traversal through all of TAG_MAP. */
704 bool
705 single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
706 void *arg ATTRIBUTE_UNUSED)
707 {
708 for (int i = v->length () - 1; i>= 1; i--)
709 {
710 tag_insn_info *insn_info = (*v)[i];
711
712 for (int j = v->length () - 2; j >= 0; j--)
713 {
714 /* Filter out destinations in the same chain. */
715 if (in_same_chain (insn_info->insn, (*v)[j]->insn,
716 REGNO (insn_info->dest)))
717 {
718 v->ordered_remove (j);
719 i = v->length ();
720 break;
721 }
722 }
723 }
724
725 return true;
726 }
727
728
729 /* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
730 list INSN_INFO for tag T. */
731 bool
732 dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
733 void *unused ATTRIBUTE_UNUSED)
734 {
735 gcc_assert (dump_file);
736 fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t));
737
738 for (unsigned i = 0; i < insn_info.length (); i++)
739 dump_insn_slim (dump_file, insn_info[i]->insn);
740
741 fprintf (dump_file, "\n");
742
743 return true;
744 }
745
746
747 /* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
748 prefetcher memory tags. */
749 static void
750 record_loads (tag_map_t &tag_map, struct loop *loop)
751 {
752 rtx_insn *insn;
753 basic_block *body, bb;
754
755 body = get_loop_body (loop);
756
757 for (unsigned i = 0; i < loop->num_nodes; i++)
758 {
759 bb = body[i];
760 FOR_BB_INSNS (bb, insn)
761 {
762 rtx base = NULL_RTX;
763 rtx dest = NULL_RTX;
764 rtx offset = NULL_RTX;
765 bool writeback = false;
766 bool ldp = false;
767
768 if (!INSN_P (insn) || DEBUG_INSN_P (insn))
769 continue;
770
771 if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
772 &ldp))
773 {
774 tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
775 writeback, ldp);
776 rtx tag = GEN_INT (i->tag ());
777 tag_map.get_or_insert (tag).safe_push (i);
778 }
779 }
780 }
781
782 if (dump_file)
783 {
784 fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
785 tag_map.traverse <void *, dump_insn_list> (NULL);
786 }
787
788 /* Try to reduce the dataset before launching into the rename attempt. Drop
789 destinations in the same collision chain that appear in the same def/use
790 chain, all as defs. These chains will move together in a rename so
791 there's no point in keeping both in there. */
792 tag_map.traverse <void *, single_dest_per_chain> (NULL);
793 }
794
795
796 /* Tag collision avoidance pass for Falkor. The pass runs in two phases for
797 each loop; the first phase collects all loads that we consider as
798 interesting for renaming into a tag-indexed map of lists. The second phase
799 renames the destination register of the loads in an attempt to spread out
800 the loads into different tags. */
801 void
802 execute_tag_collision_avoidance ()
803 {
804 struct loop *loop;
805
806 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
807 df_chain_add_problem (DF_UD_CHAIN);
808 df_compute_regs_ever_live (true);
809 df_note_add_problem ();
810 df_analyze ();
811 df_set_flags (DF_DEFER_INSN_RESCAN);
812
813 regrename_init (true);
814 regrename_analyze (NULL);
815
816 compute_bb_for_insn ();
817 calculate_dominance_info (CDI_DOMINATORS);
818 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
819
820 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
821 {
822 tag_map_t tag_map (512);
823
824 record_loads (tag_map, loop);
825 avoid_collisions (tag_map);
826 if (dump_file)
827 {
828 fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
829 tag_map.traverse <void *, dump_insn_list> (NULL);
830 }
831 tag_map.traverse <void *, free_insn_info> (NULL);
832 }
833
834 loop_optimizer_finalize ();
835 free_dominance_info (CDI_DOMINATORS);
836 regrename_finish ();
837 }
838
839
840 const pass_data pass_data_tag_collision_avoidance =
841 {
842 RTL_PASS, /* type */
843 "tag_collision_avoidance", /* name */
844 OPTGROUP_NONE, /* optinfo_flags */
845 TV_NONE, /* tv_id */
846 0, /* properties_required */
847 0, /* properties_provided */
848 0, /* properties_destroyed */
849 0, /* todo_flags_start */
850 TODO_df_finish, /* todo_flags_finish */
851 };
852
853
854 class pass_tag_collision_avoidance : public rtl_opt_pass
855 {
856 public:
857 pass_tag_collision_avoidance (gcc::context *ctxt)
858 : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
859 {}
860
861 /* opt_pass methods: */
862 virtual bool gate (function *)
863 {
864 return ((aarch64_tune_params.extra_tuning_flags
865 & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
866 && optimize >= 2);
867 }
868
869 virtual unsigned int execute (function *)
870 {
871 execute_tag_collision_avoidance ();
872 return 0;
873 }
874
875 }; // class pass_tag_collision_avoidance
876
877
878 /* Create a new pass instance. */
879 rtl_opt_pass *
880 make_pass_tag_collision_avoidance (gcc::context *ctxt)
881 {
882 return new pass_tag_collision_avoidance (ctxt);
883 }