gcc/config/aarch64/falkor-tag-collision-avoidance.cc

   1 /* Tag Collision Avoidance pass for Falkor.
   2    Copyright (C) 2018-2024 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but
  12    WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #define IN_TARGET_CODE 1
  21
  22 #include "config.h"
  23 #define INCLUDE_LIST
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "tree-pass.h"
  31 #include "aarch64-protos.h"
  32 #include "hash-map.h"
  33 #include "cfgloop.h"
  34 #include "cfgrtl.h"
  35 #include "rtl-iter.h"
  36 #include "df.h"
  37 #include "memmodel.h"
  38 #include "optabs.h"
  39 #include "regs.h"
  40 #include "recog.h"
  41 #include "function-abi.h"
  42 #include "regrename.h"
  43 #include "print-rtl.h"
  44
  45 /* The Falkor hardware prefetching system uses the encoding of the registers
  46    and offsets of loads to decide which of the multiple hardware prefetchers to
  47    assign the load to.  This has the positive effect of accelerating prefetches
  48    when all related loads with uniform strides are assigned to the same
  49    prefetcher unit.  The down side is that because of the way the assignment
  50    works, multiple unrelated loads may end up on the same prefetch unit, thus
  51    causing the unit to bounce between different sets of addresses and never
  52    train correctly.  The point of this pass is to avoid such collisions so that
  53    unrelated loads are spread out to different prefetchers.  It also makes a
  54    rudimentary attempt to ensure that related loads with the same tags don't
  55    get moved out unnecessarily.
  56
  57    Perhaps a future enhancement would be to make a more concerted attempt to
  58    get related loads under the same tag.  See the memcpy/memset implementation
  59    for falkor in glibc to understand the kind of impact this can have on
  60    falkor.
  61
  62    The assignment of loads is based on a tag that is computed from the encoding
  63    of the first destination register (the only destination in case of LDR), the
  64    base register and the offset (either the register or the immediate value, as
  65    encoded in the instruction).  This is what the 14 bit tag looks like:
  66
  67    |<- 6 bits ->|<- 4b ->|<- 4b ->|
  68    --------------------------------
  69    |  OFFSET    |  SRC   |  DST   |
  70    --------------------------------
  71
  72    For all cases, the SRC and DST are the 4 LSB of the encoding of the register
  73    in the instruction.  Offset computation is more involved and is as follows:
  74
  75    - For register offset addressing: 4 LSB of the offset register with the MSB
  76      of the 6 bits set to 1.
  77
  78    - For immediate offset: 4 LSB of the encoded immediate offset.  The encoding
  79      depends on the width of the load and is expressed as multiples of the
  80      width.
  81
  82    - For loads with update: 4 LSB of the offset.  The encoding here is the
  83      exact number by which the base is offset and incremented.
  84
  85    Based on the above it is clear that registers 0 and 16 will result in
  86    collisions, 1 and 17 and so on.  This pass detects such collisions within a
  87    def/use chain of the source register in a loop and tries to resolve the
  88    collision by renaming one of the destination registers.  */
  89
  90 /* Get the destination part of the tag.  */
  91 #define TAG_GET_DEST(__tag) ((__tag) & 0xf)
  92
  93 /* Get the tag with the destination part updated.  */
  94 #define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
  95
  96 #define MAX_PREFETCH_STRIDE 2048
  97
  98 /* The instruction information structure.  This is used to cache information
  99    about the INSN that we derive when traversing through all of the insns in
 100    loops.  */
 101 class tag_insn_info
 102 {
 103 public:
 104   rtx_insn *insn;
 105   rtx dest;
 106   rtx base;
 107   rtx offset;
 108   bool writeback;
 109   bool ldp;
 110
 111   tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
 112     : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
 113   {}
 114
 115   /* Compute the tag based on BASE, DEST and OFFSET of the load.  */
 116   unsigned tag ()
 117     {
 118       unsigned int_offset = 0;
 119       rtx offset = this->offset;
 120       unsigned dest = REGNO (this->dest);
 121       unsigned base = REGNO (this->base);
 122       machine_mode dest_mode = GET_MODE (this->dest);
 123
 124       /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
 125          destination mode is constant here.  */
 126       unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
 127
 128       /* For loads of larger than 16 bytes, the DEST part of the tag is 0.  */
 129       if ((dest_mode_size << this->ldp) > 16)
 130         dest = 0;
 131
 132       if (offset && REG_P (offset))
 133         int_offset = (1 << 5) | REGNO (offset);
 134       else if (offset && CONST_INT_P (offset))
 135         {
 136           int_offset = INTVAL (offset);
 137           int_offset /= dest_mode_size;
 138           if (!this->writeback)
 139             int_offset >>= 2;
 140         }
 141       return ((dest & 0xf)
 142               | ((base & 0xf) << 4)
 143               | ((int_offset & 0x3f) << 8));
 144     }
 145 };
 146
 147 /* Hash map to traverse and process instructions with colliding tags.  */
 148 typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
 149
 150 /* Vector of instructions with colliding tags.  */
 151 typedef auto_vec <tag_insn_info *> insn_info_list_t;
 152
 153 /* Pair of instruction information and unavailable register set to pass to
 154    CHECK_COLLIDING_TAGS.  */
 155 typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
 156
 157
 158 /* Callback to free all tag_insn_info objects.  */
 159 bool
 160 free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
 161                 void *arg ATTRIBUTE_UNUSED)
 162 {
 163   while (v->length () > 0)
 164     delete v->pop ();
 165
 166   return true;
 167 }
 168
 169
 170 /* Add all aliases of the register to the unavailable register set.  REG is the
 171    smallest register number that can then be used to reference its aliases.
 172    UNAVAILABLE is the hard register set to add the ignored register numbers to
 173    and MODE is the mode in which the registers would have been used.  */
 174 static void
 175 ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
 176 {
 177   add_to_hard_reg_set (unavailable, mode, reg);
 178   add_to_hard_reg_set (unavailable, mode, reg + 16);
 179   add_to_hard_reg_set (unavailable, mode, reg + 32);
 180   add_to_hard_reg_set (unavailable, mode, reg + 48);
 181 }
 182
 183
 184 /* Callback to check which destination registers are unavailable to us for
 185    renaming because of the base and offset colliding.  This is a callback that
 186    gets called for every name value pair (T, V) in the TAG_MAP.  The ARG is an
 187    std::pair of the tag_insn_info of the original insn and the hard register
 188    set UNAVAILABLE that is used to record hard register numbers that cannot be
 189    used for the renaming.  This always returns true since we want to traverse
 190    through the entire TAG_MAP.  */
 191 bool
 192 check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
 193 {
 194   HARD_REG_SET *unavailable = arg->second;
 195   unsigned orig_tag = arg->first->tag ();
 196   unsigned tag = INTVAL (t);
 197   machine_mode mode = GET_MODE (arg->first->dest);
 198
 199   /* Can't collide with emptiness.  */
 200   if (v.length () == 0)
 201     return true;
 202
 203   /* Drop all aliased destination registers that result in the same
 204      tag.  It is not necessary to drop all of them but we do anyway
 205      because it is quicker than checking ranges.  */
 206   if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
 207     ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
 208
 209   return true;
 210 }
 211
 212
 213 /* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
 214    renaming.  INSN_INFO is the original insn, TAG_MAP is the map of the list of
 215    insns indexed by their tags, HEAD is the def/use chain head of the
 216    destination register of the original insn.  The routine returns the super
 217    class of register classes that may be used during the renaming.  */
 218 static enum reg_class
 219 init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
 220                   HARD_REG_SET *unavailable)
 221 {
 222   unsigned dest = head->regno;
 223   enum reg_class super_class = NO_REGS;
 224   machine_mode mode = GET_MODE (insn_info->dest);
 225
 226   CLEAR_HARD_REG_SET (*unavailable);
 227
 228   for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
 229     {
 230       if (DEBUG_INSN_P (tmp->insn))
 231         continue;
 232
 233       *unavailable |= ~reg_class_contents[tmp->cl];
 234       super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
 235     }
 236
 237   for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 238     if (fixed_regs[i] || global_regs[i])
 239       add_to_hard_reg_set (unavailable, mode, i);
 240
 241   arg_pair_t arg = arg_pair_t (insn_info, unavailable);
 242
 243   /* Exclude all registers that would lead to collisions with other loads.  */
 244   tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
 245
 246   /* Finally, also ignore all aliases of the current reg.  */
 247   ignore_all_aliases (unavailable, mode, dest & 0xf);
 248
 249   return super_class;
 250 }
 251
 252
 253 /* Find a suitable and available register and rename the chain of occurrences
 254    of the register  defined in the def/use chain headed by HEAD in which INSN
 255    exists.  CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
 256    unavailable due to a potential collision due to the rename.  The routine
 257    returns the register number in case of a successful rename or -1 to indicate
 258    failure.  */
 259 static int
 260 rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
 261 {
 262   unsigned dest_regno = head->regno;
 263
 264   if (head->cannot_rename || head->renamed)
 265     return -1;
 266
 267   HARD_REG_SET unavailable;
 268
 269   enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
 270                                                  &unavailable);
 271
 272   unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
 273                                         dest_regno, false);
 274
 275   /* Attempt to rename as long as regrename doesn't just throw the same
 276      register at us.  */
 277   if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
 278     {
 279       if (dump_file && (dump_flags & TDF_DETAILS))
 280           fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
 281                    INSN_UID (insn_info->insn), dest_regno, new_regno);
 282
 283       return new_regno;
 284     }
 285
 286   return -1;
 287 }
 288
 289
 290 /* Return true if REGNO is not safe to rename.  */
 291 static bool
 292 unsafe_rename_p (unsigned regno)
 293 {
 294   /* Avoid renaming registers used for argument passing and return value.  In
 295      future we could be a little less conservative and walk through the basic
 296      blocks to see if there are any call or syscall sites.  */
 297   if (regno <= R8_REGNUM
 298       || (regno >= V0_REGNUM && regno < V8_REGNUM))
 299     return true;
 300
 301   /* Don't attempt to rename registers that may have specific meanings.  */
 302   switch (regno)
 303     {
 304     case LR_REGNUM:
 305     case HARD_FRAME_POINTER_REGNUM:
 306     case FRAME_POINTER_REGNUM:
 307     case STACK_POINTER_REGNUM:
 308       return true;
 309     }
 310
 311   return false;
 312 }
 313
 314
 315 /* Go through the def/use chains for the register and find the chain for this
 316    insn to rename.  The function returns the hard register number in case of a
 317    successful rename and -1 otherwise.  */
 318 static int
 319 rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
 320 {
 321   struct du_chain *chain = NULL;
 322   du_head_p head = NULL;
 323   int i;
 324
 325   unsigned dest_regno = REGNO (insn_info->dest);
 326
 327   if (unsafe_rename_p (dest_regno))
 328     return -1;
 329
 330   /* Search the chain where this instruction is (one of) the root.  */
 331   rtx_insn *insn = insn_info->insn;
 332   operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
 333
 334   for (i = 0; i < dest_op_info->n_chains; i++)
 335     {
 336       /* The register tracked by this chain does not match the
 337          destination register of insn.  */
 338       if (dest_op_info->heads[i]->regno != dest_regno)
 339         continue;
 340
 341       head = dest_op_info->heads[i];
 342       /* The chain was merged in another, find the new head.  */
 343       if (!head->first)
 344         head = regrename_chain_from_id (head->id);
 345
 346       for (chain = head->first; chain; chain = chain->next_use)
 347         /* Found the insn in the chain, so try renaming the register in this
 348            chain.  */
 349         if (chain->insn == insn)
 350           return rename_chain (insn_info, tag_map, head);
 351     }
 352
 353   return -1;
 354 }
 355
 356
 357 /* Flag to track if the map has changed.  */
 358 static bool map_changed = false;
 359
 360 /* The actual reallocation logic.  For each vector of collisions V, try to
 361    resolve the collision by attempting to rename the destination register of
 362    all but one of the loads.  This is a callback that is invoked for each
 363    name-value pair (T, V) in TAG_MAP.  The function returns true whenever it
 364    returns unchanged and false otherwise to halt traversal.  */
 365 bool
 366 avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
 367 {
 368   /* We need at least two loads to cause a tag collision, return unchanged.  */
 369   if (v->length () < 2)
 370     return true;
 371
 372   tag_insn_info *vec_start = v->pop ();
 373   tag_insn_info *insn_info = vec_start;
 374
 375   /* Try to rename at least one register to reduce the collision.  If we
 376      iterate all the way through, we end up dropping one of the loads from the
 377      list.  This is fine because we want at most one element to ensure that a
 378      subsequent rename attempt does not end up worsening the collision.  */
 379   do
 380     {
 381       int new_regno;
 382
 383       if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
 384         {
 385           rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
 386
 387           tag_map->get_or_insert (new_tag).safe_push (insn_info);
 388           df_set_regs_ever_live (new_regno, true);
 389           map_changed = true;
 390           return false;
 391         }
 392
 393       v->safe_insert (0, insn_info);
 394       insn_info = v->pop ();
 395     }
 396   while (insn_info != vec_start);
 397
 398   if (dump_file)
 399     fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
 400              INSN_UID (insn_info->insn));
 401
 402   /* Drop the last element and move on to the next tag.  */
 403   delete insn_info;
 404   return true;
 405 }
 406
 407
 408 /* For each set of collisions, attempt to rename the registers or insert a move
 409    to avoid the collision.  We repeatedly traverse through TAG_MAP using
 410    AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
 411    full traversal results in no change in the map.  */
 412 static void
 413 avoid_collisions (tag_map_t &tag_map)
 414 {
 415   do
 416     {
 417       map_changed = false;
 418       tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
 419     }
 420   while (map_changed);
 421 }
 422 \f
 423
 424
 425 /* Find the use def chain in which INSN exists and then see if there is a
 426    definition inside the loop and outside it.  We use this as a simple
 427    approximation to determine whether the base register is an IV.  The basic
 428    idea is to find INSN in the use-def chains for its base register and find
 429    all definitions that reach it.  Of all these definitions, there should be at
 430    least one definition that is a simple addition of a constant value, either
 431    as a binary operation or a pre or post update.
 432
 433    The function returns true if the base register is estimated to be an IV.  */
 434 static bool
 435 iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
 436 {
 437   df_ref ause;
 438   unsigned regno = REGNO (reg);
 439
 440   /* Ignore loads from the stack.  */
 441   if (regno == SP_REGNUM)
 442     return false;
 443
 444   for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
 445     {
 446       if (!DF_REF_INSN_INFO (ause)
 447           || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
 448         continue;
 449
 450       if (insn != DF_REF_INSN (ause))
 451         continue;
 452
 453       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 454       df_ref def_rec;
 455
 456       FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
 457         {
 458           rtx_insn *insn = DF_REF_INSN (def_rec);
 459           basic_block bb = BLOCK_FOR_INSN (insn);
 460
 461           if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
 462               && bb->loop_father == loop)
 463             {
 464               if (recog_memoized (insn) < 0)
 465                 continue;
 466
 467               rtx pat = PATTERN (insn);
 468
 469               /* Prefetch or clobber; unlikely to be a constant stride.  The
 470                  falkor software prefetcher tuning is pretty conservative, so
 471                  its presence indicates that the access pattern is probably
 472                  strided but most likely with an unknown stride size or a
 473                  stride size that is quite large.  */
 474               if (GET_CODE (pat) != SET)
 475                 continue;
 476
 477               rtx x = SET_SRC (pat);
 478               if (GET_CODE (x) == ZERO_EXTRACT
 479                   || GET_CODE (x) == ZERO_EXTEND
 480                   || GET_CODE (x) == SIGN_EXTEND)
 481                 x = XEXP (x, 0);
 482
 483               /* Loading the value from memory; unlikely to be a constant
 484                  stride.  */
 485               if (MEM_P (x))
 486                 continue;
 487
 488               /* An increment or decrement by a constant MODE_SIZE amount or
 489                  the result of a binary expression is likely to be an IV.  */
 490               if (GET_CODE (x) == POST_INC
 491                   || GET_CODE (x) == POST_DEC
 492                   || GET_CODE (x) == PRE_INC
 493                   || GET_CODE (x) == PRE_DEC)
 494                 return true;
 495               else if (BINARY_P (x)
 496                        && (CONST_INT_P (XEXP (x, 0))
 497                            || CONST_INT_P (XEXP (x, 1))))
 498                 {
 499                   rtx stride = (CONST_INT_P (XEXP (x, 0))
 500                                 ? XEXP (x, 0) : XEXP (x, 1));
 501
 502                   /* Don't bother with very long strides because the prefetcher
 503                      is unable to train on them anyway.  */
 504                   if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
 505                     return true;
 506                 }
 507             }
 508         }
 509       return false;
 510     }
 511   return false;
 512 }
 513
 514
 515 /* Return true if SRC is a strided load in the LOOP, false otherwise.
 516    If it is a strided load, set the BASE and OFFSET.  Also, if this is
 517    a pre/post increment load, set PRE_POST to true.  */
 518 static bool
 519 valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
 520              rtx *base, rtx *offset, bool load_pair)
 521 {
 522   subrtx_var_iterator::array_type array;
 523   rtx x = NULL_RTX;
 524
 525   FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
 526     if (MEM_P (*iter))
 527       {
 528         x = *iter;
 529         break;
 530       }
 531
 532   if (!x)
 533     return false;
 534
 535   struct aarch64_address_info addr;
 536   machine_mode mode = GET_MODE (x);
 537
 538   if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
 539     return false;
 540
 541   if (addr.type != ADDRESS_REG_IMM
 542       && addr.type != ADDRESS_REG_WB
 543       && addr.type != ADDRESS_REG_REG
 544       && addr.type != ADDRESS_REG_UXTW
 545       && addr.type != ADDRESS_REG_SXTW)
 546     return false;
 547
 548   unsigned regno = REGNO (addr.base);
 549   if (global_regs[regno] || fixed_regs[regno])
 550     return false;
 551
 552   if (addr.type == ADDRESS_REG_WB)
 553     {
 554       unsigned code = GET_CODE (XEXP (x, 0));
 555
 556       *pre_post = true;
 557       *base = addr.base;
 558
 559       if (code == PRE_MODIFY || code == POST_MODIFY)
 560         *offset = addr.offset;
 561       else
 562         {
 563           /*Writeback is only supported for fixed-width modes.  */
 564           unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
 565
 566           /* For post-incremented load pairs we would increment the base twice
 567              over, so make that adjustment.  */
 568           if (load_pair && (code == POST_INC || code == POST_DEC))
 569             int_offset *= 2;
 570
 571           *offset = GEN_INT (int_offset);
 572         }
 573       return true;
 574     }
 575   else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
 576     {
 577       /* Check if the load is strided.  */
 578       if (!iv_p (insn, addr.base, loop))
 579         return false;
 580
 581       *base = addr.base;
 582       *offset = addr.offset;
 583       return true;
 584     }
 585
 586   return false;
 587 }
 588
 589
 590 /* Return true if INSN is a strided load in LOOP.  If it is a strided load, set
 591    the DEST, BASE and OFFSET.  Also, if this is a pre/post increment load, set
 592    PRE_POST to true.
 593
 594    The routine does checks on the destination of the insn and depends on
 595    STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET.  */
 596 static bool
 597 get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
 598                rtx *offset, bool *pre_post, bool *ldp)
 599 {
 600   if (!INSN_P (insn) || recog_memoized (insn) < 0)
 601     return false;
 602
 603   rtx pat = PATTERN (insn);
 604   unsigned code = GET_CODE (pat);
 605   bool load_pair = (code == PARALLEL);
 606
 607   /* For a load pair we need only the first base and destination
 608      registers.  We however need to ensure that our pre/post increment
 609      offset is doubled; we do that in STRIDED_LOAD_P.  */
 610   if (load_pair)
 611     {
 612       pat = XVECEXP (pat, 0, 0);
 613       code = GET_CODE (pat);
 614     }
 615
 616   if (code != SET)
 617     return false;
 618
 619   rtx dest_rtx = SET_DEST (pat);
 620
 621   if (!REG_P (dest_rtx))
 622     return false;
 623
 624   unsigned regno = REGNO (dest_rtx);
 625   machine_mode mode = GET_MODE (dest_rtx);
 626   machine_mode inner_mode = GET_MODE_INNER (mode);
 627
 628   /* Falkor does not support SVE vectors.  */
 629   if (!GET_MODE_SIZE (mode).is_constant ())
 630     return false;
 631
 632   /* Ignore vector struct or lane loads.  */
 633   if (GET_MODE_SIZE (mode).to_constant ()
 634       != GET_MODE_SIZE (inner_mode).to_constant ())
 635     return false;
 636
 637   /* The largest width we want to bother with is a load of a pair of
 638      quad-words.  */
 639   if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
 640       > GET_MODE_SIZE (OImode))
 641     return false;
 642
 643   /* Ignore loads into the stack pointer because it is unlikely to be a
 644      stream.  */
 645   if (regno == SP_REGNUM)
 646     return false;
 647
 648   if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
 649                    load_pair))
 650     {
 651       *dest = dest_rtx;
 652       *ldp = load_pair;
 653
 654       return true;
 655     }
 656
 657   return false;
 658 }
 659
 660
 661 /* Return whether INSN and CAND are in the same def/use chain.  */
 662 static bool
 663 in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
 664 {
 665   struct du_chain *chain = NULL;
 666   du_head_p head = NULL;
 667   int i;
 668
 669   /* Search the chain where this instruction is (one of) the root.  */
 670   operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
 671
 672   for (i = 0; i < op_info->n_chains; i++)
 673     {
 674       /* The register tracked by this chain does not match the
 675          dest register of insn.  */
 676       if (op_info->heads[i]->regno != regno)
 677         continue;
 678
 679       head = op_info->heads[i];
 680       /* The chain was merged in another, find the new head.  */
 681       if (!head->first)
 682         head = regrename_chain_from_id (head->id);
 683
 684       bool found_insn = false, found_cand = false;
 685
 686       for (chain = head->first; chain; chain = chain->next_use)
 687         {
 688           rtx *loc = &SET_DEST (PATTERN (chain->insn));
 689
 690           if (chain->loc != loc)
 691             continue;
 692
 693           if (chain->insn == insn)
 694             found_insn = true;
 695
 696           if (chain->insn == cand)
 697             found_cand = true;
 698
 699           if (found_insn && found_cand)
 700             return true;
 701         }
 702     }
 703
 704   return false;
 705 }
 706
 707
 708 /* Callback function to traverse the tag map and drop loads that have the same
 709    destination and are in the same chain of occurrence.  Routine always returns
 710    true to allow traversal through all of TAG_MAP.  */
 711 bool
 712 single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
 713                        void *arg ATTRIBUTE_UNUSED)
 714 {
 715   for (int i = v->length () - 1; i>= 1; i--)
 716     {
 717       tag_insn_info *insn_info = (*v)[i];
 718
 719       for (int j = v->length () - 2; j >= 0; j--)
 720         {
 721           /* Filter out destinations in the same chain.  */
 722           if (in_same_chain (insn_info->insn, (*v)[j]->insn,
 723                              REGNO (insn_info->dest)))
 724             {
 725               v->ordered_remove (j);
 726               i = v->length ();
 727               break;
 728             }
 729         }
 730     }
 731
 732   return true;
 733 }
 734
 735
 736 /* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
 737    list INSN_INFO for tag T.  */
 738 bool
 739 dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
 740                 void *unused ATTRIBUTE_UNUSED)
 741 {
 742   gcc_assert (dump_file);
 743   fprintf (dump_file, "Tag 0x" HOST_WIDE_INT_PRINT_HEX_PURE " ::\n", INTVAL (t));
 744
 745   for (unsigned i = 0; i < insn_info.length (); i++)
 746     dump_insn_slim (dump_file, insn_info[i]->insn);
 747
 748   fprintf (dump_file, "\n");
 749
 750   return true;
 751 }
 752
 753
 754 /* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
 755    prefetcher memory tags.  */
 756 static void
 757 record_loads (tag_map_t &tag_map, struct loop *loop)
 758 {
 759   rtx_insn *insn;
 760   basic_block *body, bb;
 761
 762   body = get_loop_body (loop);
 763
 764   for (unsigned i = 0; i < loop->num_nodes; i++)
 765     {
 766       bb = body[i];
 767       FOR_BB_INSNS (bb, insn)
 768         {
 769           rtx base = NULL_RTX;
 770           rtx dest = NULL_RTX;
 771           rtx offset = NULL_RTX;
 772           bool writeback = false;
 773           bool ldp = false;
 774
 775           if (!INSN_P (insn) || DEBUG_INSN_P (insn))
 776             continue;
 777
 778           if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
 779                              &ldp))
 780             {
 781               tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
 782                                                     writeback, ldp);
 783               rtx tag = GEN_INT (i->tag ());
 784               tag_map.get_or_insert (tag).safe_push (i);
 785             }
 786         }
 787     }
 788
 789   if (dump_file)
 790     {
 791       fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
 792       tag_map.traverse <void *, dump_insn_list> (NULL);
 793     }
 794
 795   /* Try to reduce the dataset before launching into the rename attempt.  Drop
 796      destinations in the same collision chain that appear in the same def/use
 797      chain, all as defs.  These chains will move together in a rename so
 798      there's no point in keeping both in there.  */
 799   tag_map.traverse <void *, single_dest_per_chain> (NULL);
 800 }
 801
 802
 803 /* Tag collision avoidance pass for Falkor.  The pass runs in two phases for
 804    each loop; the first phase collects all loads that we consider as
 805    interesting for renaming into a tag-indexed map of lists.  The second phase
 806    renames the destination register of the loads in an attempt to spread out
 807    the loads into different tags.  */
 808 void
 809 execute_tag_collision_avoidance ()
 810 {
 811   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
 812   df_chain_add_problem (DF_UD_CHAIN);
 813   df_compute_regs_ever_live (true);
 814   df_note_add_problem ();
 815   df_analyze ();
 816   df_set_flags (DF_DEFER_INSN_RESCAN);
 817
 818   regrename_init (true);
 819   regrename_analyze (NULL);
 820
 821   compute_bb_for_insn ();
 822   calculate_dominance_info (CDI_DOMINATORS);
 823   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
 824
 825   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
 826     {
 827       tag_map_t tag_map (512);
 828
 829       record_loads (tag_map, loop);
 830       avoid_collisions (tag_map);
 831       if (dump_file)
 832         {
 833           fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
 834           tag_map.traverse <void *, dump_insn_list> (NULL);
 835         }
 836       tag_map.traverse <void *, free_insn_info> (NULL);
 837     }
 838
 839   loop_optimizer_finalize ();
 840   free_dominance_info (CDI_DOMINATORS);
 841   regrename_finish ();
 842 }
 843
 844
 845 const pass_data pass_data_tag_collision_avoidance =
 846 {
 847   RTL_PASS, /* type */
 848   "tag_collision_avoidance", /* name */
 849   OPTGROUP_NONE, /* optinfo_flags */
 850   TV_NONE, /* tv_id */
 851   0, /* properties_required */
 852   0, /* properties_provided */
 853   0, /* properties_destroyed */
 854   0, /* todo_flags_start */
 855   TODO_df_finish, /* todo_flags_finish */
 856 };
 857
 858
 859 class pass_tag_collision_avoidance : public rtl_opt_pass
 860 {
 861 public:
 862   pass_tag_collision_avoidance (gcc::context *ctxt)
 863     : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
 864   {}
 865
 866   /* opt_pass methods: */
 867   virtual bool gate (function *)
 868     {
 869       return ((aarch64_tune_params.extra_tuning_flags
 870                & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
 871               && optimize >= 2);
 872     }
 873
 874   virtual unsigned int execute (function *)
 875     {
 876       execute_tag_collision_avoidance ();
 877       return 0;
 878     }
 879
 880 }; // class pass_tag_collision_avoidance
 881
 882
 883 /* Create a new pass instance.  */
 884 rtl_opt_pass *
 885 make_pass_tag_collision_avoidance (gcc::context *ctxt)
 886 {
 887   return new pass_tag_collision_avoidance (ctxt);
 888 }