gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "gimple.h"
  50 #include "gimplify.h"
  51 #include "optabs.h"
  52 #include "dwarf2.h"
  53 #include "cfgloop.h"
  54 #include "tree-vectorizer.h"
  55 #include "config/arm/aarch-cost-tables.h"
  56
  57 /* Defined for convenience.  */
  58 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  59
  60 /* Classifies an address.
  61
  62    ADDRESS_REG_IMM
  63        A simple base register plus immediate offset.
  64
  65    ADDRESS_REG_WB
  66        A base register indexed by immediate offset with writeback.
  67
  68    ADDRESS_REG_REG
  69        A base register indexed by (optionally scaled) register.
  70
  71    ADDRESS_REG_UXTW
  72        A base register indexed by (optionally scaled) zero-extended register.
  73
  74    ADDRESS_REG_SXTW
  75        A base register indexed by (optionally scaled) sign-extended register.
  76
  77    ADDRESS_LO_SUM
  78        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  79
  80    ADDRESS_SYMBOLIC:
  81        A constant symbolic address, in pc-relative literal pool.  */
  82
  83 enum aarch64_address_type {
  84   ADDRESS_REG_IMM,
  85   ADDRESS_REG_WB,
  86   ADDRESS_REG_REG,
  87   ADDRESS_REG_UXTW,
  88   ADDRESS_REG_SXTW,
  89   ADDRESS_LO_SUM,
  90   ADDRESS_SYMBOLIC
  91 };
  92
  93 struct aarch64_address_info {
  94   enum aarch64_address_type type;
  95   rtx base;
  96   rtx offset;
  97   int shift;
  98   enum aarch64_symbol_type symbol_type;
  99 };
 100
 101 struct simd_immediate_info
 102 {
 103   rtx value;
 104   int shift;
 105   int element_width;
 106   bool mvn;
 107   bool msl;
 108 };
 109
 110 /* The current code model.  */
 111 enum aarch64_code_model aarch64_cmodel;
 112
 113 #ifdef HAVE_AS_TLS
 114 #undef TARGET_HAVE_TLS
 115 #define TARGET_HAVE_TLS 1
 116 #endif
 117
 118 static bool aarch64_lra_p (void);
 119 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 120 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 121                                                      const_tree,
 122                                                      enum machine_mode *, int *,
 123                                                      bool *);
 124 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 125 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 126 static void aarch64_override_options_after_change (void);
 127 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 128 static unsigned bit_count (unsigned HOST_WIDE_INT);
 129 static bool aarch64_const_vec_all_same_int_p (rtx,
 130                                               HOST_WIDE_INT, HOST_WIDE_INT);
 131
 132 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 133                                                  const unsigned char *sel);
 134
 135 /* The processor for which instructions should be scheduled.  */
 136 enum aarch64_processor aarch64_tune = cortexa53;
 137
 138 /* The current tuning set.  */
 139 const struct tune_params *aarch64_tune_params;
 140
 141 /* Mask to specify which instructions we are allowed to generate.  */
 142 unsigned long aarch64_isa_flags = 0;
 143
 144 /* Mask to specify which instruction scheduling options should be used.  */
 145 unsigned long aarch64_tune_flags = 0;
 146
 147 /* Tuning parameters.  */
 148
 149 #if HAVE_DESIGNATED_INITIALIZERS
 150 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 151 #else
 152 #define NAMED_PARAM(NAME, VAL) (VAL)
 153 #endif
 154
 155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 156 __extension__
 157 #endif
 158
 159 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 160 __extension__
 161 #endif
 162 static const struct cpu_addrcost_table generic_addrcost_table =
 163 {
 164   NAMED_PARAM (pre_modify, 0),
 165   NAMED_PARAM (post_modify, 0),
 166   NAMED_PARAM (register_offset, 0),
 167   NAMED_PARAM (register_extend, 0),
 168   NAMED_PARAM (imm_offset, 0)
 169 };
 170
 171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 172 __extension__
 173 #endif
 174 static const struct cpu_regmove_cost generic_regmove_cost =
 175 {
 176   NAMED_PARAM (GP2GP, 1),
 177   NAMED_PARAM (GP2FP, 2),
 178   NAMED_PARAM (FP2GP, 2),
 179   /* We currently do not provide direct support for TFmode Q->Q move.
 180      Therefore we need to raise the cost above 2 in order to have
 181      reload handle the situation.  */
 182   NAMED_PARAM (FP2FP, 4)
 183 };
 184
 185 /* Generic costs for vector insn classes.  */
 186 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 187 __extension__
 188 #endif
 189 static const struct cpu_vector_cost generic_vector_cost =
 190 {
 191   NAMED_PARAM (scalar_stmt_cost, 1),
 192   NAMED_PARAM (scalar_load_cost, 1),
 193   NAMED_PARAM (scalar_store_cost, 1),
 194   NAMED_PARAM (vec_stmt_cost, 1),
 195   NAMED_PARAM (vec_to_scalar_cost, 1),
 196   NAMED_PARAM (scalar_to_vec_cost, 1),
 197   NAMED_PARAM (vec_align_load_cost, 1),
 198   NAMED_PARAM (vec_unalign_load_cost, 1),
 199   NAMED_PARAM (vec_unalign_store_cost, 1),
 200   NAMED_PARAM (vec_store_cost, 1),
 201   NAMED_PARAM (cond_taken_branch_cost, 3),
 202   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 203 };
 204
 205 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 206 __extension__
 207 #endif
 208 static const struct tune_params generic_tunings =
 209 {
 210   &generic_extra_costs,
 211   &generic_addrcost_table,
 212   &generic_regmove_cost,
 213   &generic_vector_cost,
 214   NAMED_PARAM (memmov_cost, 4)
 215 };
 216
 217 /* A processor implementing AArch64.  */
 218 struct processor
 219 {
 220   const char *const name;
 221   enum aarch64_processor core;
 222   const char *arch;
 223   const unsigned long flags;
 224   const struct tune_params *const tune;
 225 };
 226
 227 /* Processor cores implementing AArch64.  */
 228 static const struct processor all_cores[] =
 229 {
 230 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 231   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 232 #include "aarch64-cores.def"
 233 #undef AARCH64_CORE
 234   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 235   {NULL, aarch64_none, NULL, 0, NULL}
 236 };
 237
 238 /* Architectures implementing AArch64.  */
 239 static const struct processor all_architectures[] =
 240 {
 241 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 242   {NAME, CORE, #ARCH, FLAGS, NULL},
 243 #include "aarch64-arches.def"
 244 #undef AARCH64_ARCH
 245   {NULL, aarch64_none, NULL, 0, NULL}
 246 };
 247
 248 /* Target specification.  These are populated as commandline arguments
 249    are processed, or NULL if not specified.  */
 250 static const struct processor *selected_arch;
 251 static const struct processor *selected_cpu;
 252 static const struct processor *selected_tune;
 253
 254 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 255
 256 /* An ISA extension in the co-processor and main instruction set space.  */
 257 struct aarch64_option_extension
 258 {
 259   const char *const name;
 260   const unsigned long flags_on;
 261   const unsigned long flags_off;
 262 };
 263
 264 /* ISA extensions in AArch64.  */
 265 static const struct aarch64_option_extension all_extensions[] =
 266 {
 267 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 268   {NAME, FLAGS_ON, FLAGS_OFF},
 269 #include "aarch64-option-extensions.def"
 270 #undef AARCH64_OPT_EXTENSION
 271   {NULL, 0, 0}
 272 };
 273
 274 /* Used to track the size of an address when generating a pre/post
 275    increment address.  */
 276 static enum machine_mode aarch64_memory_reference_mode;
 277
 278 /* Used to force GTY into this file.  */
 279 static GTY(()) int gty_dummy;
 280
 281 /* A table of valid AArch64 "bitmask immediate" values for
 282    logical instructions.  */
 283
 284 #define AARCH64_NUM_BITMASKS  5334
 285 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 286
 287 /* Did we set flag_omit_frame_pointer just so
 288    aarch64_frame_pointer_required would be called? */
 289 static bool faked_omit_frame_pointer;
 290
 291 typedef enum aarch64_cond_code
 292 {
 293   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 294   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 295   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 296 }
 297 aarch64_cc;
 298
 299 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 300
 301 /* The condition codes of the processor, and the inverse function.  */
 302 static const char * const aarch64_condition_codes[] =
 303 {
 304   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 305   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 306 };
 307
 308 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 309 unsigned
 310 aarch64_dbx_register_number (unsigned regno)
 311 {
 312    if (GP_REGNUM_P (regno))
 313      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 314    else if (regno == SP_REGNUM)
 315      return AARCH64_DWARF_SP;
 316    else if (FP_REGNUM_P (regno))
 317      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 318
 319    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 320       equivalent DWARF register.  */
 321    return DWARF_FRAME_REGISTERS;
 322 }
 323
 324 /* Return TRUE if MODE is any of the large INT modes.  */
 325 static bool
 326 aarch64_vect_struct_mode_p (enum machine_mode mode)
 327 {
 328   return mode == OImode || mode == CImode || mode == XImode;
 329 }
 330
 331 /* Return TRUE if MODE is any of the vector modes.  */
 332 static bool
 333 aarch64_vector_mode_p (enum machine_mode mode)
 334 {
 335   return aarch64_vector_mode_supported_p (mode)
 336          || aarch64_vect_struct_mode_p (mode);
 337 }
 338
 339 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 340 static bool
 341 aarch64_array_mode_supported_p (enum machine_mode mode,
 342                                 unsigned HOST_WIDE_INT nelems)
 343 {
 344   if (TARGET_SIMD
 345       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 346       && (nelems >= 2 && nelems <= 4))
 347     return true;
 348
 349   return false;
 350 }
 351
 352 /* Implement HARD_REGNO_NREGS.  */
 353
 354 int
 355 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 356 {
 357   switch (aarch64_regno_regclass (regno))
 358     {
 359     case FP_REGS:
 360     case FP_LO_REGS:
 361       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 362     default:
 363       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 364     }
 365   gcc_unreachable ();
 366 }
 367
 368 /* Implement HARD_REGNO_MODE_OK.  */
 369
 370 int
 371 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 372 {
 373   if (GET_MODE_CLASS (mode) == MODE_CC)
 374     return regno == CC_REGNUM;
 375
 376   if (regno == SP_REGNUM)
 377     /* The purpose of comparing with ptr_mode is to support the
 378        global register variable associated with the stack pointer
 379        register via the syntax of asm ("wsp") in ILP32.  */
 380     return mode == Pmode || mode == ptr_mode;
 381
 382   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 383     return mode == Pmode;
 384
 385   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 386     return 1;
 387
 388   if (FP_REGNUM_P (regno))
 389     {
 390       if (aarch64_vect_struct_mode_p (mode))
 391         return
 392           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 393       else
 394         return 1;
 395     }
 396
 397   return 0;
 398 }
 399
 400 /* Return true if calls to DECL should be treated as
 401    long-calls (ie called via a register).  */
 402 static bool
 403 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 404 {
 405   return false;
 406 }
 407
 408 /* Return true if calls to symbol-ref SYM should be treated as
 409    long-calls (ie called via a register).  */
 410 bool
 411 aarch64_is_long_call_p (rtx sym)
 412 {
 413   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 414 }
 415
 416 /* Return true if the offsets to a zero/sign-extract operation
 417    represent an expression that matches an extend operation.  The
 418    operands represent the paramters from
 419
 420    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 421 bool
 422 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 423                                 rtx extract_imm)
 424 {
 425   HOST_WIDE_INT mult_val, extract_val;
 426
 427   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 428     return false;
 429
 430   mult_val = INTVAL (mult_imm);
 431   extract_val = INTVAL (extract_imm);
 432
 433   if (extract_val > 8
 434       && extract_val < GET_MODE_BITSIZE (mode)
 435       && exact_log2 (extract_val & ~7) > 0
 436       && (extract_val & 7) <= 4
 437       && mult_val == (1 << (extract_val & 7)))
 438     return true;
 439
 440   return false;
 441 }
 442
 443 /* Emit an insn that's a simple single-set.  Both the operands must be
 444    known to be valid.  */
 445 inline static rtx
 446 emit_set_insn (rtx x, rtx y)
 447 {
 448   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 449 }
 450
 451 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 452    return the rtx for register 0 in the proper mode.  */
 453 rtx
 454 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 455 {
 456   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 457   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 458
 459   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 460   return cc_reg;
 461 }
 462
 463 /* Build the SYMBOL_REF for __tls_get_addr.  */
 464
 465 static GTY(()) rtx tls_get_addr_libfunc;
 466
 467 rtx
 468 aarch64_tls_get_addr (void)
 469 {
 470   if (!tls_get_addr_libfunc)
 471     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 472   return tls_get_addr_libfunc;
 473 }
 474
 475 /* Return the TLS model to use for ADDR.  */
 476
 477 static enum tls_model
 478 tls_symbolic_operand_type (rtx addr)
 479 {
 480   enum tls_model tls_kind = TLS_MODEL_NONE;
 481   rtx sym, addend;
 482
 483   if (GET_CODE (addr) == CONST)
 484     {
 485       split_const (addr, &sym, &addend);
 486       if (GET_CODE (sym) == SYMBOL_REF)
 487         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 488     }
 489   else if (GET_CODE (addr) == SYMBOL_REF)
 490     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 491
 492   return tls_kind;
 493 }
 494
 495 /* We'll allow lo_sum's in addresses in our legitimate addresses
 496    so that combine would take care of combining addresses where
 497    necessary, but for generation purposes, we'll generate the address
 498    as :
 499    RTL                               Absolute
 500    tmp = hi (symbol_ref);            adrp  x1, foo
 501    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 502                                      nop
 503
 504    PIC                               TLS
 505    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 506    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 507                                      bl   __tls_get_addr
 508                                      nop
 509
 510    Load TLS symbol, depending on TLS mechanism and TLS access model.
 511
 512    Global Dynamic - Traditional TLS:
 513    adrp tmp, :tlsgd:imm
 514    add  dest, tmp, #:tlsgd_lo12:imm
 515    bl   __tls_get_addr
 516
 517    Global Dynamic - TLS Descriptors:
 518    adrp dest, :tlsdesc:imm
 519    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 520    add  dest, dest, #:tlsdesc_lo12:imm
 521    blr  tmp
 522    mrs  tp, tpidr_el0
 523    add  dest, dest, tp
 524
 525    Initial Exec:
 526    mrs  tp, tpidr_el0
 527    adrp tmp, :gottprel:imm
 528    ldr  dest, [tmp, #:gottprel_lo12:imm]
 529    add  dest, dest, tp
 530
 531    Local Exec:
 532    mrs  tp, tpidr_el0
 533    add  t0, tp, #:tprel_hi12:imm
 534    add  t0, #:tprel_lo12_nc:imm
 535 */
 536
 537 static void
 538 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 539                                    enum aarch64_symbol_type type)
 540 {
 541   switch (type)
 542     {
 543     case SYMBOL_SMALL_ABSOLUTE:
 544       {
 545         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 546         rtx tmp_reg = dest;
 547         enum machine_mode mode = GET_MODE (dest);
 548
 549         gcc_assert (mode == Pmode || mode == ptr_mode);
 550
 551         if (can_create_pseudo_p ())
 552           tmp_reg = gen_reg_rtx (mode);
 553
 554         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 555         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 556         return;
 557       }
 558
 559     case SYMBOL_TINY_ABSOLUTE:
 560       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 561       return;
 562
 563     case SYMBOL_SMALL_GOT:
 564       {
 565         /* In ILP32, the mode of dest can be either SImode or DImode,
 566            while the got entry is always of SImode size.  The mode of
 567            dest depends on how dest is used: if dest is assigned to a
 568            pointer (e.g. in the memory), it has SImode; it may have
 569            DImode if dest is dereferenced to access the memeory.
 570            This is why we have to handle three different ldr_got_small
 571            patterns here (two patterns for ILP32).  */
 572         rtx tmp_reg = dest;
 573         enum machine_mode mode = GET_MODE (dest);
 574
 575         if (can_create_pseudo_p ())
 576           tmp_reg = gen_reg_rtx (mode);
 577
 578         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 579         if (mode == ptr_mode)
 580           {
 581             if (mode == DImode)
 582               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 583             else
 584               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 585           }
 586         else
 587           {
 588             gcc_assert (mode == Pmode);
 589             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 590           }
 591
 592         return;
 593       }
 594
 595     case SYMBOL_SMALL_TLSGD:
 596       {
 597         rtx insns;
 598         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 599
 600         start_sequence ();
 601         emit_call_insn (gen_tlsgd_small (result, imm));
 602         insns = get_insns ();
 603         end_sequence ();
 604
 605         RTL_CONST_CALL_P (insns) = 1;
 606         emit_libcall_block (insns, dest, result, imm);
 607         return;
 608       }
 609
 610     case SYMBOL_SMALL_TLSDESC:
 611       {
 612         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 613         rtx tp;
 614
 615         emit_insn (gen_tlsdesc_small (imm));
 616         tp = aarch64_load_tp (NULL);
 617         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 618         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 619         return;
 620       }
 621
 622     case SYMBOL_SMALL_GOTTPREL:
 623       {
 624         rtx tmp_reg = gen_reg_rtx (Pmode);
 625         rtx tp = aarch64_load_tp (NULL);
 626         emit_insn (gen_tlsie_small (tmp_reg, imm));
 627         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 628         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 629         return;
 630       }
 631
 632     case SYMBOL_SMALL_TPREL:
 633       {
 634         rtx tp = aarch64_load_tp (NULL);
 635         emit_insn (gen_tlsle_small (dest, tp, imm));
 636         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 637         return;
 638       }
 639
 640     case SYMBOL_TINY_GOT:
 641       emit_insn (gen_ldr_got_tiny (dest, imm));
 642       return;
 643
 644     default:
 645       gcc_unreachable ();
 646     }
 647 }
 648
 649 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 650    handle all moves if !can_create_pseudo_p ().  The distinction is
 651    important because, unlike emit_move_insn, the move expanders know
 652    how to force Pmode objects into the constant pool even when the
 653    constant pool address is not itself legitimate.  */
 654 static rtx
 655 aarch64_emit_move (rtx dest, rtx src)
 656 {
 657   return (can_create_pseudo_p ()
 658           ? emit_move_insn (dest, src)
 659           : emit_move_insn_1 (dest, src));
 660 }
 661
 662 void
 663 aarch64_split_128bit_move (rtx dst, rtx src)
 664 {
 665   rtx low_dst;
 666
 667   enum machine_mode src_mode = GET_MODE (src);
 668   enum machine_mode dst_mode = GET_MODE (dst);
 669   int src_regno = REGNO (src);
 670   int dst_regno = REGNO (dst);
 671
 672   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 673
 674   if (REG_P (dst) && REG_P (src))
 675     {
 676       gcc_assert (src_mode == TImode || src_mode == TFmode);
 677
 678       /* Handle r -> w, w -> r.  */
 679       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 680         {
 681           switch (src_mode) {
 682           case TImode:
 683             emit_insn
 684               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 685             emit_insn
 686               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 687             return;
 688           case TFmode:
 689             emit_insn
 690               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 691             emit_insn
 692               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 693             return;
 694           default:
 695             gcc_unreachable ();
 696           }
 697         }
 698       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 699         {
 700           switch (src_mode) {
 701           case TImode:
 702             emit_insn
 703               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 704             emit_insn
 705               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 706             return;
 707           case TFmode:
 708             emit_insn
 709               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 710             emit_insn
 711               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 712             return;
 713           default:
 714             gcc_unreachable ();
 715           }
 716         }
 717       /* Fall through to r -> r cases.  */
 718     }
 719
 720   switch (dst_mode) {
 721   case TImode:
 722     low_dst = gen_lowpart (word_mode, dst);
 723     if (REG_P (low_dst)
 724         && reg_overlap_mentioned_p (low_dst, src))
 725       {
 726         aarch64_emit_move (gen_highpart (word_mode, dst),
 727                            gen_highpart_mode (word_mode, TImode, src));
 728         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 729       }
 730     else
 731       {
 732         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 733         aarch64_emit_move (gen_highpart (word_mode, dst),
 734                            gen_highpart_mode (word_mode, TImode, src));
 735       }
 736     return;
 737   case TFmode:
 738     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 739                     gen_rtx_REG (DFmode, src_regno));
 740     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 741                     gen_rtx_REG (DFmode, src_regno + 1));
 742     return;
 743   default:
 744     gcc_unreachable ();
 745   }
 746 }
 747
 748 bool
 749 aarch64_split_128bit_move_p (rtx dst, rtx src)
 750 {
 751   return (! REG_P (src)
 752           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 753 }
 754
 755 /* Split a complex SIMD combine.  */
 756
 757 void
 758 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 759 {
 760   enum machine_mode src_mode = GET_MODE (src1);
 761   enum machine_mode dst_mode = GET_MODE (dst);
 762
 763   gcc_assert (VECTOR_MODE_P (dst_mode));
 764
 765   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 766     {
 767       rtx (*gen) (rtx, rtx, rtx);
 768
 769       switch (src_mode)
 770         {
 771         case V8QImode:
 772           gen = gen_aarch64_simd_combinev8qi;
 773           break;
 774         case V4HImode:
 775           gen = gen_aarch64_simd_combinev4hi;
 776           break;
 777         case V2SImode:
 778           gen = gen_aarch64_simd_combinev2si;
 779           break;
 780         case V2SFmode:
 781           gen = gen_aarch64_simd_combinev2sf;
 782           break;
 783         case DImode:
 784           gen = gen_aarch64_simd_combinedi;
 785           break;
 786         case DFmode:
 787           gen = gen_aarch64_simd_combinedf;
 788           break;
 789         default:
 790           gcc_unreachable ();
 791         }
 792
 793       emit_insn (gen (dst, src1, src2));
 794       return;
 795     }
 796 }
 797
 798 /* Split a complex SIMD move.  */
 799
 800 void
 801 aarch64_split_simd_move (rtx dst, rtx src)
 802 {
 803   enum machine_mode src_mode = GET_MODE (src);
 804   enum machine_mode dst_mode = GET_MODE (dst);
 805
 806   gcc_assert (VECTOR_MODE_P (dst_mode));
 807
 808   if (REG_P (dst) && REG_P (src))
 809     {
 810       rtx (*gen) (rtx, rtx);
 811
 812       gcc_assert (VECTOR_MODE_P (src_mode));
 813
 814       switch (src_mode)
 815         {
 816         case V16QImode:
 817           gen = gen_aarch64_split_simd_movv16qi;
 818           break;
 819         case V8HImode:
 820           gen = gen_aarch64_split_simd_movv8hi;
 821           break;
 822         case V4SImode:
 823           gen = gen_aarch64_split_simd_movv4si;
 824           break;
 825         case V2DImode:
 826           gen = gen_aarch64_split_simd_movv2di;
 827           break;
 828         case V4SFmode:
 829           gen = gen_aarch64_split_simd_movv4sf;
 830           break;
 831         case V2DFmode:
 832           gen = gen_aarch64_split_simd_movv2df;
 833           break;
 834         default:
 835           gcc_unreachable ();
 836         }
 837
 838       emit_insn (gen (dst, src));
 839       return;
 840     }
 841 }
 842
 843 static rtx
 844 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 845 {
 846   if (can_create_pseudo_p ())
 847     return force_reg (mode, value);
 848   else
 849     {
 850       x = aarch64_emit_move (x, value);
 851       return x;
 852     }
 853 }
 854
 855
 856 static rtx
 857 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 858 {
 859   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 860     {
 861       rtx high;
 862       /* Load the full offset into a register.  This
 863          might be improvable in the future.  */
 864       high = GEN_INT (offset);
 865       offset = 0;
 866       high = aarch64_force_temporary (mode, temp, high);
 867       reg = aarch64_force_temporary (mode, temp,
 868                                      gen_rtx_PLUS (mode, high, reg));
 869     }
 870   return plus_constant (mode, reg, offset);
 871 }
 872
 873 void
 874 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 875 {
 876   enum machine_mode mode = GET_MODE (dest);
 877   unsigned HOST_WIDE_INT mask;
 878   int i;
 879   bool first;
 880   unsigned HOST_WIDE_INT val;
 881   bool subtargets;
 882   rtx subtarget;
 883   int one_match, zero_match;
 884
 885   gcc_assert (mode == SImode || mode == DImode);
 886
 887   /* Check on what type of symbol it is.  */
 888   if (GET_CODE (imm) == SYMBOL_REF
 889       || GET_CODE (imm) == LABEL_REF
 890       || GET_CODE (imm) == CONST)
 891     {
 892       rtx mem, base, offset;
 893       enum aarch64_symbol_type sty;
 894
 895       /* If we have (const (plus symbol offset)), separate out the offset
 896          before we start classifying the symbol.  */
 897       split_const (imm, &base, &offset);
 898
 899       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 900       switch (sty)
 901         {
 902         case SYMBOL_FORCE_TO_MEM:
 903           if (offset != const0_rtx
 904               && targetm.cannot_force_const_mem (mode, imm))
 905             {
 906               gcc_assert(can_create_pseudo_p ());
 907               base = aarch64_force_temporary (mode, dest, base);
 908               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 909               aarch64_emit_move (dest, base);
 910               return;
 911             }
 912           mem = force_const_mem (ptr_mode, imm);
 913           gcc_assert (mem);
 914           if (mode != ptr_mode)
 915             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 916           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 917           return;
 918
 919         case SYMBOL_SMALL_TLSGD:
 920         case SYMBOL_SMALL_TLSDESC:
 921         case SYMBOL_SMALL_GOTTPREL:
 922         case SYMBOL_SMALL_GOT:
 923         case SYMBOL_TINY_GOT:
 924           if (offset != const0_rtx)
 925             {
 926               gcc_assert(can_create_pseudo_p ());
 927               base = aarch64_force_temporary (mode, dest, base);
 928               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 929               aarch64_emit_move (dest, base);
 930               return;
 931             }
 932           /* FALLTHRU */
 933
 934         case SYMBOL_SMALL_TPREL:
 935         case SYMBOL_SMALL_ABSOLUTE:
 936         case SYMBOL_TINY_ABSOLUTE:
 937           aarch64_load_symref_appropriately (dest, imm, sty);
 938           return;
 939
 940         default:
 941           gcc_unreachable ();
 942         }
 943     }
 944
 945   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 946     {
 947       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 948       return;
 949     }
 950
 951   if (!CONST_INT_P (imm))
 952     {
 953       if (GET_CODE (imm) == HIGH)
 954         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 955       else
 956         {
 957           rtx mem = force_const_mem (mode, imm);
 958           gcc_assert (mem);
 959           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 960         }
 961
 962       return;
 963     }
 964
 965   if (mode == SImode)
 966     {
 967       /* We know we can't do this in 1 insn, and we must be able to do it
 968          in two; so don't mess around looking for sequences that don't buy
 969          us anything.  */
 970       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 971       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 972                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 973       return;
 974     }
 975
 976   /* Remaining cases are all for DImode.  */
 977
 978   val = INTVAL (imm);
 979   subtargets = optimize && can_create_pseudo_p ();
 980
 981   one_match = 0;
 982   zero_match = 0;
 983   mask = 0xffff;
 984
 985   for (i = 0; i < 64; i += 16, mask <<= 16)
 986     {
 987       if ((val & mask) == 0)
 988         zero_match++;
 989       else if ((val & mask) == mask)
 990         one_match++;
 991     }
 992
 993   if (one_match == 2)
 994     {
 995       mask = 0xffff;
 996       for (i = 0; i < 64; i += 16, mask <<= 16)
 997         {
 998           if ((val & mask) != mask)
 999             {
1000               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1001               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1002                                          GEN_INT ((val >> i) & 0xffff)));
1003               return;
1004             }
1005         }
1006       gcc_unreachable ();
1007     }
1008
1009   if (zero_match == 2)
1010     goto simple_sequence;
1011
1012   mask = 0x0ffff0000UL;
1013   for (i = 16; i < 64; i += 16, mask <<= 16)
1014     {
1015       HOST_WIDE_INT comp = mask & ~(mask - 1);
1016
1017       if (aarch64_uimm12_shift (val - (val & mask)))
1018         {
1019           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1020
1021           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1022           emit_insn (gen_adddi3 (dest, subtarget,
1023                                  GEN_INT (val - (val & mask))));
1024           return;
1025         }
1026       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1027         {
1028           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1029
1030           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1031                                   GEN_INT ((val + comp) & mask)));
1032           emit_insn (gen_adddi3 (dest, subtarget,
1033                                  GEN_INT (val - ((val + comp) & mask))));
1034           return;
1035         }
1036       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1037         {
1038           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1041                                   GEN_INT ((val - comp) | ~mask)));
1042           emit_insn (gen_adddi3 (dest, subtarget,
1043                                  GEN_INT (val - ((val - comp) | ~mask))));
1044           return;
1045         }
1046       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1047         {
1048           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049
1050           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1051                                   GEN_INT (val | ~mask)));
1052           emit_insn (gen_adddi3 (dest, subtarget,
1053                                  GEN_INT (val - (val | ~mask))));
1054           return;
1055         }
1056     }
1057
1058   /* See if we can do it by arithmetically combining two
1059      immediates.  */
1060   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1061     {
1062       int j;
1063       mask = 0xffff;
1064
1065       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1066           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1067         {
1068           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1069           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070                                   GEN_INT (aarch64_bitmasks[i])));
1071           emit_insn (gen_adddi3 (dest, subtarget,
1072                                  GEN_INT (val - aarch64_bitmasks[i])));
1073           return;
1074         }
1075
1076       for (j = 0; j < 64; j += 16, mask <<= 16)
1077         {
1078           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1079             {
1080               emit_insn (gen_rtx_SET (VOIDmode, dest,
1081                                       GEN_INT (aarch64_bitmasks[i])));
1082               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1083                                          GEN_INT ((val >> j) & 0xffff)));
1084               return;
1085             }
1086         }
1087     }
1088
1089   /* See if we can do it by logically combining two immediates.  */
1090   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1091     {
1092       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1093         {
1094           int j;
1095
1096           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1097             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1098               {
1099                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1100                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1101                                         GEN_INT (aarch64_bitmasks[i])));
1102                 emit_insn (gen_iordi3 (dest, subtarget,
1103                                        GEN_INT (aarch64_bitmasks[j])));
1104                 return;
1105               }
1106         }
1107       else if ((val & aarch64_bitmasks[i]) == val)
1108         {
1109           int j;
1110
1111           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1112             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1113               {
1114
1115                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1116                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1117                                         GEN_INT (aarch64_bitmasks[j])));
1118                 emit_insn (gen_anddi3 (dest, subtarget,
1119                                        GEN_INT (aarch64_bitmasks[i])));
1120                 return;
1121               }
1122         }
1123     }
1124
1125  simple_sequence:
1126   first = true;
1127   mask = 0xffff;
1128   for (i = 0; i < 64; i += 16, mask <<= 16)
1129     {
1130       if ((val & mask) != 0)
1131         {
1132           if (first)
1133             {
1134               emit_insn (gen_rtx_SET (VOIDmode, dest,
1135                                       GEN_INT (val & mask)));
1136               first = false;
1137             }
1138           else
1139             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1140                                        GEN_INT ((val >> i) & 0xffff)));
1141         }
1142     }
1143 }
1144
1145 static bool
1146 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1147 {
1148   /* Indirect calls are not currently supported.  */
1149   if (decl == NULL)
1150     return false;
1151
1152   /* Cannot tail-call to long-calls, since these are outside of the
1153      range of a branch instruction (we could handle this if we added
1154      support for indirect tail-calls.  */
1155   if (aarch64_decl_is_long_call_p (decl))
1156     return false;
1157
1158   return true;
1159 }
1160
1161 /* Implement TARGET_PASS_BY_REFERENCE.  */
1162
1163 static bool
1164 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1165                            enum machine_mode mode,
1166                            const_tree type,
1167                            bool named ATTRIBUTE_UNUSED)
1168 {
1169   HOST_WIDE_INT size;
1170   enum machine_mode dummymode;
1171   int nregs;
1172
1173   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1174   size = (mode == BLKmode && type)
1175     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1176
1177   if (type)
1178     {
1179       /* Arrays always passed by reference.  */
1180       if (TREE_CODE (type) == ARRAY_TYPE)
1181         return true;
1182       /* Other aggregates based on their size.  */
1183       if (AGGREGATE_TYPE_P (type))
1184         size = int_size_in_bytes (type);
1185     }
1186
1187   /* Variable sized arguments are always returned by reference.  */
1188   if (size < 0)
1189     return true;
1190
1191   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1192   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1193                                                &dummymode, &nregs,
1194                                                NULL))
1195     return false;
1196
1197   /* Arguments which are variable sized or larger than 2 registers are
1198      passed by reference unless they are a homogenous floating point
1199      aggregate.  */
1200   return size > 2 * UNITS_PER_WORD;
1201 }
1202
1203 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1204 static bool
1205 aarch64_return_in_msb (const_tree valtype)
1206 {
1207   enum machine_mode dummy_mode;
1208   int dummy_int;
1209
1210   /* Never happens in little-endian mode.  */
1211   if (!BYTES_BIG_ENDIAN)
1212     return false;
1213
1214   /* Only composite types smaller than or equal to 16 bytes can
1215      be potentially returned in registers.  */
1216   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1217       || int_size_in_bytes (valtype) <= 0
1218       || int_size_in_bytes (valtype) > 16)
1219     return false;
1220
1221   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1222      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1223      is always passed/returned in the least significant bits of fp/simd
1224      register(s).  */
1225   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1226                                                &dummy_mode, &dummy_int, NULL))
1227     return false;
1228
1229   return true;
1230 }
1231
1232 /* Implement TARGET_FUNCTION_VALUE.
1233    Define how to find the value returned by a function.  */
1234
1235 static rtx
1236 aarch64_function_value (const_tree type, const_tree func,
1237                         bool outgoing ATTRIBUTE_UNUSED)
1238 {
1239   enum machine_mode mode;
1240   int unsignedp;
1241   int count;
1242   enum machine_mode ag_mode;
1243
1244   mode = TYPE_MODE (type);
1245   if (INTEGRAL_TYPE_P (type))
1246     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1247
1248   if (aarch64_return_in_msb (type))
1249     {
1250       HOST_WIDE_INT size = int_size_in_bytes (type);
1251
1252       if (size % UNITS_PER_WORD != 0)
1253         {
1254           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1255           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1256         }
1257     }
1258
1259   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1260                                                &ag_mode, &count, NULL))
1261     {
1262       if (!aarch64_composite_type_p (type, mode))
1263         {
1264           gcc_assert (count == 1 && mode == ag_mode);
1265           return gen_rtx_REG (mode, V0_REGNUM);
1266         }
1267       else
1268         {
1269           int i;
1270           rtx par;
1271
1272           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1273           for (i = 0; i < count; i++)
1274             {
1275               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1276               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1277                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1278               XVECEXP (par, 0, i) = tmp;
1279             }
1280           return par;
1281         }
1282     }
1283   else
1284     return gen_rtx_REG (mode, R0_REGNUM);
1285 }
1286
1287 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1288    Return true if REGNO is the number of a hard register in which the values
1289    of called function may come back.  */
1290
1291 static bool
1292 aarch64_function_value_regno_p (const unsigned int regno)
1293 {
1294   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1295      of 16-byte return values are: 128-bit integers and 16-byte small
1296      structures (excluding homogeneous floating-point aggregates).  */
1297   if (regno == R0_REGNUM || regno == R1_REGNUM)
1298     return true;
1299
1300   /* Up to four fp/simd registers can return a function value, e.g. a
1301      homogeneous floating-point aggregate having four members.  */
1302   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1303     return !TARGET_GENERAL_REGS_ONLY;
1304
1305   return false;
1306 }
1307
1308 /* Implement TARGET_RETURN_IN_MEMORY.
1309
1310    If the type T of the result of a function is such that
1311      void func (T arg)
1312    would require that arg be passed as a value in a register (or set of
1313    registers) according to the parameter passing rules, then the result
1314    is returned in the same registers as would be used for such an
1315    argument.  */
1316
1317 static bool
1318 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1319 {
1320   HOST_WIDE_INT size;
1321   enum machine_mode ag_mode;
1322   int count;
1323
1324   if (!AGGREGATE_TYPE_P (type)
1325       && TREE_CODE (type) != COMPLEX_TYPE
1326       && TREE_CODE (type) != VECTOR_TYPE)
1327     /* Simple scalar types always returned in registers.  */
1328     return false;
1329
1330   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1331                                                type,
1332                                                &ag_mode,
1333                                                &count,
1334                                                NULL))
1335     return false;
1336
1337   /* Types larger than 2 registers returned in memory.  */
1338   size = int_size_in_bytes (type);
1339   return (size < 0 || size > 2 * UNITS_PER_WORD);
1340 }
1341
1342 static bool
1343 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1344                                const_tree type, int *nregs)
1345 {
1346   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1347   return aarch64_vfp_is_call_or_return_candidate (mode,
1348                                                   type,
1349                                                   &pcum->aapcs_vfp_rmode,
1350                                                   nregs,
1351                                                   NULL);
1352 }
1353
1354 /* Given MODE and TYPE of a function argument, return the alignment in
1355    bits.  The idea is to suppress any stronger alignment requested by
1356    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1357    This is a helper function for local use only.  */
1358
1359 static unsigned int
1360 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1361 {
1362   unsigned int alignment;
1363
1364   if (type)
1365     {
1366       if (!integer_zerop (TYPE_SIZE (type)))
1367         {
1368           if (TYPE_MODE (type) == mode)
1369             alignment = TYPE_ALIGN (type);
1370           else
1371             alignment = GET_MODE_ALIGNMENT (mode);
1372         }
1373       else
1374         alignment = 0;
1375     }
1376   else
1377     alignment = GET_MODE_ALIGNMENT (mode);
1378
1379   return alignment;
1380 }
1381
1382 /* Layout a function argument according to the AAPCS64 rules.  The rule
1383    numbers refer to the rule numbers in the AAPCS64.  */
1384
1385 static void
1386 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1387                     const_tree type,
1388                     bool named ATTRIBUTE_UNUSED)
1389 {
1390   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1391   int ncrn, nvrn, nregs;
1392   bool allocate_ncrn, allocate_nvrn;
1393
1394   /* We need to do this once per argument.  */
1395   if (pcum->aapcs_arg_processed)
1396     return;
1397
1398   pcum->aapcs_arg_processed = true;
1399
1400   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1401   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1402                                                  mode,
1403                                                  type,
1404                                                  &nregs);
1405
1406   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1407      The following code thus handles passing by SIMD/FP registers first.  */
1408
1409   nvrn = pcum->aapcs_nvrn;
1410
1411   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1412      and homogenous short-vector aggregates (HVA).  */
1413   if (allocate_nvrn)
1414     {
1415       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1416         {
1417           pcum->aapcs_nextnvrn = nvrn + nregs;
1418           if (!aarch64_composite_type_p (type, mode))
1419             {
1420               gcc_assert (nregs == 1);
1421               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1422             }
1423           else
1424             {
1425               rtx par;
1426               int i;
1427               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1428               for (i = 0; i < nregs; i++)
1429                 {
1430                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1431                                          V0_REGNUM + nvrn + i);
1432                   tmp = gen_rtx_EXPR_LIST
1433                     (VOIDmode, tmp,
1434                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1435                   XVECEXP (par, 0, i) = tmp;
1436                 }
1437               pcum->aapcs_reg = par;
1438             }
1439           return;
1440         }
1441       else
1442         {
1443           /* C.3 NSRN is set to 8.  */
1444           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1445           goto on_stack;
1446         }
1447     }
1448
1449   ncrn = pcum->aapcs_ncrn;
1450   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1451            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1452
1453
1454   /* C6 - C9.  though the sign and zero extension semantics are
1455      handled elsewhere.  This is the case where the argument fits
1456      entirely general registers.  */
1457   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1458     {
1459       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1460
1461       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1462
1463       /* C.8 if the argument has an alignment of 16 then the NGRN is
1464          rounded up to the next even number.  */
1465       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1466         {
1467           ++ncrn;
1468           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1469         }
1470       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1471          A reg is still generated for it, but the caller should be smart
1472          enough not to use it.  */
1473       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1474         {
1475           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1476         }
1477       else
1478         {
1479           rtx par;
1480           int i;
1481
1482           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1483           for (i = 0; i < nregs; i++)
1484             {
1485               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1486               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1487                                        GEN_INT (i * UNITS_PER_WORD));
1488               XVECEXP (par, 0, i) = tmp;
1489             }
1490           pcum->aapcs_reg = par;
1491         }
1492
1493       pcum->aapcs_nextncrn = ncrn + nregs;
1494       return;
1495     }
1496
1497   /* C.11  */
1498   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1499
1500   /* The argument is passed on stack; record the needed number of words for
1501      this argument (we can re-use NREGS) and align the total size if
1502      necessary.  */
1503 on_stack:
1504   pcum->aapcs_stack_words = nregs;
1505   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1506     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1507                                                16 / UNITS_PER_WORD) + 1;
1508   return;
1509 }
1510
1511 /* Implement TARGET_FUNCTION_ARG.  */
1512
1513 static rtx
1514 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1515                       const_tree type, bool named)
1516 {
1517   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1518   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1519
1520   if (mode == VOIDmode)
1521     return NULL_RTX;
1522
1523   aarch64_layout_arg (pcum_v, mode, type, named);
1524   return pcum->aapcs_reg;
1525 }
1526
1527 void
1528 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1529                            const_tree fntype ATTRIBUTE_UNUSED,
1530                            rtx libname ATTRIBUTE_UNUSED,
1531                            const_tree fndecl ATTRIBUTE_UNUSED,
1532                            unsigned n_named ATTRIBUTE_UNUSED)
1533 {
1534   pcum->aapcs_ncrn = 0;
1535   pcum->aapcs_nvrn = 0;
1536   pcum->aapcs_nextncrn = 0;
1537   pcum->aapcs_nextnvrn = 0;
1538   pcum->pcs_variant = ARM_PCS_AAPCS64;
1539   pcum->aapcs_reg = NULL_RTX;
1540   pcum->aapcs_arg_processed = false;
1541   pcum->aapcs_stack_words = 0;
1542   pcum->aapcs_stack_size = 0;
1543
1544   return;
1545 }
1546
1547 static void
1548 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1549                               enum machine_mode mode,
1550                               const_tree type,
1551                               bool named)
1552 {
1553   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1554   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1555     {
1556       aarch64_layout_arg (pcum_v, mode, type, named);
1557       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1558                   != (pcum->aapcs_stack_words != 0));
1559       pcum->aapcs_arg_processed = false;
1560       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1561       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1562       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1563       pcum->aapcs_stack_words = 0;
1564       pcum->aapcs_reg = NULL_RTX;
1565     }
1566 }
1567
1568 bool
1569 aarch64_function_arg_regno_p (unsigned regno)
1570 {
1571   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1572           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1573 }
1574
1575 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1576    PARM_BOUNDARY bits of alignment, but will be given anything up
1577    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1578    that both before and after the layout of each argument, the Next
1579    Stacked Argument Address (NSAA) will have a minimum alignment of
1580    8 bytes.  */
1581
1582 static unsigned int
1583 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1584 {
1585   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1586
1587   if (alignment < PARM_BOUNDARY)
1588     alignment = PARM_BOUNDARY;
1589   if (alignment > STACK_BOUNDARY)
1590     alignment = STACK_BOUNDARY;
1591   return alignment;
1592 }
1593
1594 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1595
1596    Return true if an argument passed on the stack should be padded upwards,
1597    i.e. if the least-significant byte of the stack slot has useful data.
1598
1599    Small aggregate types are placed in the lowest memory address.
1600
1601    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1602
1603 bool
1604 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1605 {
1606   /* On little-endian targets, the least significant byte of every stack
1607      argument is passed at the lowest byte address of the stack slot.  */
1608   if (!BYTES_BIG_ENDIAN)
1609     return true;
1610
1611   /* Otherwise, integral, floating-point and pointer types are padded downward:
1612      the least significant byte of a stack argument is passed at the highest
1613      byte address of the stack slot.  */
1614   if (type
1615       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1616          || POINTER_TYPE_P (type))
1617       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1618     return false;
1619
1620   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1621   return true;
1622 }
1623
1624 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1625
1626    It specifies padding for the last (may also be the only)
1627    element of a block move between registers and memory.  If
1628    assuming the block is in the memory, padding upward means that
1629    the last element is padded after its highest significant byte,
1630    while in downward padding, the last element is padded at the
1631    its least significant byte side.
1632
1633    Small aggregates and small complex types are always padded
1634    upwards.
1635
1636    We don't need to worry about homogeneous floating-point or
1637    short-vector aggregates; their move is not affected by the
1638    padding direction determined here.  Regardless of endianness,
1639    each element of such an aggregate is put in the least
1640    significant bits of a fp/simd register.
1641
1642    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1643    register has useful data, and return the opposite if the most
1644    significant byte does.  */
1645
1646 bool
1647 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1648                      bool first ATTRIBUTE_UNUSED)
1649 {
1650
1651   /* Small composite types are always padded upward.  */
1652   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1653     {
1654       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1655                             : GET_MODE_SIZE (mode));
1656       if (size < 2 * UNITS_PER_WORD)
1657         return true;
1658     }
1659
1660   /* Otherwise, use the default padding.  */
1661   return !BYTES_BIG_ENDIAN;
1662 }
1663
1664 static enum machine_mode
1665 aarch64_libgcc_cmp_return_mode (void)
1666 {
1667   return SImode;
1668 }
1669
1670 static bool
1671 aarch64_frame_pointer_required (void)
1672 {
1673   /* If the function contains dynamic stack allocations, we need to
1674      use the frame pointer to access the static parts of the frame.  */
1675   if (cfun->calls_alloca)
1676     return true;
1677
1678   /* We may have turned flag_omit_frame_pointer on in order to have this
1679      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1680      and we'll check it here.
1681      If we really did set flag_omit_frame_pointer normally, then we return false
1682      (no frame pointer required) in all cases.  */
1683
1684   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1685     return false;
1686   else if (flag_omit_leaf_frame_pointer)
1687     return !crtl->is_leaf;
1688   return true;
1689 }
1690
1691 /* Mark the registers that need to be saved by the callee and calculate
1692    the size of the callee-saved registers area and frame record (both FP
1693    and LR may be omitted).  */
1694 static void
1695 aarch64_layout_frame (void)
1696 {
1697   HOST_WIDE_INT offset = 0;
1698   int regno;
1699
1700   if (reload_completed && cfun->machine->frame.laid_out)
1701     return;
1702
1703   cfun->machine->frame.fp_lr_offset = 0;
1704
1705   /* First mark all the registers that really need to be saved...  */
1706   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1707     cfun->machine->frame.reg_offset[regno] = -1;
1708
1709   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1710     cfun->machine->frame.reg_offset[regno] = -1;
1711
1712   /* ... that includes the eh data registers (if needed)...  */
1713   if (crtl->calls_eh_return)
1714     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1715       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1716
1717   /* ... and any callee saved register that dataflow says is live.  */
1718   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1719     if (df_regs_ever_live_p (regno)
1720         && !call_used_regs[regno])
1721       cfun->machine->frame.reg_offset[regno] = 0;
1722
1723   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1724     if (df_regs_ever_live_p (regno)
1725         && !call_used_regs[regno])
1726       cfun->machine->frame.reg_offset[regno] = 0;
1727
1728   if (frame_pointer_needed)
1729     {
1730       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1731       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1732       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1733     }
1734
1735   /* Now assign stack slots for them.  */
1736   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1737     if (cfun->machine->frame.reg_offset[regno] != -1)
1738       {
1739         cfun->machine->frame.reg_offset[regno] = offset;
1740         offset += UNITS_PER_WORD;
1741       }
1742
1743   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1744     if (cfun->machine->frame.reg_offset[regno] != -1)
1745       {
1746         cfun->machine->frame.reg_offset[regno] = offset;
1747         offset += UNITS_PER_WORD;
1748       }
1749
1750   if (frame_pointer_needed)
1751     {
1752       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1753       offset += UNITS_PER_WORD;
1754       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1755     }
1756
1757   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1758     {
1759       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1760       offset += UNITS_PER_WORD;
1761       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1762     }
1763
1764   cfun->machine->frame.padding0 =
1765     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1766   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1767
1768   cfun->machine->frame.saved_regs_size = offset;
1769   cfun->machine->frame.laid_out = true;
1770 }
1771
1772 /* Make the last instruction frame-related and note that it performs
1773    the operation described by FRAME_PATTERN.  */
1774
1775 static void
1776 aarch64_set_frame_expr (rtx frame_pattern)
1777 {
1778   rtx insn;
1779
1780   insn = get_last_insn ();
1781   RTX_FRAME_RELATED_P (insn) = 1;
1782   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1783   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1784                                       frame_pattern,
1785                                       REG_NOTES (insn));
1786 }
1787
1788 static bool
1789 aarch64_register_saved_on_entry (int regno)
1790 {
1791   return cfun->machine->frame.reg_offset[regno] != -1;
1792 }
1793
1794
1795 static void
1796 aarch64_save_or_restore_fprs (int start_offset, int increment,
1797                               bool restore, rtx base_rtx)
1798
1799 {
1800   unsigned regno;
1801   unsigned regno2;
1802   rtx insn;
1803   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1804     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1805
1806
1807   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1808     {
1809       if (aarch64_register_saved_on_entry (regno))
1810         {
1811           rtx mem;
1812           mem = gen_mem_ref (DFmode,
1813                              plus_constant (Pmode,
1814                                             base_rtx,
1815                                             start_offset));
1816
1817           for (regno2 = regno + 1;
1818                regno2 <= V31_REGNUM
1819                  && !aarch64_register_saved_on_entry (regno2);
1820                regno2++)
1821             {
1822               /* Empty loop.  */
1823             }
1824           if (regno2 <= V31_REGNUM &&
1825               aarch64_register_saved_on_entry (regno2))
1826             {
1827               rtx mem2;
1828               /* Next highest register to be saved.  */
1829               mem2 = gen_mem_ref (DFmode,
1830                                   plus_constant
1831                                   (Pmode,
1832                                    base_rtx,
1833                                    start_offset + increment));
1834               if (restore == false)
1835                 {
1836                   insn = emit_insn
1837                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1838                                         mem2, gen_rtx_REG (DFmode, regno2)));
1839
1840                 }
1841               else
1842                 {
1843                   insn = emit_insn
1844                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1845                                        gen_rtx_REG (DFmode, regno2), mem2));
1846
1847                   add_reg_note (insn, REG_CFA_RESTORE,
1848                                 gen_rtx_REG (DFmode, regno));
1849                   add_reg_note (insn, REG_CFA_RESTORE,
1850                                 gen_rtx_REG (DFmode, regno2));
1851                 }
1852
1853                   /* The first part of a frame-related parallel insn
1854                      is always assumed to be relevant to the frame
1855                      calculations; subsequent parts, are only
1856                      frame-related if explicitly marked.  */
1857               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1858               regno = regno2;
1859               start_offset += increment * 2;
1860             }
1861           else
1862             {
1863               if (restore == false)
1864                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1865               else
1866                 {
1867                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1868                   add_reg_note (insn, REG_CFA_RESTORE,
1869                                 gen_rtx_REG (DImode, regno));
1870                 }
1871               start_offset += increment;
1872             }
1873           RTX_FRAME_RELATED_P (insn) = 1;
1874         }
1875     }
1876
1877 }
1878
1879
1880 /* offset from the stack pointer of where the saves and
1881    restore's have to happen.  */
1882 static void
1883 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1884                                             bool restore)
1885 {
1886   rtx insn;
1887   rtx base_rtx = stack_pointer_rtx;
1888   HOST_WIDE_INT start_offset = offset;
1889   HOST_WIDE_INT increment = UNITS_PER_WORD;
1890   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1891   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1892   unsigned regno;
1893   unsigned regno2;
1894
1895   for (regno = R0_REGNUM; regno <= limit; regno++)
1896     {
1897       if (aarch64_register_saved_on_entry (regno))
1898         {
1899           rtx mem;
1900           mem = gen_mem_ref (Pmode,
1901                              plus_constant (Pmode,
1902                                             base_rtx,
1903                                             start_offset));
1904
1905           for (regno2 = regno + 1;
1906                regno2 <= limit
1907                  && !aarch64_register_saved_on_entry (regno2);
1908                regno2++)
1909             {
1910               /* Empty loop.  */
1911             }
1912           if (regno2 <= limit &&
1913               aarch64_register_saved_on_entry (regno2))
1914             {
1915               rtx mem2;
1916               /* Next highest register to be saved.  */
1917               mem2 = gen_mem_ref (Pmode,
1918                                   plus_constant
1919                                   (Pmode,
1920                                    base_rtx,
1921                                    start_offset + increment));
1922               if (restore == false)
1923                 {
1924                   insn = emit_insn
1925                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1926                                         mem2, gen_rtx_REG (DImode, regno2)));
1927
1928                 }
1929               else
1930                 {
1931                   insn = emit_insn
1932                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1933                                      gen_rtx_REG (DImode, regno2), mem2));
1934
1935                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1936                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1937                 }
1938
1939                   /* The first part of a frame-related parallel insn
1940                      is always assumed to be relevant to the frame
1941                      calculations; subsequent parts, are only
1942                      frame-related if explicitly marked.  */
1943               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1944                                             1)) = 1;
1945               regno = regno2;
1946               start_offset += increment * 2;
1947             }
1948           else
1949             {
1950               if (restore == false)
1951                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1952               else
1953                 {
1954                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1955                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1956                 }
1957               start_offset += increment;
1958             }
1959           RTX_FRAME_RELATED_P (insn) = 1;
1960         }
1961     }
1962
1963   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1964
1965 }
1966
1967 /* AArch64 stack frames generated by this compiler look like:
1968
1969         +-------------------------------+
1970         |                               |
1971         |  incoming stack arguments     |
1972         |                               |
1973         +-------------------------------+ <-- arg_pointer_rtx
1974         |                               |
1975         |  callee-allocated save area   |
1976         |  for register varargs         |
1977         |                               |
1978         +-------------------------------+
1979         |                               |
1980         |  local variables              |
1981         |                               |
1982         +-------------------------------+ <-- frame_pointer_rtx
1983         |                               |
1984         |  callee-saved registers       |
1985         |                               |
1986         +-------------------------------+
1987         |  LR'                          |
1988         +-------------------------------+
1989         |  FP'                          |
1990       P +-------------------------------+ <-- hard_frame_pointer_rtx
1991         |  dynamic allocation           |
1992         +-------------------------------+
1993         |                               |
1994         |  outgoing stack arguments     |
1995         |                               |
1996         +-------------------------------+ <-- stack_pointer_rtx
1997
1998    Dynamic stack allocations such as alloca insert data at point P.
1999    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2000    hard_frame_pointer_rtx unchanged.  */
2001
2002 /* Generate the prologue instructions for entry into a function.
2003    Establish the stack frame by decreasing the stack pointer with a
2004    properly calculated size and, if necessary, create a frame record
2005    filled with the values of LR and previous frame pointer.  The
2006    current FP is also set up if it is in use.  */
2007
2008 void
2009 aarch64_expand_prologue (void)
2010 {
2011   /* sub sp, sp, #<frame_size>
2012      stp {fp, lr}, [sp, #<frame_size> - 16]
2013      add fp, sp, #<frame_size> - hardfp_offset
2014      stp {cs_reg}, [fp, #-16] etc.
2015
2016      sub sp, sp, <final_adjustment_if_any>
2017   */
2018   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2019   HOST_WIDE_INT frame_size, offset;
2020   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2021   rtx insn;
2022
2023   aarch64_layout_frame ();
2024   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2025   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2026               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2027   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2028                 + crtl->outgoing_args_size);
2029   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2030                                           STACK_BOUNDARY / BITS_PER_UNIT);
2031
2032   if (flag_stack_usage_info)
2033     current_function_static_stack_size = frame_size;
2034
2035   fp_offset = (offset
2036                - original_frame_size
2037                - cfun->machine->frame.saved_regs_size);
2038
2039   /* Store pairs and load pairs have a range only -512 to 504.  */
2040   if (offset >= 512)
2041     {
2042       /* When the frame has a large size, an initial decrease is done on
2043          the stack pointer to jump over the callee-allocated save area for
2044          register varargs, the local variable area and/or the callee-saved
2045          register area.  This will allow the pre-index write-back
2046          store pair instructions to be used for setting up the stack frame
2047          efficiently.  */
2048       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2049       if (offset >= 512)
2050         offset = cfun->machine->frame.saved_regs_size;
2051
2052       frame_size -= (offset + crtl->outgoing_args_size);
2053       fp_offset = 0;
2054
2055       if (frame_size >= 0x1000000)
2056         {
2057           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2058           emit_move_insn (op0, GEN_INT (-frame_size));
2059           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2060           aarch64_set_frame_expr (gen_rtx_SET
2061                                   (Pmode, stack_pointer_rtx,
2062                                    plus_constant (Pmode,
2063                                                   stack_pointer_rtx,
2064                                                   -frame_size)));
2065         }
2066       else if (frame_size > 0)
2067         {
2068           if ((frame_size & 0xfff) != frame_size)
2069             {
2070               insn = emit_insn (gen_add2_insn
2071                                 (stack_pointer_rtx,
2072                                  GEN_INT (-(frame_size
2073                                             & ~(HOST_WIDE_INT)0xfff))));
2074               RTX_FRAME_RELATED_P (insn) = 1;
2075             }
2076           if ((frame_size & 0xfff) != 0)
2077             {
2078               insn = emit_insn (gen_add2_insn
2079                                 (stack_pointer_rtx,
2080                                  GEN_INT (-(frame_size
2081                                             & (HOST_WIDE_INT)0xfff))));
2082               RTX_FRAME_RELATED_P (insn) = 1;
2083             }
2084         }
2085     }
2086   else
2087     frame_size = -1;
2088
2089   if (offset > 0)
2090     {
2091       /* Save the frame pointer and lr if the frame pointer is needed
2092          first.  Make the frame pointer point to the location of the
2093          old frame pointer on the stack.  */
2094       if (frame_pointer_needed)
2095         {
2096           rtx mem_fp, mem_lr;
2097
2098           if (fp_offset)
2099             {
2100               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2101                                                GEN_INT (-offset)));
2102               RTX_FRAME_RELATED_P (insn) = 1;
2103               aarch64_set_frame_expr (gen_rtx_SET
2104                                       (Pmode, stack_pointer_rtx,
2105                                        gen_rtx_MINUS (Pmode,
2106                                                       stack_pointer_rtx,
2107                                                       GEN_INT (offset))));
2108               mem_fp = gen_frame_mem (DImode,
2109                                       plus_constant (Pmode,
2110                                                      stack_pointer_rtx,
2111                                                      fp_offset));
2112               mem_lr = gen_frame_mem (DImode,
2113                                       plus_constant (Pmode,
2114                                                      stack_pointer_rtx,
2115                                                      fp_offset
2116                                                      + UNITS_PER_WORD));
2117               insn = emit_insn (gen_store_pairdi (mem_fp,
2118                                                   hard_frame_pointer_rtx,
2119                                                   mem_lr,
2120                                                   gen_rtx_REG (DImode,
2121                                                                LR_REGNUM)));
2122             }
2123           else
2124             {
2125               insn = emit_insn (gen_storewb_pairdi_di
2126                                 (stack_pointer_rtx, stack_pointer_rtx,
2127                                  hard_frame_pointer_rtx,
2128                                  gen_rtx_REG (DImode, LR_REGNUM),
2129                                  GEN_INT (-offset),
2130                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2131               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2132             }
2133
2134           /* The first part of a frame-related parallel insn is always
2135              assumed to be relevant to the frame calculations;
2136              subsequent parts, are only frame-related if explicitly
2137              marked.  */
2138           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2139           RTX_FRAME_RELATED_P (insn) = 1;
2140
2141           /* Set up frame pointer to point to the location of the
2142              previous frame pointer on the stack.  */
2143           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2144                                            stack_pointer_rtx,
2145                                            GEN_INT (fp_offset)));
2146           aarch64_set_frame_expr (gen_rtx_SET
2147                                   (Pmode, hard_frame_pointer_rtx,
2148                                    plus_constant (Pmode,
2149                                                   stack_pointer_rtx,
2150                                                   fp_offset)));
2151           RTX_FRAME_RELATED_P (insn) = 1;
2152           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2153                                            hard_frame_pointer_rtx));
2154         }
2155       else
2156         {
2157           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2158                                            GEN_INT (-offset)));
2159           RTX_FRAME_RELATED_P (insn) = 1;
2160         }
2161
2162       aarch64_save_or_restore_callee_save_registers
2163         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2164     }
2165
2166   /* when offset >= 512,
2167      sub sp, sp, #<outgoing_args_size> */
2168   if (frame_size > -1)
2169     {
2170       if (crtl->outgoing_args_size > 0)
2171         {
2172           insn = emit_insn (gen_add2_insn
2173                             (stack_pointer_rtx,
2174                              GEN_INT (- crtl->outgoing_args_size)));
2175           RTX_FRAME_RELATED_P (insn) = 1;
2176         }
2177     }
2178 }
2179
2180 /* Generate the epilogue instructions for returning from a function.  */
2181 void
2182 aarch64_expand_epilogue (bool for_sibcall)
2183 {
2184   HOST_WIDE_INT original_frame_size, frame_size, offset;
2185   HOST_WIDE_INT fp_offset;
2186   rtx insn;
2187   rtx cfa_reg;
2188
2189   aarch64_layout_frame ();
2190   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2191   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2192                 + crtl->outgoing_args_size);
2193   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2194                                           STACK_BOUNDARY / BITS_PER_UNIT);
2195
2196   fp_offset = (offset
2197                - original_frame_size
2198                - cfun->machine->frame.saved_regs_size);
2199
2200   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2201
2202   /* Store pairs and load pairs have a range only -512 to 504.  */
2203   if (offset >= 512)
2204     {
2205       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2206       if (offset >= 512)
2207         offset = cfun->machine->frame.saved_regs_size;
2208
2209       frame_size -= (offset + crtl->outgoing_args_size);
2210       fp_offset = 0;
2211       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2212         {
2213           insn = emit_insn (gen_add2_insn
2214                             (stack_pointer_rtx,
2215                              GEN_INT (crtl->outgoing_args_size)));
2216           RTX_FRAME_RELATED_P (insn) = 1;
2217         }
2218     }
2219   else
2220     frame_size = -1;
2221
2222   /* If there were outgoing arguments or we've done dynamic stack
2223      allocation, then restore the stack pointer from the frame
2224      pointer.  This is at most one insn and more efficient than using
2225      GCC's internal mechanism.  */
2226   if (frame_pointer_needed
2227       && (crtl->outgoing_args_size || cfun->calls_alloca))
2228     {
2229       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2230                                        hard_frame_pointer_rtx,
2231                                        GEN_INT (- fp_offset)));
2232       RTX_FRAME_RELATED_P (insn) = 1;
2233       /* As SP is set to (FP - fp_offset), according to the rules in
2234          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2235          from the value of SP from now on.  */
2236       cfa_reg = stack_pointer_rtx;
2237     }
2238
2239   aarch64_save_or_restore_callee_save_registers
2240     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2241
2242   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2243   if (offset > 0)
2244     {
2245       if (frame_pointer_needed)
2246         {
2247           rtx mem_fp, mem_lr;
2248
2249           if (fp_offset)
2250             {
2251               mem_fp = gen_frame_mem (DImode,
2252                                       plus_constant (Pmode,
2253                                                      stack_pointer_rtx,
2254                                                      fp_offset));
2255               mem_lr = gen_frame_mem (DImode,
2256                                       plus_constant (Pmode,
2257                                                      stack_pointer_rtx,
2258                                                      fp_offset
2259                                                      + UNITS_PER_WORD));
2260               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2261                                                  mem_fp,
2262                                                  gen_rtx_REG (DImode,
2263                                                               LR_REGNUM),
2264                                                  mem_lr));
2265             }
2266           else
2267             {
2268               insn = emit_insn (gen_loadwb_pairdi_di
2269                                 (stack_pointer_rtx,
2270                                  stack_pointer_rtx,
2271                                  hard_frame_pointer_rtx,
2272                                  gen_rtx_REG (DImode, LR_REGNUM),
2273                                  GEN_INT (offset),
2274                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2275               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2276               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2277                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2278                                           plus_constant (Pmode, cfa_reg,
2279                                                          offset))));
2280             }
2281
2282           /* The first part of a frame-related parallel insn
2283              is always assumed to be relevant to the frame
2284              calculations; subsequent parts, are only
2285              frame-related if explicitly marked.  */
2286           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2287           RTX_FRAME_RELATED_P (insn) = 1;
2288           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2289           add_reg_note (insn, REG_CFA_RESTORE,
2290                         gen_rtx_REG (DImode, LR_REGNUM));
2291
2292           if (fp_offset)
2293             {
2294               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2295                                                GEN_INT (offset)));
2296               RTX_FRAME_RELATED_P (insn) = 1;
2297             }
2298         }
2299       else
2300         {
2301           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2302                                            GEN_INT (offset)));
2303           RTX_FRAME_RELATED_P (insn) = 1;
2304         }
2305     }
2306
2307   /* Stack adjustment for exception handler.  */
2308   if (crtl->calls_eh_return)
2309     {
2310       /* We need to unwind the stack by the offset computed by
2311          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2312          based on SP.  Ideally we would update the SP and define the
2313          CFA along the lines of:
2314
2315          SP = SP + EH_RETURN_STACKADJ_RTX
2316          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2317
2318          However the dwarf emitter only understands a constant
2319          register offset.
2320
2321          The solution chosen here is to use the otherwise unused IP0
2322          as a temporary register to hold the current SP value.  The
2323          CFA is described using IP0 then SP is modified.  */
2324
2325       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2326
2327       insn = emit_move_insn (ip0, stack_pointer_rtx);
2328       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2329       RTX_FRAME_RELATED_P (insn) = 1;
2330
2331       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2332
2333       /* Ensure the assignment to IP0 does not get optimized away.  */
2334       emit_use (ip0);
2335     }
2336
2337   if (frame_size > -1)
2338     {
2339       if (frame_size >= 0x1000000)
2340         {
2341           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2342           emit_move_insn (op0, GEN_INT (frame_size));
2343           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2344           aarch64_set_frame_expr (gen_rtx_SET
2345                                   (Pmode, stack_pointer_rtx,
2346                                    plus_constant (Pmode,
2347                                                   stack_pointer_rtx,
2348                                                   frame_size)));
2349         }
2350       else if (frame_size > 0)
2351         {
2352           if ((frame_size & 0xfff) != 0)
2353             {
2354               insn = emit_insn (gen_add2_insn
2355                                 (stack_pointer_rtx,
2356                                  GEN_INT ((frame_size
2357                                            & (HOST_WIDE_INT) 0xfff))));
2358               RTX_FRAME_RELATED_P (insn) = 1;
2359             }
2360           if ((frame_size & 0xfff) != frame_size)
2361             {
2362               insn = emit_insn (gen_add2_insn
2363                                 (stack_pointer_rtx,
2364                                  GEN_INT ((frame_size
2365                                            & ~ (HOST_WIDE_INT) 0xfff))));
2366               RTX_FRAME_RELATED_P (insn) = 1;
2367             }
2368         }
2369
2370         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2371                                              plus_constant (Pmode,
2372                                                             stack_pointer_rtx,
2373                                                             offset)));
2374     }
2375
2376   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2377   if (!for_sibcall)
2378     emit_jump_insn (ret_rtx);
2379 }
2380
2381 /* Return the place to copy the exception unwinding return address to.
2382    This will probably be a stack slot, but could (in theory be the
2383    return register).  */
2384 rtx
2385 aarch64_final_eh_return_addr (void)
2386 {
2387   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2388   aarch64_layout_frame ();
2389   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2390   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2391                 + crtl->outgoing_args_size);
2392   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2393                                           STACK_BOUNDARY / BITS_PER_UNIT);
2394   fp_offset = offset
2395     - original_frame_size
2396     - cfun->machine->frame.saved_regs_size;
2397
2398   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2399     return gen_rtx_REG (DImode, LR_REGNUM);
2400
2401   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2402      result in a store to save LR introduced by builtin_eh_return () being
2403      incorrectly deleted because the alias is not detected.
2404      So in the calculation of the address to copy the exception unwinding
2405      return address to, we note 2 cases.
2406      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2407      we return a SP-relative location since all the addresses are SP-relative
2408      in this case.  This prevents the store from being optimized away.
2409      If the fp_offset is not 0, then the addresses will be FP-relative and
2410      therefore we return a FP-relative location.  */
2411
2412   if (frame_pointer_needed)
2413     {
2414       if (fp_offset)
2415         return gen_frame_mem (DImode,
2416                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2417       else
2418         return gen_frame_mem (DImode,
2419                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2420     }
2421
2422   /* If FP is not needed, we calculate the location of LR, which would be
2423      at the top of the saved registers block.  */
2424
2425   return gen_frame_mem (DImode,
2426                         plus_constant (Pmode,
2427                                        stack_pointer_rtx,
2428                                        fp_offset
2429                                        + cfun->machine->frame.saved_regs_size
2430                                        - 2 * UNITS_PER_WORD));
2431 }
2432
2433 /* Output code to build up a constant in a register.  */
2434 static void
2435 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2436 {
2437   if (aarch64_bitmask_imm (val, DImode))
2438     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2439   else
2440     {
2441       int i;
2442       int ncount = 0;
2443       int zcount = 0;
2444       HOST_WIDE_INT valp = val >> 16;
2445       HOST_WIDE_INT valm;
2446       HOST_WIDE_INT tval;
2447
2448       for (i = 16; i < 64; i += 16)
2449         {
2450           valm = (valp & 0xffff);
2451
2452           if (valm != 0)
2453             ++ zcount;
2454
2455           if (valm != 0xffff)
2456             ++ ncount;
2457
2458           valp >>= 16;
2459         }
2460
2461       /* zcount contains the number of additional MOVK instructions
2462          required if the constant is built up with an initial MOVZ instruction,
2463          while ncount is the number of MOVK instructions required if starting
2464          with a MOVN instruction.  Choose the sequence that yields the fewest
2465          number of instructions, preferring MOVZ instructions when they are both
2466          the same.  */
2467       if (ncount < zcount)
2468         {
2469           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2470                           GEN_INT ((~val) & 0xffff));
2471           tval = 0xffff;
2472         }
2473       else
2474         {
2475           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2476                           GEN_INT (val & 0xffff));
2477           tval = 0;
2478         }
2479
2480       val >>= 16;
2481
2482       for (i = 16; i < 64; i += 16)
2483         {
2484           if ((val & 0xffff) != tval)
2485             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2486                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2487           val >>= 16;
2488         }
2489     }
2490 }
2491
2492 static void
2493 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2494 {
2495   HOST_WIDE_INT mdelta = delta;
2496   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2497   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2498
2499   if (mdelta < 0)
2500     mdelta = -mdelta;
2501
2502   if (mdelta >= 4096 * 4096)
2503     {
2504       aarch64_build_constant (scratchreg, delta);
2505       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2506     }
2507   else if (mdelta > 0)
2508     {
2509       if (mdelta >= 4096)
2510         {
2511           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2512           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2513           if (delta < 0)
2514             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2515                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2516           else
2517             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2518                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2519         }
2520       if (mdelta % 4096 != 0)
2521         {
2522           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2523           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2524                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2525         }
2526     }
2527 }
2528
2529 /* Output code to add DELTA to the first argument, and then jump
2530    to FUNCTION.  Used for C++ multiple inheritance.  */
2531 static void
2532 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2533                          HOST_WIDE_INT delta,
2534                          HOST_WIDE_INT vcall_offset,
2535                          tree function)
2536 {
2537   /* The this pointer is always in x0.  Note that this differs from
2538      Arm where the this pointer maybe bumped to r1 if r0 is required
2539      to return a pointer to an aggregate.  On AArch64 a result value
2540      pointer will be in x8.  */
2541   int this_regno = R0_REGNUM;
2542   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2543
2544   reload_completed = 1;
2545   emit_note (NOTE_INSN_PROLOGUE_END);
2546
2547   if (vcall_offset == 0)
2548     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2549   else
2550     {
2551       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2552
2553       this_rtx = gen_rtx_REG (Pmode, this_regno);
2554       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2555       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2556
2557       addr = this_rtx;
2558       if (delta != 0)
2559         {
2560           if (delta >= -256 && delta < 256)
2561             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2562                                        plus_constant (Pmode, this_rtx, delta));
2563           else
2564             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2565         }
2566
2567       if (Pmode == ptr_mode)
2568         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2569       else
2570         aarch64_emit_move (temp0,
2571                            gen_rtx_ZERO_EXTEND (Pmode,
2572                                                 gen_rtx_MEM (ptr_mode, addr)));
2573
2574       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2575           addr = plus_constant (Pmode, temp0, vcall_offset);
2576       else
2577         {
2578           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2579           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2580         }
2581
2582       if (Pmode == ptr_mode)
2583         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2584       else
2585         aarch64_emit_move (temp1,
2586                            gen_rtx_SIGN_EXTEND (Pmode,
2587                                                 gen_rtx_MEM (ptr_mode, addr)));
2588
2589       emit_insn (gen_add2_insn (this_rtx, temp1));
2590     }
2591
2592   /* Generate a tail call to the target function.  */
2593   if (!TREE_USED (function))
2594     {
2595       assemble_external (function);
2596       TREE_USED (function) = 1;
2597     }
2598   funexp = XEXP (DECL_RTL (function), 0);
2599   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2600   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2601   SIBLING_CALL_P (insn) = 1;
2602
2603   insn = get_insns ();
2604   shorten_branches (insn);
2605   final_start_function (insn, file, 1);
2606   final (insn, file, 1);
2607   final_end_function ();
2608
2609   /* Stop pretending to be a post-reload pass.  */
2610   reload_completed = 0;
2611 }
2612
2613 static int
2614 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2615 {
2616   if (GET_CODE (*x) == SYMBOL_REF)
2617     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2618
2619   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2620      TLS offsets, not real symbol references.  */
2621   if (GET_CODE (*x) == UNSPEC
2622       && XINT (*x, 1) == UNSPEC_TLS)
2623     return -1;
2624
2625   return 0;
2626 }
2627
2628 static bool
2629 aarch64_tls_referenced_p (rtx x)
2630 {
2631   if (!TARGET_HAVE_TLS)
2632     return false;
2633
2634   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2635 }
2636
2637
2638 static int
2639 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2640 {
2641   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2642   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2643
2644   if (*imm1 < *imm2)
2645     return -1;
2646   if (*imm1 > *imm2)
2647     return +1;
2648   return 0;
2649 }
2650
2651
2652 static void
2653 aarch64_build_bitmask_table (void)
2654 {
2655   unsigned HOST_WIDE_INT mask, imm;
2656   unsigned int log_e, e, s, r;
2657   unsigned int nimms = 0;
2658
2659   for (log_e = 1; log_e <= 6; log_e++)
2660     {
2661       e = 1 << log_e;
2662       if (e == 64)
2663         mask = ~(HOST_WIDE_INT) 0;
2664       else
2665         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2666       for (s = 1; s < e; s++)
2667         {
2668           for (r = 0; r < e; r++)
2669             {
2670               /* set s consecutive bits to 1 (s < 64) */
2671               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2672               /* rotate right by r */
2673               if (r != 0)
2674                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2675               /* replicate the constant depending on SIMD size */
2676               switch (log_e) {
2677               case 1: imm |= (imm <<  2);
2678               case 2: imm |= (imm <<  4);
2679               case 3: imm |= (imm <<  8);
2680               case 4: imm |= (imm << 16);
2681               case 5: imm |= (imm << 32);
2682               case 6:
2683                 break;
2684               default:
2685                 gcc_unreachable ();
2686               }
2687               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2688               aarch64_bitmasks[nimms++] = imm;
2689             }
2690         }
2691     }
2692
2693   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2694   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2695          aarch64_bitmasks_cmp);
2696 }
2697
2698
2699 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2700    a left shift of 0 or 12 bits.  */
2701 bool
2702 aarch64_uimm12_shift (HOST_WIDE_INT val)
2703 {
2704   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2705           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2706           );
2707 }
2708
2709
2710 /* Return true if val is an immediate that can be loaded into a
2711    register by a MOVZ instruction.  */
2712 static bool
2713 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2714 {
2715   if (GET_MODE_SIZE (mode) > 4)
2716     {
2717       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2718           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2719         return 1;
2720     }
2721   else
2722     {
2723       /* Ignore sign extension.  */
2724       val &= (HOST_WIDE_INT) 0xffffffff;
2725     }
2726   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2727           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2728 }
2729
2730
2731 /* Return true if val is a valid bitmask immediate.  */
2732 bool
2733 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2734 {
2735   if (GET_MODE_SIZE (mode) < 8)
2736     {
2737       /* Replicate bit pattern.  */
2738       val &= (HOST_WIDE_INT) 0xffffffff;
2739       val |= val << 32;
2740     }
2741   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2742                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2743 }
2744
2745
2746 /* Return true if val is an immediate that can be loaded into a
2747    register in a single instruction.  */
2748 bool
2749 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2750 {
2751   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2752     return 1;
2753   return aarch64_bitmask_imm (val, mode);
2754 }
2755
2756 static bool
2757 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2758 {
2759   rtx base, offset;
2760
2761   if (GET_CODE (x) == HIGH)
2762     return true;
2763
2764   split_const (x, &base, &offset);
2765   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2766     {
2767       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2768           != SYMBOL_FORCE_TO_MEM)
2769         return true;
2770       else
2771         /* Avoid generating a 64-bit relocation in ILP32; leave
2772            to aarch64_expand_mov_immediate to handle it properly.  */
2773         return mode != ptr_mode;
2774     }
2775
2776   return aarch64_tls_referenced_p (x);
2777 }
2778
2779 /* Return true if register REGNO is a valid index register.
2780    STRICT_P is true if REG_OK_STRICT is in effect.  */
2781
2782 bool
2783 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2784 {
2785   if (!HARD_REGISTER_NUM_P (regno))
2786     {
2787       if (!strict_p)
2788         return true;
2789
2790       if (!reg_renumber)
2791         return false;
2792
2793       regno = reg_renumber[regno];
2794     }
2795   return GP_REGNUM_P (regno);
2796 }
2797
2798 /* Return true if register REGNO is a valid base register for mode MODE.
2799    STRICT_P is true if REG_OK_STRICT is in effect.  */
2800
2801 bool
2802 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2803 {
2804   if (!HARD_REGISTER_NUM_P (regno))
2805     {
2806       if (!strict_p)
2807         return true;
2808
2809       if (!reg_renumber)
2810         return false;
2811
2812       regno = reg_renumber[regno];
2813     }
2814
2815   /* The fake registers will be eliminated to either the stack or
2816      hard frame pointer, both of which are usually valid base registers.
2817      Reload deals with the cases where the eliminated form isn't valid.  */
2818   return (GP_REGNUM_P (regno)
2819           || regno == SP_REGNUM
2820           || regno == FRAME_POINTER_REGNUM
2821           || regno == ARG_POINTER_REGNUM);
2822 }
2823
2824 /* Return true if X is a valid base register for mode MODE.
2825    STRICT_P is true if REG_OK_STRICT is in effect.  */
2826
2827 static bool
2828 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2829 {
2830   if (!strict_p && GET_CODE (x) == SUBREG)
2831     x = SUBREG_REG (x);
2832
2833   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2834 }
2835
2836 /* Return true if address offset is a valid index.  If it is, fill in INFO
2837    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2838
2839 static bool
2840 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2841                         enum machine_mode mode, bool strict_p)
2842 {
2843   enum aarch64_address_type type;
2844   rtx index;
2845   int shift;
2846
2847   /* (reg:P) */
2848   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2849       && GET_MODE (x) == Pmode)
2850     {
2851       type = ADDRESS_REG_REG;
2852       index = x;
2853       shift = 0;
2854     }
2855   /* (sign_extend:DI (reg:SI)) */
2856   else if ((GET_CODE (x) == SIGN_EXTEND
2857             || GET_CODE (x) == ZERO_EXTEND)
2858            && GET_MODE (x) == DImode
2859            && GET_MODE (XEXP (x, 0)) == SImode)
2860     {
2861       type = (GET_CODE (x) == SIGN_EXTEND)
2862         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2863       index = XEXP (x, 0);
2864       shift = 0;
2865     }
2866   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2867   else if (GET_CODE (x) == MULT
2868            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2869                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2870            && GET_MODE (XEXP (x, 0)) == DImode
2871            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2872            && CONST_INT_P (XEXP (x, 1)))
2873     {
2874       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2875         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2876       index = XEXP (XEXP (x, 0), 0);
2877       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2878     }
2879   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2880   else if (GET_CODE (x) == ASHIFT
2881            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2882                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2883            && GET_MODE (XEXP (x, 0)) == DImode
2884            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2885            && CONST_INT_P (XEXP (x, 1)))
2886     {
2887       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2888         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2889       index = XEXP (XEXP (x, 0), 0);
2890       shift = INTVAL (XEXP (x, 1));
2891     }
2892   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2893   else if ((GET_CODE (x) == SIGN_EXTRACT
2894             || GET_CODE (x) == ZERO_EXTRACT)
2895            && GET_MODE (x) == DImode
2896            && GET_CODE (XEXP (x, 0)) == MULT
2897            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2898            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2899     {
2900       type = (GET_CODE (x) == SIGN_EXTRACT)
2901         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2902       index = XEXP (XEXP (x, 0), 0);
2903       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2904       if (INTVAL (XEXP (x, 1)) != 32 + shift
2905           || INTVAL (XEXP (x, 2)) != 0)
2906         shift = -1;
2907     }
2908   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2909      (const_int 0xffffffff<<shift)) */
2910   else if (GET_CODE (x) == AND
2911            && GET_MODE (x) == DImode
2912            && GET_CODE (XEXP (x, 0)) == MULT
2913            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2914            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2915            && CONST_INT_P (XEXP (x, 1)))
2916     {
2917       type = ADDRESS_REG_UXTW;
2918       index = XEXP (XEXP (x, 0), 0);
2919       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2920       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2921         shift = -1;
2922     }
2923   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2924   else if ((GET_CODE (x) == SIGN_EXTRACT
2925             || GET_CODE (x) == ZERO_EXTRACT)
2926            && GET_MODE (x) == DImode
2927            && GET_CODE (XEXP (x, 0)) == ASHIFT
2928            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2929            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2930     {
2931       type = (GET_CODE (x) == SIGN_EXTRACT)
2932         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2933       index = XEXP (XEXP (x, 0), 0);
2934       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2935       if (INTVAL (XEXP (x, 1)) != 32 + shift
2936           || INTVAL (XEXP (x, 2)) != 0)
2937         shift = -1;
2938     }
2939   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2940      (const_int 0xffffffff<<shift)) */
2941   else if (GET_CODE (x) == AND
2942            && GET_MODE (x) == DImode
2943            && GET_CODE (XEXP (x, 0)) == ASHIFT
2944            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2945            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2946            && CONST_INT_P (XEXP (x, 1)))
2947     {
2948       type = ADDRESS_REG_UXTW;
2949       index = XEXP (XEXP (x, 0), 0);
2950       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2951       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2952         shift = -1;
2953     }
2954   /* (mult:P (reg:P) (const_int scale)) */
2955   else if (GET_CODE (x) == MULT
2956            && GET_MODE (x) == Pmode
2957            && GET_MODE (XEXP (x, 0)) == Pmode
2958            && CONST_INT_P (XEXP (x, 1)))
2959     {
2960       type = ADDRESS_REG_REG;
2961       index = XEXP (x, 0);
2962       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2963     }
2964   /* (ashift:P (reg:P) (const_int shift)) */
2965   else if (GET_CODE (x) == ASHIFT
2966            && GET_MODE (x) == Pmode
2967            && GET_MODE (XEXP (x, 0)) == Pmode
2968            && CONST_INT_P (XEXP (x, 1)))
2969     {
2970       type = ADDRESS_REG_REG;
2971       index = XEXP (x, 0);
2972       shift = INTVAL (XEXP (x, 1));
2973     }
2974   else
2975     return false;
2976
2977   if (GET_CODE (index) == SUBREG)
2978     index = SUBREG_REG (index);
2979
2980   if ((shift == 0 ||
2981        (shift > 0 && shift <= 3
2982         && (1 << shift) == GET_MODE_SIZE (mode)))
2983       && REG_P (index)
2984       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2985     {
2986       info->type = type;
2987       info->offset = index;
2988       info->shift = shift;
2989       return true;
2990     }
2991
2992   return false;
2993 }
2994
2995 static inline bool
2996 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2997 {
2998   return (offset >= -64 * GET_MODE_SIZE (mode)
2999           && offset < 64 * GET_MODE_SIZE (mode)
3000           && offset % GET_MODE_SIZE (mode) == 0);
3001 }
3002
3003 static inline bool
3004 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3005                                HOST_WIDE_INT offset)
3006 {
3007   return offset >= -256 && offset < 256;
3008 }
3009
3010 static inline bool
3011 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3012 {
3013   return (offset >= 0
3014           && offset < 4096 * GET_MODE_SIZE (mode)
3015           && offset % GET_MODE_SIZE (mode) == 0);
3016 }
3017
3018 /* Return true if X is a valid address for machine mode MODE.  If it is,
3019    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3020    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3021
3022 static bool
3023 aarch64_classify_address (struct aarch64_address_info *info,
3024                           rtx x, enum machine_mode mode,
3025                           RTX_CODE outer_code, bool strict_p)
3026 {
3027   enum rtx_code code = GET_CODE (x);
3028   rtx op0, op1;
3029   bool allow_reg_index_p =
3030     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3031
3032   /* Don't support anything other than POST_INC or REG addressing for
3033      AdvSIMD.  */
3034   if (aarch64_vector_mode_p (mode)
3035       && (code != POST_INC && code != REG))
3036     return false;
3037
3038   switch (code)
3039     {
3040     case REG:
3041     case SUBREG:
3042       info->type = ADDRESS_REG_IMM;
3043       info->base = x;
3044       info->offset = const0_rtx;
3045       return aarch64_base_register_rtx_p (x, strict_p);
3046
3047     case PLUS:
3048       op0 = XEXP (x, 0);
3049       op1 = XEXP (x, 1);
3050       if (GET_MODE_SIZE (mode) != 0
3051           && CONST_INT_P (op1)
3052           && aarch64_base_register_rtx_p (op0, strict_p))
3053         {
3054           HOST_WIDE_INT offset = INTVAL (op1);
3055
3056           info->type = ADDRESS_REG_IMM;
3057           info->base = op0;
3058           info->offset = op1;
3059
3060           /* TImode and TFmode values are allowed in both pairs of X
3061              registers and individual Q registers.  The available
3062              address modes are:
3063              X,X: 7-bit signed scaled offset
3064              Q:   9-bit signed offset
3065              We conservatively require an offset representable in either mode.
3066            */
3067           if (mode == TImode || mode == TFmode)
3068             return (offset_7bit_signed_scaled_p (mode, offset)
3069                     && offset_9bit_signed_unscaled_p (mode, offset));
3070
3071           if (outer_code == PARALLEL)
3072             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3073                     && offset_7bit_signed_scaled_p (mode, offset));
3074           else
3075             return (offset_9bit_signed_unscaled_p (mode, offset)
3076                     || offset_12bit_unsigned_scaled_p (mode, offset));
3077         }
3078
3079       if (allow_reg_index_p)
3080         {
3081           /* Look for base + (scaled/extended) index register.  */
3082           if (aarch64_base_register_rtx_p (op0, strict_p)
3083               && aarch64_classify_index (info, op1, mode, strict_p))
3084             {
3085               info->base = op0;
3086               return true;
3087             }
3088           if (aarch64_base_register_rtx_p (op1, strict_p)
3089               && aarch64_classify_index (info, op0, mode, strict_p))
3090             {
3091               info->base = op1;
3092               return true;
3093             }
3094         }
3095
3096       return false;
3097
3098     case POST_INC:
3099     case POST_DEC:
3100     case PRE_INC:
3101     case PRE_DEC:
3102       info->type = ADDRESS_REG_WB;
3103       info->base = XEXP (x, 0);
3104       info->offset = NULL_RTX;
3105       return aarch64_base_register_rtx_p (info->base, strict_p);
3106
3107     case POST_MODIFY:
3108     case PRE_MODIFY:
3109       info->type = ADDRESS_REG_WB;
3110       info->base = XEXP (x, 0);
3111       if (GET_CODE (XEXP (x, 1)) == PLUS
3112           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3113           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3114           && aarch64_base_register_rtx_p (info->base, strict_p))
3115         {
3116           HOST_WIDE_INT offset;
3117           info->offset = XEXP (XEXP (x, 1), 1);
3118           offset = INTVAL (info->offset);
3119
3120           /* TImode and TFmode values are allowed in both pairs of X
3121              registers and individual Q registers.  The available
3122              address modes are:
3123              X,X: 7-bit signed scaled offset
3124              Q:   9-bit signed offset
3125              We conservatively require an offset representable in either mode.
3126            */
3127           if (mode == TImode || mode == TFmode)
3128             return (offset_7bit_signed_scaled_p (mode, offset)
3129                     && offset_9bit_signed_unscaled_p (mode, offset));
3130
3131           if (outer_code == PARALLEL)
3132             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3133                     && offset_7bit_signed_scaled_p (mode, offset));
3134           else
3135             return offset_9bit_signed_unscaled_p (mode, offset);
3136         }
3137       return false;
3138
3139     case CONST:
3140     case SYMBOL_REF:
3141     case LABEL_REF:
3142       /* load literal: pc-relative constant pool entry.  Only supported
3143          for SI mode or larger.  */
3144       info->type = ADDRESS_SYMBOLIC;
3145       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3146         {
3147           rtx sym, addend;
3148
3149           split_const (x, &sym, &addend);
3150           return (GET_CODE (sym) == LABEL_REF
3151                   || (GET_CODE (sym) == SYMBOL_REF
3152                       && CONSTANT_POOL_ADDRESS_P (sym)));
3153         }
3154       return false;
3155
3156     case LO_SUM:
3157       info->type = ADDRESS_LO_SUM;
3158       info->base = XEXP (x, 0);
3159       info->offset = XEXP (x, 1);
3160       if (allow_reg_index_p
3161           && aarch64_base_register_rtx_p (info->base, strict_p))
3162         {
3163           rtx sym, offs;
3164           split_const (info->offset, &sym, &offs);
3165           if (GET_CODE (sym) == SYMBOL_REF
3166               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3167                   == SYMBOL_SMALL_ABSOLUTE))
3168             {
3169               /* The symbol and offset must be aligned to the access size.  */
3170               unsigned int align;
3171               unsigned int ref_size;
3172
3173               if (CONSTANT_POOL_ADDRESS_P (sym))
3174                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3175               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3176                 {
3177                   tree exp = SYMBOL_REF_DECL (sym);
3178                   align = TYPE_ALIGN (TREE_TYPE (exp));
3179                   align = CONSTANT_ALIGNMENT (exp, align);
3180                 }
3181               else if (SYMBOL_REF_DECL (sym))
3182                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3183               else
3184                 align = BITS_PER_UNIT;
3185
3186               ref_size = GET_MODE_SIZE (mode);
3187               if (ref_size == 0)
3188                 ref_size = GET_MODE_SIZE (DImode);
3189
3190               return ((INTVAL (offs) & (ref_size - 1)) == 0
3191                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3192             }
3193         }
3194       return false;
3195
3196     default:
3197       return false;
3198     }
3199 }
3200
3201 bool
3202 aarch64_symbolic_address_p (rtx x)
3203 {
3204   rtx offset;
3205
3206   split_const (x, &x, &offset);
3207   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3208 }
3209
3210 /* Classify the base of symbolic expression X, given that X appears in
3211    context CONTEXT.  */
3212
3213 enum aarch64_symbol_type
3214 aarch64_classify_symbolic_expression (rtx x,
3215                                       enum aarch64_symbol_context context)
3216 {
3217   rtx offset;
3218
3219   split_const (x, &x, &offset);
3220   return aarch64_classify_symbol (x, context);
3221 }
3222
3223
3224 /* Return TRUE if X is a legitimate address for accessing memory in
3225    mode MODE.  */
3226 static bool
3227 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3228 {
3229   struct aarch64_address_info addr;
3230
3231   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3232 }
3233
3234 /* Return TRUE if X is a legitimate address for accessing memory in
3235    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3236    pair operation.  */
3237 bool
3238 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3239                            RTX_CODE outer_code, bool strict_p)
3240 {
3241   struct aarch64_address_info addr;
3242
3243   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3244 }
3245
3246 /* Return TRUE if rtx X is immediate constant 0.0 */
3247 bool
3248 aarch64_float_const_zero_rtx_p (rtx x)
3249 {
3250   REAL_VALUE_TYPE r;
3251
3252   if (GET_MODE (x) == VOIDmode)
3253     return false;
3254
3255   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3256   if (REAL_VALUE_MINUS_ZERO (r))
3257     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3258   return REAL_VALUES_EQUAL (r, dconst0);
3259 }
3260
3261 /* Return the fixed registers used for condition codes.  */
3262
3263 static bool
3264 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3265 {
3266   *p1 = CC_REGNUM;
3267   *p2 = INVALID_REGNUM;
3268   return true;
3269 }
3270
3271 enum machine_mode
3272 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3273 {
3274   /* All floating point compares return CCFP if it is an equality
3275      comparison, and CCFPE otherwise.  */
3276   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3277     {
3278       switch (code)
3279         {
3280         case EQ:
3281         case NE:
3282         case UNORDERED:
3283         case ORDERED:
3284         case UNLT:
3285         case UNLE:
3286         case UNGT:
3287         case UNGE:
3288         case UNEQ:
3289         case LTGT:
3290           return CCFPmode;
3291
3292         case LT:
3293         case LE:
3294         case GT:
3295         case GE:
3296           return CCFPEmode;
3297
3298         default:
3299           gcc_unreachable ();
3300         }
3301     }
3302
3303   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3304       && y == const0_rtx
3305       && (code == EQ || code == NE || code == LT || code == GE)
3306       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3307           || GET_CODE (x) == NEG))
3308     return CC_NZmode;
3309
3310   /* A compare with a shifted or negated operand.  Because of canonicalization,
3311      the comparison will have to be swapped when we emit the assembly
3312      code.  */
3313   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3314       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3315       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3316           || GET_CODE (x) == LSHIFTRT
3317           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3318           || GET_CODE (x) == NEG))
3319     return CC_SWPmode;
3320
3321   /* A compare of a mode narrower than SI mode against zero can be done
3322      by extending the value in the comparison.  */
3323   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3324       && y == const0_rtx)
3325     /* Only use sign-extension if we really need it.  */
3326     return ((code == GT || code == GE || code == LE || code == LT)
3327             ? CC_SESWPmode : CC_ZESWPmode);
3328
3329   /* For everything else, return CCmode.  */
3330   return CCmode;
3331 }
3332
3333 static unsigned
3334 aarch64_get_condition_code (rtx x)
3335 {
3336   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3337   enum rtx_code comp_code = GET_CODE (x);
3338
3339   if (GET_MODE_CLASS (mode) != MODE_CC)
3340     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3341
3342   switch (mode)
3343     {
3344     case CCFPmode:
3345     case CCFPEmode:
3346       switch (comp_code)
3347         {
3348         case GE: return AARCH64_GE;
3349         case GT: return AARCH64_GT;
3350         case LE: return AARCH64_LS;
3351         case LT: return AARCH64_MI;
3352         case NE: return AARCH64_NE;
3353         case EQ: return AARCH64_EQ;
3354         case ORDERED: return AARCH64_VC;
3355         case UNORDERED: return AARCH64_VS;
3356         case UNLT: return AARCH64_LT;
3357         case UNLE: return AARCH64_LE;
3358         case UNGT: return AARCH64_HI;
3359         case UNGE: return AARCH64_PL;
3360         default: gcc_unreachable ();
3361         }
3362       break;
3363
3364     case CCmode:
3365       switch (comp_code)
3366         {
3367         case NE: return AARCH64_NE;
3368         case EQ: return AARCH64_EQ;
3369         case GE: return AARCH64_GE;
3370         case GT: return AARCH64_GT;
3371         case LE: return AARCH64_LE;
3372         case LT: return AARCH64_LT;
3373         case GEU: return AARCH64_CS;
3374         case GTU: return AARCH64_HI;
3375         case LEU: return AARCH64_LS;
3376         case LTU: return AARCH64_CC;
3377         default: gcc_unreachable ();
3378         }
3379       break;
3380
3381     case CC_SWPmode:
3382     case CC_ZESWPmode:
3383     case CC_SESWPmode:
3384       switch (comp_code)
3385         {
3386         case NE: return AARCH64_NE;
3387         case EQ: return AARCH64_EQ;
3388         case GE: return AARCH64_LE;
3389         case GT: return AARCH64_LT;
3390         case LE: return AARCH64_GE;
3391         case LT: return AARCH64_GT;
3392         case GEU: return AARCH64_LS;
3393         case GTU: return AARCH64_CC;
3394         case LEU: return AARCH64_CS;
3395         case LTU: return AARCH64_HI;
3396         default: gcc_unreachable ();
3397         }
3398       break;
3399
3400     case CC_NZmode:
3401       switch (comp_code)
3402         {
3403         case NE: return AARCH64_NE;
3404         case EQ: return AARCH64_EQ;
3405         case GE: return AARCH64_PL;
3406         case LT: return AARCH64_MI;
3407         default: gcc_unreachable ();
3408         }
3409       break;
3410
3411     default:
3412       gcc_unreachable ();
3413       break;
3414     }
3415 }
3416
3417 static unsigned
3418 bit_count (unsigned HOST_WIDE_INT value)
3419 {
3420   unsigned count = 0;
3421
3422   while (value)
3423     {
3424       count++;
3425       value &= value - 1;
3426     }
3427
3428   return count;
3429 }
3430
3431 void
3432 aarch64_print_operand (FILE *f, rtx x, char code)
3433 {
3434   switch (code)
3435     {
3436     /* An integer or symbol address without a preceding # sign.  */
3437     case 'c':
3438       switch (GET_CODE (x))
3439         {
3440         case CONST_INT:
3441           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3442           break;
3443
3444         case SYMBOL_REF:
3445           output_addr_const (f, x);
3446           break;
3447
3448         case CONST:
3449           if (GET_CODE (XEXP (x, 0)) == PLUS
3450               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3451             {
3452               output_addr_const (f, x);
3453               break;
3454             }
3455           /* Fall through.  */
3456
3457         default:
3458           output_operand_lossage ("Unsupported operand for code '%c'", code);
3459         }
3460       break;
3461
3462     case 'e':
3463       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3464       {
3465         int n;
3466
3467         if (GET_CODE (x) != CONST_INT
3468             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3469           {
3470             output_operand_lossage ("invalid operand for '%%%c'", code);
3471             return;
3472           }
3473
3474         switch (n)
3475           {
3476           case 3:
3477             fputc ('b', f);
3478             break;
3479           case 4:
3480             fputc ('h', f);
3481             break;
3482           case 5:
3483             fputc ('w', f);
3484             break;
3485           default:
3486             output_operand_lossage ("invalid operand for '%%%c'", code);
3487             return;
3488           }
3489       }
3490       break;
3491
3492     case 'p':
3493       {
3494         int n;
3495
3496         /* Print N such that 2^N == X.  */
3497         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3498           {
3499             output_operand_lossage ("invalid operand for '%%%c'", code);
3500             return;
3501           }
3502
3503         asm_fprintf (f, "%d", n);
3504       }
3505       break;
3506
3507     case 'P':
3508       /* Print the number of non-zero bits in X (a const_int).  */
3509       if (GET_CODE (x) != CONST_INT)
3510         {
3511           output_operand_lossage ("invalid operand for '%%%c'", code);
3512           return;
3513         }
3514
3515       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3516       break;
3517
3518     case 'H':
3519       /* Print the higher numbered register of a pair (TImode) of regs.  */
3520       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3521         {
3522           output_operand_lossage ("invalid operand for '%%%c'", code);
3523           return;
3524         }
3525
3526       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3527       break;
3528
3529     case 'm':
3530       /* Print a condition (eq, ne, etc).  */
3531
3532       /* CONST_TRUE_RTX means always -- that's the default.  */
3533       if (x == const_true_rtx)
3534         return;
3535
3536       if (!COMPARISON_P (x))
3537         {
3538           output_operand_lossage ("invalid operand for '%%%c'", code);
3539           return;
3540         }
3541
3542       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3543       break;
3544
3545     case 'M':
3546       /* Print the inverse of a condition (eq <-> ne, etc).  */
3547
3548       /* CONST_TRUE_RTX means never -- that's the default.  */
3549       if (x == const_true_rtx)
3550         {
3551           fputs ("nv", f);
3552           return;
3553         }
3554
3555       if (!COMPARISON_P (x))
3556         {
3557           output_operand_lossage ("invalid operand for '%%%c'", code);
3558           return;
3559         }
3560
3561       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3562                                   (aarch64_get_condition_code (x))], f);
3563       break;
3564
3565     case 'b':
3566     case 'h':
3567     case 's':
3568     case 'd':
3569     case 'q':
3570       /* Print a scalar FP/SIMD register name.  */
3571       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3572         {
3573           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3574           return;
3575         }
3576       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3577       break;
3578
3579     case 'S':
3580     case 'T':
3581     case 'U':
3582     case 'V':
3583       /* Print the first FP/SIMD register name in a list.  */
3584       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3585         {
3586           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3587           return;
3588         }
3589       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3590       break;
3591
3592     case 'X':
3593       /* Print bottom 16 bits of integer constant in hex.  */
3594       if (GET_CODE (x) != CONST_INT)
3595         {
3596           output_operand_lossage ("invalid operand for '%%%c'", code);
3597           return;
3598         }
3599       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3600       break;
3601
3602     case 'w':
3603     case 'x':
3604       /* Print a general register name or the zero register (32-bit or
3605          64-bit).  */
3606       if (x == const0_rtx
3607           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3608         {
3609           asm_fprintf (f, "%czr", code);
3610           break;
3611         }
3612
3613       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3614         {
3615           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3616           break;
3617         }
3618
3619       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3620         {
3621           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3622           break;
3623         }
3624
3625       /* Fall through */
3626
3627     case 0:
3628       /* Print a normal operand, if it's a general register, then we
3629          assume DImode.  */
3630       if (x == NULL)
3631         {
3632           output_operand_lossage ("missing operand");
3633           return;
3634         }
3635
3636       switch (GET_CODE (x))
3637         {
3638         case REG:
3639           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3640           break;
3641
3642         case MEM:
3643           aarch64_memory_reference_mode = GET_MODE (x);
3644           output_address (XEXP (x, 0));
3645           break;
3646
3647         case LABEL_REF:
3648         case SYMBOL_REF:
3649           output_addr_const (asm_out_file, x);
3650           break;
3651
3652         case CONST_INT:
3653           asm_fprintf (f, "%wd", INTVAL (x));
3654           break;
3655
3656         case CONST_VECTOR:
3657           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3658             {
3659               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3660                                                             HOST_WIDE_INT_MIN,
3661                                                             HOST_WIDE_INT_MAX));
3662               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3663             }
3664           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3665             {
3666               fputc ('0', f);
3667             }
3668           else
3669             gcc_unreachable ();
3670           break;
3671
3672         case CONST_DOUBLE:
3673           /* CONST_DOUBLE can represent a double-width integer.
3674              In this case, the mode of x is VOIDmode.  */
3675           if (GET_MODE (x) == VOIDmode)
3676             ; /* Do Nothing.  */
3677           else if (aarch64_float_const_zero_rtx_p (x))
3678             {
3679               fputc ('0', f);
3680               break;
3681             }
3682           else if (aarch64_float_const_representable_p (x))
3683             {
3684 #define buf_size 20
3685               char float_buf[buf_size] = {'\0'};
3686               REAL_VALUE_TYPE r;
3687               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3688               real_to_decimal_for_mode (float_buf, &r,
3689                                         buf_size, buf_size,
3690                                         1, GET_MODE (x));
3691               asm_fprintf (asm_out_file, "%s", float_buf);
3692               break;
3693 #undef buf_size
3694             }
3695           output_operand_lossage ("invalid constant");
3696           return;
3697         default:
3698           output_operand_lossage ("invalid operand");
3699           return;
3700         }
3701       break;
3702
3703     case 'A':
3704       if (GET_CODE (x) == HIGH)
3705         x = XEXP (x, 0);
3706
3707       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3708         {
3709         case SYMBOL_SMALL_GOT:
3710           asm_fprintf (asm_out_file, ":got:");
3711           break;
3712
3713         case SYMBOL_SMALL_TLSGD:
3714           asm_fprintf (asm_out_file, ":tlsgd:");
3715           break;
3716
3717         case SYMBOL_SMALL_TLSDESC:
3718           asm_fprintf (asm_out_file, ":tlsdesc:");
3719           break;
3720
3721         case SYMBOL_SMALL_GOTTPREL:
3722           asm_fprintf (asm_out_file, ":gottprel:");
3723           break;
3724
3725         case SYMBOL_SMALL_TPREL:
3726           asm_fprintf (asm_out_file, ":tprel:");
3727           break;
3728
3729         case SYMBOL_TINY_GOT:
3730           gcc_unreachable ();
3731           break;
3732
3733         default:
3734           break;
3735         }
3736       output_addr_const (asm_out_file, x);
3737       break;
3738
3739     case 'L':
3740       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3741         {
3742         case SYMBOL_SMALL_GOT:
3743           asm_fprintf (asm_out_file, ":lo12:");
3744           break;
3745
3746         case SYMBOL_SMALL_TLSGD:
3747           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3748           break;
3749
3750         case SYMBOL_SMALL_TLSDESC:
3751           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3752           break;
3753
3754         case SYMBOL_SMALL_GOTTPREL:
3755           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3756           break;
3757
3758         case SYMBOL_SMALL_TPREL:
3759           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3760           break;
3761
3762         case SYMBOL_TINY_GOT:
3763           asm_fprintf (asm_out_file, ":got:");
3764           break;
3765
3766         default:
3767           break;
3768         }
3769       output_addr_const (asm_out_file, x);
3770       break;
3771
3772     case 'G':
3773
3774       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3775         {
3776         case SYMBOL_SMALL_TPREL:
3777           asm_fprintf (asm_out_file, ":tprel_hi12:");
3778           break;
3779         default:
3780           break;
3781         }
3782       output_addr_const (asm_out_file, x);
3783       break;
3784
3785     default:
3786       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3787       return;
3788     }
3789 }
3790
3791 void
3792 aarch64_print_operand_address (FILE *f, rtx x)
3793 {
3794   struct aarch64_address_info addr;
3795
3796   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3797                              MEM, true))
3798     switch (addr.type)
3799       {
3800       case ADDRESS_REG_IMM:
3801         if (addr.offset == const0_rtx)
3802           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3803         else
3804           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3805                        INTVAL (addr.offset));
3806         return;
3807
3808       case ADDRESS_REG_REG:
3809         if (addr.shift == 0)
3810           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3811                        reg_names [REGNO (addr.offset)]);
3812         else
3813           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3814                        reg_names [REGNO (addr.offset)], addr.shift);
3815         return;
3816
3817       case ADDRESS_REG_UXTW:
3818         if (addr.shift == 0)
3819           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3820                        REGNO (addr.offset) - R0_REGNUM);
3821         else
3822           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3823                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3824         return;
3825
3826       case ADDRESS_REG_SXTW:
3827         if (addr.shift == 0)
3828           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3829                        REGNO (addr.offset) - R0_REGNUM);
3830         else
3831           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3832                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3833         return;
3834
3835       case ADDRESS_REG_WB:
3836         switch (GET_CODE (x))
3837           {
3838           case PRE_INC:
3839             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3840                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3841             return;
3842           case POST_INC:
3843             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3844                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3845             return;
3846           case PRE_DEC:
3847             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3848                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3849             return;
3850           case POST_DEC:
3851             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3852                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3853             return;
3854           case PRE_MODIFY:
3855             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3856                          INTVAL (addr.offset));
3857             return;
3858           case POST_MODIFY:
3859             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3860                          INTVAL (addr.offset));
3861             return;
3862           default:
3863             break;
3864           }
3865         break;
3866
3867       case ADDRESS_LO_SUM:
3868         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3869         output_addr_const (f, addr.offset);
3870         asm_fprintf (f, "]");
3871         return;
3872
3873       case ADDRESS_SYMBOLIC:
3874         break;
3875       }
3876
3877   output_addr_const (f, x);
3878 }
3879
3880 bool
3881 aarch64_label_mentioned_p (rtx x)
3882 {
3883   const char *fmt;
3884   int i;
3885
3886   if (GET_CODE (x) == LABEL_REF)
3887     return true;
3888
3889   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3890      referencing instruction, but they are constant offsets, not
3891      symbols.  */
3892   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3893     return false;
3894
3895   fmt = GET_RTX_FORMAT (GET_CODE (x));
3896   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3897     {
3898       if (fmt[i] == 'E')
3899         {
3900           int j;
3901
3902           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3903             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3904               return 1;
3905         }
3906       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3907         return 1;
3908     }
3909
3910   return 0;
3911 }
3912
3913 /* Implement REGNO_REG_CLASS.  */
3914
3915 enum reg_class
3916 aarch64_regno_regclass (unsigned regno)
3917 {
3918   if (GP_REGNUM_P (regno))
3919     return CORE_REGS;
3920
3921   if (regno == SP_REGNUM)
3922     return STACK_REG;
3923
3924   if (regno == FRAME_POINTER_REGNUM
3925       || regno == ARG_POINTER_REGNUM)
3926     return POINTER_REGS;
3927
3928   if (FP_REGNUM_P (regno))
3929     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3930
3931   return NO_REGS;
3932 }
3933
3934 /* Try a machine-dependent way of reloading an illegitimate address
3935    operand.  If we find one, push the reload and return the new rtx.  */
3936
3937 rtx
3938 aarch64_legitimize_reload_address (rtx *x_p,
3939                                    enum machine_mode mode,
3940                                    int opnum, int type,
3941                                    int ind_levels ATTRIBUTE_UNUSED)
3942 {
3943   rtx x = *x_p;
3944
3945   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3946   if (aarch64_vector_mode_p (mode)
3947       && GET_CODE (x) == PLUS
3948       && REG_P (XEXP (x, 0))
3949       && CONST_INT_P (XEXP (x, 1)))
3950     {
3951       rtx orig_rtx = x;
3952       x = copy_rtx (x);
3953       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3954                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3955                    opnum, (enum reload_type) type);
3956       return x;
3957     }
3958
3959   /* We must recognize output that we have already generated ourselves.  */
3960   if (GET_CODE (x) == PLUS
3961       && GET_CODE (XEXP (x, 0)) == PLUS
3962       && REG_P (XEXP (XEXP (x, 0), 0))
3963       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3964       && CONST_INT_P (XEXP (x, 1)))
3965     {
3966       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3967                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3968                    opnum, (enum reload_type) type);
3969       return x;
3970     }
3971
3972   /* We wish to handle large displacements off a base register by splitting
3973      the addend across an add and the mem insn.  This can cut the number of
3974      extra insns needed from 3 to 1.  It is only useful for load/store of a
3975      single register with 12 bit offset field.  */
3976   if (GET_CODE (x) == PLUS
3977       && REG_P (XEXP (x, 0))
3978       && CONST_INT_P (XEXP (x, 1))
3979       && HARD_REGISTER_P (XEXP (x, 0))
3980       && mode != TImode
3981       && mode != TFmode
3982       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3983     {
3984       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3985       HOST_WIDE_INT low = val & 0xfff;
3986       HOST_WIDE_INT high = val - low;
3987       HOST_WIDE_INT offs;
3988       rtx cst;
3989       enum machine_mode xmode = GET_MODE (x);
3990
3991       /* In ILP32, xmode can be either DImode or SImode.  */
3992       gcc_assert (xmode == DImode || xmode == SImode);
3993
3994       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3995          BLKmode alignment.  */
3996       if (GET_MODE_SIZE (mode) == 0)
3997         return NULL_RTX;
3998
3999       offs = low % GET_MODE_SIZE (mode);
4000
4001       /* Align misaligned offset by adjusting high part to compensate.  */
4002       if (offs != 0)
4003         {
4004           if (aarch64_uimm12_shift (high + offs))
4005             {
4006               /* Align down.  */
4007               low = low - offs;
4008               high = high + offs;
4009             }
4010           else
4011             {
4012               /* Align up.  */
4013               offs = GET_MODE_SIZE (mode) - offs;
4014               low = low + offs;
4015               high = high + (low & 0x1000) - offs;
4016               low &= 0xfff;
4017             }
4018         }
4019
4020       /* Check for overflow.  */
4021       if (high + low != val)
4022         return NULL_RTX;
4023
4024       cst = GEN_INT (high);
4025       if (!aarch64_uimm12_shift (high))
4026         cst = force_const_mem (xmode, cst);
4027
4028       /* Reload high part into base reg, leaving the low part
4029          in the mem instruction.
4030          Note that replacing this gen_rtx_PLUS with plus_constant is
4031          wrong in this case because we rely on the
4032          (plus (plus reg c1) c2) structure being preserved so that
4033          XEXP (*p, 0) in push_reload below uses the correct term.  */
4034       x = gen_rtx_PLUS (xmode,
4035                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4036                         GEN_INT (low));
4037
4038       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4039                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4040                    opnum, (enum reload_type) type);
4041       return x;
4042     }
4043
4044   return NULL_RTX;
4045 }
4046
4047
4048 static reg_class_t
4049 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4050                           reg_class_t rclass,
4051                           enum machine_mode mode,
4052                           secondary_reload_info *sri)
4053 {
4054   /* Without the TARGET_SIMD instructions we cannot move a Q register
4055      to a Q register directly.  We need a scratch.  */
4056   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4057       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4058       && reg_class_subset_p (rclass, FP_REGS))
4059     {
4060       if (mode == TFmode)
4061         sri->icode = CODE_FOR_aarch64_reload_movtf;
4062       else if (mode == TImode)
4063         sri->icode = CODE_FOR_aarch64_reload_movti;
4064       return NO_REGS;
4065     }
4066
4067   /* A TFmode or TImode memory access should be handled via an FP_REGS
4068      because AArch64 has richer addressing modes for LDR/STR instructions
4069      than LDP/STP instructions.  */
4070   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4071       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4072     return FP_REGS;
4073
4074   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4075       return CORE_REGS;
4076
4077   return NO_REGS;
4078 }
4079
4080 static bool
4081 aarch64_can_eliminate (const int from, const int to)
4082 {
4083   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4084      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4085
4086   if (frame_pointer_needed)
4087     {
4088       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4089         return true;
4090       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4091         return false;
4092       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4093           && !cfun->calls_alloca)
4094         return true;
4095       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4096         return true;
4097     return false;
4098     }
4099   else
4100     {
4101       /* If we decided that we didn't need a leaf frame pointer but then used
4102          LR in the function, then we'll want a frame pointer after all, so
4103          prevent this elimination to ensure a frame pointer is used.
4104
4105          NOTE: the original value of flag_omit_frame_pointer gets trashed
4106          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4107          of faked_omit_frame_pointer here (which is true when we always
4108          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4109          pointers when LR is clobbered).  */
4110       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4111           && df_regs_ever_live_p (LR_REGNUM)
4112           && faked_omit_frame_pointer)
4113         return false;
4114     }
4115
4116   return true;
4117 }
4118
4119 HOST_WIDE_INT
4120 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4121 {
4122   HOST_WIDE_INT frame_size;
4123   HOST_WIDE_INT offset;
4124
4125   aarch64_layout_frame ();
4126   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4127                 + crtl->outgoing_args_size
4128                 + cfun->machine->saved_varargs_size);
4129
4130    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4131    offset = frame_size;
4132
4133    if (to == HARD_FRAME_POINTER_REGNUM)
4134      {
4135        if (from == ARG_POINTER_REGNUM)
4136          return offset - crtl->outgoing_args_size;
4137
4138        if (from == FRAME_POINTER_REGNUM)
4139          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4140      }
4141
4142    if (to == STACK_POINTER_REGNUM)
4143      {
4144        if (from == FRAME_POINTER_REGNUM)
4145          {
4146            HOST_WIDE_INT elim = crtl->outgoing_args_size
4147                               + cfun->machine->frame.saved_regs_size
4148                               + get_frame_size ()
4149                               - cfun->machine->frame.fp_lr_offset;
4150            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4151            return elim;
4152          }
4153      }
4154
4155    return offset;
4156 }
4157
4158
4159 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4160    previous frame.  */
4161
4162 rtx
4163 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4164 {
4165   if (count != 0)
4166     return const0_rtx;
4167   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4168 }
4169
4170
4171 static void
4172 aarch64_asm_trampoline_template (FILE *f)
4173 {
4174   if (TARGET_ILP32)
4175     {
4176       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4177       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4178     }
4179   else
4180     {
4181       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4182       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4183     }
4184   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4185   assemble_aligned_integer (4, const0_rtx);
4186   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4187   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4188 }
4189
4190 static void
4191 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4192 {
4193   rtx fnaddr, mem, a_tramp;
4194   const int tramp_code_sz = 16;
4195
4196   /* Don't need to copy the trailing D-words, we fill those in below.  */
4197   emit_block_move (m_tramp, assemble_trampoline_template (),
4198                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4199   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4200   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4201   if (GET_MODE (fnaddr) != ptr_mode)
4202     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4203   emit_move_insn (mem, fnaddr);
4204
4205   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4206   emit_move_insn (mem, chain_value);
4207
4208   /* XXX We should really define a "clear_cache" pattern and use
4209      gen_clear_cache().  */
4210   a_tramp = XEXP (m_tramp, 0);
4211   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4212                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4213                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4214                      ptr_mode);
4215 }
4216
4217 static unsigned char
4218 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4219 {
4220   switch (regclass)
4221     {
4222     case CORE_REGS:
4223     case POINTER_REGS:
4224     case GENERAL_REGS:
4225     case ALL_REGS:
4226     case FP_REGS:
4227     case FP_LO_REGS:
4228       return
4229         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4230                                        (GET_MODE_SIZE (mode) + 7) / 8;
4231     case STACK_REG:
4232       return 1;
4233
4234     case NO_REGS:
4235       return 0;
4236
4237     default:
4238       break;
4239     }
4240   gcc_unreachable ();
4241 }
4242
4243 static reg_class_t
4244 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4245 {
4246   if (regclass == POINTER_REGS)
4247     return GENERAL_REGS;
4248
4249   if (regclass == STACK_REG)
4250     {
4251       if (REG_P(x)
4252           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4253           return regclass;
4254
4255       return NO_REGS;
4256     }
4257
4258   /* If it's an integer immediate that MOVI can't handle, then
4259      FP_REGS is not an option, so we return NO_REGS instead.  */
4260   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4261       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4262     return NO_REGS;
4263
4264   /* Register eliminiation can result in a request for
4265      SP+constant->FP_REGS.  We cannot support such operations which
4266      use SP as source and an FP_REG as destination, so reject out
4267      right now.  */
4268   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4269     {
4270       rtx lhs = XEXP (x, 0);
4271
4272       /* Look through a possible SUBREG introduced by ILP32.  */
4273       if (GET_CODE (lhs) == SUBREG)
4274         lhs = SUBREG_REG (lhs);
4275
4276       gcc_assert (REG_P (lhs));
4277       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4278                                       POINTER_REGS));
4279       return NO_REGS;
4280     }
4281
4282   return regclass;
4283 }
4284
4285 void
4286 aarch64_asm_output_labelref (FILE* f, const char *name)
4287 {
4288   asm_fprintf (f, "%U%s", name);
4289 }
4290
4291 static void
4292 aarch64_elf_asm_constructor (rtx symbol, int priority)
4293 {
4294   if (priority == DEFAULT_INIT_PRIORITY)
4295     default_ctor_section_asm_out_constructor (symbol, priority);
4296   else
4297     {
4298       section *s;
4299       char buf[18];
4300       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4301       s = get_section (buf, SECTION_WRITE, NULL);
4302       switch_to_section (s);
4303       assemble_align (POINTER_SIZE);
4304       assemble_aligned_integer (POINTER_BYTES, symbol);
4305     }
4306 }
4307
4308 static void
4309 aarch64_elf_asm_destructor (rtx symbol, int priority)
4310 {
4311   if (priority == DEFAULT_INIT_PRIORITY)
4312     default_dtor_section_asm_out_destructor (symbol, priority);
4313   else
4314     {
4315       section *s;
4316       char buf[18];
4317       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4318       s = get_section (buf, SECTION_WRITE, NULL);
4319       switch_to_section (s);
4320       assemble_align (POINTER_SIZE);
4321       assemble_aligned_integer (POINTER_BYTES, symbol);
4322     }
4323 }
4324
4325 const char*
4326 aarch64_output_casesi (rtx *operands)
4327 {
4328   char buf[100];
4329   char label[100];
4330   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4331   int index;
4332   static const char *const patterns[4][2] =
4333   {
4334     {
4335       "ldrb\t%w3, [%0,%w1,uxtw]",
4336       "add\t%3, %4, %w3, sxtb #2"
4337     },
4338     {
4339       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4340       "add\t%3, %4, %w3, sxth #2"
4341     },
4342     {
4343       "ldr\t%w3, [%0,%w1,uxtw #2]",
4344       "add\t%3, %4, %w3, sxtw #2"
4345     },
4346     /* We assume that DImode is only generated when not optimizing and
4347        that we don't really need 64-bit address offsets.  That would
4348        imply an object file with 8GB of code in a single function!  */
4349     {
4350       "ldr\t%w3, [%0,%w1,uxtw #2]",
4351       "add\t%3, %4, %w3, sxtw #2"
4352     }
4353   };
4354
4355   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4356
4357   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4358
4359   gcc_assert (index >= 0 && index <= 3);
4360
4361   /* Need to implement table size reduction, by chaning the code below.  */
4362   output_asm_insn (patterns[index][0], operands);
4363   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4364   snprintf (buf, sizeof (buf),
4365             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4366   output_asm_insn (buf, operands);
4367   output_asm_insn (patterns[index][1], operands);
4368   output_asm_insn ("br\t%3", operands);
4369   assemble_label (asm_out_file, label);
4370   return "";
4371 }
4372
4373
4374 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4375    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4376    operator.  */
4377
4378 int
4379 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4380 {
4381   if (shift >= 0 && shift <= 3)
4382     {
4383       int size;
4384       for (size = 8; size <= 32; size *= 2)
4385         {
4386           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4387           if (mask == bits << shift)
4388             return size;
4389         }
4390     }
4391   return 0;
4392 }
4393
4394 static bool
4395 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4396                                    const_rtx x ATTRIBUTE_UNUSED)
4397 {
4398   /* We can't use blocks for constants when we're using a per-function
4399      constant pool.  */
4400   return false;
4401 }
4402
4403 static section *
4404 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4405                             rtx x ATTRIBUTE_UNUSED,
4406                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4407 {
4408   /* Force all constant pool entries into the current function section.  */
4409   return function_section (current_function_decl);
4410 }
4411
4412
4413 /* Costs.  */
4414
4415 /* Helper function for rtx cost calculation.  Strip a shift expression
4416    from X.  Returns the inner operand if successful, or the original
4417    expression on failure.  */
4418 static rtx
4419 aarch64_strip_shift (rtx x)
4420 {
4421   rtx op = x;
4422
4423   if ((GET_CODE (op) == ASHIFT
4424        || GET_CODE (op) == ASHIFTRT
4425        || GET_CODE (op) == LSHIFTRT)
4426       && CONST_INT_P (XEXP (op, 1)))
4427     return XEXP (op, 0);
4428
4429   if (GET_CODE (op) == MULT
4430       && CONST_INT_P (XEXP (op, 1))
4431       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4432     return XEXP (op, 0);
4433
4434   return x;
4435 }
4436
4437 /* Helper function for rtx cost calculation.  Strip a shift or extend
4438    expression from X.  Returns the inner operand if successful, or the
4439    original expression on failure.  We deal with a number of possible
4440    canonicalization variations here.  */
4441 static rtx
4442 aarch64_strip_shift_or_extend (rtx x)
4443 {
4444   rtx op = x;
4445
4446   /* Zero and sign extraction of a widened value.  */
4447   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4448       && XEXP (op, 2) == const0_rtx
4449       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4450                                          XEXP (op, 1)))
4451     return XEXP (XEXP (op, 0), 0);
4452
4453   /* It can also be represented (for zero-extend) as an AND with an
4454      immediate.  */
4455   if (GET_CODE (op) == AND
4456       && GET_CODE (XEXP (op, 0)) == MULT
4457       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4458       && CONST_INT_P (XEXP (op, 1))
4459       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4460                            INTVAL (XEXP (op, 1))) != 0)
4461     return XEXP (XEXP (op, 0), 0);
4462
4463   /* Now handle extended register, as this may also have an optional
4464      left shift by 1..4.  */
4465   if (GET_CODE (op) == ASHIFT
4466       && CONST_INT_P (XEXP (op, 1))
4467       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4468     op = XEXP (op, 0);
4469
4470   if (GET_CODE (op) == ZERO_EXTEND
4471       || GET_CODE (op) == SIGN_EXTEND)
4472     op = XEXP (op, 0);
4473
4474   if (op != x)
4475     return op;
4476
4477   return aarch64_strip_shift (x);
4478 }
4479
4480 /* Calculate the cost of calculating X, storing it in *COST.  Result
4481    is true if the total cost of the operation has now been calculated.  */
4482 static bool
4483 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4484                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4485 {
4486   rtx op0, op1;
4487   const struct cpu_cost_table *extra_cost
4488     = aarch64_tune_params->insn_extra_cost;
4489
4490   switch (code)
4491     {
4492     case SET:
4493       op0 = SET_DEST (x);
4494       op1 = SET_SRC (x);
4495
4496       switch (GET_CODE (op0))
4497         {
4498         case MEM:
4499           if (speed)
4500             *cost += extra_cost->ldst.store;
4501
4502           if (op1 != const0_rtx)
4503             *cost += rtx_cost (op1, SET, 1, speed);
4504           return true;
4505
4506         case SUBREG:
4507           if (! REG_P (SUBREG_REG (op0)))
4508             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4509           /* Fall through.  */
4510         case REG:
4511           /* Cost is just the cost of the RHS of the set.  */
4512           *cost += rtx_cost (op1, SET, 1, true);
4513           return true;
4514
4515         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4516         case SIGN_EXTRACT:
4517           /* Strip any redundant widening of the RHS to meet the width of
4518              the target.  */
4519           if (GET_CODE (op1) == SUBREG)
4520             op1 = SUBREG_REG (op1);
4521           if ((GET_CODE (op1) == ZERO_EXTEND
4522                || GET_CODE (op1) == SIGN_EXTEND)
4523               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4524               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4525                   >= INTVAL (XEXP (op0, 1))))
4526             op1 = XEXP (op1, 0);
4527           *cost += rtx_cost (op1, SET, 1, speed);
4528           return true;
4529
4530         default:
4531           break;
4532         }
4533       return false;
4534
4535     case MEM:
4536       if (speed)
4537         *cost += extra_cost->ldst.load;
4538
4539       return true;
4540
4541     case NEG:
4542       op0 = CONST0_RTX (GET_MODE (x));
4543       op1 = XEXP (x, 0);
4544       goto cost_minus;
4545
4546     case COMPARE:
4547       op0 = XEXP (x, 0);
4548       op1 = XEXP (x, 1);
4549
4550       if (op1 == const0_rtx
4551           && GET_CODE (op0) == AND)
4552         {
4553           x = op0;
4554           goto cost_logic;
4555         }
4556
4557       /* Comparisons can work if the order is swapped.
4558          Canonicalization puts the more complex operation first, but
4559          we want it in op1.  */
4560       if (! (REG_P (op0)
4561              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4562         {
4563           op0 = XEXP (x, 1);
4564           op1 = XEXP (x, 0);
4565         }
4566       goto cost_minus;
4567
4568     case MINUS:
4569       op0 = XEXP (x, 0);
4570       op1 = XEXP (x, 1);
4571
4572     cost_minus:
4573       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4574           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4575               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4576         {
4577           if (op0 != const0_rtx)
4578             *cost += rtx_cost (op0, MINUS, 0, speed);
4579
4580           if (CONST_INT_P (op1))
4581             {
4582               if (!aarch64_uimm12_shift (INTVAL (op1)))
4583                 *cost += rtx_cost (op1, MINUS, 1, speed);
4584             }
4585           else
4586             {
4587               op1 = aarch64_strip_shift_or_extend (op1);
4588               *cost += rtx_cost (op1, MINUS, 1, speed);
4589             }
4590           return true;
4591         }
4592
4593       return false;
4594
4595     case PLUS:
4596       op0 = XEXP (x, 0);
4597       op1 = XEXP (x, 1);
4598
4599       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4600         {
4601           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4602             {
4603               *cost += rtx_cost (op0, PLUS, 0, speed);
4604             }
4605           else
4606             {
4607               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4608
4609               if (new_op0 == op0
4610                   && GET_CODE (op0) == MULT)
4611                 {
4612                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4613                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4614                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4615                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4616                     {
4617                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4618                                           speed)
4619                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4620                                             speed)
4621                                 + rtx_cost (op1, PLUS, 1, speed));
4622                       if (speed)
4623                         *cost +=
4624                           extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4625                       return true;
4626                     }
4627                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4628                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4629                             + rtx_cost (op1, PLUS, 1, speed));
4630
4631                   if (speed)
4632                     *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4633                 }
4634
4635               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4636                         + rtx_cost (op1, PLUS, 1, speed));
4637             }
4638           return true;
4639         }
4640
4641       return false;
4642
4643     case IOR:
4644     case XOR:
4645     case AND:
4646     cost_logic:
4647       op0 = XEXP (x, 0);
4648       op1 = XEXP (x, 1);
4649
4650       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4651         {
4652           if (CONST_INT_P (op1)
4653               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4654             {
4655               *cost += rtx_cost (op0, AND, 0, speed);
4656             }
4657           else
4658             {
4659               if (GET_CODE (op0) == NOT)
4660                 op0 = XEXP (op0, 0);
4661               op0 = aarch64_strip_shift (op0);
4662               *cost += (rtx_cost (op0, AND, 0, speed)
4663                         + rtx_cost (op1, AND, 1, speed));
4664             }
4665           return true;
4666         }
4667       return false;
4668
4669     case ZERO_EXTEND:
4670       if ((GET_MODE (x) == DImode
4671            && GET_MODE (XEXP (x, 0)) == SImode)
4672           || GET_CODE (XEXP (x, 0)) == MEM)
4673         {
4674           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4675           return true;
4676         }
4677       return false;
4678
4679     case SIGN_EXTEND:
4680       if (GET_CODE (XEXP (x, 0)) == MEM)
4681         {
4682           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4683           return true;
4684         }
4685       return false;
4686
4687     case ROTATE:
4688       if (!CONST_INT_P (XEXP (x, 1)))
4689         *cost += COSTS_N_INSNS (2);
4690       /* Fall through.  */
4691     case ROTATERT:
4692     case LSHIFTRT:
4693     case ASHIFT:
4694     case ASHIFTRT:
4695
4696       /* Shifting by a register often takes an extra cycle.  */
4697       if (speed && !CONST_INT_P (XEXP (x, 1)))
4698         *cost += extra_cost->alu.arith_shift_reg;
4699
4700       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4701       return true;
4702
4703     case HIGH:
4704       if (!CONSTANT_P (XEXP (x, 0)))
4705         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4706       return true;
4707
4708     case LO_SUM:
4709       if (!CONSTANT_P (XEXP (x, 1)))
4710         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4711       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4712       return true;
4713
4714     case ZERO_EXTRACT:
4715     case SIGN_EXTRACT:
4716       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4717       return true;
4718
4719     case MULT:
4720       op0 = XEXP (x, 0);
4721       op1 = XEXP (x, 1);
4722
4723       *cost = COSTS_N_INSNS (1);
4724       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4725         {
4726           if (CONST_INT_P (op1)
4727               && exact_log2 (INTVAL (op1)) > 0)
4728             {
4729               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4730               return true;
4731             }
4732
4733           if ((GET_CODE (op0) == ZERO_EXTEND
4734                && GET_CODE (op1) == ZERO_EXTEND)
4735               || (GET_CODE (op0) == SIGN_EXTEND
4736                   && GET_CODE (op1) == SIGN_EXTEND))
4737             {
4738               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4739                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4740               if (speed)
4741                 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4742               return true;
4743             }
4744
4745           if (speed)
4746             *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4747         }
4748       else if (speed)
4749         {
4750           if (GET_MODE (x) == DFmode)
4751             *cost += extra_cost->fp[1].mult;
4752           else if (GET_MODE (x) == SFmode)
4753             *cost += extra_cost->fp[0].mult;
4754         }
4755
4756       return false;  /* All arguments need to be in registers.  */
4757
4758     case MOD:
4759     case UMOD:
4760       *cost = COSTS_N_INSNS (2);
4761       if (speed)
4762         {
4763           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4764             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4765                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4766           else if (GET_MODE (x) == DFmode)
4767             *cost += (extra_cost->fp[1].mult
4768                       + extra_cost->fp[1].div);
4769           else if (GET_MODE (x) == SFmode)
4770             *cost += (extra_cost->fp[0].mult
4771                       + extra_cost->fp[0].div);
4772         }
4773       return false;  /* All arguments need to be in registers.  */
4774
4775     case DIV:
4776     case UDIV:
4777       *cost = COSTS_N_INSNS (1);
4778       if (speed)
4779         {
4780           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4781             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4782           else if (GET_MODE (x) == DFmode)
4783             *cost += extra_cost->fp[1].div;
4784           else if (GET_MODE (x) == SFmode)
4785             *cost += extra_cost->fp[0].div;
4786         }
4787       return false;  /* All arguments need to be in registers.  */
4788
4789     default:
4790       break;
4791     }
4792   return false;
4793 }
4794
4795 static int
4796 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4797                   enum machine_mode mode ATTRIBUTE_UNUSED,
4798                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4799 {
4800   enum rtx_code c  = GET_CODE (x);
4801   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4802
4803   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4804     return addr_cost->pre_modify;
4805
4806   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4807     return addr_cost->post_modify;
4808
4809   if (c == PLUS)
4810     {
4811       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4812         return addr_cost->imm_offset;
4813       else if (GET_CODE (XEXP (x, 0)) == MULT
4814                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4815                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4816         return addr_cost->register_extend;
4817
4818       return addr_cost->register_offset;
4819     }
4820   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4821     return addr_cost->imm_offset;
4822
4823   return 0;
4824 }
4825
4826 static int
4827 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4828                             reg_class_t from, reg_class_t to)
4829 {
4830   const struct cpu_regmove_cost *regmove_cost
4831     = aarch64_tune_params->regmove_cost;
4832
4833   if (from == GENERAL_REGS && to == GENERAL_REGS)
4834     return regmove_cost->GP2GP;
4835   else if (from == GENERAL_REGS)
4836     return regmove_cost->GP2FP;
4837   else if (to == GENERAL_REGS)
4838     return regmove_cost->FP2GP;
4839
4840   /* When AdvSIMD instructions are disabled it is not possible to move
4841      a 128-bit value directly between Q registers.  This is handled in
4842      secondary reload.  A general register is used as a scratch to move
4843      the upper DI value and the lower DI value is moved directly,
4844      hence the cost is the sum of three moves. */
4845
4846   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4847     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4848
4849   return regmove_cost->FP2FP;
4850 }
4851
4852 static int
4853 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4854                           reg_class_t rclass ATTRIBUTE_UNUSED,
4855                           bool in ATTRIBUTE_UNUSED)
4856 {
4857   return aarch64_tune_params->memmov_cost;
4858 }
4859
4860 /* Vectorizer cost model target hooks.  */
4861
4862 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4863 static int
4864 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4865                                     tree vectype,
4866                                     int misalign ATTRIBUTE_UNUSED)
4867 {
4868   unsigned elements;
4869
4870   switch (type_of_cost)
4871     {
4872       case scalar_stmt:
4873         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4874
4875       case scalar_load:
4876         return aarch64_tune_params->vec_costs->scalar_load_cost;
4877
4878       case scalar_store:
4879         return aarch64_tune_params->vec_costs->scalar_store_cost;
4880
4881       case vector_stmt:
4882         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4883
4884       case vector_load:
4885         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4886
4887       case vector_store:
4888         return aarch64_tune_params->vec_costs->vec_store_cost;
4889
4890       case vec_to_scalar:
4891         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4892
4893       case scalar_to_vec:
4894         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4895
4896       case unaligned_load:
4897         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4898
4899       case unaligned_store:
4900         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4901
4902       case cond_branch_taken:
4903         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4904
4905       case cond_branch_not_taken:
4906         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4907
4908       case vec_perm:
4909       case vec_promote_demote:
4910         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4911
4912       case vec_construct:
4913         elements = TYPE_VECTOR_SUBPARTS (vectype);
4914         return elements / 2 + 1;
4915
4916       default:
4917         gcc_unreachable ();
4918     }
4919 }
4920
4921 /* Implement targetm.vectorize.add_stmt_cost.  */
4922 static unsigned
4923 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4924                        struct _stmt_vec_info *stmt_info, int misalign,
4925                        enum vect_cost_model_location where)
4926 {
4927   unsigned *cost = (unsigned *) data;
4928   unsigned retval = 0;
4929
4930   if (flag_vect_cost_model)
4931     {
4932       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4933       int stmt_cost =
4934             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4935
4936       /* Statements in an inner loop relative to the loop being
4937          vectorized are weighted more heavily.  The value here is
4938          a function (linear for now) of the loop nest level.  */
4939       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4940         {
4941           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4942           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4943           unsigned nest_level = loop_depth (loop);
4944
4945           count *= nest_level;
4946         }
4947
4948       retval = (unsigned) (count * stmt_cost);
4949       cost[where] += retval;
4950     }
4951
4952   return retval;
4953 }
4954
4955 static void initialize_aarch64_code_model (void);
4956
4957 /* Parse the architecture extension string.  */
4958
4959 static void
4960 aarch64_parse_extension (char *str)
4961 {
4962   /* The extension string is parsed left to right.  */
4963   const struct aarch64_option_extension *opt = NULL;
4964
4965   /* Flag to say whether we are adding or removing an extension.  */
4966   int adding_ext = -1;
4967
4968   while (str != NULL && *str != 0)
4969     {
4970       char *ext;
4971       size_t len;
4972
4973       str++;
4974       ext = strchr (str, '+');
4975
4976       if (ext != NULL)
4977         len = ext - str;
4978       else
4979         len = strlen (str);
4980
4981       if (len >= 2 && strncmp (str, "no", 2) == 0)
4982         {
4983           adding_ext = 0;
4984           len -= 2;
4985           str += 2;
4986         }
4987       else if (len > 0)
4988         adding_ext = 1;
4989
4990       if (len == 0)
4991         {
4992           error ("missing feature modifier after %qs", "+no");
4993           return;
4994         }
4995
4996       /* Scan over the extensions table trying to find an exact match.  */
4997       for (opt = all_extensions; opt->name != NULL; opt++)
4998         {
4999           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5000             {
5001               /* Add or remove the extension.  */
5002               if (adding_ext)
5003                 aarch64_isa_flags |= opt->flags_on;
5004               else
5005                 aarch64_isa_flags &= ~(opt->flags_off);
5006               break;
5007             }
5008         }
5009
5010       if (opt->name == NULL)
5011         {
5012           /* Extension not found in list.  */
5013           error ("unknown feature modifier %qs", str);
5014           return;
5015         }
5016
5017       str = ext;
5018     };
5019
5020   return;
5021 }
5022
5023 /* Parse the ARCH string.  */
5024
5025 static void
5026 aarch64_parse_arch (void)
5027 {
5028   char *ext;
5029   const struct processor *arch;
5030   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5031   size_t len;
5032
5033   strcpy (str, aarch64_arch_string);
5034
5035   ext = strchr (str, '+');
5036
5037   if (ext != NULL)
5038     len = ext - str;
5039   else
5040     len = strlen (str);
5041
5042   if (len == 0)
5043     {
5044       error ("missing arch name in -march=%qs", str);
5045       return;
5046     }
5047
5048   /* Loop through the list of supported ARCHs to find a match.  */
5049   for (arch = all_architectures; arch->name != NULL; arch++)
5050     {
5051       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5052         {
5053           selected_arch = arch;
5054           aarch64_isa_flags = selected_arch->flags;
5055           selected_cpu = &all_cores[selected_arch->core];
5056
5057           if (ext != NULL)
5058             {
5059               /* ARCH string contains at least one extension.  */
5060               aarch64_parse_extension (ext);
5061             }
5062
5063           return;
5064         }
5065     }
5066
5067   /* ARCH name not found in list.  */
5068   error ("unknown value %qs for -march", str);
5069   return;
5070 }
5071
5072 /* Parse the CPU string.  */
5073
5074 static void
5075 aarch64_parse_cpu (void)
5076 {
5077   char *ext;
5078   const struct processor *cpu;
5079   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5080   size_t len;
5081
5082   strcpy (str, aarch64_cpu_string);
5083
5084   ext = strchr (str, '+');
5085
5086   if (ext != NULL)
5087     len = ext - str;
5088   else
5089     len = strlen (str);
5090
5091   if (len == 0)
5092     {
5093       error ("missing cpu name in -mcpu=%qs", str);
5094       return;
5095     }
5096
5097   /* Loop through the list of supported CPUs to find a match.  */
5098   for (cpu = all_cores; cpu->name != NULL; cpu++)
5099     {
5100       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5101         {
5102           selected_cpu = cpu;
5103           aarch64_isa_flags = selected_cpu->flags;
5104
5105           if (ext != NULL)
5106             {
5107               /* CPU string contains at least one extension.  */
5108               aarch64_parse_extension (ext);
5109             }
5110
5111           return;
5112         }
5113     }
5114
5115   /* CPU name not found in list.  */
5116   error ("unknown value %qs for -mcpu", str);
5117   return;
5118 }
5119
5120 /* Parse the TUNE string.  */
5121
5122 static void
5123 aarch64_parse_tune (void)
5124 {
5125   const struct processor *cpu;
5126   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5127   strcpy (str, aarch64_tune_string);
5128
5129   /* Loop through the list of supported CPUs to find a match.  */
5130   for (cpu = all_cores; cpu->name != NULL; cpu++)
5131     {
5132       if (strcmp (cpu->name, str) == 0)
5133         {
5134           selected_tune = cpu;
5135           return;
5136         }
5137     }
5138
5139   /* CPU name not found in list.  */
5140   error ("unknown value %qs for -mtune", str);
5141   return;
5142 }
5143
5144
5145 /* Implement TARGET_OPTION_OVERRIDE.  */
5146
5147 static void
5148 aarch64_override_options (void)
5149 {
5150   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5151      otherwise march remains undefined.  mtune can be used with either march or
5152      mcpu.  */
5153
5154   if (aarch64_arch_string)
5155     {
5156       aarch64_parse_arch ();
5157       aarch64_cpu_string = NULL;
5158     }
5159
5160   if (aarch64_cpu_string)
5161     {
5162       aarch64_parse_cpu ();
5163       selected_arch = NULL;
5164     }
5165
5166   if (aarch64_tune_string)
5167     {
5168       aarch64_parse_tune ();
5169     }
5170
5171   initialize_aarch64_code_model ();
5172
5173   aarch64_build_bitmask_table ();
5174
5175   /* This target defaults to strict volatile bitfields.  */
5176   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5177     flag_strict_volatile_bitfields = 1;
5178
5179   /* If the user did not specify a processor, choose the default
5180      one for them.  This will be the CPU set during configuration using
5181      --with-cpu, otherwise it is "coretex-a53".  */
5182   if (!selected_cpu)
5183     {
5184       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5185       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5186     }
5187
5188   gcc_assert (selected_cpu);
5189
5190   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5191   if (!selected_tune)
5192     selected_tune = &all_cores[selected_cpu->core];
5193
5194   aarch64_tune_flags = selected_tune->flags;
5195   aarch64_tune = selected_tune->core;
5196   aarch64_tune_params = selected_tune->tune;
5197
5198   aarch64_override_options_after_change ();
5199 }
5200
5201 /* Implement targetm.override_options_after_change.  */
5202
5203 static void
5204 aarch64_override_options_after_change (void)
5205 {
5206   faked_omit_frame_pointer = false;
5207
5208   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5209      that aarch64_frame_pointer_required will be called.  We need to remember
5210      whether flag_omit_frame_pointer was turned on normally or just faked.  */
5211
5212   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5213     {
5214       flag_omit_frame_pointer = true;
5215       faked_omit_frame_pointer = true;
5216     }
5217 }
5218
5219 static struct machine_function *
5220 aarch64_init_machine_status (void)
5221 {
5222   struct machine_function *machine;
5223   machine = ggc_alloc_cleared_machine_function ();
5224   return machine;
5225 }
5226
5227 void
5228 aarch64_init_expanders (void)
5229 {
5230   init_machine_status = aarch64_init_machine_status;
5231 }
5232
5233 /* A checking mechanism for the implementation of the various code models.  */
5234 static void
5235 initialize_aarch64_code_model (void)
5236 {
5237    if (flag_pic)
5238      {
5239        switch (aarch64_cmodel_var)
5240          {
5241          case AARCH64_CMODEL_TINY:
5242            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5243            break;
5244          case AARCH64_CMODEL_SMALL:
5245            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5246            break;
5247          case AARCH64_CMODEL_LARGE:
5248            sorry ("code model %qs with -f%s", "large",
5249                   flag_pic > 1 ? "PIC" : "pic");
5250          default:
5251            gcc_unreachable ();
5252          }
5253      }
5254    else
5255      aarch64_cmodel = aarch64_cmodel_var;
5256 }
5257
5258 /* Return true if SYMBOL_REF X binds locally.  */
5259
5260 static bool
5261 aarch64_symbol_binds_local_p (const_rtx x)
5262 {
5263   return (SYMBOL_REF_DECL (x)
5264           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5265           : SYMBOL_REF_LOCAL_P (x));
5266 }
5267
5268 /* Return true if SYMBOL_REF X is thread local */
5269 static bool
5270 aarch64_tls_symbol_p (rtx x)
5271 {
5272   if (! TARGET_HAVE_TLS)
5273     return false;
5274
5275   if (GET_CODE (x) != SYMBOL_REF)
5276     return false;
5277
5278   return SYMBOL_REF_TLS_MODEL (x) != 0;
5279 }
5280
5281 /* Classify a TLS symbol into one of the TLS kinds.  */
5282 enum aarch64_symbol_type
5283 aarch64_classify_tls_symbol (rtx x)
5284 {
5285   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5286
5287   switch (tls_kind)
5288     {
5289     case TLS_MODEL_GLOBAL_DYNAMIC:
5290     case TLS_MODEL_LOCAL_DYNAMIC:
5291       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5292
5293     case TLS_MODEL_INITIAL_EXEC:
5294       return SYMBOL_SMALL_GOTTPREL;
5295
5296     case TLS_MODEL_LOCAL_EXEC:
5297       return SYMBOL_SMALL_TPREL;
5298
5299     case TLS_MODEL_EMULATED:
5300     case TLS_MODEL_NONE:
5301       return SYMBOL_FORCE_TO_MEM;
5302
5303     default:
5304       gcc_unreachable ();
5305     }
5306 }
5307
5308 /* Return the method that should be used to access SYMBOL_REF or
5309    LABEL_REF X in context CONTEXT.  */
5310
5311 enum aarch64_symbol_type
5312 aarch64_classify_symbol (rtx x,
5313                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5314 {
5315   if (GET_CODE (x) == LABEL_REF)
5316     {
5317       switch (aarch64_cmodel)
5318         {
5319         case AARCH64_CMODEL_LARGE:
5320           return SYMBOL_FORCE_TO_MEM;
5321
5322         case AARCH64_CMODEL_TINY_PIC:
5323         case AARCH64_CMODEL_TINY:
5324           return SYMBOL_TINY_ABSOLUTE;
5325
5326         case AARCH64_CMODEL_SMALL_PIC:
5327         case AARCH64_CMODEL_SMALL:
5328           return SYMBOL_SMALL_ABSOLUTE;
5329
5330         default:
5331           gcc_unreachable ();
5332         }
5333     }
5334
5335   if (GET_CODE (x) == SYMBOL_REF)
5336     {
5337       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5338           || CONSTANT_POOL_ADDRESS_P (x))
5339         return SYMBOL_FORCE_TO_MEM;
5340
5341       if (aarch64_tls_symbol_p (x))
5342         return aarch64_classify_tls_symbol (x);
5343
5344       switch (aarch64_cmodel)
5345         {
5346         case AARCH64_CMODEL_TINY:
5347           if (SYMBOL_REF_WEAK (x))
5348             return SYMBOL_FORCE_TO_MEM;
5349           return SYMBOL_TINY_ABSOLUTE;
5350
5351         case AARCH64_CMODEL_SMALL:
5352           if (SYMBOL_REF_WEAK (x))
5353             return SYMBOL_FORCE_TO_MEM;
5354           return SYMBOL_SMALL_ABSOLUTE;
5355
5356         case AARCH64_CMODEL_TINY_PIC:
5357           if (!aarch64_symbol_binds_local_p (x))
5358             return SYMBOL_TINY_GOT;
5359           return SYMBOL_TINY_ABSOLUTE;
5360
5361         case AARCH64_CMODEL_SMALL_PIC:
5362           if (!aarch64_symbol_binds_local_p (x))
5363             return SYMBOL_SMALL_GOT;
5364           return SYMBOL_SMALL_ABSOLUTE;
5365
5366         default:
5367           gcc_unreachable ();
5368         }
5369     }
5370
5371   /* By default push everything into the constant pool.  */
5372   return SYMBOL_FORCE_TO_MEM;
5373 }
5374
5375 bool
5376 aarch64_constant_address_p (rtx x)
5377 {
5378   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5379 }
5380
5381 bool
5382 aarch64_legitimate_pic_operand_p (rtx x)
5383 {
5384   if (GET_CODE (x) == SYMBOL_REF
5385       || (GET_CODE (x) == CONST
5386           && GET_CODE (XEXP (x, 0)) == PLUS
5387           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5388      return false;
5389
5390   return true;
5391 }
5392
5393 /* Return true if X holds either a quarter-precision or
5394      floating-point +0.0 constant.  */
5395 static bool
5396 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5397 {
5398   if (!CONST_DOUBLE_P (x))
5399     return false;
5400
5401   /* TODO: We could handle moving 0.0 to a TFmode register,
5402      but first we would like to refactor the movtf_aarch64
5403      to be more amicable to split moves properly and
5404      correctly gate on TARGET_SIMD.  For now - reject all
5405      constants which are not to SFmode or DFmode registers.  */
5406   if (!(mode == SFmode || mode == DFmode))
5407     return false;
5408
5409   if (aarch64_float_const_zero_rtx_p (x))
5410     return true;
5411   return aarch64_float_const_representable_p (x);
5412 }
5413
5414 static bool
5415 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5416 {
5417   /* Do not allow vector struct mode constants.  We could support
5418      0 and -1 easily, but they need support in aarch64-simd.md.  */
5419   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5420     return false;
5421
5422   /* This could probably go away because
5423      we now decompose CONST_INTs according to expand_mov_immediate.  */
5424   if ((GET_CODE (x) == CONST_VECTOR
5425        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5426       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5427         return !targetm.cannot_force_const_mem (mode, x);
5428
5429   if (GET_CODE (x) == HIGH
5430       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5431     return true;
5432
5433   return aarch64_constant_address_p (x);
5434 }
5435
5436 rtx
5437 aarch64_load_tp (rtx target)
5438 {
5439   if (!target
5440       || GET_MODE (target) != Pmode
5441       || !register_operand (target, Pmode))
5442     target = gen_reg_rtx (Pmode);
5443
5444   /* Can return in any reg.  */
5445   emit_insn (gen_aarch64_load_tp_hard (target));
5446   return target;
5447 }
5448
5449 /* On AAPCS systems, this is the "struct __va_list".  */
5450 static GTY(()) tree va_list_type;
5451
5452 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5453    Return the type to use as __builtin_va_list.
5454
5455    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5456
5457    struct __va_list
5458    {
5459      void *__stack;
5460      void *__gr_top;
5461      void *__vr_top;
5462      int   __gr_offs;
5463      int   __vr_offs;
5464    };  */
5465
5466 static tree
5467 aarch64_build_builtin_va_list (void)
5468 {
5469   tree va_list_name;
5470   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5471
5472   /* Create the type.  */
5473   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5474   /* Give it the required name.  */
5475   va_list_name = build_decl (BUILTINS_LOCATION,
5476                              TYPE_DECL,
5477                              get_identifier ("__va_list"),
5478                              va_list_type);
5479   DECL_ARTIFICIAL (va_list_name) = 1;
5480   TYPE_NAME (va_list_type) = va_list_name;
5481   TYPE_STUB_DECL (va_list_type) = va_list_name;
5482
5483   /* Create the fields.  */
5484   f_stack = build_decl (BUILTINS_LOCATION,
5485                         FIELD_DECL, get_identifier ("__stack"),
5486                         ptr_type_node);
5487   f_grtop = build_decl (BUILTINS_LOCATION,
5488                         FIELD_DECL, get_identifier ("__gr_top"),
5489                         ptr_type_node);
5490   f_vrtop = build_decl (BUILTINS_LOCATION,
5491                         FIELD_DECL, get_identifier ("__vr_top"),
5492                         ptr_type_node);
5493   f_groff = build_decl (BUILTINS_LOCATION,
5494                         FIELD_DECL, get_identifier ("__gr_offs"),
5495                         integer_type_node);
5496   f_vroff = build_decl (BUILTINS_LOCATION,
5497                         FIELD_DECL, get_identifier ("__vr_offs"),
5498                         integer_type_node);
5499
5500   DECL_ARTIFICIAL (f_stack) = 1;
5501   DECL_ARTIFICIAL (f_grtop) = 1;
5502   DECL_ARTIFICIAL (f_vrtop) = 1;
5503   DECL_ARTIFICIAL (f_groff) = 1;
5504   DECL_ARTIFICIAL (f_vroff) = 1;
5505
5506   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5507   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5508   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5509   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5510   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5511
5512   TYPE_FIELDS (va_list_type) = f_stack;
5513   DECL_CHAIN (f_stack) = f_grtop;
5514   DECL_CHAIN (f_grtop) = f_vrtop;
5515   DECL_CHAIN (f_vrtop) = f_groff;
5516   DECL_CHAIN (f_groff) = f_vroff;
5517
5518   /* Compute its layout.  */
5519   layout_type (va_list_type);
5520
5521   return va_list_type;
5522 }
5523
5524 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5525 static void
5526 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5527 {
5528   const CUMULATIVE_ARGS *cum;
5529   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5530   tree stack, grtop, vrtop, groff, vroff;
5531   tree t;
5532   int gr_save_area_size;
5533   int vr_save_area_size;
5534   int vr_offset;
5535
5536   cum = &crtl->args.info;
5537   gr_save_area_size
5538     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5539   vr_save_area_size
5540     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5541
5542   if (TARGET_GENERAL_REGS_ONLY)
5543     {
5544       if (cum->aapcs_nvrn > 0)
5545         sorry ("%qs and floating point or vector arguments",
5546                "-mgeneral-regs-only");
5547       vr_save_area_size = 0;
5548     }
5549
5550   f_stack = TYPE_FIELDS (va_list_type_node);
5551   f_grtop = DECL_CHAIN (f_stack);
5552   f_vrtop = DECL_CHAIN (f_grtop);
5553   f_groff = DECL_CHAIN (f_vrtop);
5554   f_vroff = DECL_CHAIN (f_groff);
5555
5556   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5557                   NULL_TREE);
5558   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5559                   NULL_TREE);
5560   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5561                   NULL_TREE);
5562   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5563                   NULL_TREE);
5564   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5565                   NULL_TREE);
5566
5567   /* Emit code to initialize STACK, which points to the next varargs stack
5568      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5569      by named arguments.  STACK is 8-byte aligned.  */
5570   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5571   if (cum->aapcs_stack_size > 0)
5572     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5573   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5574   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5575
5576   /* Emit code to initialize GRTOP, the top of the GR save area.
5577      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5578   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5579   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5580   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5581
5582   /* Emit code to initialize VRTOP, the top of the VR save area.
5583      This address is gr_save_area_bytes below GRTOP, rounded
5584      down to the next 16-byte boundary.  */
5585   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5586   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5587                              STACK_BOUNDARY / BITS_PER_UNIT);
5588
5589   if (vr_offset)
5590     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5591   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5592   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5593
5594   /* Emit code to initialize GROFF, the offset from GRTOP of the
5595      next GPR argument.  */
5596   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5597               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5598   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5599
5600   /* Likewise emit code to initialize VROFF, the offset from FTOP
5601      of the next VR argument.  */
5602   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5603               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5604   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5605 }
5606
5607 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5608
5609 static tree
5610 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5611                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5612 {
5613   tree addr;
5614   bool indirect_p;
5615   bool is_ha;           /* is HFA or HVA.  */
5616   bool dw_align;        /* double-word align.  */
5617   enum machine_mode ag_mode = VOIDmode;
5618   int nregs;
5619   enum machine_mode mode;
5620
5621   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5622   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5623   HOST_WIDE_INT size, rsize, adjust, align;
5624   tree t, u, cond1, cond2;
5625
5626   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5627   if (indirect_p)
5628     type = build_pointer_type (type);
5629
5630   mode = TYPE_MODE (type);
5631
5632   f_stack = TYPE_FIELDS (va_list_type_node);
5633   f_grtop = DECL_CHAIN (f_stack);
5634   f_vrtop = DECL_CHAIN (f_grtop);
5635   f_groff = DECL_CHAIN (f_vrtop);
5636   f_vroff = DECL_CHAIN (f_groff);
5637
5638   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5639                   f_stack, NULL_TREE);
5640   size = int_size_in_bytes (type);
5641   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5642
5643   dw_align = false;
5644   adjust = 0;
5645   if (aarch64_vfp_is_call_or_return_candidate (mode,
5646                                                type,
5647                                                &ag_mode,
5648                                                &nregs,
5649                                                &is_ha))
5650     {
5651       /* TYPE passed in fp/simd registers.  */
5652       if (TARGET_GENERAL_REGS_ONLY)
5653         sorry ("%qs and floating point or vector arguments",
5654                "-mgeneral-regs-only");
5655
5656       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5657                       unshare_expr (valist), f_vrtop, NULL_TREE);
5658       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5659                       unshare_expr (valist), f_vroff, NULL_TREE);
5660
5661       rsize = nregs * UNITS_PER_VREG;
5662
5663       if (is_ha)
5664         {
5665           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5666             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5667         }
5668       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5669                && size < UNITS_PER_VREG)
5670         {
5671           adjust = UNITS_PER_VREG - size;
5672         }
5673     }
5674   else
5675     {
5676       /* TYPE passed in general registers.  */
5677       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5678                       unshare_expr (valist), f_grtop, NULL_TREE);
5679       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5680                       unshare_expr (valist), f_groff, NULL_TREE);
5681       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5682       nregs = rsize / UNITS_PER_WORD;
5683
5684       if (align > 8)
5685         dw_align = true;
5686
5687       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5688           && size < UNITS_PER_WORD)
5689         {
5690           adjust = UNITS_PER_WORD  - size;
5691         }
5692     }
5693
5694   /* Get a local temporary for the field value.  */
5695   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5696
5697   /* Emit code to branch if off >= 0.  */
5698   t = build2 (GE_EXPR, boolean_type_node, off,
5699               build_int_cst (TREE_TYPE (off), 0));
5700   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5701
5702   if (dw_align)
5703     {
5704       /* Emit: offs = (offs + 15) & -16.  */
5705       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5706                   build_int_cst (TREE_TYPE (off), 15));
5707       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5708                   build_int_cst (TREE_TYPE (off), -16));
5709       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5710     }
5711   else
5712     roundup = NULL;
5713
5714   /* Update ap.__[g|v]r_offs  */
5715   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5716               build_int_cst (TREE_TYPE (off), rsize));
5717   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5718
5719   /* String up.  */
5720   if (roundup)
5721     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5722
5723   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5724   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5725               build_int_cst (TREE_TYPE (f_off), 0));
5726   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5727
5728   /* String up: make sure the assignment happens before the use.  */
5729   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5730   COND_EXPR_ELSE (cond1) = t;
5731
5732   /* Prepare the trees handling the argument that is passed on the stack;
5733      the top level node will store in ON_STACK.  */
5734   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5735   if (align > 8)
5736     {
5737       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5738       t = fold_convert (intDI_type_node, arg);
5739       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5740                   build_int_cst (TREE_TYPE (t), 15));
5741       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5742                   build_int_cst (TREE_TYPE (t), -16));
5743       t = fold_convert (TREE_TYPE (arg), t);
5744       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5745     }
5746   else
5747     roundup = NULL;
5748   /* Advance ap.__stack  */
5749   t = fold_convert (intDI_type_node, arg);
5750   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5751               build_int_cst (TREE_TYPE (t), size + 7));
5752   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5753               build_int_cst (TREE_TYPE (t), -8));
5754   t = fold_convert (TREE_TYPE (arg), t);
5755   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5756   /* String up roundup and advance.  */
5757   if (roundup)
5758     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5759   /* String up with arg */
5760   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5761   /* Big-endianness related address adjustment.  */
5762   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5763       && size < UNITS_PER_WORD)
5764   {
5765     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5766                 size_int (UNITS_PER_WORD - size));
5767     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5768   }
5769
5770   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5771   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5772
5773   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5774   t = off;
5775   if (adjust)
5776     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5777                 build_int_cst (TREE_TYPE (off), adjust));
5778
5779   t = fold_convert (sizetype, t);
5780   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5781
5782   if (is_ha)
5783     {
5784       /* type ha; // treat as "struct {ftype field[n];}"
5785          ... [computing offs]
5786          for (i = 0; i <nregs; ++i, offs += 16)
5787            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5788          return ha;  */
5789       int i;
5790       tree tmp_ha, field_t, field_ptr_t;
5791
5792       /* Declare a local variable.  */
5793       tmp_ha = create_tmp_var_raw (type, "ha");
5794       gimple_add_tmp_var (tmp_ha);
5795
5796       /* Establish the base type.  */
5797       switch (ag_mode)
5798         {
5799         case SFmode:
5800           field_t = float_type_node;
5801           field_ptr_t = float_ptr_type_node;
5802           break;
5803         case DFmode:
5804           field_t = double_type_node;
5805           field_ptr_t = double_ptr_type_node;
5806           break;
5807         case TFmode:
5808           field_t = long_double_type_node;
5809           field_ptr_t = long_double_ptr_type_node;
5810           break;
5811 /* The half precision and quad precision are not fully supported yet.  Enable
5812    the following code after the support is complete.  Need to find the correct
5813    type node for __fp16 *.  */
5814 #if 0
5815         case HFmode:
5816           field_t = float_type_node;
5817           field_ptr_t = float_ptr_type_node;
5818           break;
5819 #endif
5820         case V2SImode:
5821         case V4SImode:
5822             {
5823               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5824               field_t = build_vector_type_for_mode (innertype, ag_mode);
5825               field_ptr_t = build_pointer_type (field_t);
5826             }
5827           break;
5828         default:
5829           gcc_assert (0);
5830         }
5831
5832       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5833       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5834       addr = t;
5835       t = fold_convert (field_ptr_t, addr);
5836       t = build2 (MODIFY_EXPR, field_t,
5837                   build1 (INDIRECT_REF, field_t, tmp_ha),
5838                   build1 (INDIRECT_REF, field_t, t));
5839
5840       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5841       for (i = 1; i < nregs; ++i)
5842         {
5843           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5844           u = fold_convert (field_ptr_t, addr);
5845           u = build2 (MODIFY_EXPR, field_t,
5846                       build2 (MEM_REF, field_t, tmp_ha,
5847                               build_int_cst (field_ptr_t,
5848                                              (i *
5849                                               int_size_in_bytes (field_t)))),
5850                       build1 (INDIRECT_REF, field_t, u));
5851           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5852         }
5853
5854       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5855       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5856     }
5857
5858   COND_EXPR_ELSE (cond2) = t;
5859   addr = fold_convert (build_pointer_type (type), cond1);
5860   addr = build_va_arg_indirect_ref (addr);
5861
5862   if (indirect_p)
5863     addr = build_va_arg_indirect_ref (addr);
5864
5865   return addr;
5866 }
5867
5868 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5869
5870 static void
5871 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5872                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5873                                 int no_rtl)
5874 {
5875   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5876   CUMULATIVE_ARGS local_cum;
5877   int gr_saved, vr_saved;
5878
5879   /* The caller has advanced CUM up to, but not beyond, the last named
5880      argument.  Advance a local copy of CUM past the last "real" named
5881      argument, to find out how many registers are left over.  */
5882   local_cum = *cum;
5883   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5884
5885   /* Found out how many registers we need to save.  */
5886   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5887   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5888
5889   if (TARGET_GENERAL_REGS_ONLY)
5890     {
5891       if (local_cum.aapcs_nvrn > 0)
5892         sorry ("%qs and floating point or vector arguments",
5893                "-mgeneral-regs-only");
5894       vr_saved = 0;
5895     }
5896
5897   if (!no_rtl)
5898     {
5899       if (gr_saved > 0)
5900         {
5901           rtx ptr, mem;
5902
5903           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5904           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5905                                - gr_saved * UNITS_PER_WORD);
5906           mem = gen_frame_mem (BLKmode, ptr);
5907           set_mem_alias_set (mem, get_varargs_alias_set ());
5908
5909           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5910                                mem, gr_saved);
5911         }
5912       if (vr_saved > 0)
5913         {
5914           /* We can't use move_block_from_reg, because it will use
5915              the wrong mode, storing D regs only.  */
5916           enum machine_mode mode = TImode;
5917           int off, i;
5918
5919           /* Set OFF to the offset from virtual_incoming_args_rtx of
5920              the first vector register.  The VR save area lies below
5921              the GR one, and is aligned to 16 bytes.  */
5922           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5923                                    STACK_BOUNDARY / BITS_PER_UNIT);
5924           off -= vr_saved * UNITS_PER_VREG;
5925
5926           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5927             {
5928               rtx ptr, mem;
5929
5930               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5931               mem = gen_frame_mem (mode, ptr);
5932               set_mem_alias_set (mem, get_varargs_alias_set ());
5933               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5934               off += UNITS_PER_VREG;
5935             }
5936         }
5937     }
5938
5939   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5940      any complication of having crtl->args.pretend_args_size changed.  */
5941   cfun->machine->saved_varargs_size
5942     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5943                       STACK_BOUNDARY / BITS_PER_UNIT)
5944        + vr_saved * UNITS_PER_VREG);
5945 }
5946
5947 static void
5948 aarch64_conditional_register_usage (void)
5949 {
5950   int i;
5951   if (!TARGET_FLOAT)
5952     {
5953       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5954         {
5955           fixed_regs[i] = 1;
5956           call_used_regs[i] = 1;
5957         }
5958     }
5959 }
5960
5961 /* Walk down the type tree of TYPE counting consecutive base elements.
5962    If *MODEP is VOIDmode, then set it to the first valid floating point
5963    type.  If a non-floating point type is found, or if a floating point
5964    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5965    otherwise return the count in the sub-tree.  */
5966 static int
5967 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5968 {
5969   enum machine_mode mode;
5970   HOST_WIDE_INT size;
5971
5972   switch (TREE_CODE (type))
5973     {
5974     case REAL_TYPE:
5975       mode = TYPE_MODE (type);
5976       if (mode != DFmode && mode != SFmode && mode != TFmode)
5977         return -1;
5978
5979       if (*modep == VOIDmode)
5980         *modep = mode;
5981
5982       if (*modep == mode)
5983         return 1;
5984
5985       break;
5986
5987     case COMPLEX_TYPE:
5988       mode = TYPE_MODE (TREE_TYPE (type));
5989       if (mode != DFmode && mode != SFmode && mode != TFmode)
5990         return -1;
5991
5992       if (*modep == VOIDmode)
5993         *modep = mode;
5994
5995       if (*modep == mode)
5996         return 2;
5997
5998       break;
5999
6000     case VECTOR_TYPE:
6001       /* Use V2SImode and V4SImode as representatives of all 64-bit
6002          and 128-bit vector types.  */
6003       size = int_size_in_bytes (type);
6004       switch (size)
6005         {
6006         case 8:
6007           mode = V2SImode;
6008           break;
6009         case 16:
6010           mode = V4SImode;
6011           break;
6012         default:
6013           return -1;
6014         }
6015
6016       if (*modep == VOIDmode)
6017         *modep = mode;
6018
6019       /* Vector modes are considered to be opaque: two vectors are
6020          equivalent for the purposes of being homogeneous aggregates
6021          if they are the same size.  */
6022       if (*modep == mode)
6023         return 1;
6024
6025       break;
6026
6027     case ARRAY_TYPE:
6028       {
6029         int count;
6030         tree index = TYPE_DOMAIN (type);
6031
6032         /* Can't handle incomplete types.  */
6033         if (!COMPLETE_TYPE_P (type))
6034           return -1;
6035
6036         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6037         if (count == -1
6038             || !index
6039             || !TYPE_MAX_VALUE (index)
6040             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6041             || !TYPE_MIN_VALUE (index)
6042             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6043             || count < 0)
6044           return -1;
6045
6046         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6047                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6048
6049         /* There must be no padding.  */
6050         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6051             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6052                 != count * GET_MODE_BITSIZE (*modep)))
6053           return -1;
6054
6055         return count;
6056       }
6057
6058     case RECORD_TYPE:
6059       {
6060         int count = 0;
6061         int sub_count;
6062         tree field;
6063
6064         /* Can't handle incomplete types.  */
6065         if (!COMPLETE_TYPE_P (type))
6066           return -1;
6067
6068         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6069           {
6070             if (TREE_CODE (field) != FIELD_DECL)
6071               continue;
6072
6073             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6074             if (sub_count < 0)
6075               return -1;
6076             count += sub_count;
6077           }
6078
6079         /* There must be no padding.  */
6080         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6081             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6082                 != count * GET_MODE_BITSIZE (*modep)))
6083           return -1;
6084
6085         return count;
6086       }
6087
6088     case UNION_TYPE:
6089     case QUAL_UNION_TYPE:
6090       {
6091         /* These aren't very interesting except in a degenerate case.  */
6092         int count = 0;
6093         int sub_count;
6094         tree field;
6095
6096         /* Can't handle incomplete types.  */
6097         if (!COMPLETE_TYPE_P (type))
6098           return -1;
6099
6100         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6101           {
6102             if (TREE_CODE (field) != FIELD_DECL)
6103               continue;
6104
6105             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6106             if (sub_count < 0)
6107               return -1;
6108             count = count > sub_count ? count : sub_count;
6109           }
6110
6111         /* There must be no padding.  */
6112         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6113             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6114                 != count * GET_MODE_BITSIZE (*modep)))
6115           return -1;
6116
6117         return count;
6118       }
6119
6120     default:
6121       break;
6122     }
6123
6124   return -1;
6125 }
6126
6127 /* Return true if we use LRA instead of reload pass.  */
6128 static bool
6129 aarch64_lra_p (void)
6130 {
6131   return aarch64_lra_flag;
6132 }
6133
6134 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6135    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6136    array types.  The C99 floating-point complex types are also considered
6137    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6138    types, which are GCC extensions and out of the scope of AAPCS64, are
6139    treated as composite types here as well.
6140
6141    Note that MODE itself is not sufficient in determining whether a type
6142    is such a composite type or not.  This is because
6143    stor-layout.c:compute_record_mode may have already changed the MODE
6144    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6145    structure with only one field may have its MODE set to the mode of the
6146    field.  Also an integer mode whose size matches the size of the
6147    RECORD_TYPE type may be used to substitute the original mode
6148    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6149    solely relied on.  */
6150
6151 static bool
6152 aarch64_composite_type_p (const_tree type,
6153                           enum machine_mode mode)
6154 {
6155   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6156     return true;
6157
6158   if (mode == BLKmode
6159       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6160       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6161     return true;
6162
6163   return false;
6164 }
6165
6166 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6167    type as described in AAPCS64 \S 4.1.2.
6168
6169    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6170
6171 static bool
6172 aarch64_short_vector_p (const_tree type,
6173                         enum machine_mode mode)
6174 {
6175   HOST_WIDE_INT size = -1;
6176
6177   if (type && TREE_CODE (type) == VECTOR_TYPE)
6178     size = int_size_in_bytes (type);
6179   else if (!aarch64_composite_type_p (type, mode)
6180            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6181                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6182     size = GET_MODE_SIZE (mode);
6183
6184   return (size == 8 || size == 16) ? true : false;
6185 }
6186
6187 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6188    shall be passed or returned in simd/fp register(s) (providing these
6189    parameter passing registers are available).
6190
6191    Upon successful return, *COUNT returns the number of needed registers,
6192    *BASE_MODE returns the mode of the individual register and when IS_HAF
6193    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6194    floating-point aggregate or a homogeneous short-vector aggregate.  */
6195
6196 static bool
6197 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6198                                          const_tree type,
6199                                          enum machine_mode *base_mode,
6200                                          int *count,
6201                                          bool *is_ha)
6202 {
6203   enum machine_mode new_mode = VOIDmode;
6204   bool composite_p = aarch64_composite_type_p (type, mode);
6205
6206   if (is_ha != NULL) *is_ha = false;
6207
6208   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6209       || aarch64_short_vector_p (type, mode))
6210     {
6211       *count = 1;
6212       new_mode = mode;
6213     }
6214   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6215     {
6216       if (is_ha != NULL) *is_ha = true;
6217       *count = 2;
6218       new_mode = GET_MODE_INNER (mode);
6219     }
6220   else if (type && composite_p)
6221     {
6222       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6223
6224       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6225         {
6226           if (is_ha != NULL) *is_ha = true;
6227           *count = ag_count;
6228         }
6229       else
6230         return false;
6231     }
6232   else
6233     return false;
6234
6235   *base_mode = new_mode;
6236   return true;
6237 }
6238
6239 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6240
6241 static rtx
6242 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6243                           int incoming ATTRIBUTE_UNUSED)
6244 {
6245   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6246 }
6247
6248 /* Implements target hook vector_mode_supported_p.  */
6249 static bool
6250 aarch64_vector_mode_supported_p (enum machine_mode mode)
6251 {
6252   if (TARGET_SIMD
6253       && (mode == V4SImode  || mode == V8HImode
6254           || mode == V16QImode || mode == V2DImode
6255           || mode == V2SImode  || mode == V4HImode
6256           || mode == V8QImode || mode == V2SFmode
6257           || mode == V4SFmode || mode == V2DFmode))
6258     return true;
6259
6260   return false;
6261 }
6262
6263 /* Return appropriate SIMD container
6264    for MODE within a vector of WIDTH bits.  */
6265 static enum machine_mode
6266 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6267 {
6268   gcc_assert (width == 64 || width == 128);
6269   if (TARGET_SIMD)
6270     {
6271       if (width == 128)
6272         switch (mode)
6273           {
6274           case DFmode:
6275             return V2DFmode;
6276           case SFmode:
6277             return V4SFmode;
6278           case SImode:
6279             return V4SImode;
6280           case HImode:
6281             return V8HImode;
6282           case QImode:
6283             return V16QImode;
6284           case DImode:
6285             return V2DImode;
6286           default:
6287             break;
6288           }
6289       else
6290         switch (mode)
6291           {
6292           case SFmode:
6293             return V2SFmode;
6294           case SImode:
6295             return V2SImode;
6296           case HImode:
6297             return V4HImode;
6298           case QImode:
6299             return V8QImode;
6300           default:
6301             break;
6302           }
6303     }
6304   return word_mode;
6305 }
6306
6307 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6308 static enum machine_mode
6309 aarch64_preferred_simd_mode (enum machine_mode mode)
6310 {
6311   return aarch64_simd_container_mode (mode, 128);
6312 }
6313
6314 /* Return the bitmask of possible vector sizes for the vectorizer
6315    to iterate over.  */
6316 static unsigned int
6317 aarch64_autovectorize_vector_sizes (void)
6318 {
6319   return (16 | 8);
6320 }
6321
6322 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6323    vector types in order to conform to the AAPCS64 (see "Procedure
6324    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6325    qualify for emission with the mangled names defined in that document,
6326    a vector type must not only be of the correct mode but also be
6327    composed of AdvSIMD vector element types (e.g.
6328    _builtin_aarch64_simd_qi); these types are registered by
6329    aarch64_init_simd_builtins ().  In other words, vector types defined
6330    in other ways e.g. via vector_size attribute will get default
6331    mangled names.  */
6332 typedef struct
6333 {
6334   enum machine_mode mode;
6335   const char *element_type_name;
6336   const char *mangled_name;
6337 } aarch64_simd_mangle_map_entry;
6338
6339 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6340   /* 64-bit containerized types.  */
6341   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6342   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6343   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6344   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6345   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6346   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6347   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6348   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6349   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6350   /* 128-bit containerized types.  */
6351   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6352   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6353   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6354   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6355   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6356   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6357   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6358   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6359   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6360   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6361   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6362   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6363   { VOIDmode, NULL, NULL }
6364 };
6365
6366 /* Implement TARGET_MANGLE_TYPE.  */
6367
6368 static const char *
6369 aarch64_mangle_type (const_tree type)
6370 {
6371   /* The AArch64 ABI documents say that "__va_list" has to be
6372      managled as if it is in the "std" namespace.  */
6373   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6374     return "St9__va_list";
6375
6376   /* Check the mode of the vector type, and the name of the vector
6377      element type, against the table.  */
6378   if (TREE_CODE (type) == VECTOR_TYPE)
6379     {
6380       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6381
6382       while (pos->mode != VOIDmode)
6383         {
6384           tree elt_type = TREE_TYPE (type);
6385
6386           if (pos->mode == TYPE_MODE (type)
6387               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6388               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6389                           pos->element_type_name))
6390             return pos->mangled_name;
6391
6392           pos++;
6393         }
6394     }
6395
6396   /* Use the default mangling.  */
6397   return NULL;
6398 }
6399
6400 /* Return the equivalent letter for size.  */
6401 static char
6402 sizetochar (int size)
6403 {
6404   switch (size)
6405     {
6406     case 64: return 'd';
6407     case 32: return 's';
6408     case 16: return 'h';
6409     case 8 : return 'b';
6410     default: gcc_unreachable ();
6411     }
6412 }
6413
6414 /* Return true iff x is a uniform vector of floating-point
6415    constants, and the constant can be represented in
6416    quarter-precision form.  Note, as aarch64_float_const_representable
6417    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6418 static bool
6419 aarch64_vect_float_const_representable_p (rtx x)
6420 {
6421   int i = 0;
6422   REAL_VALUE_TYPE r0, ri;
6423   rtx x0, xi;
6424
6425   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6426     return false;
6427
6428   x0 = CONST_VECTOR_ELT (x, 0);
6429   if (!CONST_DOUBLE_P (x0))
6430     return false;
6431
6432   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6433
6434   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6435     {
6436       xi = CONST_VECTOR_ELT (x, i);
6437       if (!CONST_DOUBLE_P (xi))
6438         return false;
6439
6440       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6441       if (!REAL_VALUES_EQUAL (r0, ri))
6442         return false;
6443     }
6444
6445   return aarch64_float_const_representable_p (x0);
6446 }
6447
6448 /* Return true for valid and false for invalid.  */
6449 bool
6450 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6451                               struct simd_immediate_info *info)
6452 {
6453 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6454   matches = 1;                                          \
6455   for (i = 0; i < idx; i += (STRIDE))                   \
6456     if (!(TEST))                                        \
6457       matches = 0;                                      \
6458   if (matches)                                          \
6459     {                                                   \
6460       immtype = (CLASS);                                \
6461       elsize = (ELSIZE);                                \
6462       eshift = (SHIFT);                                 \
6463       emvn = (NEG);                                     \
6464       break;                                            \
6465     }
6466
6467   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6468   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6469   unsigned char bytes[16];
6470   int immtype = -1, matches;
6471   unsigned int invmask = inverse ? 0xff : 0;
6472   int eshift, emvn;
6473
6474   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6475     {
6476       if (! (aarch64_simd_imm_zero_p (op, mode)
6477              || aarch64_vect_float_const_representable_p (op)))
6478         return false;
6479
6480       if (info)
6481         {
6482           info->value = CONST_VECTOR_ELT (op, 0);
6483           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6484           info->mvn = false;
6485           info->shift = 0;
6486         }
6487
6488       return true;
6489     }
6490
6491   /* Splat vector constant out into a byte vector.  */
6492   for (i = 0; i < n_elts; i++)
6493     {
6494       rtx el = CONST_VECTOR_ELT (op, i);
6495       unsigned HOST_WIDE_INT elpart;
6496       unsigned int part, parts;
6497
6498       if (GET_CODE (el) == CONST_INT)
6499         {
6500           elpart = INTVAL (el);
6501           parts = 1;
6502         }
6503       else if (GET_CODE (el) == CONST_DOUBLE)
6504         {
6505           elpart = CONST_DOUBLE_LOW (el);
6506           parts = 2;
6507         }
6508       else
6509         gcc_unreachable ();
6510
6511       for (part = 0; part < parts; part++)
6512         {
6513           unsigned int byte;
6514           for (byte = 0; byte < innersize; byte++)
6515             {
6516               bytes[idx++] = (elpart & 0xff) ^ invmask;
6517               elpart >>= BITS_PER_UNIT;
6518             }
6519           if (GET_CODE (el) == CONST_DOUBLE)
6520             elpart = CONST_DOUBLE_HIGH (el);
6521         }
6522     }
6523
6524   /* Sanity check.  */
6525   gcc_assert (idx == GET_MODE_SIZE (mode));
6526
6527   do
6528     {
6529       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6530              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6531
6532       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6533              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6534
6535       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6536              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6537
6538       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6539              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6540
6541       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6542
6543       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6544
6545       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6546              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6547
6548       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6549              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6550
6551       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6552              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6553
6554       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6555              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6556
6557       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6558
6559       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6560
6561       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6562              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6563
6564       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6565              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6566
6567       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6568              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6569
6570       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6571              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6572
6573       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6574
6575       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6576              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6577     }
6578   while (0);
6579
6580   if (immtype == -1)
6581     return false;
6582
6583   if (info)
6584     {
6585       info->element_width = elsize;
6586       info->mvn = emvn != 0;
6587       info->shift = eshift;
6588
6589       unsigned HOST_WIDE_INT imm = 0;
6590
6591       if (immtype >= 12 && immtype <= 15)
6592         info->msl = true;
6593
6594       /* Un-invert bytes of recognized vector, if necessary.  */
6595       if (invmask != 0)
6596         for (i = 0; i < idx; i++)
6597           bytes[i] ^= invmask;
6598
6599       if (immtype == 17)
6600         {
6601           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6602           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6603
6604           for (i = 0; i < 8; i++)
6605             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6606               << (i * BITS_PER_UNIT);
6607
6608
6609           info->value = GEN_INT (imm);
6610         }
6611       else
6612         {
6613           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6614             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6615
6616           /* Construct 'abcdefgh' because the assembler cannot handle
6617              generic constants.  */
6618           if (info->mvn)
6619             imm = ~imm;
6620           imm = (imm >> info->shift) & 0xff;
6621           info->value = GEN_INT (imm);
6622         }
6623     }
6624
6625   return true;
6626 #undef CHECK
6627 }
6628
6629 static bool
6630 aarch64_const_vec_all_same_int_p (rtx x,
6631                                   HOST_WIDE_INT minval,
6632                                   HOST_WIDE_INT maxval)
6633 {
6634   HOST_WIDE_INT firstval;
6635   int count, i;
6636
6637   if (GET_CODE (x) != CONST_VECTOR
6638       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6639     return false;
6640
6641   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6642   if (firstval < minval || firstval > maxval)
6643     return false;
6644
6645   count = CONST_VECTOR_NUNITS (x);
6646   for (i = 1; i < count; i++)
6647     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6648       return false;
6649
6650   return true;
6651 }
6652
6653 /* Check of immediate shift constants are within range.  */
6654 bool
6655 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6656 {
6657   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6658   if (left)
6659     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6660   else
6661     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6662 }
6663
6664 /* Return true if X is a uniform vector where all elements
6665    are either the floating-point constant 0.0 or the
6666    integer constant 0.  */
6667 bool
6668 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6669 {
6670   return x == CONST0_RTX (mode);
6671 }
6672
6673 bool
6674 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6675 {
6676   HOST_WIDE_INT imm = INTVAL (x);
6677   int i;
6678
6679   for (i = 0; i < 8; i++)
6680     {
6681       unsigned int byte = imm & 0xff;
6682       if (byte != 0xff && byte != 0)
6683        return false;
6684       imm >>= 8;
6685     }
6686
6687   return true;
6688 }
6689
6690 bool
6691 aarch64_mov_operand_p (rtx x,
6692                        enum aarch64_symbol_context context,
6693                        enum machine_mode mode)
6694 {
6695   if (GET_CODE (x) == HIGH
6696       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6697     return true;
6698
6699   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6700     return true;
6701
6702   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6703     return true;
6704
6705   return aarch64_classify_symbolic_expression (x, context)
6706     == SYMBOL_TINY_ABSOLUTE;
6707 }
6708
6709 /* Return a const_int vector of VAL.  */
6710 rtx
6711 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6712 {
6713   int nunits = GET_MODE_NUNITS (mode);
6714   rtvec v = rtvec_alloc (nunits);
6715   int i;
6716
6717   for (i=0; i < nunits; i++)
6718     RTVEC_ELT (v, i) = GEN_INT (val);
6719
6720   return gen_rtx_CONST_VECTOR (mode, v);
6721 }
6722
6723 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6724
6725 bool
6726 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6727 {
6728   enum machine_mode vmode;
6729
6730   gcc_assert (!VECTOR_MODE_P (mode));
6731   vmode = aarch64_preferred_simd_mode (mode);
6732   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6733   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6734 }
6735
6736 /* Construct and return a PARALLEL RTX vector.  */
6737 rtx
6738 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6739 {
6740   int nunits = GET_MODE_NUNITS (mode);
6741   rtvec v = rtvec_alloc (nunits / 2);
6742   int base = high ? nunits / 2 : 0;
6743   rtx t1;
6744   int i;
6745
6746   for (i=0; i < nunits / 2; i++)
6747     RTVEC_ELT (v, i) = GEN_INT (base + i);
6748
6749   t1 = gen_rtx_PARALLEL (mode, v);
6750   return t1;
6751 }
6752
6753 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6754    HIGH (exclusive).  */
6755 void
6756 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6757 {
6758   HOST_WIDE_INT lane;
6759   gcc_assert (GET_CODE (operand) == CONST_INT);
6760   lane = INTVAL (operand);
6761
6762   if (lane < low || lane >= high)
6763     error ("lane out of range");
6764 }
6765
6766 void
6767 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6768 {
6769   gcc_assert (GET_CODE (operand) == CONST_INT);
6770   HOST_WIDE_INT lane = INTVAL (operand);
6771
6772   if (lane < low || lane >= high)
6773     error ("constant out of range");
6774 }
6775
6776 /* Emit code to reinterpret one AdvSIMD type as another,
6777    without altering bits.  */
6778 void
6779 aarch64_simd_reinterpret (rtx dest, rtx src)
6780 {
6781   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6782 }
6783
6784 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6785    registers).  */
6786 void
6787 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6788                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6789                             rtx op1)
6790 {
6791   rtx mem = gen_rtx_MEM (mode, destaddr);
6792   rtx tmp1 = gen_reg_rtx (mode);
6793   rtx tmp2 = gen_reg_rtx (mode);
6794
6795   emit_insn (intfn (tmp1, op1, tmp2));
6796
6797   emit_move_insn (mem, tmp1);
6798   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6799   emit_move_insn (mem, tmp2);
6800 }
6801
6802 /* Return TRUE if OP is a valid vector addressing mode.  */
6803 bool
6804 aarch64_simd_mem_operand_p (rtx op)
6805 {
6806   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6807                         || GET_CODE (XEXP (op, 0)) == REG);
6808 }
6809
6810 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6811    not to early-clobber SRC registers in the process.
6812
6813    We assume that the operands described by SRC and DEST represent a
6814    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6815    number of components into which the copy has been decomposed.  */
6816 void
6817 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6818                                 rtx *src, unsigned int count)
6819 {
6820   unsigned int i;
6821
6822   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6823       || REGNO (operands[0]) < REGNO (operands[1]))
6824     {
6825       for (i = 0; i < count; i++)
6826         {
6827           operands[2 * i] = dest[i];
6828           operands[2 * i + 1] = src[i];
6829         }
6830     }
6831   else
6832     {
6833       for (i = 0; i < count; i++)
6834         {
6835           operands[2 * i] = dest[count - i - 1];
6836           operands[2 * i + 1] = src[count - i - 1];
6837         }
6838     }
6839 }
6840
6841 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6842    one of VSTRUCT modes: OI, CI or XI.  */
6843 int
6844 aarch64_simd_attr_length_move (rtx insn)
6845 {
6846   enum machine_mode mode;
6847
6848   extract_insn_cached (insn);
6849
6850   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6851     {
6852       mode = GET_MODE (recog_data.operand[0]);
6853       switch (mode)
6854         {
6855         case OImode:
6856           return 8;
6857         case CImode:
6858           return 12;
6859         case XImode:
6860           return 16;
6861         default:
6862           gcc_unreachable ();
6863         }
6864     }
6865   return 4;
6866 }
6867
6868 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6869    alignment of a vector to 128 bits.  */
6870 static HOST_WIDE_INT
6871 aarch64_simd_vector_alignment (const_tree type)
6872 {
6873   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6874   return MIN (align, 128);
6875 }
6876
6877 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6878 static bool
6879 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6880 {
6881   if (is_packed)
6882     return false;
6883
6884   /* We guarantee alignment for vectors up to 128-bits.  */
6885   if (tree_int_cst_compare (TYPE_SIZE (type),
6886                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6887     return false;
6888
6889   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6890   return true;
6891 }
6892
6893 /* If VALS is a vector constant that can be loaded into a register
6894    using DUP, generate instructions to do so and return an RTX to
6895    assign to the register.  Otherwise return NULL_RTX.  */
6896 static rtx
6897 aarch64_simd_dup_constant (rtx vals)
6898 {
6899   enum machine_mode mode = GET_MODE (vals);
6900   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6901   int n_elts = GET_MODE_NUNITS (mode);
6902   bool all_same = true;
6903   rtx x;
6904   int i;
6905
6906   if (GET_CODE (vals) != CONST_VECTOR)
6907     return NULL_RTX;
6908
6909   for (i = 1; i < n_elts; ++i)
6910     {
6911       x = CONST_VECTOR_ELT (vals, i);
6912       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6913         all_same = false;
6914     }
6915
6916   if (!all_same)
6917     return NULL_RTX;
6918
6919   /* We can load this constant by using DUP and a constant in a
6920      single ARM register.  This will be cheaper than a vector
6921      load.  */
6922   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6923   return gen_rtx_VEC_DUPLICATE (mode, x);
6924 }
6925
6926
6927 /* Generate code to load VALS, which is a PARALLEL containing only
6928    constants (for vec_init) or CONST_VECTOR, efficiently into a
6929    register.  Returns an RTX to copy into the register, or NULL_RTX
6930    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6931 static rtx
6932 aarch64_simd_make_constant (rtx vals)
6933 {
6934   enum machine_mode mode = GET_MODE (vals);
6935   rtx const_dup;
6936   rtx const_vec = NULL_RTX;
6937   int n_elts = GET_MODE_NUNITS (mode);
6938   int n_const = 0;
6939   int i;
6940
6941   if (GET_CODE (vals) == CONST_VECTOR)
6942     const_vec = vals;
6943   else if (GET_CODE (vals) == PARALLEL)
6944     {
6945       /* A CONST_VECTOR must contain only CONST_INTs and
6946          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6947          Only store valid constants in a CONST_VECTOR.  */
6948       for (i = 0; i < n_elts; ++i)
6949         {
6950           rtx x = XVECEXP (vals, 0, i);
6951           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6952             n_const++;
6953         }
6954       if (n_const == n_elts)
6955         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6956     }
6957   else
6958     gcc_unreachable ();
6959
6960   if (const_vec != NULL_RTX
6961       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6962     /* Load using MOVI/MVNI.  */
6963     return const_vec;
6964   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6965     /* Loaded using DUP.  */
6966     return const_dup;
6967   else if (const_vec != NULL_RTX)
6968     /* Load from constant pool. We can not take advantage of single-cycle
6969        LD1 because we need a PC-relative addressing mode.  */
6970     return const_vec;
6971   else
6972     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6973        We can not construct an initializer.  */
6974     return NULL_RTX;
6975 }
6976
6977 void
6978 aarch64_expand_vector_init (rtx target, rtx vals)
6979 {
6980   enum machine_mode mode = GET_MODE (target);
6981   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6982   int n_elts = GET_MODE_NUNITS (mode);
6983   int n_var = 0, one_var = -1;
6984   bool all_same = true;
6985   rtx x, mem;
6986   int i;
6987
6988   x = XVECEXP (vals, 0, 0);
6989   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6990     n_var = 1, one_var = 0;
6991
6992   for (i = 1; i < n_elts; ++i)
6993     {
6994       x = XVECEXP (vals, 0, i);
6995       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6996         ++n_var, one_var = i;
6997
6998       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6999         all_same = false;
7000     }
7001
7002   if (n_var == 0)
7003     {
7004       rtx constant = aarch64_simd_make_constant (vals);
7005       if (constant != NULL_RTX)
7006         {
7007           emit_move_insn (target, constant);
7008           return;
7009         }
7010     }
7011
7012   /* Splat a single non-constant element if we can.  */
7013   if (all_same)
7014     {
7015       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7016       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7017       return;
7018     }
7019
7020   /* One field is non-constant.  Load constant then overwrite varying
7021      field.  This is more efficient than using the stack.  */
7022   if (n_var == 1)
7023     {
7024       rtx copy = copy_rtx (vals);
7025       rtx index = GEN_INT (one_var);
7026       enum insn_code icode;
7027
7028       /* Load constant part of vector, substitute neighboring value for
7029          varying element.  */
7030       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7031       aarch64_expand_vector_init (target, copy);
7032
7033       /* Insert variable.  */
7034       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7035       icode = optab_handler (vec_set_optab, mode);
7036       gcc_assert (icode != CODE_FOR_nothing);
7037       emit_insn (GEN_FCN (icode) (target, x, index));
7038       return;
7039     }
7040
7041   /* Construct the vector in memory one field at a time
7042      and load the whole vector.  */
7043   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7044   for (i = 0; i < n_elts; i++)
7045     emit_move_insn (adjust_address_nv (mem, inner_mode,
7046                                     i * GET_MODE_SIZE (inner_mode)),
7047                     XVECEXP (vals, 0, i));
7048   emit_move_insn (target, mem);
7049
7050 }
7051
7052 static unsigned HOST_WIDE_INT
7053 aarch64_shift_truncation_mask (enum machine_mode mode)
7054 {
7055   return
7056     (aarch64_vector_mode_supported_p (mode)
7057      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7058 }
7059
7060 #ifndef TLS_SECTION_ASM_FLAG
7061 #define TLS_SECTION_ASM_FLAG 'T'
7062 #endif
7063
7064 void
7065 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7066                                tree decl ATTRIBUTE_UNUSED)
7067 {
7068   char flagchars[10], *f = flagchars;
7069
7070   /* If we have already declared this section, we can use an
7071      abbreviated form to switch back to it -- unless this section is
7072      part of a COMDAT groups, in which case GAS requires the full
7073      declaration every time.  */
7074   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7075       && (flags & SECTION_DECLARED))
7076     {
7077       fprintf (asm_out_file, "\t.section\t%s\n", name);
7078       return;
7079     }
7080
7081   if (!(flags & SECTION_DEBUG))
7082     *f++ = 'a';
7083   if (flags & SECTION_WRITE)
7084     *f++ = 'w';
7085   if (flags & SECTION_CODE)
7086     *f++ = 'x';
7087   if (flags & SECTION_SMALL)
7088     *f++ = 's';
7089   if (flags & SECTION_MERGE)
7090     *f++ = 'M';
7091   if (flags & SECTION_STRINGS)
7092     *f++ = 'S';
7093   if (flags & SECTION_TLS)
7094     *f++ = TLS_SECTION_ASM_FLAG;
7095   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7096     *f++ = 'G';
7097   *f = '\0';
7098
7099   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7100
7101   if (!(flags & SECTION_NOTYPE))
7102     {
7103       const char *type;
7104       const char *format;
7105
7106       if (flags & SECTION_BSS)
7107         type = "nobits";
7108       else
7109         type = "progbits";
7110
7111 #ifdef TYPE_OPERAND_FMT
7112       format = "," TYPE_OPERAND_FMT;
7113 #else
7114       format = ",@%s";
7115 #endif
7116
7117       fprintf (asm_out_file, format, type);
7118
7119       if (flags & SECTION_ENTSIZE)
7120         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7121       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7122         {
7123           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7124             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7125           else
7126             fprintf (asm_out_file, ",%s,comdat",
7127                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7128         }
7129     }
7130
7131   putc ('\n', asm_out_file);
7132 }
7133
7134 /* Select a format to encode pointers in exception handling data.  */
7135 int
7136 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7137 {
7138    int type;
7139    switch (aarch64_cmodel)
7140      {
7141      case AARCH64_CMODEL_TINY:
7142      case AARCH64_CMODEL_TINY_PIC:
7143      case AARCH64_CMODEL_SMALL:
7144      case AARCH64_CMODEL_SMALL_PIC:
7145        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7146           for everything.  */
7147        type = DW_EH_PE_sdata4;
7148        break;
7149      default:
7150        /* No assumptions here.  8-byte relocs required.  */
7151        type = DW_EH_PE_sdata8;
7152        break;
7153      }
7154    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7155 }
7156
7157 /* Emit load exclusive.  */
7158
7159 static void
7160 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7161                              rtx mem, rtx model_rtx)
7162 {
7163   rtx (*gen) (rtx, rtx, rtx);
7164
7165   switch (mode)
7166     {
7167     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7168     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7169     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7170     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7171     default:
7172       gcc_unreachable ();
7173     }
7174
7175   emit_insn (gen (rval, mem, model_rtx));
7176 }
7177
7178 /* Emit store exclusive.  */
7179
7180 static void
7181 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7182                               rtx rval, rtx mem, rtx model_rtx)
7183 {
7184   rtx (*gen) (rtx, rtx, rtx, rtx);
7185
7186   switch (mode)
7187     {
7188     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7189     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7190     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7191     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7192     default:
7193       gcc_unreachable ();
7194     }
7195
7196   emit_insn (gen (bval, rval, mem, model_rtx));
7197 }
7198
7199 /* Mark the previous jump instruction as unlikely.  */
7200
7201 static void
7202 aarch64_emit_unlikely_jump (rtx insn)
7203 {
7204   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7205
7206   insn = emit_jump_insn (insn);
7207   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7208 }
7209
7210 /* Expand a compare and swap pattern.  */
7211
7212 void
7213 aarch64_expand_compare_and_swap (rtx operands[])
7214 {
7215   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7216   enum machine_mode mode, cmp_mode;
7217   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7218
7219   bval = operands[0];
7220   rval = operands[1];
7221   mem = operands[2];
7222   oldval = operands[3];
7223   newval = operands[4];
7224   is_weak = operands[5];
7225   mod_s = operands[6];
7226   mod_f = operands[7];
7227   mode = GET_MODE (mem);
7228   cmp_mode = mode;
7229
7230   /* Normally the succ memory model must be stronger than fail, but in the
7231      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7232      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7233
7234   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7235       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7236     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7237
7238   switch (mode)
7239     {
7240     case QImode:
7241     case HImode:
7242       /* For short modes, we're going to perform the comparison in SImode,
7243          so do the zero-extension now.  */
7244       cmp_mode = SImode;
7245       rval = gen_reg_rtx (SImode);
7246       oldval = convert_modes (SImode, mode, oldval, true);
7247       /* Fall through.  */
7248
7249     case SImode:
7250     case DImode:
7251       /* Force the value into a register if needed.  */
7252       if (!aarch64_plus_operand (oldval, mode))
7253         oldval = force_reg (cmp_mode, oldval);
7254       break;
7255
7256     default:
7257       gcc_unreachable ();
7258     }
7259
7260   switch (mode)
7261     {
7262     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7263     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7264     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7265     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7266     default:
7267       gcc_unreachable ();
7268     }
7269
7270   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7271
7272   if (mode == QImode || mode == HImode)
7273     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7274
7275   x = gen_rtx_REG (CCmode, CC_REGNUM);
7276   x = gen_rtx_EQ (SImode, x, const0_rtx);
7277   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7278 }
7279
7280 /* Split a compare and swap pattern.  */
7281
7282 void
7283 aarch64_split_compare_and_swap (rtx operands[])
7284 {
7285   rtx rval, mem, oldval, newval, scratch;
7286   enum machine_mode mode;
7287   bool is_weak;
7288   rtx label1, label2, x, cond;
7289
7290   rval = operands[0];
7291   mem = operands[1];
7292   oldval = operands[2];
7293   newval = operands[3];
7294   is_weak = (operands[4] != const0_rtx);
7295   scratch = operands[7];
7296   mode = GET_MODE (mem);
7297
7298   label1 = NULL_RTX;
7299   if (!is_weak)
7300     {
7301       label1 = gen_label_rtx ();
7302       emit_label (label1);
7303     }
7304   label2 = gen_label_rtx ();
7305
7306   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7307
7308   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7309   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7310   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7311                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7312   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7313
7314   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7315
7316   if (!is_weak)
7317     {
7318       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7319       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7320                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7321       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7322     }
7323   else
7324     {
7325       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7326       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7327       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7328     }
7329
7330   emit_label (label2);
7331 }
7332
7333 /* Split an atomic operation.  */
7334
7335 void
7336 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7337                      rtx value, rtx model_rtx, rtx cond)
7338 {
7339   enum machine_mode mode = GET_MODE (mem);
7340   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7341   rtx label, x;
7342
7343   label = gen_label_rtx ();
7344   emit_label (label);
7345
7346   if (new_out)
7347     new_out = gen_lowpart (wmode, new_out);
7348   if (old_out)
7349     old_out = gen_lowpart (wmode, old_out);
7350   else
7351     old_out = new_out;
7352   value = simplify_gen_subreg (wmode, value, mode, 0);
7353
7354   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7355
7356   switch (code)
7357     {
7358     case SET:
7359       new_out = value;
7360       break;
7361
7362     case NOT:
7363       x = gen_rtx_AND (wmode, old_out, value);
7364       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7365       x = gen_rtx_NOT (wmode, new_out);
7366       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7367       break;
7368
7369     case MINUS:
7370       if (CONST_INT_P (value))
7371         {
7372           value = GEN_INT (-INTVAL (value));
7373           code = PLUS;
7374         }
7375       /* Fall through.  */
7376
7377     default:
7378       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7379       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7380       break;
7381     }
7382
7383   aarch64_emit_store_exclusive (mode, cond, mem,
7384                                 gen_lowpart (mode, new_out), model_rtx);
7385
7386   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7387   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7388                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7389   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7390 }
7391
7392 static void
7393 aarch64_print_extension (void)
7394 {
7395   const struct aarch64_option_extension *opt = NULL;
7396
7397   for (opt = all_extensions; opt->name != NULL; opt++)
7398     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7399       asm_fprintf (asm_out_file, "+%s", opt->name);
7400
7401   asm_fprintf (asm_out_file, "\n");
7402 }
7403
7404 static void
7405 aarch64_start_file (void)
7406 {
7407   if (selected_arch)
7408     {
7409       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7410       aarch64_print_extension ();
7411     }
7412   else if (selected_cpu)
7413     {
7414       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7415       aarch64_print_extension ();
7416     }
7417   default_file_start();
7418 }
7419
7420 /* Target hook for c_mode_for_suffix.  */
7421 static enum machine_mode
7422 aarch64_c_mode_for_suffix (char suffix)
7423 {
7424   if (suffix == 'q')
7425     return TFmode;
7426
7427   return VOIDmode;
7428 }
7429
7430 /* We can only represent floating point constants which will fit in
7431    "quarter-precision" values.  These values are characterised by
7432    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7433    by:
7434
7435    (-1)^s * (n/16) * 2^r
7436
7437    Where:
7438      's' is the sign bit.
7439      'n' is an integer in the range 16 <= n <= 31.
7440      'r' is an integer in the range -3 <= r <= 4.  */
7441
7442 /* Return true iff X can be represented by a quarter-precision
7443    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7444 bool
7445 aarch64_float_const_representable_p (rtx x)
7446 {
7447   /* This represents our current view of how many bits
7448      make up the mantissa.  */
7449   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7450   int exponent;
7451   unsigned HOST_WIDE_INT mantissa, mask;
7452   HOST_WIDE_INT m1, m2;
7453   REAL_VALUE_TYPE r, m;
7454
7455   if (!CONST_DOUBLE_P (x))
7456     return false;
7457
7458   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7459
7460   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7461      know if we have +zero until we analyse the mantissa, but we
7462      can reject the other invalid values.  */
7463   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7464       || REAL_VALUE_MINUS_ZERO (r))
7465     return false;
7466
7467   /* Extract exponent.  */
7468   r = real_value_abs (&r);
7469   exponent = REAL_EXP (&r);
7470
7471   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7472      highest (sign) bit, with a fixed binary point at bit point_pos.
7473      m1 holds the low part of the mantissa, m2 the high part.
7474      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7475      bits for the mantissa, this can fail (low bits will be lost).  */
7476   real_ldexp (&m, &r, point_pos - exponent);
7477   REAL_VALUE_TO_INT (&m1, &m2, m);
7478
7479   /* If the low part of the mantissa has bits set we cannot represent
7480      the value.  */
7481   if (m1 != 0)
7482     return false;
7483   /* We have rejected the lower HOST_WIDE_INT, so update our
7484      understanding of how many bits lie in the mantissa and
7485      look only at the high HOST_WIDE_INT.  */
7486   mantissa = m2;
7487   point_pos -= HOST_BITS_PER_WIDE_INT;
7488
7489   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7490   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7491   if ((mantissa & mask) != 0)
7492     return false;
7493
7494   /* Having filtered unrepresentable values, we may now remove all
7495      but the highest 5 bits.  */
7496   mantissa >>= point_pos - 5;
7497
7498   /* We cannot represent the value 0.0, so reject it.  This is handled
7499      elsewhere.  */
7500   if (mantissa == 0)
7501     return false;
7502
7503   /* Then, as bit 4 is always set, we can mask it off, leaving
7504      the mantissa in the range [0, 15].  */
7505   mantissa &= ~(1 << 4);
7506   gcc_assert (mantissa <= 15);
7507
7508   /* GCC internally does not use IEEE754-like encoding (where normalized
7509      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7510      Our mantissa values are shifted 4 places to the left relative to
7511      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7512      by 5 places to correct for GCC's representation.  */
7513   exponent = 5 - exponent;
7514
7515   return (exponent >= 0 && exponent <= 7);
7516 }
7517
7518 char*
7519 aarch64_output_simd_mov_immediate (rtx const_vector,
7520                                    enum machine_mode mode,
7521                                    unsigned width)
7522 {
7523   bool is_valid;
7524   static char templ[40];
7525   const char *mnemonic;
7526   const char *shift_op;
7527   unsigned int lane_count = 0;
7528   char element_char;
7529
7530   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7531
7532   /* This will return true to show const_vector is legal for use as either
7533      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7534      also update INFO to show how the immediate should be generated.  */
7535   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7536   gcc_assert (is_valid);
7537
7538   element_char = sizetochar (info.element_width);
7539   lane_count = width / info.element_width;
7540
7541   mode = GET_MODE_INNER (mode);
7542   if (mode == SFmode || mode == DFmode)
7543     {
7544       gcc_assert (info.shift == 0 && ! info.mvn);
7545       if (aarch64_float_const_zero_rtx_p (info.value))
7546         info.value = GEN_INT (0);
7547       else
7548         {
7549 #define buf_size 20
7550           REAL_VALUE_TYPE r;
7551           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7552           char float_buf[buf_size] = {'\0'};
7553           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7554 #undef buf_size
7555
7556           if (lane_count == 1)
7557             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7558           else
7559             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7560                       lane_count, element_char, float_buf);
7561           return templ;
7562         }
7563     }
7564
7565   mnemonic = info.mvn ? "mvni" : "movi";
7566   shift_op = info.msl ? "msl" : "lsl";
7567
7568   if (lane_count == 1)
7569     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7570               mnemonic, UINTVAL (info.value));
7571   else if (info.shift)
7572     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7573               ", %s %d", mnemonic, lane_count, element_char,
7574               UINTVAL (info.value), shift_op, info.shift);
7575   else
7576     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7577               mnemonic, lane_count, element_char, UINTVAL (info.value));
7578   return templ;
7579 }
7580
7581 char*
7582 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7583                                           enum machine_mode mode)
7584 {
7585   enum machine_mode vmode;
7586
7587   gcc_assert (!VECTOR_MODE_P (mode));
7588   vmode = aarch64_simd_container_mode (mode, 64);
7589   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7590   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7591 }
7592
7593 /* Split operands into moves from op[1] + op[2] into op[0].  */
7594
7595 void
7596 aarch64_split_combinev16qi (rtx operands[3])
7597 {
7598   unsigned int dest = REGNO (operands[0]);
7599   unsigned int src1 = REGNO (operands[1]);
7600   unsigned int src2 = REGNO (operands[2]);
7601   enum machine_mode halfmode = GET_MODE (operands[1]);
7602   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7603   rtx destlo, desthi;
7604
7605   gcc_assert (halfmode == V16QImode);
7606
7607   if (src1 == dest && src2 == dest + halfregs)
7608     {
7609       /* No-op move.  Can't split to nothing; emit something.  */
7610       emit_note (NOTE_INSN_DELETED);
7611       return;
7612     }
7613
7614   /* Preserve register attributes for variable tracking.  */
7615   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7616   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7617                                GET_MODE_SIZE (halfmode));
7618
7619   /* Special case of reversed high/low parts.  */
7620   if (reg_overlap_mentioned_p (operands[2], destlo)
7621       && reg_overlap_mentioned_p (operands[1], desthi))
7622     {
7623       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7624       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7625       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7626     }
7627   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7628     {
7629       /* Try to avoid unnecessary moves if part of the result
7630          is in the right place already.  */
7631       if (src1 != dest)
7632         emit_move_insn (destlo, operands[1]);
7633       if (src2 != dest + halfregs)
7634         emit_move_insn (desthi, operands[2]);
7635     }
7636   else
7637     {
7638       if (src2 != dest + halfregs)
7639         emit_move_insn (desthi, operands[2]);
7640       if (src1 != dest)
7641         emit_move_insn (destlo, operands[1]);
7642     }
7643 }
7644
7645 /* vec_perm support.  */
7646
7647 #define MAX_VECT_LEN 16
7648
7649 struct expand_vec_perm_d
7650 {
7651   rtx target, op0, op1;
7652   unsigned char perm[MAX_VECT_LEN];
7653   enum machine_mode vmode;
7654   unsigned char nelt;
7655   bool one_vector_p;
7656   bool testing_p;
7657 };
7658
7659 /* Generate a variable permutation.  */
7660
7661 static void
7662 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7663 {
7664   enum machine_mode vmode = GET_MODE (target);
7665   bool one_vector_p = rtx_equal_p (op0, op1);
7666
7667   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7668   gcc_checking_assert (GET_MODE (op0) == vmode);
7669   gcc_checking_assert (GET_MODE (op1) == vmode);
7670   gcc_checking_assert (GET_MODE (sel) == vmode);
7671   gcc_checking_assert (TARGET_SIMD);
7672
7673   if (one_vector_p)
7674     {
7675       if (vmode == V8QImode)
7676         {
7677           /* Expand the argument to a V16QI mode by duplicating it.  */
7678           rtx pair = gen_reg_rtx (V16QImode);
7679           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7680           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7681         }
7682       else
7683         {
7684           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7685         }
7686     }
7687   else
7688     {
7689       rtx pair;
7690
7691       if (vmode == V8QImode)
7692         {
7693           pair = gen_reg_rtx (V16QImode);
7694           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7695           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7696         }
7697       else
7698         {
7699           pair = gen_reg_rtx (OImode);
7700           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7701           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7702         }
7703     }
7704 }
7705
7706 void
7707 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7708 {
7709   enum machine_mode vmode = GET_MODE (target);
7710   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7711   bool one_vector_p = rtx_equal_p (op0, op1);
7712   rtx rmask[MAX_VECT_LEN], mask;
7713
7714   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7715
7716   /* The TBL instruction does not use a modulo index, so we must take care
7717      of that ourselves.  */
7718   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7719   for (i = 0; i < nelt; ++i)
7720     rmask[i] = mask;
7721   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7722   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7723
7724   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7725 }
7726
7727 /* Recognize patterns suitable for the TRN instructions.  */
7728 static bool
7729 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7730 {
7731   unsigned int i, odd, mask, nelt = d->nelt;
7732   rtx out, in0, in1, x;
7733   rtx (*gen) (rtx, rtx, rtx);
7734   enum machine_mode vmode = d->vmode;
7735
7736   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7737     return false;
7738
7739   /* Note that these are little-endian tests.
7740      We correct for big-endian later.  */
7741   if (d->perm[0] == 0)
7742     odd = 0;
7743   else if (d->perm[0] == 1)
7744     odd = 1;
7745   else
7746     return false;
7747   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7748
7749   for (i = 0; i < nelt; i += 2)
7750     {
7751       if (d->perm[i] != i + odd)
7752         return false;
7753       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7754         return false;
7755     }
7756
7757   /* Success!  */
7758   if (d->testing_p)
7759     return true;
7760
7761   in0 = d->op0;
7762   in1 = d->op1;
7763   if (BYTES_BIG_ENDIAN)
7764     {
7765       x = in0, in0 = in1, in1 = x;
7766       odd = !odd;
7767     }
7768   out = d->target;
7769
7770   if (odd)
7771     {
7772       switch (vmode)
7773         {
7774         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7775         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7776         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7777         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7778         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7779         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7780         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7781         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7782         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7783         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7784         default:
7785           return false;
7786         }
7787     }
7788   else
7789     {
7790       switch (vmode)
7791         {
7792         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7793         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7794         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7795         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7796         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7797         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7798         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7799         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7800         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7801         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7802         default:
7803           return false;
7804         }
7805     }
7806
7807   emit_insn (gen (out, in0, in1));
7808   return true;
7809 }
7810
7811 /* Recognize patterns suitable for the UZP instructions.  */
7812 static bool
7813 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7814 {
7815   unsigned int i, odd, mask, nelt = d->nelt;
7816   rtx out, in0, in1, x;
7817   rtx (*gen) (rtx, rtx, rtx);
7818   enum machine_mode vmode = d->vmode;
7819
7820   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7821     return false;
7822
7823   /* Note that these are little-endian tests.
7824      We correct for big-endian later.  */
7825   if (d->perm[0] == 0)
7826     odd = 0;
7827   else if (d->perm[0] == 1)
7828     odd = 1;
7829   else
7830     return false;
7831   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7832
7833   for (i = 0; i < nelt; i++)
7834     {
7835       unsigned elt = (i * 2 + odd) & mask;
7836       if (d->perm[i] != elt)
7837         return false;
7838     }
7839
7840   /* Success!  */
7841   if (d->testing_p)
7842     return true;
7843
7844   in0 = d->op0;
7845   in1 = d->op1;
7846   if (BYTES_BIG_ENDIAN)
7847     {
7848       x = in0, in0 = in1, in1 = x;
7849       odd = !odd;
7850     }
7851   out = d->target;
7852
7853   if (odd)
7854     {
7855       switch (vmode)
7856         {
7857         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7858         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7859         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7860         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7861         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7862         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7863         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7864         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7865         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7866         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7867         default:
7868           return false;
7869         }
7870     }
7871   else
7872     {
7873       switch (vmode)
7874         {
7875         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7876         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7877         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7878         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7879         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7880         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7881         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7882         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7883         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7884         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7885         default:
7886           return false;
7887         }
7888     }
7889
7890   emit_insn (gen (out, in0, in1));
7891   return true;
7892 }
7893
7894 /* Recognize patterns suitable for the ZIP instructions.  */
7895 static bool
7896 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7897 {
7898   unsigned int i, high, mask, nelt = d->nelt;
7899   rtx out, in0, in1, x;
7900   rtx (*gen) (rtx, rtx, rtx);
7901   enum machine_mode vmode = d->vmode;
7902
7903   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7904     return false;
7905
7906   /* Note that these are little-endian tests.
7907      We correct for big-endian later.  */
7908   high = nelt / 2;
7909   if (d->perm[0] == high)
7910     /* Do Nothing.  */
7911     ;
7912   else if (d->perm[0] == 0)
7913     high = 0;
7914   else
7915     return false;
7916   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7917
7918   for (i = 0; i < nelt / 2; i++)
7919     {
7920       unsigned elt = (i + high) & mask;
7921       if (d->perm[i * 2] != elt)
7922         return false;
7923       elt = (elt + nelt) & mask;
7924       if (d->perm[i * 2 + 1] != elt)
7925         return false;
7926     }
7927
7928   /* Success!  */
7929   if (d->testing_p)
7930     return true;
7931
7932   in0 = d->op0;
7933   in1 = d->op1;
7934   if (BYTES_BIG_ENDIAN)
7935     {
7936       x = in0, in0 = in1, in1 = x;
7937       high = !high;
7938     }
7939   out = d->target;
7940
7941   if (high)
7942     {
7943       switch (vmode)
7944         {
7945         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7946         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7947         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7948         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7949         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7950         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7951         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7952         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7953         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7954         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7955         default:
7956           return false;
7957         }
7958     }
7959   else
7960     {
7961       switch (vmode)
7962         {
7963         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7964         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7965         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7966         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7967         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7968         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7969         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7970         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7971         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7972         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7973         default:
7974           return false;
7975         }
7976     }
7977
7978   emit_insn (gen (out, in0, in1));
7979   return true;
7980 }
7981
7982 static bool
7983 aarch64_evpc_dup (struct expand_vec_perm_d *d)
7984 {
7985   rtx (*gen) (rtx, rtx, rtx);
7986   rtx out = d->target;
7987   rtx in0;
7988   enum machine_mode vmode = d->vmode;
7989   unsigned int i, elt, nelt = d->nelt;
7990   rtx lane;
7991
7992   /* TODO: This may not be big-endian safe.  */
7993   if (BYTES_BIG_ENDIAN)
7994     return false;
7995
7996   elt = d->perm[0];
7997   for (i = 1; i < nelt; i++)
7998     {
7999       if (elt != d->perm[i])
8000         return false;
8001     }
8002
8003   /* The generic preparation in aarch64_expand_vec_perm_const_1
8004      swaps the operand order and the permute indices if it finds
8005      d->perm[0] to be in the second operand.  Thus, we can always
8006      use d->op0 and need not do any extra arithmetic to get the
8007      correct lane number.  */
8008   in0 = d->op0;
8009   lane = GEN_INT (elt);
8010
8011   switch (vmode)
8012     {
8013     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8014     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8015     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8016     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8017     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8018     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8019     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8020     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8021     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8022     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8023     default:
8024       return false;
8025     }
8026
8027   emit_insn (gen (out, in0, lane));
8028   return true;
8029 }
8030
8031 static bool
8032 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8033 {
8034   rtx rperm[MAX_VECT_LEN], sel;
8035   enum machine_mode vmode = d->vmode;
8036   unsigned int i, nelt = d->nelt;
8037
8038   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8039      numbering of elements for big-endian, we must reverse the order.  */
8040   if (BYTES_BIG_ENDIAN)
8041     return false;
8042
8043   if (d->testing_p)
8044     return true;
8045
8046   /* Generic code will try constant permutation twice.  Once with the
8047      original mode and again with the elements lowered to QImode.
8048      So wait and don't do the selector expansion ourselves.  */
8049   if (vmode != V8QImode && vmode != V16QImode)
8050     return false;
8051
8052   for (i = 0; i < nelt; ++i)
8053     rperm[i] = GEN_INT (d->perm[i]);
8054   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8055   sel = force_reg (vmode, sel);
8056
8057   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8058   return true;
8059 }
8060
8061 static bool
8062 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8063 {
8064   /* The pattern matching functions above are written to look for a small
8065      number to begin the sequence (0, 1, N/2).  If we begin with an index
8066      from the second operand, we can swap the operands.  */
8067   if (d->perm[0] >= d->nelt)
8068     {
8069       unsigned i, nelt = d->nelt;
8070       rtx x;
8071
8072       for (i = 0; i < nelt; ++i)
8073         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8074
8075       x = d->op0;
8076       d->op0 = d->op1;
8077       d->op1 = x;
8078     }
8079
8080   if (TARGET_SIMD)
8081     {
8082       if (aarch64_evpc_zip (d))
8083         return true;
8084       else if (aarch64_evpc_uzp (d))
8085         return true;
8086       else if (aarch64_evpc_trn (d))
8087         return true;
8088       else if (aarch64_evpc_dup (d))
8089         return true;
8090       return aarch64_evpc_tbl (d);
8091     }
8092   return false;
8093 }
8094
8095 /* Expand a vec_perm_const pattern.  */
8096
8097 bool
8098 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8099 {
8100   struct expand_vec_perm_d d;
8101   int i, nelt, which;
8102
8103   d.target = target;
8104   d.op0 = op0;
8105   d.op1 = op1;
8106
8107   d.vmode = GET_MODE (target);
8108   gcc_assert (VECTOR_MODE_P (d.vmode));
8109   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8110   d.testing_p = false;
8111
8112   for (i = which = 0; i < nelt; ++i)
8113     {
8114       rtx e = XVECEXP (sel, 0, i);
8115       int ei = INTVAL (e) & (2 * nelt - 1);
8116       which |= (ei < nelt ? 1 : 2);
8117       d.perm[i] = ei;
8118     }
8119
8120   switch (which)
8121     {
8122     default:
8123       gcc_unreachable ();
8124
8125     case 3:
8126       d.one_vector_p = false;
8127       if (!rtx_equal_p (op0, op1))
8128         break;
8129
8130       /* The elements of PERM do not suggest that only the first operand
8131          is used, but both operands are identical.  Allow easier matching
8132          of the permutation by folding the permutation into the single
8133          input vector.  */
8134       /* Fall Through.  */
8135     case 2:
8136       for (i = 0; i < nelt; ++i)
8137         d.perm[i] &= nelt - 1;
8138       d.op0 = op1;
8139       d.one_vector_p = true;
8140       break;
8141
8142     case 1:
8143       d.op1 = op0;
8144       d.one_vector_p = true;
8145       break;
8146     }
8147
8148   return aarch64_expand_vec_perm_const_1 (&d);
8149 }
8150
8151 static bool
8152 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8153                                      const unsigned char *sel)
8154 {
8155   struct expand_vec_perm_d d;
8156   unsigned int i, nelt, which;
8157   bool ret;
8158
8159   d.vmode = vmode;
8160   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8161   d.testing_p = true;
8162   memcpy (d.perm, sel, nelt);
8163
8164   /* Calculate whether all elements are in one vector.  */
8165   for (i = which = 0; i < nelt; ++i)
8166     {
8167       unsigned char e = d.perm[i];
8168       gcc_assert (e < 2 * nelt);
8169       which |= (e < nelt ? 1 : 2);
8170     }
8171
8172   /* If all elements are from the second vector, reindex as if from the
8173      first vector.  */
8174   if (which == 2)
8175     for (i = 0; i < nelt; ++i)
8176       d.perm[i] -= nelt;
8177
8178   /* Check whether the mask can be applied to a single vector.  */
8179   d.one_vector_p = (which != 3);
8180
8181   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8182   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8183   if (!d.one_vector_p)
8184     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8185
8186   start_sequence ();
8187   ret = aarch64_expand_vec_perm_const_1 (&d);
8188   end_sequence ();
8189
8190   return ret;
8191 }
8192
8193 #undef TARGET_ADDRESS_COST
8194 #define TARGET_ADDRESS_COST aarch64_address_cost
8195
8196 /* This hook will determines whether unnamed bitfields affect the alignment
8197    of the containing structure.  The hook returns true if the structure
8198    should inherit the alignment requirements of an unnamed bitfield's
8199    type.  */
8200 #undef TARGET_ALIGN_ANON_BITFIELD
8201 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8202
8203 #undef TARGET_ASM_ALIGNED_DI_OP
8204 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8205
8206 #undef TARGET_ASM_ALIGNED_HI_OP
8207 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8208
8209 #undef TARGET_ASM_ALIGNED_SI_OP
8210 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8211
8212 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8213 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8214   hook_bool_const_tree_hwi_hwi_const_tree_true
8215
8216 #undef TARGET_ASM_FILE_START
8217 #define TARGET_ASM_FILE_START aarch64_start_file
8218
8219 #undef TARGET_ASM_OUTPUT_MI_THUNK
8220 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8221
8222 #undef TARGET_ASM_SELECT_RTX_SECTION
8223 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8224
8225 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8226 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8227
8228 #undef TARGET_BUILD_BUILTIN_VA_LIST
8229 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8230
8231 #undef TARGET_CALLEE_COPIES
8232 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8233
8234 #undef TARGET_CAN_ELIMINATE
8235 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8236
8237 #undef TARGET_CANNOT_FORCE_CONST_MEM
8238 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8239
8240 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8241 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8242
8243 /* Only the least significant bit is used for initialization guard
8244    variables.  */
8245 #undef TARGET_CXX_GUARD_MASK_BIT
8246 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8247
8248 #undef TARGET_C_MODE_FOR_SUFFIX
8249 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8250
8251 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8252 #undef  TARGET_DEFAULT_TARGET_FLAGS
8253 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8254 #endif
8255
8256 #undef TARGET_CLASS_MAX_NREGS
8257 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8258
8259 #undef TARGET_BUILTIN_DECL
8260 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8261
8262 #undef  TARGET_EXPAND_BUILTIN
8263 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8264
8265 #undef TARGET_EXPAND_BUILTIN_VA_START
8266 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8267
8268 #undef TARGET_FOLD_BUILTIN
8269 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8270
8271 #undef TARGET_FUNCTION_ARG
8272 #define TARGET_FUNCTION_ARG aarch64_function_arg
8273
8274 #undef TARGET_FUNCTION_ARG_ADVANCE
8275 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8276
8277 #undef TARGET_FUNCTION_ARG_BOUNDARY
8278 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8279
8280 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8281 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8282
8283 #undef TARGET_FUNCTION_VALUE
8284 #define TARGET_FUNCTION_VALUE aarch64_function_value
8285
8286 #undef TARGET_FUNCTION_VALUE_REGNO_P
8287 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8288
8289 #undef TARGET_FRAME_POINTER_REQUIRED
8290 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8291
8292 #undef TARGET_GIMPLE_FOLD_BUILTIN
8293 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8294
8295 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8296 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8297
8298 #undef  TARGET_INIT_BUILTINS
8299 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8300
8301 #undef TARGET_LEGITIMATE_ADDRESS_P
8302 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8303
8304 #undef TARGET_LEGITIMATE_CONSTANT_P
8305 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8306
8307 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8308 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8309
8310 #undef TARGET_LRA_P
8311 #define TARGET_LRA_P aarch64_lra_p
8312
8313 #undef TARGET_MANGLE_TYPE
8314 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8315
8316 #undef TARGET_MEMORY_MOVE_COST
8317 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8318
8319 #undef TARGET_MUST_PASS_IN_STACK
8320 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8321
8322 /* This target hook should return true if accesses to volatile bitfields
8323    should use the narrowest mode possible.  It should return false if these
8324    accesses should use the bitfield container type.  */
8325 #undef TARGET_NARROW_VOLATILE_BITFIELD
8326 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8327
8328 #undef  TARGET_OPTION_OVERRIDE
8329 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8330
8331 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8332 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8333   aarch64_override_options_after_change
8334
8335 #undef TARGET_PASS_BY_REFERENCE
8336 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8337
8338 #undef TARGET_PREFERRED_RELOAD_CLASS
8339 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8340
8341 #undef TARGET_SECONDARY_RELOAD
8342 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8343
8344 #undef TARGET_SHIFT_TRUNCATION_MASK
8345 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8346
8347 #undef TARGET_SETUP_INCOMING_VARARGS
8348 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8349
8350 #undef TARGET_STRUCT_VALUE_RTX
8351 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8352
8353 #undef TARGET_REGISTER_MOVE_COST
8354 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8355
8356 #undef TARGET_RETURN_IN_MEMORY
8357 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8358
8359 #undef TARGET_RETURN_IN_MSB
8360 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8361
8362 #undef TARGET_RTX_COSTS
8363 #define TARGET_RTX_COSTS aarch64_rtx_costs
8364
8365 #undef TARGET_TRAMPOLINE_INIT
8366 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8367
8368 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8369 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8370
8371 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8372 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8373
8374 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8375 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8376
8377 #undef TARGET_VECTORIZE_ADD_STMT_COST
8378 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8379
8380 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8381 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8382   aarch64_builtin_vectorization_cost
8383
8384 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8385 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8386
8387 #undef TARGET_VECTORIZE_BUILTINS
8388 #define TARGET_VECTORIZE_BUILTINS
8389
8390 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8391 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8392   aarch64_builtin_vectorized_function
8393
8394 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8395 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8396   aarch64_autovectorize_vector_sizes
8397
8398 /* Section anchor support.  */
8399
8400 #undef TARGET_MIN_ANCHOR_OFFSET
8401 #define TARGET_MIN_ANCHOR_OFFSET -256
8402
8403 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8404    byte offset; we can do much more for larger data types, but have no way
8405    to determine the size of the access.  We assume accesses are aligned.  */
8406 #undef TARGET_MAX_ANCHOR_OFFSET
8407 #define TARGET_MAX_ANCHOR_OFFSET 4095
8408
8409 #undef TARGET_VECTOR_ALIGNMENT
8410 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8411
8412 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8413 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8414   aarch64_simd_vector_alignment_reachable
8415
8416 /* vec_perm support.  */
8417
8418 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8419 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8420   aarch64_vectorize_vec_perm_const_ok
8421
8422
8423 #undef TARGET_FIXED_CONDITION_CODE_REGS
8424 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8425
8426 struct gcc_target targetm = TARGET_INITIALIZER;
8427
8428 #include "gt-aarch64.h"