gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66
  67 /* Defined for convenience.  */
  68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  69
  70 /* Classifies an address.
  71
  72    ADDRESS_REG_IMM
  73        A simple base register plus immediate offset.
  74
  75    ADDRESS_REG_WB
  76        A base register indexed by immediate offset with writeback.
  77
  78    ADDRESS_REG_REG
  79        A base register indexed by (optionally scaled) register.
  80
  81    ADDRESS_REG_UXTW
  82        A base register indexed by (optionally scaled) zero-extended register.
  83
  84    ADDRESS_REG_SXTW
  85        A base register indexed by (optionally scaled) sign-extended register.
  86
  87    ADDRESS_LO_SUM
  88        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  89
  90    ADDRESS_SYMBOLIC:
  91        A constant symbolic address, in pc-relative literal pool.  */
  92
  93 enum aarch64_address_type {
  94   ADDRESS_REG_IMM,
  95   ADDRESS_REG_WB,
  96   ADDRESS_REG_REG,
  97   ADDRESS_REG_UXTW,
  98   ADDRESS_REG_SXTW,
  99   ADDRESS_LO_SUM,
 100   ADDRESS_SYMBOLIC
 101 };
 102
 103 struct aarch64_address_info {
 104   enum aarch64_address_type type;
 105   rtx base;
 106   rtx offset;
 107   int shift;
 108   enum aarch64_symbol_type symbol_type;
 109 };
 110
 111 struct simd_immediate_info
 112 {
 113   rtx value;
 114   int shift;
 115   int element_width;
 116   bool mvn;
 117   bool msl;
 118 };
 119
 120 /* The current code model.  */
 121 enum aarch64_code_model aarch64_cmodel;
 122
 123 #ifdef HAVE_AS_TLS
 124 #undef TARGET_HAVE_TLS
 125 #define TARGET_HAVE_TLS 1
 126 #endif
 127
 128 static bool aarch64_lra_p (void);
 129 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 131                                                      const_tree,
 132                                                      enum machine_mode *, int *,
 133                                                      bool *);
 134 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 135 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_override_options_after_change (void);
 137 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 138 static unsigned bit_count (unsigned HOST_WIDE_INT);
 139 static bool aarch64_const_vec_all_same_int_p (rtx,
 140                                               HOST_WIDE_INT, HOST_WIDE_INT);
 141
 142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 143                                                  const unsigned char *sel);
 144
 145 /* The processor for which instructions should be scheduled.  */
 146 enum aarch64_processor aarch64_tune = cortexa53;
 147
 148 /* The current tuning set.  */
 149 const struct tune_params *aarch64_tune_params;
 150
 151 /* Mask to specify which instructions we are allowed to generate.  */
 152 unsigned long aarch64_isa_flags = 0;
 153
 154 /* Mask to specify which instruction scheduling options should be used.  */
 155 unsigned long aarch64_tune_flags = 0;
 156
 157 /* Tuning parameters.  */
 158
 159 #if HAVE_DESIGNATED_INITIALIZERS
 160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 161 #else
 162 #define NAMED_PARAM(NAME, VAL) (VAL)
 163 #endif
 164
 165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 166 __extension__
 167 #endif
 168
 169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 170 __extension__
 171 #endif
 172 static const struct cpu_addrcost_table generic_addrcost_table =
 173 {
 174   NAMED_PARAM (pre_modify, 0),
 175   NAMED_PARAM (post_modify, 0),
 176   NAMED_PARAM (register_offset, 0),
 177   NAMED_PARAM (register_extend, 0),
 178   NAMED_PARAM (imm_offset, 0)
 179 };
 180
 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 182 __extension__
 183 #endif
 184 static const struct cpu_regmove_cost generic_regmove_cost =
 185 {
 186   NAMED_PARAM (GP2GP, 1),
 187   NAMED_PARAM (GP2FP, 2),
 188   NAMED_PARAM (FP2GP, 2),
 189   /* We currently do not provide direct support for TFmode Q->Q move.
 190      Therefore we need to raise the cost above 2 in order to have
 191      reload handle the situation.  */
 192   NAMED_PARAM (FP2FP, 4)
 193 };
 194
 195 /* Generic costs for vector insn classes.  */
 196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 197 __extension__
 198 #endif
 199 static const struct cpu_vector_cost generic_vector_cost =
 200 {
 201   NAMED_PARAM (scalar_stmt_cost, 1),
 202   NAMED_PARAM (scalar_load_cost, 1),
 203   NAMED_PARAM (scalar_store_cost, 1),
 204   NAMED_PARAM (vec_stmt_cost, 1),
 205   NAMED_PARAM (vec_to_scalar_cost, 1),
 206   NAMED_PARAM (scalar_to_vec_cost, 1),
 207   NAMED_PARAM (vec_align_load_cost, 1),
 208   NAMED_PARAM (vec_unalign_load_cost, 1),
 209   NAMED_PARAM (vec_unalign_store_cost, 1),
 210   NAMED_PARAM (vec_store_cost, 1),
 211   NAMED_PARAM (cond_taken_branch_cost, 3),
 212   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 213 };
 214
 215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 216 __extension__
 217 #endif
 218 static const struct tune_params generic_tunings =
 219 {
 220   &generic_extra_costs,
 221   &generic_addrcost_table,
 222   &generic_regmove_cost,
 223   &generic_vector_cost,
 224   NAMED_PARAM (memmov_cost, 4)
 225 };
 226
 227 /* A processor implementing AArch64.  */
 228 struct processor
 229 {
 230   const char *const name;
 231   enum aarch64_processor core;
 232   const char *arch;
 233   const unsigned long flags;
 234   const struct tune_params *const tune;
 235 };
 236
 237 /* Processor cores implementing AArch64.  */
 238 static const struct processor all_cores[] =
 239 {
 240 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 241   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 242 #include "aarch64-cores.def"
 243 #undef AARCH64_CORE
 244   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 245   {NULL, aarch64_none, NULL, 0, NULL}
 246 };
 247
 248 /* Architectures implementing AArch64.  */
 249 static const struct processor all_architectures[] =
 250 {
 251 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 252   {NAME, CORE, #ARCH, FLAGS, NULL},
 253 #include "aarch64-arches.def"
 254 #undef AARCH64_ARCH
 255   {NULL, aarch64_none, NULL, 0, NULL}
 256 };
 257
 258 /* Target specification.  These are populated as commandline arguments
 259    are processed, or NULL if not specified.  */
 260 static const struct processor *selected_arch;
 261 static const struct processor *selected_cpu;
 262 static const struct processor *selected_tune;
 263
 264 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 265
 266 /* An ISA extension in the co-processor and main instruction set space.  */
 267 struct aarch64_option_extension
 268 {
 269   const char *const name;
 270   const unsigned long flags_on;
 271   const unsigned long flags_off;
 272 };
 273
 274 /* ISA extensions in AArch64.  */
 275 static const struct aarch64_option_extension all_extensions[] =
 276 {
 277 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 278   {NAME, FLAGS_ON, FLAGS_OFF},
 279 #include "aarch64-option-extensions.def"
 280 #undef AARCH64_OPT_EXTENSION
 281   {NULL, 0, 0}
 282 };
 283
 284 /* Used to track the size of an address when generating a pre/post
 285    increment address.  */
 286 static enum machine_mode aarch64_memory_reference_mode;
 287
 288 /* Used to force GTY into this file.  */
 289 static GTY(()) int gty_dummy;
 290
 291 /* A table of valid AArch64 "bitmask immediate" values for
 292    logical instructions.  */
 293
 294 #define AARCH64_NUM_BITMASKS  5334
 295 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 296
 297 /* Did we set flag_omit_frame_pointer just so
 298    aarch64_frame_pointer_required would be called? */
 299 static bool faked_omit_frame_pointer;
 300
 301 typedef enum aarch64_cond_code
 302 {
 303   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 304   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 305   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 306 }
 307 aarch64_cc;
 308
 309 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 310
 311 /* The condition codes of the processor, and the inverse function.  */
 312 static const char * const aarch64_condition_codes[] =
 313 {
 314   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 315   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 316 };
 317
 318 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 319 unsigned
 320 aarch64_dbx_register_number (unsigned regno)
 321 {
 322    if (GP_REGNUM_P (regno))
 323      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 324    else if (regno == SP_REGNUM)
 325      return AARCH64_DWARF_SP;
 326    else if (FP_REGNUM_P (regno))
 327      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 328
 329    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 330       equivalent DWARF register.  */
 331    return DWARF_FRAME_REGISTERS;
 332 }
 333
 334 /* Return TRUE if MODE is any of the large INT modes.  */
 335 static bool
 336 aarch64_vect_struct_mode_p (enum machine_mode mode)
 337 {
 338   return mode == OImode || mode == CImode || mode == XImode;
 339 }
 340
 341 /* Return TRUE if MODE is any of the vector modes.  */
 342 static bool
 343 aarch64_vector_mode_p (enum machine_mode mode)
 344 {
 345   return aarch64_vector_mode_supported_p (mode)
 346          || aarch64_vect_struct_mode_p (mode);
 347 }
 348
 349 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 350 static bool
 351 aarch64_array_mode_supported_p (enum machine_mode mode,
 352                                 unsigned HOST_WIDE_INT nelems)
 353 {
 354   if (TARGET_SIMD
 355       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 356       && (nelems >= 2 && nelems <= 4))
 357     return true;
 358
 359   return false;
 360 }
 361
 362 /* Implement HARD_REGNO_NREGS.  */
 363
 364 int
 365 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 366 {
 367   switch (aarch64_regno_regclass (regno))
 368     {
 369     case FP_REGS:
 370     case FP_LO_REGS:
 371       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 372     default:
 373       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 374     }
 375   gcc_unreachable ();
 376 }
 377
 378 /* Implement HARD_REGNO_MODE_OK.  */
 379
 380 int
 381 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 382 {
 383   if (GET_MODE_CLASS (mode) == MODE_CC)
 384     return regno == CC_REGNUM;
 385
 386   if (regno == SP_REGNUM)
 387     /* The purpose of comparing with ptr_mode is to support the
 388        global register variable associated with the stack pointer
 389        register via the syntax of asm ("wsp") in ILP32.  */
 390     return mode == Pmode || mode == ptr_mode;
 391
 392   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 393     return mode == Pmode;
 394
 395   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 396     return 1;
 397
 398   if (FP_REGNUM_P (regno))
 399     {
 400       if (aarch64_vect_struct_mode_p (mode))
 401         return
 402           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 403       else
 404         return 1;
 405     }
 406
 407   return 0;
 408 }
 409
 410 /* Return true if calls to DECL should be treated as
 411    long-calls (ie called via a register).  */
 412 static bool
 413 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 414 {
 415   return false;
 416 }
 417
 418 /* Return true if calls to symbol-ref SYM should be treated as
 419    long-calls (ie called via a register).  */
 420 bool
 421 aarch64_is_long_call_p (rtx sym)
 422 {
 423   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 424 }
 425
 426 /* Return true if the offsets to a zero/sign-extract operation
 427    represent an expression that matches an extend operation.  The
 428    operands represent the paramters from
 429
 430    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 431 bool
 432 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 433                                 rtx extract_imm)
 434 {
 435   HOST_WIDE_INT mult_val, extract_val;
 436
 437   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 438     return false;
 439
 440   mult_val = INTVAL (mult_imm);
 441   extract_val = INTVAL (extract_imm);
 442
 443   if (extract_val > 8
 444       && extract_val < GET_MODE_BITSIZE (mode)
 445       && exact_log2 (extract_val & ~7) > 0
 446       && (extract_val & 7) <= 4
 447       && mult_val == (1 << (extract_val & 7)))
 448     return true;
 449
 450   return false;
 451 }
 452
 453 /* Emit an insn that's a simple single-set.  Both the operands must be
 454    known to be valid.  */
 455 inline static rtx
 456 emit_set_insn (rtx x, rtx y)
 457 {
 458   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 459 }
 460
 461 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 462    return the rtx for register 0 in the proper mode.  */
 463 rtx
 464 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 465 {
 466   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 467   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 468
 469   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 470   return cc_reg;
 471 }
 472
 473 /* Build the SYMBOL_REF for __tls_get_addr.  */
 474
 475 static GTY(()) rtx tls_get_addr_libfunc;
 476
 477 rtx
 478 aarch64_tls_get_addr (void)
 479 {
 480   if (!tls_get_addr_libfunc)
 481     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 482   return tls_get_addr_libfunc;
 483 }
 484
 485 /* Return the TLS model to use for ADDR.  */
 486
 487 static enum tls_model
 488 tls_symbolic_operand_type (rtx addr)
 489 {
 490   enum tls_model tls_kind = TLS_MODEL_NONE;
 491   rtx sym, addend;
 492
 493   if (GET_CODE (addr) == CONST)
 494     {
 495       split_const (addr, &sym, &addend);
 496       if (GET_CODE (sym) == SYMBOL_REF)
 497         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 498     }
 499   else if (GET_CODE (addr) == SYMBOL_REF)
 500     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 501
 502   return tls_kind;
 503 }
 504
 505 /* We'll allow lo_sum's in addresses in our legitimate addresses
 506    so that combine would take care of combining addresses where
 507    necessary, but for generation purposes, we'll generate the address
 508    as :
 509    RTL                               Absolute
 510    tmp = hi (symbol_ref);            adrp  x1, foo
 511    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 512                                      nop
 513
 514    PIC                               TLS
 515    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 516    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 517                                      bl   __tls_get_addr
 518                                      nop
 519
 520    Load TLS symbol, depending on TLS mechanism and TLS access model.
 521
 522    Global Dynamic - Traditional TLS:
 523    adrp tmp, :tlsgd:imm
 524    add  dest, tmp, #:tlsgd_lo12:imm
 525    bl   __tls_get_addr
 526
 527    Global Dynamic - TLS Descriptors:
 528    adrp dest, :tlsdesc:imm
 529    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 530    add  dest, dest, #:tlsdesc_lo12:imm
 531    blr  tmp
 532    mrs  tp, tpidr_el0
 533    add  dest, dest, tp
 534
 535    Initial Exec:
 536    mrs  tp, tpidr_el0
 537    adrp tmp, :gottprel:imm
 538    ldr  dest, [tmp, #:gottprel_lo12:imm]
 539    add  dest, dest, tp
 540
 541    Local Exec:
 542    mrs  tp, tpidr_el0
 543    add  t0, tp, #:tprel_hi12:imm
 544    add  t0, #:tprel_lo12_nc:imm
 545 */
 546
 547 static void
 548 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 549                                    enum aarch64_symbol_type type)
 550 {
 551   switch (type)
 552     {
 553     case SYMBOL_SMALL_ABSOLUTE:
 554       {
 555         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 556         rtx tmp_reg = dest;
 557         enum machine_mode mode = GET_MODE (dest);
 558
 559         gcc_assert (mode == Pmode || mode == ptr_mode);
 560
 561         if (can_create_pseudo_p ())
 562           tmp_reg = gen_reg_rtx (mode);
 563
 564         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 565         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 566         return;
 567       }
 568
 569     case SYMBOL_TINY_ABSOLUTE:
 570       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 571       return;
 572
 573     case SYMBOL_SMALL_GOT:
 574       {
 575         /* In ILP32, the mode of dest can be either SImode or DImode,
 576            while the got entry is always of SImode size.  The mode of
 577            dest depends on how dest is used: if dest is assigned to a
 578            pointer (e.g. in the memory), it has SImode; it may have
 579            DImode if dest is dereferenced to access the memeory.
 580            This is why we have to handle three different ldr_got_small
 581            patterns here (two patterns for ILP32).  */
 582         rtx tmp_reg = dest;
 583         enum machine_mode mode = GET_MODE (dest);
 584
 585         if (can_create_pseudo_p ())
 586           tmp_reg = gen_reg_rtx (mode);
 587
 588         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 589         if (mode == ptr_mode)
 590           {
 591             if (mode == DImode)
 592               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 593             else
 594               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 595           }
 596         else
 597           {
 598             gcc_assert (mode == Pmode);
 599             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 600           }
 601
 602         return;
 603       }
 604
 605     case SYMBOL_SMALL_TLSGD:
 606       {
 607         rtx insns;
 608         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 609
 610         start_sequence ();
 611         emit_call_insn (gen_tlsgd_small (result, imm));
 612         insns = get_insns ();
 613         end_sequence ();
 614
 615         RTL_CONST_CALL_P (insns) = 1;
 616         emit_libcall_block (insns, dest, result, imm);
 617         return;
 618       }
 619
 620     case SYMBOL_SMALL_TLSDESC:
 621       {
 622         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 623         rtx tp;
 624
 625         emit_insn (gen_tlsdesc_small (imm));
 626         tp = aarch64_load_tp (NULL);
 627         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 628         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 629         return;
 630       }
 631
 632     case SYMBOL_SMALL_GOTTPREL:
 633       {
 634         rtx tmp_reg = gen_reg_rtx (Pmode);
 635         rtx tp = aarch64_load_tp (NULL);
 636         emit_insn (gen_tlsie_small (tmp_reg, imm));
 637         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 638         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 639         return;
 640       }
 641
 642     case SYMBOL_SMALL_TPREL:
 643       {
 644         rtx tp = aarch64_load_tp (NULL);
 645         emit_insn (gen_tlsle_small (dest, tp, imm));
 646         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 647         return;
 648       }
 649
 650     case SYMBOL_TINY_GOT:
 651       emit_insn (gen_ldr_got_tiny (dest, imm));
 652       return;
 653
 654     default:
 655       gcc_unreachable ();
 656     }
 657 }
 658
 659 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 660    handle all moves if !can_create_pseudo_p ().  The distinction is
 661    important because, unlike emit_move_insn, the move expanders know
 662    how to force Pmode objects into the constant pool even when the
 663    constant pool address is not itself legitimate.  */
 664 static rtx
 665 aarch64_emit_move (rtx dest, rtx src)
 666 {
 667   return (can_create_pseudo_p ()
 668           ? emit_move_insn (dest, src)
 669           : emit_move_insn_1 (dest, src));
 670 }
 671
 672 void
 673 aarch64_split_128bit_move (rtx dst, rtx src)
 674 {
 675   rtx low_dst;
 676
 677   enum machine_mode src_mode = GET_MODE (src);
 678   enum machine_mode dst_mode = GET_MODE (dst);
 679   int src_regno = REGNO (src);
 680   int dst_regno = REGNO (dst);
 681
 682   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 683
 684   if (REG_P (dst) && REG_P (src))
 685     {
 686       gcc_assert (src_mode == TImode || src_mode == TFmode);
 687
 688       /* Handle r -> w, w -> r.  */
 689       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 690         {
 691           switch (src_mode) {
 692           case TImode:
 693             emit_insn
 694               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 695             emit_insn
 696               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 697             return;
 698           case TFmode:
 699             emit_insn
 700               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 701             emit_insn
 702               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 703             return;
 704           default:
 705             gcc_unreachable ();
 706           }
 707         }
 708       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 709         {
 710           switch (src_mode) {
 711           case TImode:
 712             emit_insn
 713               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 714             emit_insn
 715               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 716             return;
 717           case TFmode:
 718             emit_insn
 719               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 720             emit_insn
 721               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 722             return;
 723           default:
 724             gcc_unreachable ();
 725           }
 726         }
 727       /* Fall through to r -> r cases.  */
 728     }
 729
 730   switch (dst_mode) {
 731   case TImode:
 732     low_dst = gen_lowpart (word_mode, dst);
 733     if (REG_P (low_dst)
 734         && reg_overlap_mentioned_p (low_dst, src))
 735       {
 736         aarch64_emit_move (gen_highpart (word_mode, dst),
 737                            gen_highpart_mode (word_mode, TImode, src));
 738         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 739       }
 740     else
 741       {
 742         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 743         aarch64_emit_move (gen_highpart (word_mode, dst),
 744                            gen_highpart_mode (word_mode, TImode, src));
 745       }
 746     return;
 747   case TFmode:
 748     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 749                     gen_rtx_REG (DFmode, src_regno));
 750     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 751                     gen_rtx_REG (DFmode, src_regno + 1));
 752     return;
 753   default:
 754     gcc_unreachable ();
 755   }
 756 }
 757
 758 bool
 759 aarch64_split_128bit_move_p (rtx dst, rtx src)
 760 {
 761   return (! REG_P (src)
 762           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 763 }
 764
 765 /* Split a complex SIMD combine.  */
 766
 767 void
 768 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 769 {
 770   enum machine_mode src_mode = GET_MODE (src1);
 771   enum machine_mode dst_mode = GET_MODE (dst);
 772
 773   gcc_assert (VECTOR_MODE_P (dst_mode));
 774
 775   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 776     {
 777       rtx (*gen) (rtx, rtx, rtx);
 778
 779       switch (src_mode)
 780         {
 781         case V8QImode:
 782           gen = gen_aarch64_simd_combinev8qi;
 783           break;
 784         case V4HImode:
 785           gen = gen_aarch64_simd_combinev4hi;
 786           break;
 787         case V2SImode:
 788           gen = gen_aarch64_simd_combinev2si;
 789           break;
 790         case V2SFmode:
 791           gen = gen_aarch64_simd_combinev2sf;
 792           break;
 793         case DImode:
 794           gen = gen_aarch64_simd_combinedi;
 795           break;
 796         case DFmode:
 797           gen = gen_aarch64_simd_combinedf;
 798           break;
 799         default:
 800           gcc_unreachable ();
 801         }
 802
 803       emit_insn (gen (dst, src1, src2));
 804       return;
 805     }
 806 }
 807
 808 /* Split a complex SIMD move.  */
 809
 810 void
 811 aarch64_split_simd_move (rtx dst, rtx src)
 812 {
 813   enum machine_mode src_mode = GET_MODE (src);
 814   enum machine_mode dst_mode = GET_MODE (dst);
 815
 816   gcc_assert (VECTOR_MODE_P (dst_mode));
 817
 818   if (REG_P (dst) && REG_P (src))
 819     {
 820       rtx (*gen) (rtx, rtx);
 821
 822       gcc_assert (VECTOR_MODE_P (src_mode));
 823
 824       switch (src_mode)
 825         {
 826         case V16QImode:
 827           gen = gen_aarch64_split_simd_movv16qi;
 828           break;
 829         case V8HImode:
 830           gen = gen_aarch64_split_simd_movv8hi;
 831           break;
 832         case V4SImode:
 833           gen = gen_aarch64_split_simd_movv4si;
 834           break;
 835         case V2DImode:
 836           gen = gen_aarch64_split_simd_movv2di;
 837           break;
 838         case V4SFmode:
 839           gen = gen_aarch64_split_simd_movv4sf;
 840           break;
 841         case V2DFmode:
 842           gen = gen_aarch64_split_simd_movv2df;
 843           break;
 844         default:
 845           gcc_unreachable ();
 846         }
 847
 848       emit_insn (gen (dst, src));
 849       return;
 850     }
 851 }
 852
 853 static rtx
 854 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 855 {
 856   if (can_create_pseudo_p ())
 857     return force_reg (mode, value);
 858   else
 859     {
 860       x = aarch64_emit_move (x, value);
 861       return x;
 862     }
 863 }
 864
 865
 866 static rtx
 867 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 868 {
 869   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 870     {
 871       rtx high;
 872       /* Load the full offset into a register.  This
 873          might be improvable in the future.  */
 874       high = GEN_INT (offset);
 875       offset = 0;
 876       high = aarch64_force_temporary (mode, temp, high);
 877       reg = aarch64_force_temporary (mode, temp,
 878                                      gen_rtx_PLUS (mode, high, reg));
 879     }
 880   return plus_constant (mode, reg, offset);
 881 }
 882
 883 void
 884 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 885 {
 886   enum machine_mode mode = GET_MODE (dest);
 887   unsigned HOST_WIDE_INT mask;
 888   int i;
 889   bool first;
 890   unsigned HOST_WIDE_INT val;
 891   bool subtargets;
 892   rtx subtarget;
 893   int one_match, zero_match;
 894
 895   gcc_assert (mode == SImode || mode == DImode);
 896
 897   /* Check on what type of symbol it is.  */
 898   if (GET_CODE (imm) == SYMBOL_REF
 899       || GET_CODE (imm) == LABEL_REF
 900       || GET_CODE (imm) == CONST)
 901     {
 902       rtx mem, base, offset;
 903       enum aarch64_symbol_type sty;
 904
 905       /* If we have (const (plus symbol offset)), separate out the offset
 906          before we start classifying the symbol.  */
 907       split_const (imm, &base, &offset);
 908
 909       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 910       switch (sty)
 911         {
 912         case SYMBOL_FORCE_TO_MEM:
 913           if (offset != const0_rtx
 914               && targetm.cannot_force_const_mem (mode, imm))
 915             {
 916               gcc_assert(can_create_pseudo_p ());
 917               base = aarch64_force_temporary (mode, dest, base);
 918               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 919               aarch64_emit_move (dest, base);
 920               return;
 921             }
 922           mem = force_const_mem (ptr_mode, imm);
 923           gcc_assert (mem);
 924           if (mode != ptr_mode)
 925             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 926           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 927           return;
 928
 929         case SYMBOL_SMALL_TLSGD:
 930         case SYMBOL_SMALL_TLSDESC:
 931         case SYMBOL_SMALL_GOTTPREL:
 932         case SYMBOL_SMALL_GOT:
 933         case SYMBOL_TINY_GOT:
 934           if (offset != const0_rtx)
 935             {
 936               gcc_assert(can_create_pseudo_p ());
 937               base = aarch64_force_temporary (mode, dest, base);
 938               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 939               aarch64_emit_move (dest, base);
 940               return;
 941             }
 942           /* FALLTHRU */
 943
 944         case SYMBOL_SMALL_TPREL:
 945         case SYMBOL_SMALL_ABSOLUTE:
 946         case SYMBOL_TINY_ABSOLUTE:
 947           aarch64_load_symref_appropriately (dest, imm, sty);
 948           return;
 949
 950         default:
 951           gcc_unreachable ();
 952         }
 953     }
 954
 955   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 956     {
 957       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 958       return;
 959     }
 960
 961   if (!CONST_INT_P (imm))
 962     {
 963       if (GET_CODE (imm) == HIGH)
 964         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 965       else
 966         {
 967           rtx mem = force_const_mem (mode, imm);
 968           gcc_assert (mem);
 969           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 970         }
 971
 972       return;
 973     }
 974
 975   if (mode == SImode)
 976     {
 977       /* We know we can't do this in 1 insn, and we must be able to do it
 978          in two; so don't mess around looking for sequences that don't buy
 979          us anything.  */
 980       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 981       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 982                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 983       return;
 984     }
 985
 986   /* Remaining cases are all for DImode.  */
 987
 988   val = INTVAL (imm);
 989   subtargets = optimize && can_create_pseudo_p ();
 990
 991   one_match = 0;
 992   zero_match = 0;
 993   mask = 0xffff;
 994
 995   for (i = 0; i < 64; i += 16, mask <<= 16)
 996     {
 997       if ((val & mask) == 0)
 998         zero_match++;
 999       else if ((val & mask) == mask)
1000         one_match++;
1001     }
1002
1003   if (one_match == 2)
1004     {
1005       mask = 0xffff;
1006       for (i = 0; i < 64; i += 16, mask <<= 16)
1007         {
1008           if ((val & mask) != mask)
1009             {
1010               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1011               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1012                                          GEN_INT ((val >> i) & 0xffff)));
1013               return;
1014             }
1015         }
1016       gcc_unreachable ();
1017     }
1018
1019   if (zero_match == 2)
1020     goto simple_sequence;
1021
1022   mask = 0x0ffff0000UL;
1023   for (i = 16; i < 64; i += 16, mask <<= 16)
1024     {
1025       HOST_WIDE_INT comp = mask & ~(mask - 1);
1026
1027       if (aarch64_uimm12_shift (val - (val & mask)))
1028         {
1029           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1030
1031           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1032           emit_insn (gen_adddi3 (dest, subtarget,
1033                                  GEN_INT (val - (val & mask))));
1034           return;
1035         }
1036       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1037         {
1038           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1041                                   GEN_INT ((val + comp) & mask)));
1042           emit_insn (gen_adddi3 (dest, subtarget,
1043                                  GEN_INT (val - ((val + comp) & mask))));
1044           return;
1045         }
1046       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1047         {
1048           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049
1050           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1051                                   GEN_INT ((val - comp) | ~mask)));
1052           emit_insn (gen_adddi3 (dest, subtarget,
1053                                  GEN_INT (val - ((val - comp) | ~mask))));
1054           return;
1055         }
1056       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1057         {
1058           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1059
1060           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1061                                   GEN_INT (val | ~mask)));
1062           emit_insn (gen_adddi3 (dest, subtarget,
1063                                  GEN_INT (val - (val | ~mask))));
1064           return;
1065         }
1066     }
1067
1068   /* See if we can do it by arithmetically combining two
1069      immediates.  */
1070   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1071     {
1072       int j;
1073       mask = 0xffff;
1074
1075       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1076           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1077         {
1078           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1079           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1080                                   GEN_INT (aarch64_bitmasks[i])));
1081           emit_insn (gen_adddi3 (dest, subtarget,
1082                                  GEN_INT (val - aarch64_bitmasks[i])));
1083           return;
1084         }
1085
1086       for (j = 0; j < 64; j += 16, mask <<= 16)
1087         {
1088           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1089             {
1090               emit_insn (gen_rtx_SET (VOIDmode, dest,
1091                                       GEN_INT (aarch64_bitmasks[i])));
1092               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1093                                          GEN_INT ((val >> j) & 0xffff)));
1094               return;
1095             }
1096         }
1097     }
1098
1099   /* See if we can do it by logically combining two immediates.  */
1100   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1101     {
1102       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1103         {
1104           int j;
1105
1106           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1107             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1108               {
1109                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1110                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1111                                         GEN_INT (aarch64_bitmasks[i])));
1112                 emit_insn (gen_iordi3 (dest, subtarget,
1113                                        GEN_INT (aarch64_bitmasks[j])));
1114                 return;
1115               }
1116         }
1117       else if ((val & aarch64_bitmasks[i]) == val)
1118         {
1119           int j;
1120
1121           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1122             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1123               {
1124
1125                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1126                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1127                                         GEN_INT (aarch64_bitmasks[j])));
1128                 emit_insn (gen_anddi3 (dest, subtarget,
1129                                        GEN_INT (aarch64_bitmasks[i])));
1130                 return;
1131               }
1132         }
1133     }
1134
1135  simple_sequence:
1136   first = true;
1137   mask = 0xffff;
1138   for (i = 0; i < 64; i += 16, mask <<= 16)
1139     {
1140       if ((val & mask) != 0)
1141         {
1142           if (first)
1143             {
1144               emit_insn (gen_rtx_SET (VOIDmode, dest,
1145                                       GEN_INT (val & mask)));
1146               first = false;
1147             }
1148           else
1149             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1150                                        GEN_INT ((val >> i) & 0xffff)));
1151         }
1152     }
1153 }
1154
1155 static bool
1156 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1157 {
1158   /* Indirect calls are not currently supported.  */
1159   if (decl == NULL)
1160     return false;
1161
1162   /* Cannot tail-call to long-calls, since these are outside of the
1163      range of a branch instruction (we could handle this if we added
1164      support for indirect tail-calls.  */
1165   if (aarch64_decl_is_long_call_p (decl))
1166     return false;
1167
1168   return true;
1169 }
1170
1171 /* Implement TARGET_PASS_BY_REFERENCE.  */
1172
1173 static bool
1174 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1175                            enum machine_mode mode,
1176                            const_tree type,
1177                            bool named ATTRIBUTE_UNUSED)
1178 {
1179   HOST_WIDE_INT size;
1180   enum machine_mode dummymode;
1181   int nregs;
1182
1183   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1184   size = (mode == BLKmode && type)
1185     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1186
1187   if (type)
1188     {
1189       /* Arrays always passed by reference.  */
1190       if (TREE_CODE (type) == ARRAY_TYPE)
1191         return true;
1192       /* Other aggregates based on their size.  */
1193       if (AGGREGATE_TYPE_P (type))
1194         size = int_size_in_bytes (type);
1195     }
1196
1197   /* Variable sized arguments are always returned by reference.  */
1198   if (size < 0)
1199     return true;
1200
1201   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1202   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1203                                                &dummymode, &nregs,
1204                                                NULL))
1205     return false;
1206
1207   /* Arguments which are variable sized or larger than 2 registers are
1208      passed by reference unless they are a homogenous floating point
1209      aggregate.  */
1210   return size > 2 * UNITS_PER_WORD;
1211 }
1212
1213 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1214 static bool
1215 aarch64_return_in_msb (const_tree valtype)
1216 {
1217   enum machine_mode dummy_mode;
1218   int dummy_int;
1219
1220   /* Never happens in little-endian mode.  */
1221   if (!BYTES_BIG_ENDIAN)
1222     return false;
1223
1224   /* Only composite types smaller than or equal to 16 bytes can
1225      be potentially returned in registers.  */
1226   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1227       || int_size_in_bytes (valtype) <= 0
1228       || int_size_in_bytes (valtype) > 16)
1229     return false;
1230
1231   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1232      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1233      is always passed/returned in the least significant bits of fp/simd
1234      register(s).  */
1235   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1236                                                &dummy_mode, &dummy_int, NULL))
1237     return false;
1238
1239   return true;
1240 }
1241
1242 /* Implement TARGET_FUNCTION_VALUE.
1243    Define how to find the value returned by a function.  */
1244
1245 static rtx
1246 aarch64_function_value (const_tree type, const_tree func,
1247                         bool outgoing ATTRIBUTE_UNUSED)
1248 {
1249   enum machine_mode mode;
1250   int unsignedp;
1251   int count;
1252   enum machine_mode ag_mode;
1253
1254   mode = TYPE_MODE (type);
1255   if (INTEGRAL_TYPE_P (type))
1256     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1257
1258   if (aarch64_return_in_msb (type))
1259     {
1260       HOST_WIDE_INT size = int_size_in_bytes (type);
1261
1262       if (size % UNITS_PER_WORD != 0)
1263         {
1264           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1265           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1266         }
1267     }
1268
1269   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1270                                                &ag_mode, &count, NULL))
1271     {
1272       if (!aarch64_composite_type_p (type, mode))
1273         {
1274           gcc_assert (count == 1 && mode == ag_mode);
1275           return gen_rtx_REG (mode, V0_REGNUM);
1276         }
1277       else
1278         {
1279           int i;
1280           rtx par;
1281
1282           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1283           for (i = 0; i < count; i++)
1284             {
1285               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1286               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1287                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1288               XVECEXP (par, 0, i) = tmp;
1289             }
1290           return par;
1291         }
1292     }
1293   else
1294     return gen_rtx_REG (mode, R0_REGNUM);
1295 }
1296
1297 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1298    Return true if REGNO is the number of a hard register in which the values
1299    of called function may come back.  */
1300
1301 static bool
1302 aarch64_function_value_regno_p (const unsigned int regno)
1303 {
1304   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1305      of 16-byte return values are: 128-bit integers and 16-byte small
1306      structures (excluding homogeneous floating-point aggregates).  */
1307   if (regno == R0_REGNUM || regno == R1_REGNUM)
1308     return true;
1309
1310   /* Up to four fp/simd registers can return a function value, e.g. a
1311      homogeneous floating-point aggregate having four members.  */
1312   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1313     return !TARGET_GENERAL_REGS_ONLY;
1314
1315   return false;
1316 }
1317
1318 /* Implement TARGET_RETURN_IN_MEMORY.
1319
1320    If the type T of the result of a function is such that
1321      void func (T arg)
1322    would require that arg be passed as a value in a register (or set of
1323    registers) according to the parameter passing rules, then the result
1324    is returned in the same registers as would be used for such an
1325    argument.  */
1326
1327 static bool
1328 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1329 {
1330   HOST_WIDE_INT size;
1331   enum machine_mode ag_mode;
1332   int count;
1333
1334   if (!AGGREGATE_TYPE_P (type)
1335       && TREE_CODE (type) != COMPLEX_TYPE
1336       && TREE_CODE (type) != VECTOR_TYPE)
1337     /* Simple scalar types always returned in registers.  */
1338     return false;
1339
1340   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1341                                                type,
1342                                                &ag_mode,
1343                                                &count,
1344                                                NULL))
1345     return false;
1346
1347   /* Types larger than 2 registers returned in memory.  */
1348   size = int_size_in_bytes (type);
1349   return (size < 0 || size > 2 * UNITS_PER_WORD);
1350 }
1351
1352 static bool
1353 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1354                                const_tree type, int *nregs)
1355 {
1356   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1357   return aarch64_vfp_is_call_or_return_candidate (mode,
1358                                                   type,
1359                                                   &pcum->aapcs_vfp_rmode,
1360                                                   nregs,
1361                                                   NULL);
1362 }
1363
1364 /* Given MODE and TYPE of a function argument, return the alignment in
1365    bits.  The idea is to suppress any stronger alignment requested by
1366    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1367    This is a helper function for local use only.  */
1368
1369 static unsigned int
1370 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1371 {
1372   unsigned int alignment;
1373
1374   if (type)
1375     {
1376       if (!integer_zerop (TYPE_SIZE (type)))
1377         {
1378           if (TYPE_MODE (type) == mode)
1379             alignment = TYPE_ALIGN (type);
1380           else
1381             alignment = GET_MODE_ALIGNMENT (mode);
1382         }
1383       else
1384         alignment = 0;
1385     }
1386   else
1387     alignment = GET_MODE_ALIGNMENT (mode);
1388
1389   return alignment;
1390 }
1391
1392 /* Layout a function argument according to the AAPCS64 rules.  The rule
1393    numbers refer to the rule numbers in the AAPCS64.  */
1394
1395 static void
1396 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1397                     const_tree type,
1398                     bool named ATTRIBUTE_UNUSED)
1399 {
1400   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1401   int ncrn, nvrn, nregs;
1402   bool allocate_ncrn, allocate_nvrn;
1403
1404   /* We need to do this once per argument.  */
1405   if (pcum->aapcs_arg_processed)
1406     return;
1407
1408   pcum->aapcs_arg_processed = true;
1409
1410   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1411   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1412                                                  mode,
1413                                                  type,
1414                                                  &nregs);
1415
1416   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1417      The following code thus handles passing by SIMD/FP registers first.  */
1418
1419   nvrn = pcum->aapcs_nvrn;
1420
1421   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1422      and homogenous short-vector aggregates (HVA).  */
1423   if (allocate_nvrn)
1424     {
1425       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1426         {
1427           pcum->aapcs_nextnvrn = nvrn + nregs;
1428           if (!aarch64_composite_type_p (type, mode))
1429             {
1430               gcc_assert (nregs == 1);
1431               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1432             }
1433           else
1434             {
1435               rtx par;
1436               int i;
1437               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1438               for (i = 0; i < nregs; i++)
1439                 {
1440                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1441                                          V0_REGNUM + nvrn + i);
1442                   tmp = gen_rtx_EXPR_LIST
1443                     (VOIDmode, tmp,
1444                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1445                   XVECEXP (par, 0, i) = tmp;
1446                 }
1447               pcum->aapcs_reg = par;
1448             }
1449           return;
1450         }
1451       else
1452         {
1453           /* C.3 NSRN is set to 8.  */
1454           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1455           goto on_stack;
1456         }
1457     }
1458
1459   ncrn = pcum->aapcs_ncrn;
1460   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1461            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1462
1463
1464   /* C6 - C9.  though the sign and zero extension semantics are
1465      handled elsewhere.  This is the case where the argument fits
1466      entirely general registers.  */
1467   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1468     {
1469       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1470
1471       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1472
1473       /* C.8 if the argument has an alignment of 16 then the NGRN is
1474          rounded up to the next even number.  */
1475       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1476         {
1477           ++ncrn;
1478           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1479         }
1480       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1481          A reg is still generated for it, but the caller should be smart
1482          enough not to use it.  */
1483       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1484         {
1485           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1486         }
1487       else
1488         {
1489           rtx par;
1490           int i;
1491
1492           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1493           for (i = 0; i < nregs; i++)
1494             {
1495               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1496               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1497                                        GEN_INT (i * UNITS_PER_WORD));
1498               XVECEXP (par, 0, i) = tmp;
1499             }
1500           pcum->aapcs_reg = par;
1501         }
1502
1503       pcum->aapcs_nextncrn = ncrn + nregs;
1504       return;
1505     }
1506
1507   /* C.11  */
1508   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1509
1510   /* The argument is passed on stack; record the needed number of words for
1511      this argument (we can re-use NREGS) and align the total size if
1512      necessary.  */
1513 on_stack:
1514   pcum->aapcs_stack_words = nregs;
1515   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1516     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1517                                                16 / UNITS_PER_WORD) + 1;
1518   return;
1519 }
1520
1521 /* Implement TARGET_FUNCTION_ARG.  */
1522
1523 static rtx
1524 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1525                       const_tree type, bool named)
1526 {
1527   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1528   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1529
1530   if (mode == VOIDmode)
1531     return NULL_RTX;
1532
1533   aarch64_layout_arg (pcum_v, mode, type, named);
1534   return pcum->aapcs_reg;
1535 }
1536
1537 void
1538 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1539                            const_tree fntype ATTRIBUTE_UNUSED,
1540                            rtx libname ATTRIBUTE_UNUSED,
1541                            const_tree fndecl ATTRIBUTE_UNUSED,
1542                            unsigned n_named ATTRIBUTE_UNUSED)
1543 {
1544   pcum->aapcs_ncrn = 0;
1545   pcum->aapcs_nvrn = 0;
1546   pcum->aapcs_nextncrn = 0;
1547   pcum->aapcs_nextnvrn = 0;
1548   pcum->pcs_variant = ARM_PCS_AAPCS64;
1549   pcum->aapcs_reg = NULL_RTX;
1550   pcum->aapcs_arg_processed = false;
1551   pcum->aapcs_stack_words = 0;
1552   pcum->aapcs_stack_size = 0;
1553
1554   return;
1555 }
1556
1557 static void
1558 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1559                               enum machine_mode mode,
1560                               const_tree type,
1561                               bool named)
1562 {
1563   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1564   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1565     {
1566       aarch64_layout_arg (pcum_v, mode, type, named);
1567       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1568                   != (pcum->aapcs_stack_words != 0));
1569       pcum->aapcs_arg_processed = false;
1570       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1571       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1572       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1573       pcum->aapcs_stack_words = 0;
1574       pcum->aapcs_reg = NULL_RTX;
1575     }
1576 }
1577
1578 bool
1579 aarch64_function_arg_regno_p (unsigned regno)
1580 {
1581   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1582           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1583 }
1584
1585 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1586    PARM_BOUNDARY bits of alignment, but will be given anything up
1587    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1588    that both before and after the layout of each argument, the Next
1589    Stacked Argument Address (NSAA) will have a minimum alignment of
1590    8 bytes.  */
1591
1592 static unsigned int
1593 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1594 {
1595   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1596
1597   if (alignment < PARM_BOUNDARY)
1598     alignment = PARM_BOUNDARY;
1599   if (alignment > STACK_BOUNDARY)
1600     alignment = STACK_BOUNDARY;
1601   return alignment;
1602 }
1603
1604 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1605
1606    Return true if an argument passed on the stack should be padded upwards,
1607    i.e. if the least-significant byte of the stack slot has useful data.
1608
1609    Small aggregate types are placed in the lowest memory address.
1610
1611    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1612
1613 bool
1614 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1615 {
1616   /* On little-endian targets, the least significant byte of every stack
1617      argument is passed at the lowest byte address of the stack slot.  */
1618   if (!BYTES_BIG_ENDIAN)
1619     return true;
1620
1621   /* Otherwise, integral, floating-point and pointer types are padded downward:
1622      the least significant byte of a stack argument is passed at the highest
1623      byte address of the stack slot.  */
1624   if (type
1625       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1626          || POINTER_TYPE_P (type))
1627       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1628     return false;
1629
1630   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1631   return true;
1632 }
1633
1634 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1635
1636    It specifies padding for the last (may also be the only)
1637    element of a block move between registers and memory.  If
1638    assuming the block is in the memory, padding upward means that
1639    the last element is padded after its highest significant byte,
1640    while in downward padding, the last element is padded at the
1641    its least significant byte side.
1642
1643    Small aggregates and small complex types are always padded
1644    upwards.
1645
1646    We don't need to worry about homogeneous floating-point or
1647    short-vector aggregates; their move is not affected by the
1648    padding direction determined here.  Regardless of endianness,
1649    each element of such an aggregate is put in the least
1650    significant bits of a fp/simd register.
1651
1652    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1653    register has useful data, and return the opposite if the most
1654    significant byte does.  */
1655
1656 bool
1657 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1658                      bool first ATTRIBUTE_UNUSED)
1659 {
1660
1661   /* Small composite types are always padded upward.  */
1662   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1663     {
1664       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1665                             : GET_MODE_SIZE (mode));
1666       if (size < 2 * UNITS_PER_WORD)
1667         return true;
1668     }
1669
1670   /* Otherwise, use the default padding.  */
1671   return !BYTES_BIG_ENDIAN;
1672 }
1673
1674 static enum machine_mode
1675 aarch64_libgcc_cmp_return_mode (void)
1676 {
1677   return SImode;
1678 }
1679
1680 static bool
1681 aarch64_frame_pointer_required (void)
1682 {
1683   /* If the function contains dynamic stack allocations, we need to
1684      use the frame pointer to access the static parts of the frame.  */
1685   if (cfun->calls_alloca)
1686     return true;
1687
1688   /* We may have turned flag_omit_frame_pointer on in order to have this
1689      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1690      and we'll check it here.
1691      If we really did set flag_omit_frame_pointer normally, then we return false
1692      (no frame pointer required) in all cases.  */
1693
1694   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1695     return false;
1696   else if (flag_omit_leaf_frame_pointer)
1697     return !crtl->is_leaf;
1698   return true;
1699 }
1700
1701 /* Mark the registers that need to be saved by the callee and calculate
1702    the size of the callee-saved registers area and frame record (both FP
1703    and LR may be omitted).  */
1704 static void
1705 aarch64_layout_frame (void)
1706 {
1707   HOST_WIDE_INT offset = 0;
1708   int regno;
1709
1710   if (reload_completed && cfun->machine->frame.laid_out)
1711     return;
1712
1713   cfun->machine->frame.fp_lr_offset = 0;
1714
1715   /* First mark all the registers that really need to be saved...  */
1716   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1717     cfun->machine->frame.reg_offset[regno] = -1;
1718
1719   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1720     cfun->machine->frame.reg_offset[regno] = -1;
1721
1722   /* ... that includes the eh data registers (if needed)...  */
1723   if (crtl->calls_eh_return)
1724     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1725       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1726
1727   /* ... and any callee saved register that dataflow says is live.  */
1728   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1729     if (df_regs_ever_live_p (regno)
1730         && !call_used_regs[regno])
1731       cfun->machine->frame.reg_offset[regno] = 0;
1732
1733   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1734     if (df_regs_ever_live_p (regno)
1735         && !call_used_regs[regno])
1736       cfun->machine->frame.reg_offset[regno] = 0;
1737
1738   if (frame_pointer_needed)
1739     {
1740       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1741       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1742       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1743     }
1744
1745   /* Now assign stack slots for them.  */
1746   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1747     if (cfun->machine->frame.reg_offset[regno] != -1)
1748       {
1749         cfun->machine->frame.reg_offset[regno] = offset;
1750         offset += UNITS_PER_WORD;
1751       }
1752
1753   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1754     if (cfun->machine->frame.reg_offset[regno] != -1)
1755       {
1756         cfun->machine->frame.reg_offset[regno] = offset;
1757         offset += UNITS_PER_WORD;
1758       }
1759
1760   if (frame_pointer_needed)
1761     {
1762       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1763       offset += UNITS_PER_WORD;
1764       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1765     }
1766
1767   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1768     {
1769       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1770       offset += UNITS_PER_WORD;
1771       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1772     }
1773
1774   cfun->machine->frame.padding0 =
1775     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1776   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1777
1778   cfun->machine->frame.saved_regs_size = offset;
1779   cfun->machine->frame.laid_out = true;
1780 }
1781
1782 /* Make the last instruction frame-related and note that it performs
1783    the operation described by FRAME_PATTERN.  */
1784
1785 static void
1786 aarch64_set_frame_expr (rtx frame_pattern)
1787 {
1788   rtx insn;
1789
1790   insn = get_last_insn ();
1791   RTX_FRAME_RELATED_P (insn) = 1;
1792   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1793   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1794                                       frame_pattern,
1795                                       REG_NOTES (insn));
1796 }
1797
1798 static bool
1799 aarch64_register_saved_on_entry (int regno)
1800 {
1801   return cfun->machine->frame.reg_offset[regno] != -1;
1802 }
1803
1804
1805 static void
1806 aarch64_save_or_restore_fprs (int start_offset, int increment,
1807                               bool restore, rtx base_rtx)
1808
1809 {
1810   unsigned regno;
1811   unsigned regno2;
1812   rtx insn;
1813   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1814     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1815
1816
1817   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1818     {
1819       if (aarch64_register_saved_on_entry (regno))
1820         {
1821           rtx mem;
1822           mem = gen_mem_ref (DFmode,
1823                              plus_constant (Pmode,
1824                                             base_rtx,
1825                                             start_offset));
1826
1827           for (regno2 = regno + 1;
1828                regno2 <= V31_REGNUM
1829                  && !aarch64_register_saved_on_entry (regno2);
1830                regno2++)
1831             {
1832               /* Empty loop.  */
1833             }
1834           if (regno2 <= V31_REGNUM &&
1835               aarch64_register_saved_on_entry (regno2))
1836             {
1837               rtx mem2;
1838               /* Next highest register to be saved.  */
1839               mem2 = gen_mem_ref (DFmode,
1840                                   plus_constant
1841                                   (Pmode,
1842                                    base_rtx,
1843                                    start_offset + increment));
1844               if (restore == false)
1845                 {
1846                   insn = emit_insn
1847                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1848                                         mem2, gen_rtx_REG (DFmode, regno2)));
1849
1850                 }
1851               else
1852                 {
1853                   insn = emit_insn
1854                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1855                                        gen_rtx_REG (DFmode, regno2), mem2));
1856
1857                   add_reg_note (insn, REG_CFA_RESTORE,
1858                                 gen_rtx_REG (DFmode, regno));
1859                   add_reg_note (insn, REG_CFA_RESTORE,
1860                                 gen_rtx_REG (DFmode, regno2));
1861                 }
1862
1863                   /* The first part of a frame-related parallel insn
1864                      is always assumed to be relevant to the frame
1865                      calculations; subsequent parts, are only
1866                      frame-related if explicitly marked.  */
1867               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1868               regno = regno2;
1869               start_offset += increment * 2;
1870             }
1871           else
1872             {
1873               if (restore == false)
1874                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1875               else
1876                 {
1877                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1878                   add_reg_note (insn, REG_CFA_RESTORE,
1879                                 gen_rtx_REG (DImode, regno));
1880                 }
1881               start_offset += increment;
1882             }
1883           RTX_FRAME_RELATED_P (insn) = 1;
1884         }
1885     }
1886
1887 }
1888
1889
1890 /* offset from the stack pointer of where the saves and
1891    restore's have to happen.  */
1892 static void
1893 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1894                                             bool restore)
1895 {
1896   rtx insn;
1897   rtx base_rtx = stack_pointer_rtx;
1898   HOST_WIDE_INT start_offset = offset;
1899   HOST_WIDE_INT increment = UNITS_PER_WORD;
1900   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1901   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1902   unsigned regno;
1903   unsigned regno2;
1904
1905   for (regno = R0_REGNUM; regno <= limit; regno++)
1906     {
1907       if (aarch64_register_saved_on_entry (regno))
1908         {
1909           rtx mem;
1910           mem = gen_mem_ref (Pmode,
1911                              plus_constant (Pmode,
1912                                             base_rtx,
1913                                             start_offset));
1914
1915           for (regno2 = regno + 1;
1916                regno2 <= limit
1917                  && !aarch64_register_saved_on_entry (regno2);
1918                regno2++)
1919             {
1920               /* Empty loop.  */
1921             }
1922           if (regno2 <= limit &&
1923               aarch64_register_saved_on_entry (regno2))
1924             {
1925               rtx mem2;
1926               /* Next highest register to be saved.  */
1927               mem2 = gen_mem_ref (Pmode,
1928                                   plus_constant
1929                                   (Pmode,
1930                                    base_rtx,
1931                                    start_offset + increment));
1932               if (restore == false)
1933                 {
1934                   insn = emit_insn
1935                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1936                                         mem2, gen_rtx_REG (DImode, regno2)));
1937
1938                 }
1939               else
1940                 {
1941                   insn = emit_insn
1942                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1943                                      gen_rtx_REG (DImode, regno2), mem2));
1944
1945                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1946                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1947                 }
1948
1949                   /* The first part of a frame-related parallel insn
1950                      is always assumed to be relevant to the frame
1951                      calculations; subsequent parts, are only
1952                      frame-related if explicitly marked.  */
1953               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1954                                             1)) = 1;
1955               regno = regno2;
1956               start_offset += increment * 2;
1957             }
1958           else
1959             {
1960               if (restore == false)
1961                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1962               else
1963                 {
1964                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1965                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1966                 }
1967               start_offset += increment;
1968             }
1969           RTX_FRAME_RELATED_P (insn) = 1;
1970         }
1971     }
1972
1973   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1974
1975 }
1976
1977 /* AArch64 stack frames generated by this compiler look like:
1978
1979         +-------------------------------+
1980         |                               |
1981         |  incoming stack arguments     |
1982         |                               |
1983         +-------------------------------+ <-- arg_pointer_rtx
1984         |                               |
1985         |  callee-allocated save area   |
1986         |  for register varargs         |
1987         |                               |
1988         +-------------------------------+
1989         |                               |
1990         |  local variables              |
1991         |                               |
1992         +-------------------------------+ <-- frame_pointer_rtx
1993         |                               |
1994         |  callee-saved registers       |
1995         |                               |
1996         +-------------------------------+
1997         |  LR'                          |
1998         +-------------------------------+
1999         |  FP'                          |
2000       P +-------------------------------+ <-- hard_frame_pointer_rtx
2001         |  dynamic allocation           |
2002         +-------------------------------+
2003         |                               |
2004         |  outgoing stack arguments     |
2005         |                               |
2006         +-------------------------------+ <-- stack_pointer_rtx
2007
2008    Dynamic stack allocations such as alloca insert data at point P.
2009    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2010    hard_frame_pointer_rtx unchanged.  */
2011
2012 /* Generate the prologue instructions for entry into a function.
2013    Establish the stack frame by decreasing the stack pointer with a
2014    properly calculated size and, if necessary, create a frame record
2015    filled with the values of LR and previous frame pointer.  The
2016    current FP is also set up if it is in use.  */
2017
2018 void
2019 aarch64_expand_prologue (void)
2020 {
2021   /* sub sp, sp, #<frame_size>
2022      stp {fp, lr}, [sp, #<frame_size> - 16]
2023      add fp, sp, #<frame_size> - hardfp_offset
2024      stp {cs_reg}, [fp, #-16] etc.
2025
2026      sub sp, sp, <final_adjustment_if_any>
2027   */
2028   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2029   HOST_WIDE_INT frame_size, offset;
2030   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2031   rtx insn;
2032
2033   aarch64_layout_frame ();
2034   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2035   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2036               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2037   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2038                 + crtl->outgoing_args_size);
2039   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2040                                           STACK_BOUNDARY / BITS_PER_UNIT);
2041
2042   if (flag_stack_usage_info)
2043     current_function_static_stack_size = frame_size;
2044
2045   fp_offset = (offset
2046                - original_frame_size
2047                - cfun->machine->frame.saved_regs_size);
2048
2049   /* Store pairs and load pairs have a range only -512 to 504.  */
2050   if (offset >= 512)
2051     {
2052       /* When the frame has a large size, an initial decrease is done on
2053          the stack pointer to jump over the callee-allocated save area for
2054          register varargs, the local variable area and/or the callee-saved
2055          register area.  This will allow the pre-index write-back
2056          store pair instructions to be used for setting up the stack frame
2057          efficiently.  */
2058       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2059       if (offset >= 512)
2060         offset = cfun->machine->frame.saved_regs_size;
2061
2062       frame_size -= (offset + crtl->outgoing_args_size);
2063       fp_offset = 0;
2064
2065       if (frame_size >= 0x1000000)
2066         {
2067           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2068           emit_move_insn (op0, GEN_INT (-frame_size));
2069           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2070           aarch64_set_frame_expr (gen_rtx_SET
2071                                   (Pmode, stack_pointer_rtx,
2072                                    plus_constant (Pmode,
2073                                                   stack_pointer_rtx,
2074                                                   -frame_size)));
2075         }
2076       else if (frame_size > 0)
2077         {
2078           if ((frame_size & 0xfff) != frame_size)
2079             {
2080               insn = emit_insn (gen_add2_insn
2081                                 (stack_pointer_rtx,
2082                                  GEN_INT (-(frame_size
2083                                             & ~(HOST_WIDE_INT)0xfff))));
2084               RTX_FRAME_RELATED_P (insn) = 1;
2085             }
2086           if ((frame_size & 0xfff) != 0)
2087             {
2088               insn = emit_insn (gen_add2_insn
2089                                 (stack_pointer_rtx,
2090                                  GEN_INT (-(frame_size
2091                                             & (HOST_WIDE_INT)0xfff))));
2092               RTX_FRAME_RELATED_P (insn) = 1;
2093             }
2094         }
2095     }
2096   else
2097     frame_size = -1;
2098
2099   if (offset > 0)
2100     {
2101       /* Save the frame pointer and lr if the frame pointer is needed
2102          first.  Make the frame pointer point to the location of the
2103          old frame pointer on the stack.  */
2104       if (frame_pointer_needed)
2105         {
2106           rtx mem_fp, mem_lr;
2107
2108           if (fp_offset)
2109             {
2110               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2111                                                GEN_INT (-offset)));
2112               RTX_FRAME_RELATED_P (insn) = 1;
2113               aarch64_set_frame_expr (gen_rtx_SET
2114                                       (Pmode, stack_pointer_rtx,
2115                                        gen_rtx_MINUS (Pmode,
2116                                                       stack_pointer_rtx,
2117                                                       GEN_INT (offset))));
2118               mem_fp = gen_frame_mem (DImode,
2119                                       plus_constant (Pmode,
2120                                                      stack_pointer_rtx,
2121                                                      fp_offset));
2122               mem_lr = gen_frame_mem (DImode,
2123                                       plus_constant (Pmode,
2124                                                      stack_pointer_rtx,
2125                                                      fp_offset
2126                                                      + UNITS_PER_WORD));
2127               insn = emit_insn (gen_store_pairdi (mem_fp,
2128                                                   hard_frame_pointer_rtx,
2129                                                   mem_lr,
2130                                                   gen_rtx_REG (DImode,
2131                                                                LR_REGNUM)));
2132             }
2133           else
2134             {
2135               insn = emit_insn (gen_storewb_pairdi_di
2136                                 (stack_pointer_rtx, stack_pointer_rtx,
2137                                  hard_frame_pointer_rtx,
2138                                  gen_rtx_REG (DImode, LR_REGNUM),
2139                                  GEN_INT (-offset),
2140                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2141               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2142             }
2143
2144           /* The first part of a frame-related parallel insn is always
2145              assumed to be relevant to the frame calculations;
2146              subsequent parts, are only frame-related if explicitly
2147              marked.  */
2148           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2149           RTX_FRAME_RELATED_P (insn) = 1;
2150
2151           /* Set up frame pointer to point to the location of the
2152              previous frame pointer on the stack.  */
2153           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2154                                            stack_pointer_rtx,
2155                                            GEN_INT (fp_offset)));
2156           aarch64_set_frame_expr (gen_rtx_SET
2157                                   (Pmode, hard_frame_pointer_rtx,
2158                                    plus_constant (Pmode,
2159                                                   stack_pointer_rtx,
2160                                                   fp_offset)));
2161           RTX_FRAME_RELATED_P (insn) = 1;
2162           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2163                                            hard_frame_pointer_rtx));
2164         }
2165       else
2166         {
2167           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2168                                            GEN_INT (-offset)));
2169           RTX_FRAME_RELATED_P (insn) = 1;
2170         }
2171
2172       aarch64_save_or_restore_callee_save_registers
2173         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2174     }
2175
2176   /* when offset >= 512,
2177      sub sp, sp, #<outgoing_args_size> */
2178   if (frame_size > -1)
2179     {
2180       if (crtl->outgoing_args_size > 0)
2181         {
2182           insn = emit_insn (gen_add2_insn
2183                             (stack_pointer_rtx,
2184                              GEN_INT (- crtl->outgoing_args_size)));
2185           RTX_FRAME_RELATED_P (insn) = 1;
2186         }
2187     }
2188 }
2189
2190 /* Generate the epilogue instructions for returning from a function.  */
2191 void
2192 aarch64_expand_epilogue (bool for_sibcall)
2193 {
2194   HOST_WIDE_INT original_frame_size, frame_size, offset;
2195   HOST_WIDE_INT fp_offset;
2196   rtx insn;
2197   rtx cfa_reg;
2198
2199   aarch64_layout_frame ();
2200   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2201   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2202                 + crtl->outgoing_args_size);
2203   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2204                                           STACK_BOUNDARY / BITS_PER_UNIT);
2205
2206   fp_offset = (offset
2207                - original_frame_size
2208                - cfun->machine->frame.saved_regs_size);
2209
2210   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2211
2212   /* Store pairs and load pairs have a range only -512 to 504.  */
2213   if (offset >= 512)
2214     {
2215       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2216       if (offset >= 512)
2217         offset = cfun->machine->frame.saved_regs_size;
2218
2219       frame_size -= (offset + crtl->outgoing_args_size);
2220       fp_offset = 0;
2221       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2222         {
2223           insn = emit_insn (gen_add2_insn
2224                             (stack_pointer_rtx,
2225                              GEN_INT (crtl->outgoing_args_size)));
2226           RTX_FRAME_RELATED_P (insn) = 1;
2227         }
2228     }
2229   else
2230     frame_size = -1;
2231
2232   /* If there were outgoing arguments or we've done dynamic stack
2233      allocation, then restore the stack pointer from the frame
2234      pointer.  This is at most one insn and more efficient than using
2235      GCC's internal mechanism.  */
2236   if (frame_pointer_needed
2237       && (crtl->outgoing_args_size || cfun->calls_alloca))
2238     {
2239       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2240                                        hard_frame_pointer_rtx,
2241                                        GEN_INT (- fp_offset)));
2242       RTX_FRAME_RELATED_P (insn) = 1;
2243       /* As SP is set to (FP - fp_offset), according to the rules in
2244          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2245          from the value of SP from now on.  */
2246       cfa_reg = stack_pointer_rtx;
2247     }
2248
2249   aarch64_save_or_restore_callee_save_registers
2250     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2251
2252   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2253   if (offset > 0)
2254     {
2255       if (frame_pointer_needed)
2256         {
2257           rtx mem_fp, mem_lr;
2258
2259           if (fp_offset)
2260             {
2261               mem_fp = gen_frame_mem (DImode,
2262                                       plus_constant (Pmode,
2263                                                      stack_pointer_rtx,
2264                                                      fp_offset));
2265               mem_lr = gen_frame_mem (DImode,
2266                                       plus_constant (Pmode,
2267                                                      stack_pointer_rtx,
2268                                                      fp_offset
2269                                                      + UNITS_PER_WORD));
2270               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2271                                                  mem_fp,
2272                                                  gen_rtx_REG (DImode,
2273                                                               LR_REGNUM),
2274                                                  mem_lr));
2275             }
2276           else
2277             {
2278               insn = emit_insn (gen_loadwb_pairdi_di
2279                                 (stack_pointer_rtx,
2280                                  stack_pointer_rtx,
2281                                  hard_frame_pointer_rtx,
2282                                  gen_rtx_REG (DImode, LR_REGNUM),
2283                                  GEN_INT (offset),
2284                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2285               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2286               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2287                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2288                                           plus_constant (Pmode, cfa_reg,
2289                                                          offset))));
2290             }
2291
2292           /* The first part of a frame-related parallel insn
2293              is always assumed to be relevant to the frame
2294              calculations; subsequent parts, are only
2295              frame-related if explicitly marked.  */
2296           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2297           RTX_FRAME_RELATED_P (insn) = 1;
2298           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2299           add_reg_note (insn, REG_CFA_RESTORE,
2300                         gen_rtx_REG (DImode, LR_REGNUM));
2301
2302           if (fp_offset)
2303             {
2304               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2305                                                GEN_INT (offset)));
2306               RTX_FRAME_RELATED_P (insn) = 1;
2307             }
2308         }
2309       else
2310         {
2311           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2312                                            GEN_INT (offset)));
2313           RTX_FRAME_RELATED_P (insn) = 1;
2314         }
2315     }
2316
2317   /* Stack adjustment for exception handler.  */
2318   if (crtl->calls_eh_return)
2319     {
2320       /* We need to unwind the stack by the offset computed by
2321          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2322          based on SP.  Ideally we would update the SP and define the
2323          CFA along the lines of:
2324
2325          SP = SP + EH_RETURN_STACKADJ_RTX
2326          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2327
2328          However the dwarf emitter only understands a constant
2329          register offset.
2330
2331          The solution chosen here is to use the otherwise unused IP0
2332          as a temporary register to hold the current SP value.  The
2333          CFA is described using IP0 then SP is modified.  */
2334
2335       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2336
2337       insn = emit_move_insn (ip0, stack_pointer_rtx);
2338       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2339       RTX_FRAME_RELATED_P (insn) = 1;
2340
2341       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2342
2343       /* Ensure the assignment to IP0 does not get optimized away.  */
2344       emit_use (ip0);
2345     }
2346
2347   if (frame_size > -1)
2348     {
2349       if (frame_size >= 0x1000000)
2350         {
2351           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2352           emit_move_insn (op0, GEN_INT (frame_size));
2353           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2354           aarch64_set_frame_expr (gen_rtx_SET
2355                                   (Pmode, stack_pointer_rtx,
2356                                    plus_constant (Pmode,
2357                                                   stack_pointer_rtx,
2358                                                   frame_size)));
2359         }
2360       else if (frame_size > 0)
2361         {
2362           if ((frame_size & 0xfff) != 0)
2363             {
2364               insn = emit_insn (gen_add2_insn
2365                                 (stack_pointer_rtx,
2366                                  GEN_INT ((frame_size
2367                                            & (HOST_WIDE_INT) 0xfff))));
2368               RTX_FRAME_RELATED_P (insn) = 1;
2369             }
2370           if ((frame_size & 0xfff) != frame_size)
2371             {
2372               insn = emit_insn (gen_add2_insn
2373                                 (stack_pointer_rtx,
2374                                  GEN_INT ((frame_size
2375                                            & ~ (HOST_WIDE_INT) 0xfff))));
2376               RTX_FRAME_RELATED_P (insn) = 1;
2377             }
2378         }
2379
2380         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2381                                              plus_constant (Pmode,
2382                                                             stack_pointer_rtx,
2383                                                             offset)));
2384     }
2385
2386   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2387   if (!for_sibcall)
2388     emit_jump_insn (ret_rtx);
2389 }
2390
2391 /* Return the place to copy the exception unwinding return address to.
2392    This will probably be a stack slot, but could (in theory be the
2393    return register).  */
2394 rtx
2395 aarch64_final_eh_return_addr (void)
2396 {
2397   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2398   aarch64_layout_frame ();
2399   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2400   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2401                 + crtl->outgoing_args_size);
2402   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2403                                           STACK_BOUNDARY / BITS_PER_UNIT);
2404   fp_offset = offset
2405     - original_frame_size
2406     - cfun->machine->frame.saved_regs_size;
2407
2408   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2409     return gen_rtx_REG (DImode, LR_REGNUM);
2410
2411   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2412      result in a store to save LR introduced by builtin_eh_return () being
2413      incorrectly deleted because the alias is not detected.
2414      So in the calculation of the address to copy the exception unwinding
2415      return address to, we note 2 cases.
2416      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2417      we return a SP-relative location since all the addresses are SP-relative
2418      in this case.  This prevents the store from being optimized away.
2419      If the fp_offset is not 0, then the addresses will be FP-relative and
2420      therefore we return a FP-relative location.  */
2421
2422   if (frame_pointer_needed)
2423     {
2424       if (fp_offset)
2425         return gen_frame_mem (DImode,
2426                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2427       else
2428         return gen_frame_mem (DImode,
2429                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2430     }
2431
2432   /* If FP is not needed, we calculate the location of LR, which would be
2433      at the top of the saved registers block.  */
2434
2435   return gen_frame_mem (DImode,
2436                         plus_constant (Pmode,
2437                                        stack_pointer_rtx,
2438                                        fp_offset
2439                                        + cfun->machine->frame.saved_regs_size
2440                                        - 2 * UNITS_PER_WORD));
2441 }
2442
2443 /* Output code to build up a constant in a register.  */
2444 static void
2445 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2446 {
2447   if (aarch64_bitmask_imm (val, DImode))
2448     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2449   else
2450     {
2451       int i;
2452       int ncount = 0;
2453       int zcount = 0;
2454       HOST_WIDE_INT valp = val >> 16;
2455       HOST_WIDE_INT valm;
2456       HOST_WIDE_INT tval;
2457
2458       for (i = 16; i < 64; i += 16)
2459         {
2460           valm = (valp & 0xffff);
2461
2462           if (valm != 0)
2463             ++ zcount;
2464
2465           if (valm != 0xffff)
2466             ++ ncount;
2467
2468           valp >>= 16;
2469         }
2470
2471       /* zcount contains the number of additional MOVK instructions
2472          required if the constant is built up with an initial MOVZ instruction,
2473          while ncount is the number of MOVK instructions required if starting
2474          with a MOVN instruction.  Choose the sequence that yields the fewest
2475          number of instructions, preferring MOVZ instructions when they are both
2476          the same.  */
2477       if (ncount < zcount)
2478         {
2479           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2480                           GEN_INT ((~val) & 0xffff));
2481           tval = 0xffff;
2482         }
2483       else
2484         {
2485           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2486                           GEN_INT (val & 0xffff));
2487           tval = 0;
2488         }
2489
2490       val >>= 16;
2491
2492       for (i = 16; i < 64; i += 16)
2493         {
2494           if ((val & 0xffff) != tval)
2495             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2496                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2497           val >>= 16;
2498         }
2499     }
2500 }
2501
2502 static void
2503 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2504 {
2505   HOST_WIDE_INT mdelta = delta;
2506   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2507   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2508
2509   if (mdelta < 0)
2510     mdelta = -mdelta;
2511
2512   if (mdelta >= 4096 * 4096)
2513     {
2514       aarch64_build_constant (scratchreg, delta);
2515       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2516     }
2517   else if (mdelta > 0)
2518     {
2519       if (mdelta >= 4096)
2520         {
2521           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2522           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2523           if (delta < 0)
2524             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2525                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2526           else
2527             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2528                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2529         }
2530       if (mdelta % 4096 != 0)
2531         {
2532           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2533           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2534                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2535         }
2536     }
2537 }
2538
2539 /* Output code to add DELTA to the first argument, and then jump
2540    to FUNCTION.  Used for C++ multiple inheritance.  */
2541 static void
2542 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2543                          HOST_WIDE_INT delta,
2544                          HOST_WIDE_INT vcall_offset,
2545                          tree function)
2546 {
2547   /* The this pointer is always in x0.  Note that this differs from
2548      Arm where the this pointer maybe bumped to r1 if r0 is required
2549      to return a pointer to an aggregate.  On AArch64 a result value
2550      pointer will be in x8.  */
2551   int this_regno = R0_REGNUM;
2552   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2553
2554   reload_completed = 1;
2555   emit_note (NOTE_INSN_PROLOGUE_END);
2556
2557   if (vcall_offset == 0)
2558     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2559   else
2560     {
2561       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2562
2563       this_rtx = gen_rtx_REG (Pmode, this_regno);
2564       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2565       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2566
2567       addr = this_rtx;
2568       if (delta != 0)
2569         {
2570           if (delta >= -256 && delta < 256)
2571             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2572                                        plus_constant (Pmode, this_rtx, delta));
2573           else
2574             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2575         }
2576
2577       if (Pmode == ptr_mode)
2578         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2579       else
2580         aarch64_emit_move (temp0,
2581                            gen_rtx_ZERO_EXTEND (Pmode,
2582                                                 gen_rtx_MEM (ptr_mode, addr)));
2583
2584       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2585           addr = plus_constant (Pmode, temp0, vcall_offset);
2586       else
2587         {
2588           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2589           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2590         }
2591
2592       if (Pmode == ptr_mode)
2593         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2594       else
2595         aarch64_emit_move (temp1,
2596                            gen_rtx_SIGN_EXTEND (Pmode,
2597                                                 gen_rtx_MEM (ptr_mode, addr)));
2598
2599       emit_insn (gen_add2_insn (this_rtx, temp1));
2600     }
2601
2602   /* Generate a tail call to the target function.  */
2603   if (!TREE_USED (function))
2604     {
2605       assemble_external (function);
2606       TREE_USED (function) = 1;
2607     }
2608   funexp = XEXP (DECL_RTL (function), 0);
2609   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2610   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2611   SIBLING_CALL_P (insn) = 1;
2612
2613   insn = get_insns ();
2614   shorten_branches (insn);
2615   final_start_function (insn, file, 1);
2616   final (insn, file, 1);
2617   final_end_function ();
2618
2619   /* Stop pretending to be a post-reload pass.  */
2620   reload_completed = 0;
2621 }
2622
2623 static int
2624 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2625 {
2626   if (GET_CODE (*x) == SYMBOL_REF)
2627     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2628
2629   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2630      TLS offsets, not real symbol references.  */
2631   if (GET_CODE (*x) == UNSPEC
2632       && XINT (*x, 1) == UNSPEC_TLS)
2633     return -1;
2634
2635   return 0;
2636 }
2637
2638 static bool
2639 aarch64_tls_referenced_p (rtx x)
2640 {
2641   if (!TARGET_HAVE_TLS)
2642     return false;
2643
2644   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2645 }
2646
2647
2648 static int
2649 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2650 {
2651   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2652   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2653
2654   if (*imm1 < *imm2)
2655     return -1;
2656   if (*imm1 > *imm2)
2657     return +1;
2658   return 0;
2659 }
2660
2661
2662 static void
2663 aarch64_build_bitmask_table (void)
2664 {
2665   unsigned HOST_WIDE_INT mask, imm;
2666   unsigned int log_e, e, s, r;
2667   unsigned int nimms = 0;
2668
2669   for (log_e = 1; log_e <= 6; log_e++)
2670     {
2671       e = 1 << log_e;
2672       if (e == 64)
2673         mask = ~(HOST_WIDE_INT) 0;
2674       else
2675         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2676       for (s = 1; s < e; s++)
2677         {
2678           for (r = 0; r < e; r++)
2679             {
2680               /* set s consecutive bits to 1 (s < 64) */
2681               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2682               /* rotate right by r */
2683               if (r != 0)
2684                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2685               /* replicate the constant depending on SIMD size */
2686               switch (log_e) {
2687               case 1: imm |= (imm <<  2);
2688               case 2: imm |= (imm <<  4);
2689               case 3: imm |= (imm <<  8);
2690               case 4: imm |= (imm << 16);
2691               case 5: imm |= (imm << 32);
2692               case 6:
2693                 break;
2694               default:
2695                 gcc_unreachable ();
2696               }
2697               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2698               aarch64_bitmasks[nimms++] = imm;
2699             }
2700         }
2701     }
2702
2703   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2704   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2705          aarch64_bitmasks_cmp);
2706 }
2707
2708
2709 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2710    a left shift of 0 or 12 bits.  */
2711 bool
2712 aarch64_uimm12_shift (HOST_WIDE_INT val)
2713 {
2714   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2715           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2716           );
2717 }
2718
2719
2720 /* Return true if val is an immediate that can be loaded into a
2721    register by a MOVZ instruction.  */
2722 static bool
2723 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2724 {
2725   if (GET_MODE_SIZE (mode) > 4)
2726     {
2727       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2728           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2729         return 1;
2730     }
2731   else
2732     {
2733       /* Ignore sign extension.  */
2734       val &= (HOST_WIDE_INT) 0xffffffff;
2735     }
2736   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2737           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2738 }
2739
2740
2741 /* Return true if val is a valid bitmask immediate.  */
2742 bool
2743 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2744 {
2745   if (GET_MODE_SIZE (mode) < 8)
2746     {
2747       /* Replicate bit pattern.  */
2748       val &= (HOST_WIDE_INT) 0xffffffff;
2749       val |= val << 32;
2750     }
2751   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2752                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2753 }
2754
2755
2756 /* Return true if val is an immediate that can be loaded into a
2757    register in a single instruction.  */
2758 bool
2759 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2760 {
2761   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2762     return 1;
2763   return aarch64_bitmask_imm (val, mode);
2764 }
2765
2766 static bool
2767 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2768 {
2769   rtx base, offset;
2770
2771   if (GET_CODE (x) == HIGH)
2772     return true;
2773
2774   split_const (x, &base, &offset);
2775   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2776     {
2777       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2778           != SYMBOL_FORCE_TO_MEM)
2779         return true;
2780       else
2781         /* Avoid generating a 64-bit relocation in ILP32; leave
2782            to aarch64_expand_mov_immediate to handle it properly.  */
2783         return mode != ptr_mode;
2784     }
2785
2786   return aarch64_tls_referenced_p (x);
2787 }
2788
2789 /* Return true if register REGNO is a valid index register.
2790    STRICT_P is true if REG_OK_STRICT is in effect.  */
2791
2792 bool
2793 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2794 {
2795   if (!HARD_REGISTER_NUM_P (regno))
2796     {
2797       if (!strict_p)
2798         return true;
2799
2800       if (!reg_renumber)
2801         return false;
2802
2803       regno = reg_renumber[regno];
2804     }
2805   return GP_REGNUM_P (regno);
2806 }
2807
2808 /* Return true if register REGNO is a valid base register for mode MODE.
2809    STRICT_P is true if REG_OK_STRICT is in effect.  */
2810
2811 bool
2812 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2813 {
2814   if (!HARD_REGISTER_NUM_P (regno))
2815     {
2816       if (!strict_p)
2817         return true;
2818
2819       if (!reg_renumber)
2820         return false;
2821
2822       regno = reg_renumber[regno];
2823     }
2824
2825   /* The fake registers will be eliminated to either the stack or
2826      hard frame pointer, both of which are usually valid base registers.
2827      Reload deals with the cases where the eliminated form isn't valid.  */
2828   return (GP_REGNUM_P (regno)
2829           || regno == SP_REGNUM
2830           || regno == FRAME_POINTER_REGNUM
2831           || regno == ARG_POINTER_REGNUM);
2832 }
2833
2834 /* Return true if X is a valid base register for mode MODE.
2835    STRICT_P is true if REG_OK_STRICT is in effect.  */
2836
2837 static bool
2838 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2839 {
2840   if (!strict_p && GET_CODE (x) == SUBREG)
2841     x = SUBREG_REG (x);
2842
2843   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2844 }
2845
2846 /* Return true if address offset is a valid index.  If it is, fill in INFO
2847    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2848
2849 static bool
2850 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2851                         enum machine_mode mode, bool strict_p)
2852 {
2853   enum aarch64_address_type type;
2854   rtx index;
2855   int shift;
2856
2857   /* (reg:P) */
2858   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2859       && GET_MODE (x) == Pmode)
2860     {
2861       type = ADDRESS_REG_REG;
2862       index = x;
2863       shift = 0;
2864     }
2865   /* (sign_extend:DI (reg:SI)) */
2866   else if ((GET_CODE (x) == SIGN_EXTEND
2867             || GET_CODE (x) == ZERO_EXTEND)
2868            && GET_MODE (x) == DImode
2869            && GET_MODE (XEXP (x, 0)) == SImode)
2870     {
2871       type = (GET_CODE (x) == SIGN_EXTEND)
2872         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2873       index = XEXP (x, 0);
2874       shift = 0;
2875     }
2876   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2877   else if (GET_CODE (x) == MULT
2878            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2879                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2880            && GET_MODE (XEXP (x, 0)) == DImode
2881            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2882            && CONST_INT_P (XEXP (x, 1)))
2883     {
2884       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2885         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2886       index = XEXP (XEXP (x, 0), 0);
2887       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2888     }
2889   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2890   else if (GET_CODE (x) == ASHIFT
2891            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2892                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2893            && GET_MODE (XEXP (x, 0)) == DImode
2894            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2895            && CONST_INT_P (XEXP (x, 1)))
2896     {
2897       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2898         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2899       index = XEXP (XEXP (x, 0), 0);
2900       shift = INTVAL (XEXP (x, 1));
2901     }
2902   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2903   else if ((GET_CODE (x) == SIGN_EXTRACT
2904             || GET_CODE (x) == ZERO_EXTRACT)
2905            && GET_MODE (x) == DImode
2906            && GET_CODE (XEXP (x, 0)) == MULT
2907            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2908            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2909     {
2910       type = (GET_CODE (x) == SIGN_EXTRACT)
2911         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2912       index = XEXP (XEXP (x, 0), 0);
2913       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2914       if (INTVAL (XEXP (x, 1)) != 32 + shift
2915           || INTVAL (XEXP (x, 2)) != 0)
2916         shift = -1;
2917     }
2918   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2919      (const_int 0xffffffff<<shift)) */
2920   else if (GET_CODE (x) == AND
2921            && GET_MODE (x) == DImode
2922            && GET_CODE (XEXP (x, 0)) == MULT
2923            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2924            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2925            && CONST_INT_P (XEXP (x, 1)))
2926     {
2927       type = ADDRESS_REG_UXTW;
2928       index = XEXP (XEXP (x, 0), 0);
2929       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2930       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2931         shift = -1;
2932     }
2933   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2934   else if ((GET_CODE (x) == SIGN_EXTRACT
2935             || GET_CODE (x) == ZERO_EXTRACT)
2936            && GET_MODE (x) == DImode
2937            && GET_CODE (XEXP (x, 0)) == ASHIFT
2938            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2939            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2940     {
2941       type = (GET_CODE (x) == SIGN_EXTRACT)
2942         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2943       index = XEXP (XEXP (x, 0), 0);
2944       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2945       if (INTVAL (XEXP (x, 1)) != 32 + shift
2946           || INTVAL (XEXP (x, 2)) != 0)
2947         shift = -1;
2948     }
2949   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2950      (const_int 0xffffffff<<shift)) */
2951   else if (GET_CODE (x) == AND
2952            && GET_MODE (x) == DImode
2953            && GET_CODE (XEXP (x, 0)) == ASHIFT
2954            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2955            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2956            && CONST_INT_P (XEXP (x, 1)))
2957     {
2958       type = ADDRESS_REG_UXTW;
2959       index = XEXP (XEXP (x, 0), 0);
2960       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2961       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2962         shift = -1;
2963     }
2964   /* (mult:P (reg:P) (const_int scale)) */
2965   else if (GET_CODE (x) == MULT
2966            && GET_MODE (x) == Pmode
2967            && GET_MODE (XEXP (x, 0)) == Pmode
2968            && CONST_INT_P (XEXP (x, 1)))
2969     {
2970       type = ADDRESS_REG_REG;
2971       index = XEXP (x, 0);
2972       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2973     }
2974   /* (ashift:P (reg:P) (const_int shift)) */
2975   else if (GET_CODE (x) == ASHIFT
2976            && GET_MODE (x) == Pmode
2977            && GET_MODE (XEXP (x, 0)) == Pmode
2978            && CONST_INT_P (XEXP (x, 1)))
2979     {
2980       type = ADDRESS_REG_REG;
2981       index = XEXP (x, 0);
2982       shift = INTVAL (XEXP (x, 1));
2983     }
2984   else
2985     return false;
2986
2987   if (GET_CODE (index) == SUBREG)
2988     index = SUBREG_REG (index);
2989
2990   if ((shift == 0 ||
2991        (shift > 0 && shift <= 3
2992         && (1 << shift) == GET_MODE_SIZE (mode)))
2993       && REG_P (index)
2994       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2995     {
2996       info->type = type;
2997       info->offset = index;
2998       info->shift = shift;
2999       return true;
3000     }
3001
3002   return false;
3003 }
3004
3005 static inline bool
3006 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3007 {
3008   return (offset >= -64 * GET_MODE_SIZE (mode)
3009           && offset < 64 * GET_MODE_SIZE (mode)
3010           && offset % GET_MODE_SIZE (mode) == 0);
3011 }
3012
3013 static inline bool
3014 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3015                                HOST_WIDE_INT offset)
3016 {
3017   return offset >= -256 && offset < 256;
3018 }
3019
3020 static inline bool
3021 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3022 {
3023   return (offset >= 0
3024           && offset < 4096 * GET_MODE_SIZE (mode)
3025           && offset % GET_MODE_SIZE (mode) == 0);
3026 }
3027
3028 /* Return true if X is a valid address for machine mode MODE.  If it is,
3029    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3030    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3031
3032 static bool
3033 aarch64_classify_address (struct aarch64_address_info *info,
3034                           rtx x, enum machine_mode mode,
3035                           RTX_CODE outer_code, bool strict_p)
3036 {
3037   enum rtx_code code = GET_CODE (x);
3038   rtx op0, op1;
3039   bool allow_reg_index_p =
3040     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3041
3042   /* Don't support anything other than POST_INC or REG addressing for
3043      AdvSIMD.  */
3044   if (aarch64_vector_mode_p (mode)
3045       && (code != POST_INC && code != REG))
3046     return false;
3047
3048   switch (code)
3049     {
3050     case REG:
3051     case SUBREG:
3052       info->type = ADDRESS_REG_IMM;
3053       info->base = x;
3054       info->offset = const0_rtx;
3055       return aarch64_base_register_rtx_p (x, strict_p);
3056
3057     case PLUS:
3058       op0 = XEXP (x, 0);
3059       op1 = XEXP (x, 1);
3060       if (GET_MODE_SIZE (mode) != 0
3061           && CONST_INT_P (op1)
3062           && aarch64_base_register_rtx_p (op0, strict_p))
3063         {
3064           HOST_WIDE_INT offset = INTVAL (op1);
3065
3066           info->type = ADDRESS_REG_IMM;
3067           info->base = op0;
3068           info->offset = op1;
3069
3070           /* TImode and TFmode values are allowed in both pairs of X
3071              registers and individual Q registers.  The available
3072              address modes are:
3073              X,X: 7-bit signed scaled offset
3074              Q:   9-bit signed offset
3075              We conservatively require an offset representable in either mode.
3076            */
3077           if (mode == TImode || mode == TFmode)
3078             return (offset_7bit_signed_scaled_p (mode, offset)
3079                     && offset_9bit_signed_unscaled_p (mode, offset));
3080
3081           if (outer_code == PARALLEL)
3082             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3083                     && offset_7bit_signed_scaled_p (mode, offset));
3084           else
3085             return (offset_9bit_signed_unscaled_p (mode, offset)
3086                     || offset_12bit_unsigned_scaled_p (mode, offset));
3087         }
3088
3089       if (allow_reg_index_p)
3090         {
3091           /* Look for base + (scaled/extended) index register.  */
3092           if (aarch64_base_register_rtx_p (op0, strict_p)
3093               && aarch64_classify_index (info, op1, mode, strict_p))
3094             {
3095               info->base = op0;
3096               return true;
3097             }
3098           if (aarch64_base_register_rtx_p (op1, strict_p)
3099               && aarch64_classify_index (info, op0, mode, strict_p))
3100             {
3101               info->base = op1;
3102               return true;
3103             }
3104         }
3105
3106       return false;
3107
3108     case POST_INC:
3109     case POST_DEC:
3110     case PRE_INC:
3111     case PRE_DEC:
3112       info->type = ADDRESS_REG_WB;
3113       info->base = XEXP (x, 0);
3114       info->offset = NULL_RTX;
3115       return aarch64_base_register_rtx_p (info->base, strict_p);
3116
3117     case POST_MODIFY:
3118     case PRE_MODIFY:
3119       info->type = ADDRESS_REG_WB;
3120       info->base = XEXP (x, 0);
3121       if (GET_CODE (XEXP (x, 1)) == PLUS
3122           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3123           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3124           && aarch64_base_register_rtx_p (info->base, strict_p))
3125         {
3126           HOST_WIDE_INT offset;
3127           info->offset = XEXP (XEXP (x, 1), 1);
3128           offset = INTVAL (info->offset);
3129
3130           /* TImode and TFmode values are allowed in both pairs of X
3131              registers and individual Q registers.  The available
3132              address modes are:
3133              X,X: 7-bit signed scaled offset
3134              Q:   9-bit signed offset
3135              We conservatively require an offset representable in either mode.
3136            */
3137           if (mode == TImode || mode == TFmode)
3138             return (offset_7bit_signed_scaled_p (mode, offset)
3139                     && offset_9bit_signed_unscaled_p (mode, offset));
3140
3141           if (outer_code == PARALLEL)
3142             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3143                     && offset_7bit_signed_scaled_p (mode, offset));
3144           else
3145             return offset_9bit_signed_unscaled_p (mode, offset);
3146         }
3147       return false;
3148
3149     case CONST:
3150     case SYMBOL_REF:
3151     case LABEL_REF:
3152       /* load literal: pc-relative constant pool entry.  Only supported
3153          for SI mode or larger.  */
3154       info->type = ADDRESS_SYMBOLIC;
3155       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3156         {
3157           rtx sym, addend;
3158
3159           split_const (x, &sym, &addend);
3160           return (GET_CODE (sym) == LABEL_REF
3161                   || (GET_CODE (sym) == SYMBOL_REF
3162                       && CONSTANT_POOL_ADDRESS_P (sym)));
3163         }
3164       return false;
3165
3166     case LO_SUM:
3167       info->type = ADDRESS_LO_SUM;
3168       info->base = XEXP (x, 0);
3169       info->offset = XEXP (x, 1);
3170       if (allow_reg_index_p
3171           && aarch64_base_register_rtx_p (info->base, strict_p))
3172         {
3173           rtx sym, offs;
3174           split_const (info->offset, &sym, &offs);
3175           if (GET_CODE (sym) == SYMBOL_REF
3176               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3177                   == SYMBOL_SMALL_ABSOLUTE))
3178             {
3179               /* The symbol and offset must be aligned to the access size.  */
3180               unsigned int align;
3181               unsigned int ref_size;
3182
3183               if (CONSTANT_POOL_ADDRESS_P (sym))
3184                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3185               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3186                 {
3187                   tree exp = SYMBOL_REF_DECL (sym);
3188                   align = TYPE_ALIGN (TREE_TYPE (exp));
3189                   align = CONSTANT_ALIGNMENT (exp, align);
3190                 }
3191               else if (SYMBOL_REF_DECL (sym))
3192                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3193               else
3194                 align = BITS_PER_UNIT;
3195
3196               ref_size = GET_MODE_SIZE (mode);
3197               if (ref_size == 0)
3198                 ref_size = GET_MODE_SIZE (DImode);
3199
3200               return ((INTVAL (offs) & (ref_size - 1)) == 0
3201                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3202             }
3203         }
3204       return false;
3205
3206     default:
3207       return false;
3208     }
3209 }
3210
3211 bool
3212 aarch64_symbolic_address_p (rtx x)
3213 {
3214   rtx offset;
3215
3216   split_const (x, &x, &offset);
3217   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3218 }
3219
3220 /* Classify the base of symbolic expression X, given that X appears in
3221    context CONTEXT.  */
3222
3223 enum aarch64_symbol_type
3224 aarch64_classify_symbolic_expression (rtx x,
3225                                       enum aarch64_symbol_context context)
3226 {
3227   rtx offset;
3228
3229   split_const (x, &x, &offset);
3230   return aarch64_classify_symbol (x, context);
3231 }
3232
3233
3234 /* Return TRUE if X is a legitimate address for accessing memory in
3235    mode MODE.  */
3236 static bool
3237 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3238 {
3239   struct aarch64_address_info addr;
3240
3241   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3242 }
3243
3244 /* Return TRUE if X is a legitimate address for accessing memory in
3245    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3246    pair operation.  */
3247 bool
3248 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3249                            RTX_CODE outer_code, bool strict_p)
3250 {
3251   struct aarch64_address_info addr;
3252
3253   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3254 }
3255
3256 /* Return TRUE if rtx X is immediate constant 0.0 */
3257 bool
3258 aarch64_float_const_zero_rtx_p (rtx x)
3259 {
3260   REAL_VALUE_TYPE r;
3261
3262   if (GET_MODE (x) == VOIDmode)
3263     return false;
3264
3265   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3266   if (REAL_VALUE_MINUS_ZERO (r))
3267     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3268   return REAL_VALUES_EQUAL (r, dconst0);
3269 }
3270
3271 /* Return the fixed registers used for condition codes.  */
3272
3273 static bool
3274 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3275 {
3276   *p1 = CC_REGNUM;
3277   *p2 = INVALID_REGNUM;
3278   return true;
3279 }
3280
3281 enum machine_mode
3282 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3283 {
3284   /* All floating point compares return CCFP if it is an equality
3285      comparison, and CCFPE otherwise.  */
3286   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3287     {
3288       switch (code)
3289         {
3290         case EQ:
3291         case NE:
3292         case UNORDERED:
3293         case ORDERED:
3294         case UNLT:
3295         case UNLE:
3296         case UNGT:
3297         case UNGE:
3298         case UNEQ:
3299         case LTGT:
3300           return CCFPmode;
3301
3302         case LT:
3303         case LE:
3304         case GT:
3305         case GE:
3306           return CCFPEmode;
3307
3308         default:
3309           gcc_unreachable ();
3310         }
3311     }
3312
3313   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3314       && y == const0_rtx
3315       && (code == EQ || code == NE || code == LT || code == GE)
3316       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3317           || GET_CODE (x) == NEG))
3318     return CC_NZmode;
3319
3320   /* A compare with a shifted or negated operand.  Because of canonicalization,
3321      the comparison will have to be swapped when we emit the assembly
3322      code.  */
3323   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3324       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3325       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3326           || GET_CODE (x) == LSHIFTRT
3327           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3328           || GET_CODE (x) == NEG))
3329     return CC_SWPmode;
3330
3331   /* A compare of a mode narrower than SI mode against zero can be done
3332      by extending the value in the comparison.  */
3333   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3334       && y == const0_rtx)
3335     /* Only use sign-extension if we really need it.  */
3336     return ((code == GT || code == GE || code == LE || code == LT)
3337             ? CC_SESWPmode : CC_ZESWPmode);
3338
3339   /* For everything else, return CCmode.  */
3340   return CCmode;
3341 }
3342
3343 static unsigned
3344 aarch64_get_condition_code (rtx x)
3345 {
3346   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3347   enum rtx_code comp_code = GET_CODE (x);
3348
3349   if (GET_MODE_CLASS (mode) != MODE_CC)
3350     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3351
3352   switch (mode)
3353     {
3354     case CCFPmode:
3355     case CCFPEmode:
3356       switch (comp_code)
3357         {
3358         case GE: return AARCH64_GE;
3359         case GT: return AARCH64_GT;
3360         case LE: return AARCH64_LS;
3361         case LT: return AARCH64_MI;
3362         case NE: return AARCH64_NE;
3363         case EQ: return AARCH64_EQ;
3364         case ORDERED: return AARCH64_VC;
3365         case UNORDERED: return AARCH64_VS;
3366         case UNLT: return AARCH64_LT;
3367         case UNLE: return AARCH64_LE;
3368         case UNGT: return AARCH64_HI;
3369         case UNGE: return AARCH64_PL;
3370         default: gcc_unreachable ();
3371         }
3372       break;
3373
3374     case CCmode:
3375       switch (comp_code)
3376         {
3377         case NE: return AARCH64_NE;
3378         case EQ: return AARCH64_EQ;
3379         case GE: return AARCH64_GE;
3380         case GT: return AARCH64_GT;
3381         case LE: return AARCH64_LE;
3382         case LT: return AARCH64_LT;
3383         case GEU: return AARCH64_CS;
3384         case GTU: return AARCH64_HI;
3385         case LEU: return AARCH64_LS;
3386         case LTU: return AARCH64_CC;
3387         default: gcc_unreachable ();
3388         }
3389       break;
3390
3391     case CC_SWPmode:
3392     case CC_ZESWPmode:
3393     case CC_SESWPmode:
3394       switch (comp_code)
3395         {
3396         case NE: return AARCH64_NE;
3397         case EQ: return AARCH64_EQ;
3398         case GE: return AARCH64_LE;
3399         case GT: return AARCH64_LT;
3400         case LE: return AARCH64_GE;
3401         case LT: return AARCH64_GT;
3402         case GEU: return AARCH64_LS;
3403         case GTU: return AARCH64_CC;
3404         case LEU: return AARCH64_CS;
3405         case LTU: return AARCH64_HI;
3406         default: gcc_unreachable ();
3407         }
3408       break;
3409
3410     case CC_NZmode:
3411       switch (comp_code)
3412         {
3413         case NE: return AARCH64_NE;
3414         case EQ: return AARCH64_EQ;
3415         case GE: return AARCH64_PL;
3416         case LT: return AARCH64_MI;
3417         default: gcc_unreachable ();
3418         }
3419       break;
3420
3421     default:
3422       gcc_unreachable ();
3423       break;
3424     }
3425 }
3426
3427 static unsigned
3428 bit_count (unsigned HOST_WIDE_INT value)
3429 {
3430   unsigned count = 0;
3431
3432   while (value)
3433     {
3434       count++;
3435       value &= value - 1;
3436     }
3437
3438   return count;
3439 }
3440
3441 void
3442 aarch64_print_operand (FILE *f, rtx x, char code)
3443 {
3444   switch (code)
3445     {
3446     /* An integer or symbol address without a preceding # sign.  */
3447     case 'c':
3448       switch (GET_CODE (x))
3449         {
3450         case CONST_INT:
3451           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3452           break;
3453
3454         case SYMBOL_REF:
3455           output_addr_const (f, x);
3456           break;
3457
3458         case CONST:
3459           if (GET_CODE (XEXP (x, 0)) == PLUS
3460               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3461             {
3462               output_addr_const (f, x);
3463               break;
3464             }
3465           /* Fall through.  */
3466
3467         default:
3468           output_operand_lossage ("Unsupported operand for code '%c'", code);
3469         }
3470       break;
3471
3472     case 'e':
3473       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3474       {
3475         int n;
3476
3477         if (GET_CODE (x) != CONST_INT
3478             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3479           {
3480             output_operand_lossage ("invalid operand for '%%%c'", code);
3481             return;
3482           }
3483
3484         switch (n)
3485           {
3486           case 3:
3487             fputc ('b', f);
3488             break;
3489           case 4:
3490             fputc ('h', f);
3491             break;
3492           case 5:
3493             fputc ('w', f);
3494             break;
3495           default:
3496             output_operand_lossage ("invalid operand for '%%%c'", code);
3497             return;
3498           }
3499       }
3500       break;
3501
3502     case 'p':
3503       {
3504         int n;
3505
3506         /* Print N such that 2^N == X.  */
3507         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3508           {
3509             output_operand_lossage ("invalid operand for '%%%c'", code);
3510             return;
3511           }
3512
3513         asm_fprintf (f, "%d", n);
3514       }
3515       break;
3516
3517     case 'P':
3518       /* Print the number of non-zero bits in X (a const_int).  */
3519       if (GET_CODE (x) != CONST_INT)
3520         {
3521           output_operand_lossage ("invalid operand for '%%%c'", code);
3522           return;
3523         }
3524
3525       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3526       break;
3527
3528     case 'H':
3529       /* Print the higher numbered register of a pair (TImode) of regs.  */
3530       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3531         {
3532           output_operand_lossage ("invalid operand for '%%%c'", code);
3533           return;
3534         }
3535
3536       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3537       break;
3538
3539     case 'm':
3540       /* Print a condition (eq, ne, etc).  */
3541
3542       /* CONST_TRUE_RTX means always -- that's the default.  */
3543       if (x == const_true_rtx)
3544         return;
3545
3546       if (!COMPARISON_P (x))
3547         {
3548           output_operand_lossage ("invalid operand for '%%%c'", code);
3549           return;
3550         }
3551
3552       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3553       break;
3554
3555     case 'M':
3556       /* Print the inverse of a condition (eq <-> ne, etc).  */
3557
3558       /* CONST_TRUE_RTX means never -- that's the default.  */
3559       if (x == const_true_rtx)
3560         {
3561           fputs ("nv", f);
3562           return;
3563         }
3564
3565       if (!COMPARISON_P (x))
3566         {
3567           output_operand_lossage ("invalid operand for '%%%c'", code);
3568           return;
3569         }
3570
3571       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3572                                   (aarch64_get_condition_code (x))], f);
3573       break;
3574
3575     case 'b':
3576     case 'h':
3577     case 's':
3578     case 'd':
3579     case 'q':
3580       /* Print a scalar FP/SIMD register name.  */
3581       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3582         {
3583           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3584           return;
3585         }
3586       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3587       break;
3588
3589     case 'S':
3590     case 'T':
3591     case 'U':
3592     case 'V':
3593       /* Print the first FP/SIMD register name in a list.  */
3594       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3595         {
3596           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3597           return;
3598         }
3599       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3600       break;
3601
3602     case 'X':
3603       /* Print bottom 16 bits of integer constant in hex.  */
3604       if (GET_CODE (x) != CONST_INT)
3605         {
3606           output_operand_lossage ("invalid operand for '%%%c'", code);
3607           return;
3608         }
3609       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3610       break;
3611
3612     case 'w':
3613     case 'x':
3614       /* Print a general register name or the zero register (32-bit or
3615          64-bit).  */
3616       if (x == const0_rtx
3617           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3618         {
3619           asm_fprintf (f, "%czr", code);
3620           break;
3621         }
3622
3623       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3624         {
3625           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3626           break;
3627         }
3628
3629       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3630         {
3631           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3632           break;
3633         }
3634
3635       /* Fall through */
3636
3637     case 0:
3638       /* Print a normal operand, if it's a general register, then we
3639          assume DImode.  */
3640       if (x == NULL)
3641         {
3642           output_operand_lossage ("missing operand");
3643           return;
3644         }
3645
3646       switch (GET_CODE (x))
3647         {
3648         case REG:
3649           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3650           break;
3651
3652         case MEM:
3653           aarch64_memory_reference_mode = GET_MODE (x);
3654           output_address (XEXP (x, 0));
3655           break;
3656
3657         case LABEL_REF:
3658         case SYMBOL_REF:
3659           output_addr_const (asm_out_file, x);
3660           break;
3661
3662         case CONST_INT:
3663           asm_fprintf (f, "%wd", INTVAL (x));
3664           break;
3665
3666         case CONST_VECTOR:
3667           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3668             {
3669               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3670                                                             HOST_WIDE_INT_MIN,
3671                                                             HOST_WIDE_INT_MAX));
3672               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3673             }
3674           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3675             {
3676               fputc ('0', f);
3677             }
3678           else
3679             gcc_unreachable ();
3680           break;
3681
3682         case CONST_DOUBLE:
3683           /* CONST_DOUBLE can represent a double-width integer.
3684              In this case, the mode of x is VOIDmode.  */
3685           if (GET_MODE (x) == VOIDmode)
3686             ; /* Do Nothing.  */
3687           else if (aarch64_float_const_zero_rtx_p (x))
3688             {
3689               fputc ('0', f);
3690               break;
3691             }
3692           else if (aarch64_float_const_representable_p (x))
3693             {
3694 #define buf_size 20
3695               char float_buf[buf_size] = {'\0'};
3696               REAL_VALUE_TYPE r;
3697               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3698               real_to_decimal_for_mode (float_buf, &r,
3699                                         buf_size, buf_size,
3700                                         1, GET_MODE (x));
3701               asm_fprintf (asm_out_file, "%s", float_buf);
3702               break;
3703 #undef buf_size
3704             }
3705           output_operand_lossage ("invalid constant");
3706           return;
3707         default:
3708           output_operand_lossage ("invalid operand");
3709           return;
3710         }
3711       break;
3712
3713     case 'A':
3714       if (GET_CODE (x) == HIGH)
3715         x = XEXP (x, 0);
3716
3717       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3718         {
3719         case SYMBOL_SMALL_GOT:
3720           asm_fprintf (asm_out_file, ":got:");
3721           break;
3722
3723         case SYMBOL_SMALL_TLSGD:
3724           asm_fprintf (asm_out_file, ":tlsgd:");
3725           break;
3726
3727         case SYMBOL_SMALL_TLSDESC:
3728           asm_fprintf (asm_out_file, ":tlsdesc:");
3729           break;
3730
3731         case SYMBOL_SMALL_GOTTPREL:
3732           asm_fprintf (asm_out_file, ":gottprel:");
3733           break;
3734
3735         case SYMBOL_SMALL_TPREL:
3736           asm_fprintf (asm_out_file, ":tprel:");
3737           break;
3738
3739         case SYMBOL_TINY_GOT:
3740           gcc_unreachable ();
3741           break;
3742
3743         default:
3744           break;
3745         }
3746       output_addr_const (asm_out_file, x);
3747       break;
3748
3749     case 'L':
3750       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3751         {
3752         case SYMBOL_SMALL_GOT:
3753           asm_fprintf (asm_out_file, ":lo12:");
3754           break;
3755
3756         case SYMBOL_SMALL_TLSGD:
3757           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3758           break;
3759
3760         case SYMBOL_SMALL_TLSDESC:
3761           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3762           break;
3763
3764         case SYMBOL_SMALL_GOTTPREL:
3765           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3766           break;
3767
3768         case SYMBOL_SMALL_TPREL:
3769           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3770           break;
3771
3772         case SYMBOL_TINY_GOT:
3773           asm_fprintf (asm_out_file, ":got:");
3774           break;
3775
3776         default:
3777           break;
3778         }
3779       output_addr_const (asm_out_file, x);
3780       break;
3781
3782     case 'G':
3783
3784       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3785         {
3786         case SYMBOL_SMALL_TPREL:
3787           asm_fprintf (asm_out_file, ":tprel_hi12:");
3788           break;
3789         default:
3790           break;
3791         }
3792       output_addr_const (asm_out_file, x);
3793       break;
3794
3795     default:
3796       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3797       return;
3798     }
3799 }
3800
3801 void
3802 aarch64_print_operand_address (FILE *f, rtx x)
3803 {
3804   struct aarch64_address_info addr;
3805
3806   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3807                              MEM, true))
3808     switch (addr.type)
3809       {
3810       case ADDRESS_REG_IMM:
3811         if (addr.offset == const0_rtx)
3812           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3813         else
3814           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3815                        INTVAL (addr.offset));
3816         return;
3817
3818       case ADDRESS_REG_REG:
3819         if (addr.shift == 0)
3820           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3821                        reg_names [REGNO (addr.offset)]);
3822         else
3823           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3824                        reg_names [REGNO (addr.offset)], addr.shift);
3825         return;
3826
3827       case ADDRESS_REG_UXTW:
3828         if (addr.shift == 0)
3829           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3830                        REGNO (addr.offset) - R0_REGNUM);
3831         else
3832           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3833                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3834         return;
3835
3836       case ADDRESS_REG_SXTW:
3837         if (addr.shift == 0)
3838           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3839                        REGNO (addr.offset) - R0_REGNUM);
3840         else
3841           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3842                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3843         return;
3844
3845       case ADDRESS_REG_WB:
3846         switch (GET_CODE (x))
3847           {
3848           case PRE_INC:
3849             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3850                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3851             return;
3852           case POST_INC:
3853             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3854                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3855             return;
3856           case PRE_DEC:
3857             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3858                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3859             return;
3860           case POST_DEC:
3861             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3862                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3863             return;
3864           case PRE_MODIFY:
3865             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3866                          INTVAL (addr.offset));
3867             return;
3868           case POST_MODIFY:
3869             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3870                          INTVAL (addr.offset));
3871             return;
3872           default:
3873             break;
3874           }
3875         break;
3876
3877       case ADDRESS_LO_SUM:
3878         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3879         output_addr_const (f, addr.offset);
3880         asm_fprintf (f, "]");
3881         return;
3882
3883       case ADDRESS_SYMBOLIC:
3884         break;
3885       }
3886
3887   output_addr_const (f, x);
3888 }
3889
3890 bool
3891 aarch64_label_mentioned_p (rtx x)
3892 {
3893   const char *fmt;
3894   int i;
3895
3896   if (GET_CODE (x) == LABEL_REF)
3897     return true;
3898
3899   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3900      referencing instruction, but they are constant offsets, not
3901      symbols.  */
3902   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3903     return false;
3904
3905   fmt = GET_RTX_FORMAT (GET_CODE (x));
3906   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3907     {
3908       if (fmt[i] == 'E')
3909         {
3910           int j;
3911
3912           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3913             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3914               return 1;
3915         }
3916       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3917         return 1;
3918     }
3919
3920   return 0;
3921 }
3922
3923 /* Implement REGNO_REG_CLASS.  */
3924
3925 enum reg_class
3926 aarch64_regno_regclass (unsigned regno)
3927 {
3928   if (GP_REGNUM_P (regno))
3929     return CORE_REGS;
3930
3931   if (regno == SP_REGNUM)
3932     return STACK_REG;
3933
3934   if (regno == FRAME_POINTER_REGNUM
3935       || regno == ARG_POINTER_REGNUM)
3936     return POINTER_REGS;
3937
3938   if (FP_REGNUM_P (regno))
3939     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3940
3941   return NO_REGS;
3942 }
3943
3944 /* Try a machine-dependent way of reloading an illegitimate address
3945    operand.  If we find one, push the reload and return the new rtx.  */
3946
3947 rtx
3948 aarch64_legitimize_reload_address (rtx *x_p,
3949                                    enum machine_mode mode,
3950                                    int opnum, int type,
3951                                    int ind_levels ATTRIBUTE_UNUSED)
3952 {
3953   rtx x = *x_p;
3954
3955   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3956   if (aarch64_vector_mode_p (mode)
3957       && GET_CODE (x) == PLUS
3958       && REG_P (XEXP (x, 0))
3959       && CONST_INT_P (XEXP (x, 1)))
3960     {
3961       rtx orig_rtx = x;
3962       x = copy_rtx (x);
3963       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3964                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3965                    opnum, (enum reload_type) type);
3966       return x;
3967     }
3968
3969   /* We must recognize output that we have already generated ourselves.  */
3970   if (GET_CODE (x) == PLUS
3971       && GET_CODE (XEXP (x, 0)) == PLUS
3972       && REG_P (XEXP (XEXP (x, 0), 0))
3973       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3974       && CONST_INT_P (XEXP (x, 1)))
3975     {
3976       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3977                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3978                    opnum, (enum reload_type) type);
3979       return x;
3980     }
3981
3982   /* We wish to handle large displacements off a base register by splitting
3983      the addend across an add and the mem insn.  This can cut the number of
3984      extra insns needed from 3 to 1.  It is only useful for load/store of a
3985      single register with 12 bit offset field.  */
3986   if (GET_CODE (x) == PLUS
3987       && REG_P (XEXP (x, 0))
3988       && CONST_INT_P (XEXP (x, 1))
3989       && HARD_REGISTER_P (XEXP (x, 0))
3990       && mode != TImode
3991       && mode != TFmode
3992       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3993     {
3994       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3995       HOST_WIDE_INT low = val & 0xfff;
3996       HOST_WIDE_INT high = val - low;
3997       HOST_WIDE_INT offs;
3998       rtx cst;
3999       enum machine_mode xmode = GET_MODE (x);
4000
4001       /* In ILP32, xmode can be either DImode or SImode.  */
4002       gcc_assert (xmode == DImode || xmode == SImode);
4003
4004       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4005          BLKmode alignment.  */
4006       if (GET_MODE_SIZE (mode) == 0)
4007         return NULL_RTX;
4008
4009       offs = low % GET_MODE_SIZE (mode);
4010
4011       /* Align misaligned offset by adjusting high part to compensate.  */
4012       if (offs != 0)
4013         {
4014           if (aarch64_uimm12_shift (high + offs))
4015             {
4016               /* Align down.  */
4017               low = low - offs;
4018               high = high + offs;
4019             }
4020           else
4021             {
4022               /* Align up.  */
4023               offs = GET_MODE_SIZE (mode) - offs;
4024               low = low + offs;
4025               high = high + (low & 0x1000) - offs;
4026               low &= 0xfff;
4027             }
4028         }
4029
4030       /* Check for overflow.  */
4031       if (high + low != val)
4032         return NULL_RTX;
4033
4034       cst = GEN_INT (high);
4035       if (!aarch64_uimm12_shift (high))
4036         cst = force_const_mem (xmode, cst);
4037
4038       /* Reload high part into base reg, leaving the low part
4039          in the mem instruction.
4040          Note that replacing this gen_rtx_PLUS with plus_constant is
4041          wrong in this case because we rely on the
4042          (plus (plus reg c1) c2) structure being preserved so that
4043          XEXP (*p, 0) in push_reload below uses the correct term.  */
4044       x = gen_rtx_PLUS (xmode,
4045                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4046                         GEN_INT (low));
4047
4048       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4049                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4050                    opnum, (enum reload_type) type);
4051       return x;
4052     }
4053
4054   return NULL_RTX;
4055 }
4056
4057
4058 static reg_class_t
4059 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4060                           reg_class_t rclass,
4061                           enum machine_mode mode,
4062                           secondary_reload_info *sri)
4063 {
4064   /* Without the TARGET_SIMD instructions we cannot move a Q register
4065      to a Q register directly.  We need a scratch.  */
4066   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4067       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4068       && reg_class_subset_p (rclass, FP_REGS))
4069     {
4070       if (mode == TFmode)
4071         sri->icode = CODE_FOR_aarch64_reload_movtf;
4072       else if (mode == TImode)
4073         sri->icode = CODE_FOR_aarch64_reload_movti;
4074       return NO_REGS;
4075     }
4076
4077   /* A TFmode or TImode memory access should be handled via an FP_REGS
4078      because AArch64 has richer addressing modes for LDR/STR instructions
4079      than LDP/STP instructions.  */
4080   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4081       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4082     return FP_REGS;
4083
4084   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4085       return CORE_REGS;
4086
4087   return NO_REGS;
4088 }
4089
4090 static bool
4091 aarch64_can_eliminate (const int from, const int to)
4092 {
4093   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4094      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4095
4096   if (frame_pointer_needed)
4097     {
4098       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4099         return true;
4100       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4101         return false;
4102       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4103           && !cfun->calls_alloca)
4104         return true;
4105       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4106         return true;
4107     return false;
4108     }
4109   else
4110     {
4111       /* If we decided that we didn't need a leaf frame pointer but then used
4112          LR in the function, then we'll want a frame pointer after all, so
4113          prevent this elimination to ensure a frame pointer is used.
4114
4115          NOTE: the original value of flag_omit_frame_pointer gets trashed
4116          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4117          of faked_omit_frame_pointer here (which is true when we always
4118          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4119          pointers when LR is clobbered).  */
4120       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4121           && df_regs_ever_live_p (LR_REGNUM)
4122           && faked_omit_frame_pointer)
4123         return false;
4124     }
4125
4126   return true;
4127 }
4128
4129 HOST_WIDE_INT
4130 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4131 {
4132   HOST_WIDE_INT frame_size;
4133   HOST_WIDE_INT offset;
4134
4135   aarch64_layout_frame ();
4136   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4137                 + crtl->outgoing_args_size
4138                 + cfun->machine->saved_varargs_size);
4139
4140    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4141    offset = frame_size;
4142
4143    if (to == HARD_FRAME_POINTER_REGNUM)
4144      {
4145        if (from == ARG_POINTER_REGNUM)
4146          return offset - crtl->outgoing_args_size;
4147
4148        if (from == FRAME_POINTER_REGNUM)
4149          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4150      }
4151
4152    if (to == STACK_POINTER_REGNUM)
4153      {
4154        if (from == FRAME_POINTER_REGNUM)
4155          {
4156            HOST_WIDE_INT elim = crtl->outgoing_args_size
4157                               + cfun->machine->frame.saved_regs_size
4158                               + get_frame_size ()
4159                               - cfun->machine->frame.fp_lr_offset;
4160            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4161            return elim;
4162          }
4163      }
4164
4165    return offset;
4166 }
4167
4168
4169 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4170    previous frame.  */
4171
4172 rtx
4173 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4174 {
4175   if (count != 0)
4176     return const0_rtx;
4177   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4178 }
4179
4180
4181 static void
4182 aarch64_asm_trampoline_template (FILE *f)
4183 {
4184   if (TARGET_ILP32)
4185     {
4186       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4187       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4188     }
4189   else
4190     {
4191       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4192       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4193     }
4194   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4195   assemble_aligned_integer (4, const0_rtx);
4196   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4197   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4198 }
4199
4200 static void
4201 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4202 {
4203   rtx fnaddr, mem, a_tramp;
4204   const int tramp_code_sz = 16;
4205
4206   /* Don't need to copy the trailing D-words, we fill those in below.  */
4207   emit_block_move (m_tramp, assemble_trampoline_template (),
4208                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4209   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4210   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4211   if (GET_MODE (fnaddr) != ptr_mode)
4212     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4213   emit_move_insn (mem, fnaddr);
4214
4215   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4216   emit_move_insn (mem, chain_value);
4217
4218   /* XXX We should really define a "clear_cache" pattern and use
4219      gen_clear_cache().  */
4220   a_tramp = XEXP (m_tramp, 0);
4221   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4222                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4223                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4224                      ptr_mode);
4225 }
4226
4227 static unsigned char
4228 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4229 {
4230   switch (regclass)
4231     {
4232     case CORE_REGS:
4233     case POINTER_REGS:
4234     case GENERAL_REGS:
4235     case ALL_REGS:
4236     case FP_REGS:
4237     case FP_LO_REGS:
4238       return
4239         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4240                                        (GET_MODE_SIZE (mode) + 7) / 8;
4241     case STACK_REG:
4242       return 1;
4243
4244     case NO_REGS:
4245       return 0;
4246
4247     default:
4248       break;
4249     }
4250   gcc_unreachable ();
4251 }
4252
4253 static reg_class_t
4254 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4255 {
4256   if (regclass == POINTER_REGS)
4257     return GENERAL_REGS;
4258
4259   if (regclass == STACK_REG)
4260     {
4261       if (REG_P(x)
4262           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4263           return regclass;
4264
4265       return NO_REGS;
4266     }
4267
4268   /* If it's an integer immediate that MOVI can't handle, then
4269      FP_REGS is not an option, so we return NO_REGS instead.  */
4270   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4271       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4272     return NO_REGS;
4273
4274   /* Register eliminiation can result in a request for
4275      SP+constant->FP_REGS.  We cannot support such operations which
4276      use SP as source and an FP_REG as destination, so reject out
4277      right now.  */
4278   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4279     {
4280       rtx lhs = XEXP (x, 0);
4281
4282       /* Look through a possible SUBREG introduced by ILP32.  */
4283       if (GET_CODE (lhs) == SUBREG)
4284         lhs = SUBREG_REG (lhs);
4285
4286       gcc_assert (REG_P (lhs));
4287       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4288                                       POINTER_REGS));
4289       return NO_REGS;
4290     }
4291
4292   return regclass;
4293 }
4294
4295 void
4296 aarch64_asm_output_labelref (FILE* f, const char *name)
4297 {
4298   asm_fprintf (f, "%U%s", name);
4299 }
4300
4301 static void
4302 aarch64_elf_asm_constructor (rtx symbol, int priority)
4303 {
4304   if (priority == DEFAULT_INIT_PRIORITY)
4305     default_ctor_section_asm_out_constructor (symbol, priority);
4306   else
4307     {
4308       section *s;
4309       char buf[18];
4310       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4311       s = get_section (buf, SECTION_WRITE, NULL);
4312       switch_to_section (s);
4313       assemble_align (POINTER_SIZE);
4314       assemble_aligned_integer (POINTER_BYTES, symbol);
4315     }
4316 }
4317
4318 static void
4319 aarch64_elf_asm_destructor (rtx symbol, int priority)
4320 {
4321   if (priority == DEFAULT_INIT_PRIORITY)
4322     default_dtor_section_asm_out_destructor (symbol, priority);
4323   else
4324     {
4325       section *s;
4326       char buf[18];
4327       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4328       s = get_section (buf, SECTION_WRITE, NULL);
4329       switch_to_section (s);
4330       assemble_align (POINTER_SIZE);
4331       assemble_aligned_integer (POINTER_BYTES, symbol);
4332     }
4333 }
4334
4335 const char*
4336 aarch64_output_casesi (rtx *operands)
4337 {
4338   char buf[100];
4339   char label[100];
4340   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4341   int index;
4342   static const char *const patterns[4][2] =
4343   {
4344     {
4345       "ldrb\t%w3, [%0,%w1,uxtw]",
4346       "add\t%3, %4, %w3, sxtb #2"
4347     },
4348     {
4349       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4350       "add\t%3, %4, %w3, sxth #2"
4351     },
4352     {
4353       "ldr\t%w3, [%0,%w1,uxtw #2]",
4354       "add\t%3, %4, %w3, sxtw #2"
4355     },
4356     /* We assume that DImode is only generated when not optimizing and
4357        that we don't really need 64-bit address offsets.  That would
4358        imply an object file with 8GB of code in a single function!  */
4359     {
4360       "ldr\t%w3, [%0,%w1,uxtw #2]",
4361       "add\t%3, %4, %w3, sxtw #2"
4362     }
4363   };
4364
4365   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4366
4367   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4368
4369   gcc_assert (index >= 0 && index <= 3);
4370
4371   /* Need to implement table size reduction, by chaning the code below.  */
4372   output_asm_insn (patterns[index][0], operands);
4373   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4374   snprintf (buf, sizeof (buf),
4375             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4376   output_asm_insn (buf, operands);
4377   output_asm_insn (patterns[index][1], operands);
4378   output_asm_insn ("br\t%3", operands);
4379   assemble_label (asm_out_file, label);
4380   return "";
4381 }
4382
4383
4384 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4385    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4386    operator.  */
4387
4388 int
4389 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4390 {
4391   if (shift >= 0 && shift <= 3)
4392     {
4393       int size;
4394       for (size = 8; size <= 32; size *= 2)
4395         {
4396           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4397           if (mask == bits << shift)
4398             return size;
4399         }
4400     }
4401   return 0;
4402 }
4403
4404 static bool
4405 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4406                                    const_rtx x ATTRIBUTE_UNUSED)
4407 {
4408   /* We can't use blocks for constants when we're using a per-function
4409      constant pool.  */
4410   return false;
4411 }
4412
4413 static section *
4414 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4415                             rtx x ATTRIBUTE_UNUSED,
4416                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4417 {
4418   /* Force all constant pool entries into the current function section.  */
4419   return function_section (current_function_decl);
4420 }
4421
4422
4423 /* Costs.  */
4424
4425 /* Helper function for rtx cost calculation.  Strip a shift expression
4426    from X.  Returns the inner operand if successful, or the original
4427    expression on failure.  */
4428 static rtx
4429 aarch64_strip_shift (rtx x)
4430 {
4431   rtx op = x;
4432
4433   if ((GET_CODE (op) == ASHIFT
4434        || GET_CODE (op) == ASHIFTRT
4435        || GET_CODE (op) == LSHIFTRT)
4436       && CONST_INT_P (XEXP (op, 1)))
4437     return XEXP (op, 0);
4438
4439   if (GET_CODE (op) == MULT
4440       && CONST_INT_P (XEXP (op, 1))
4441       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4442     return XEXP (op, 0);
4443
4444   return x;
4445 }
4446
4447 /* Helper function for rtx cost calculation.  Strip a shift or extend
4448    expression from X.  Returns the inner operand if successful, or the
4449    original expression on failure.  We deal with a number of possible
4450    canonicalization variations here.  */
4451 static rtx
4452 aarch64_strip_shift_or_extend (rtx x)
4453 {
4454   rtx op = x;
4455
4456   /* Zero and sign extraction of a widened value.  */
4457   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4458       && XEXP (op, 2) == const0_rtx
4459       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4460                                          XEXP (op, 1)))
4461     return XEXP (XEXP (op, 0), 0);
4462
4463   /* It can also be represented (for zero-extend) as an AND with an
4464      immediate.  */
4465   if (GET_CODE (op) == AND
4466       && GET_CODE (XEXP (op, 0)) == MULT
4467       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4468       && CONST_INT_P (XEXP (op, 1))
4469       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4470                            INTVAL (XEXP (op, 1))) != 0)
4471     return XEXP (XEXP (op, 0), 0);
4472
4473   /* Now handle extended register, as this may also have an optional
4474      left shift by 1..4.  */
4475   if (GET_CODE (op) == ASHIFT
4476       && CONST_INT_P (XEXP (op, 1))
4477       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4478     op = XEXP (op, 0);
4479
4480   if (GET_CODE (op) == ZERO_EXTEND
4481       || GET_CODE (op) == SIGN_EXTEND)
4482     op = XEXP (op, 0);
4483
4484   if (op != x)
4485     return op;
4486
4487   return aarch64_strip_shift (x);
4488 }
4489
4490 /* Calculate the cost of calculating X, storing it in *COST.  Result
4491    is true if the total cost of the operation has now been calculated.  */
4492 static bool
4493 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4494                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4495 {
4496   rtx op0, op1;
4497   const struct cpu_cost_table *extra_cost
4498     = aarch64_tune_params->insn_extra_cost;
4499
4500   switch (code)
4501     {
4502     case SET:
4503       op0 = SET_DEST (x);
4504       op1 = SET_SRC (x);
4505
4506       switch (GET_CODE (op0))
4507         {
4508         case MEM:
4509           if (speed)
4510             *cost += extra_cost->ldst.store;
4511
4512           if (op1 != const0_rtx)
4513             *cost += rtx_cost (op1, SET, 1, speed);
4514           return true;
4515
4516         case SUBREG:
4517           if (! REG_P (SUBREG_REG (op0)))
4518             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4519           /* Fall through.  */
4520         case REG:
4521           /* Cost is just the cost of the RHS of the set.  */
4522           *cost += rtx_cost (op1, SET, 1, true);
4523           return true;
4524
4525         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4526         case SIGN_EXTRACT:
4527           /* Strip any redundant widening of the RHS to meet the width of
4528              the target.  */
4529           if (GET_CODE (op1) == SUBREG)
4530             op1 = SUBREG_REG (op1);
4531           if ((GET_CODE (op1) == ZERO_EXTEND
4532                || GET_CODE (op1) == SIGN_EXTEND)
4533               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4534               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4535                   >= INTVAL (XEXP (op0, 1))))
4536             op1 = XEXP (op1, 0);
4537           *cost += rtx_cost (op1, SET, 1, speed);
4538           return true;
4539
4540         default:
4541           break;
4542         }
4543       return false;
4544
4545     case MEM:
4546       if (speed)
4547         *cost += extra_cost->ldst.load;
4548
4549       return true;
4550
4551     case NEG:
4552       op0 = CONST0_RTX (GET_MODE (x));
4553       op1 = XEXP (x, 0);
4554       goto cost_minus;
4555
4556     case COMPARE:
4557       op0 = XEXP (x, 0);
4558       op1 = XEXP (x, 1);
4559
4560       if (op1 == const0_rtx
4561           && GET_CODE (op0) == AND)
4562         {
4563           x = op0;
4564           goto cost_logic;
4565         }
4566
4567       /* Comparisons can work if the order is swapped.
4568          Canonicalization puts the more complex operation first, but
4569          we want it in op1.  */
4570       if (! (REG_P (op0)
4571              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4572         {
4573           op0 = XEXP (x, 1);
4574           op1 = XEXP (x, 0);
4575         }
4576       goto cost_minus;
4577
4578     case MINUS:
4579       op0 = XEXP (x, 0);
4580       op1 = XEXP (x, 1);
4581
4582     cost_minus:
4583       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4584           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4585               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4586         {
4587           if (op0 != const0_rtx)
4588             *cost += rtx_cost (op0, MINUS, 0, speed);
4589
4590           if (CONST_INT_P (op1))
4591             {
4592               if (!aarch64_uimm12_shift (INTVAL (op1)))
4593                 *cost += rtx_cost (op1, MINUS, 1, speed);
4594             }
4595           else
4596             {
4597               op1 = aarch64_strip_shift_or_extend (op1);
4598               *cost += rtx_cost (op1, MINUS, 1, speed);
4599             }
4600           return true;
4601         }
4602
4603       return false;
4604
4605     case PLUS:
4606       op0 = XEXP (x, 0);
4607       op1 = XEXP (x, 1);
4608
4609       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4610         {
4611           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4612             {
4613               *cost += rtx_cost (op0, PLUS, 0, speed);
4614             }
4615           else
4616             {
4617               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4618
4619               if (new_op0 == op0
4620                   && GET_CODE (op0) == MULT)
4621                 {
4622                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4623                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4624                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4625                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4626                     {
4627                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4628                                           speed)
4629                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4630                                             speed)
4631                                 + rtx_cost (op1, PLUS, 1, speed));
4632                       if (speed)
4633                         *cost +=
4634                           extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4635                       return true;
4636                     }
4637                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4638                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4639                             + rtx_cost (op1, PLUS, 1, speed));
4640
4641                   if (speed)
4642                     *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4643                 }
4644
4645               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4646                         + rtx_cost (op1, PLUS, 1, speed));
4647             }
4648           return true;
4649         }
4650
4651       return false;
4652
4653     case IOR:
4654     case XOR:
4655     case AND:
4656     cost_logic:
4657       op0 = XEXP (x, 0);
4658       op1 = XEXP (x, 1);
4659
4660       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4661         {
4662           if (CONST_INT_P (op1)
4663               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4664             {
4665               *cost += rtx_cost (op0, AND, 0, speed);
4666             }
4667           else
4668             {
4669               if (GET_CODE (op0) == NOT)
4670                 op0 = XEXP (op0, 0);
4671               op0 = aarch64_strip_shift (op0);
4672               *cost += (rtx_cost (op0, AND, 0, speed)
4673                         + rtx_cost (op1, AND, 1, speed));
4674             }
4675           return true;
4676         }
4677       return false;
4678
4679     case ZERO_EXTEND:
4680       if ((GET_MODE (x) == DImode
4681            && GET_MODE (XEXP (x, 0)) == SImode)
4682           || GET_CODE (XEXP (x, 0)) == MEM)
4683         {
4684           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4685           return true;
4686         }
4687       return false;
4688
4689     case SIGN_EXTEND:
4690       if (GET_CODE (XEXP (x, 0)) == MEM)
4691         {
4692           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4693           return true;
4694         }
4695       return false;
4696
4697     case ROTATE:
4698       if (!CONST_INT_P (XEXP (x, 1)))
4699         *cost += COSTS_N_INSNS (2);
4700       /* Fall through.  */
4701     case ROTATERT:
4702     case LSHIFTRT:
4703     case ASHIFT:
4704     case ASHIFTRT:
4705
4706       /* Shifting by a register often takes an extra cycle.  */
4707       if (speed && !CONST_INT_P (XEXP (x, 1)))
4708         *cost += extra_cost->alu.arith_shift_reg;
4709
4710       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4711       return true;
4712
4713     case HIGH:
4714       if (!CONSTANT_P (XEXP (x, 0)))
4715         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4716       return true;
4717
4718     case LO_SUM:
4719       if (!CONSTANT_P (XEXP (x, 1)))
4720         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4721       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4722       return true;
4723
4724     case ZERO_EXTRACT:
4725     case SIGN_EXTRACT:
4726       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4727       return true;
4728
4729     case MULT:
4730       op0 = XEXP (x, 0);
4731       op1 = XEXP (x, 1);
4732
4733       *cost = COSTS_N_INSNS (1);
4734       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4735         {
4736           if (CONST_INT_P (op1)
4737               && exact_log2 (INTVAL (op1)) > 0)
4738             {
4739               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4740               return true;
4741             }
4742
4743           if ((GET_CODE (op0) == ZERO_EXTEND
4744                && GET_CODE (op1) == ZERO_EXTEND)
4745               || (GET_CODE (op0) == SIGN_EXTEND
4746                   && GET_CODE (op1) == SIGN_EXTEND))
4747             {
4748               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4749                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4750               if (speed)
4751                 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4752               return true;
4753             }
4754
4755           if (speed)
4756             *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4757         }
4758       else if (speed)
4759         {
4760           if (GET_MODE (x) == DFmode)
4761             *cost += extra_cost->fp[1].mult;
4762           else if (GET_MODE (x) == SFmode)
4763             *cost += extra_cost->fp[0].mult;
4764         }
4765
4766       return false;  /* All arguments need to be in registers.  */
4767
4768     case MOD:
4769     case UMOD:
4770       *cost = COSTS_N_INSNS (2);
4771       if (speed)
4772         {
4773           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4774             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4775                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4776           else if (GET_MODE (x) == DFmode)
4777             *cost += (extra_cost->fp[1].mult
4778                       + extra_cost->fp[1].div);
4779           else if (GET_MODE (x) == SFmode)
4780             *cost += (extra_cost->fp[0].mult
4781                       + extra_cost->fp[0].div);
4782         }
4783       return false;  /* All arguments need to be in registers.  */
4784
4785     case DIV:
4786     case UDIV:
4787       *cost = COSTS_N_INSNS (1);
4788       if (speed)
4789         {
4790           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4791             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4792           else if (GET_MODE (x) == DFmode)
4793             *cost += extra_cost->fp[1].div;
4794           else if (GET_MODE (x) == SFmode)
4795             *cost += extra_cost->fp[0].div;
4796         }
4797       return false;  /* All arguments need to be in registers.  */
4798
4799     default:
4800       break;
4801     }
4802   return false;
4803 }
4804
4805 static int
4806 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4807                   enum machine_mode mode ATTRIBUTE_UNUSED,
4808                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4809 {
4810   enum rtx_code c  = GET_CODE (x);
4811   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4812
4813   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4814     return addr_cost->pre_modify;
4815
4816   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4817     return addr_cost->post_modify;
4818
4819   if (c == PLUS)
4820     {
4821       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4822         return addr_cost->imm_offset;
4823       else if (GET_CODE (XEXP (x, 0)) == MULT
4824                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4825                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4826         return addr_cost->register_extend;
4827
4828       return addr_cost->register_offset;
4829     }
4830   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4831     return addr_cost->imm_offset;
4832
4833   return 0;
4834 }
4835
4836 static int
4837 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4838                             reg_class_t from, reg_class_t to)
4839 {
4840   const struct cpu_regmove_cost *regmove_cost
4841     = aarch64_tune_params->regmove_cost;
4842
4843   if (from == GENERAL_REGS && to == GENERAL_REGS)
4844     return regmove_cost->GP2GP;
4845   else if (from == GENERAL_REGS)
4846     return regmove_cost->GP2FP;
4847   else if (to == GENERAL_REGS)
4848     return regmove_cost->FP2GP;
4849
4850   /* When AdvSIMD instructions are disabled it is not possible to move
4851      a 128-bit value directly between Q registers.  This is handled in
4852      secondary reload.  A general register is used as a scratch to move
4853      the upper DI value and the lower DI value is moved directly,
4854      hence the cost is the sum of three moves. */
4855
4856   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4857     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4858
4859   return regmove_cost->FP2FP;
4860 }
4861
4862 static int
4863 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4864                           reg_class_t rclass ATTRIBUTE_UNUSED,
4865                           bool in ATTRIBUTE_UNUSED)
4866 {
4867   return aarch64_tune_params->memmov_cost;
4868 }
4869
4870 /* Vectorizer cost model target hooks.  */
4871
4872 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4873 static int
4874 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4875                                     tree vectype,
4876                                     int misalign ATTRIBUTE_UNUSED)
4877 {
4878   unsigned elements;
4879
4880   switch (type_of_cost)
4881     {
4882       case scalar_stmt:
4883         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4884
4885       case scalar_load:
4886         return aarch64_tune_params->vec_costs->scalar_load_cost;
4887
4888       case scalar_store:
4889         return aarch64_tune_params->vec_costs->scalar_store_cost;
4890
4891       case vector_stmt:
4892         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4893
4894       case vector_load:
4895         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4896
4897       case vector_store:
4898         return aarch64_tune_params->vec_costs->vec_store_cost;
4899
4900       case vec_to_scalar:
4901         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4902
4903       case scalar_to_vec:
4904         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4905
4906       case unaligned_load:
4907         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4908
4909       case unaligned_store:
4910         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4911
4912       case cond_branch_taken:
4913         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4914
4915       case cond_branch_not_taken:
4916         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4917
4918       case vec_perm:
4919       case vec_promote_demote:
4920         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4921
4922       case vec_construct:
4923         elements = TYPE_VECTOR_SUBPARTS (vectype);
4924         return elements / 2 + 1;
4925
4926       default:
4927         gcc_unreachable ();
4928     }
4929 }
4930
4931 /* Implement targetm.vectorize.add_stmt_cost.  */
4932 static unsigned
4933 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4934                        struct _stmt_vec_info *stmt_info, int misalign,
4935                        enum vect_cost_model_location where)
4936 {
4937   unsigned *cost = (unsigned *) data;
4938   unsigned retval = 0;
4939
4940   if (flag_vect_cost_model)
4941     {
4942       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4943       int stmt_cost =
4944             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4945
4946       /* Statements in an inner loop relative to the loop being
4947          vectorized are weighted more heavily.  The value here is
4948          a function (linear for now) of the loop nest level.  */
4949       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4950         {
4951           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4952           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4953           unsigned nest_level = loop_depth (loop);
4954
4955           count *= nest_level;
4956         }
4957
4958       retval = (unsigned) (count * stmt_cost);
4959       cost[where] += retval;
4960     }
4961
4962   return retval;
4963 }
4964
4965 static void initialize_aarch64_code_model (void);
4966
4967 /* Parse the architecture extension string.  */
4968
4969 static void
4970 aarch64_parse_extension (char *str)
4971 {
4972   /* The extension string is parsed left to right.  */
4973   const struct aarch64_option_extension *opt = NULL;
4974
4975   /* Flag to say whether we are adding or removing an extension.  */
4976   int adding_ext = -1;
4977
4978   while (str != NULL && *str != 0)
4979     {
4980       char *ext;
4981       size_t len;
4982
4983       str++;
4984       ext = strchr (str, '+');
4985
4986       if (ext != NULL)
4987         len = ext - str;
4988       else
4989         len = strlen (str);
4990
4991       if (len >= 2 && strncmp (str, "no", 2) == 0)
4992         {
4993           adding_ext = 0;
4994           len -= 2;
4995           str += 2;
4996         }
4997       else if (len > 0)
4998         adding_ext = 1;
4999
5000       if (len == 0)
5001         {
5002           error ("missing feature modifier after %qs", "+no");
5003           return;
5004         }
5005
5006       /* Scan over the extensions table trying to find an exact match.  */
5007       for (opt = all_extensions; opt->name != NULL; opt++)
5008         {
5009           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5010             {
5011               /* Add or remove the extension.  */
5012               if (adding_ext)
5013                 aarch64_isa_flags |= opt->flags_on;
5014               else
5015                 aarch64_isa_flags &= ~(opt->flags_off);
5016               break;
5017             }
5018         }
5019
5020       if (opt->name == NULL)
5021         {
5022           /* Extension not found in list.  */
5023           error ("unknown feature modifier %qs", str);
5024           return;
5025         }
5026
5027       str = ext;
5028     };
5029
5030   return;
5031 }
5032
5033 /* Parse the ARCH string.  */
5034
5035 static void
5036 aarch64_parse_arch (void)
5037 {
5038   char *ext;
5039   const struct processor *arch;
5040   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5041   size_t len;
5042
5043   strcpy (str, aarch64_arch_string);
5044
5045   ext = strchr (str, '+');
5046
5047   if (ext != NULL)
5048     len = ext - str;
5049   else
5050     len = strlen (str);
5051
5052   if (len == 0)
5053     {
5054       error ("missing arch name in -march=%qs", str);
5055       return;
5056     }
5057
5058   /* Loop through the list of supported ARCHs to find a match.  */
5059   for (arch = all_architectures; arch->name != NULL; arch++)
5060     {
5061       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5062         {
5063           selected_arch = arch;
5064           aarch64_isa_flags = selected_arch->flags;
5065           selected_cpu = &all_cores[selected_arch->core];
5066
5067           if (ext != NULL)
5068             {
5069               /* ARCH string contains at least one extension.  */
5070               aarch64_parse_extension (ext);
5071             }
5072
5073           return;
5074         }
5075     }
5076
5077   /* ARCH name not found in list.  */
5078   error ("unknown value %qs for -march", str);
5079   return;
5080 }
5081
5082 /* Parse the CPU string.  */
5083
5084 static void
5085 aarch64_parse_cpu (void)
5086 {
5087   char *ext;
5088   const struct processor *cpu;
5089   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5090   size_t len;
5091
5092   strcpy (str, aarch64_cpu_string);
5093
5094   ext = strchr (str, '+');
5095
5096   if (ext != NULL)
5097     len = ext - str;
5098   else
5099     len = strlen (str);
5100
5101   if (len == 0)
5102     {
5103       error ("missing cpu name in -mcpu=%qs", str);
5104       return;
5105     }
5106
5107   /* Loop through the list of supported CPUs to find a match.  */
5108   for (cpu = all_cores; cpu->name != NULL; cpu++)
5109     {
5110       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5111         {
5112           selected_cpu = cpu;
5113           aarch64_isa_flags = selected_cpu->flags;
5114
5115           if (ext != NULL)
5116             {
5117               /* CPU string contains at least one extension.  */
5118               aarch64_parse_extension (ext);
5119             }
5120
5121           return;
5122         }
5123     }
5124
5125   /* CPU name not found in list.  */
5126   error ("unknown value %qs for -mcpu", str);
5127   return;
5128 }
5129
5130 /* Parse the TUNE string.  */
5131
5132 static void
5133 aarch64_parse_tune (void)
5134 {
5135   const struct processor *cpu;
5136   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5137   strcpy (str, aarch64_tune_string);
5138
5139   /* Loop through the list of supported CPUs to find a match.  */
5140   for (cpu = all_cores; cpu->name != NULL; cpu++)
5141     {
5142       if (strcmp (cpu->name, str) == 0)
5143         {
5144           selected_tune = cpu;
5145           return;
5146         }
5147     }
5148
5149   /* CPU name not found in list.  */
5150   error ("unknown value %qs for -mtune", str);
5151   return;
5152 }
5153
5154
5155 /* Implement TARGET_OPTION_OVERRIDE.  */
5156
5157 static void
5158 aarch64_override_options (void)
5159 {
5160   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5161      otherwise march remains undefined.  mtune can be used with either march or
5162      mcpu.  */
5163
5164   if (aarch64_arch_string)
5165     {
5166       aarch64_parse_arch ();
5167       aarch64_cpu_string = NULL;
5168     }
5169
5170   if (aarch64_cpu_string)
5171     {
5172       aarch64_parse_cpu ();
5173       selected_arch = NULL;
5174     }
5175
5176   if (aarch64_tune_string)
5177     {
5178       aarch64_parse_tune ();
5179     }
5180
5181   initialize_aarch64_code_model ();
5182
5183   aarch64_build_bitmask_table ();
5184
5185   /* This target defaults to strict volatile bitfields.  */
5186   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5187     flag_strict_volatile_bitfields = 1;
5188
5189   /* If the user did not specify a processor, choose the default
5190      one for them.  This will be the CPU set during configuration using
5191      --with-cpu, otherwise it is "coretex-a53".  */
5192   if (!selected_cpu)
5193     {
5194       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5195       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5196     }
5197
5198   gcc_assert (selected_cpu);
5199
5200   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5201   if (!selected_tune)
5202     selected_tune = &all_cores[selected_cpu->core];
5203
5204   aarch64_tune_flags = selected_tune->flags;
5205   aarch64_tune = selected_tune->core;
5206   aarch64_tune_params = selected_tune->tune;
5207
5208   aarch64_override_options_after_change ();
5209 }
5210
5211 /* Implement targetm.override_options_after_change.  */
5212
5213 static void
5214 aarch64_override_options_after_change (void)
5215 {
5216   faked_omit_frame_pointer = false;
5217
5218   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5219      that aarch64_frame_pointer_required will be called.  We need to remember
5220      whether flag_omit_frame_pointer was turned on normally or just faked.  */
5221
5222   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5223     {
5224       flag_omit_frame_pointer = true;
5225       faked_omit_frame_pointer = true;
5226     }
5227 }
5228
5229 static struct machine_function *
5230 aarch64_init_machine_status (void)
5231 {
5232   struct machine_function *machine;
5233   machine = ggc_alloc_cleared_machine_function ();
5234   return machine;
5235 }
5236
5237 void
5238 aarch64_init_expanders (void)
5239 {
5240   init_machine_status = aarch64_init_machine_status;
5241 }
5242
5243 /* A checking mechanism for the implementation of the various code models.  */
5244 static void
5245 initialize_aarch64_code_model (void)
5246 {
5247    if (flag_pic)
5248      {
5249        switch (aarch64_cmodel_var)
5250          {
5251          case AARCH64_CMODEL_TINY:
5252            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5253            break;
5254          case AARCH64_CMODEL_SMALL:
5255            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5256            break;
5257          case AARCH64_CMODEL_LARGE:
5258            sorry ("code model %qs with -f%s", "large",
5259                   flag_pic > 1 ? "PIC" : "pic");
5260          default:
5261            gcc_unreachable ();
5262          }
5263      }
5264    else
5265      aarch64_cmodel = aarch64_cmodel_var;
5266 }
5267
5268 /* Return true if SYMBOL_REF X binds locally.  */
5269
5270 static bool
5271 aarch64_symbol_binds_local_p (const_rtx x)
5272 {
5273   return (SYMBOL_REF_DECL (x)
5274           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5275           : SYMBOL_REF_LOCAL_P (x));
5276 }
5277
5278 /* Return true if SYMBOL_REF X is thread local */
5279 static bool
5280 aarch64_tls_symbol_p (rtx x)
5281 {
5282   if (! TARGET_HAVE_TLS)
5283     return false;
5284
5285   if (GET_CODE (x) != SYMBOL_REF)
5286     return false;
5287
5288   return SYMBOL_REF_TLS_MODEL (x) != 0;
5289 }
5290
5291 /* Classify a TLS symbol into one of the TLS kinds.  */
5292 enum aarch64_symbol_type
5293 aarch64_classify_tls_symbol (rtx x)
5294 {
5295   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5296
5297   switch (tls_kind)
5298     {
5299     case TLS_MODEL_GLOBAL_DYNAMIC:
5300     case TLS_MODEL_LOCAL_DYNAMIC:
5301       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5302
5303     case TLS_MODEL_INITIAL_EXEC:
5304       return SYMBOL_SMALL_GOTTPREL;
5305
5306     case TLS_MODEL_LOCAL_EXEC:
5307       return SYMBOL_SMALL_TPREL;
5308
5309     case TLS_MODEL_EMULATED:
5310     case TLS_MODEL_NONE:
5311       return SYMBOL_FORCE_TO_MEM;
5312
5313     default:
5314       gcc_unreachable ();
5315     }
5316 }
5317
5318 /* Return the method that should be used to access SYMBOL_REF or
5319    LABEL_REF X in context CONTEXT.  */
5320
5321 enum aarch64_symbol_type
5322 aarch64_classify_symbol (rtx x,
5323                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5324 {
5325   if (GET_CODE (x) == LABEL_REF)
5326     {
5327       switch (aarch64_cmodel)
5328         {
5329         case AARCH64_CMODEL_LARGE:
5330           return SYMBOL_FORCE_TO_MEM;
5331
5332         case AARCH64_CMODEL_TINY_PIC:
5333         case AARCH64_CMODEL_TINY:
5334           return SYMBOL_TINY_ABSOLUTE;
5335
5336         case AARCH64_CMODEL_SMALL_PIC:
5337         case AARCH64_CMODEL_SMALL:
5338           return SYMBOL_SMALL_ABSOLUTE;
5339
5340         default:
5341           gcc_unreachable ();
5342         }
5343     }
5344
5345   if (GET_CODE (x) == SYMBOL_REF)
5346     {
5347       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5348           || CONSTANT_POOL_ADDRESS_P (x))
5349         return SYMBOL_FORCE_TO_MEM;
5350
5351       if (aarch64_tls_symbol_p (x))
5352         return aarch64_classify_tls_symbol (x);
5353
5354       switch (aarch64_cmodel)
5355         {
5356         case AARCH64_CMODEL_TINY:
5357           if (SYMBOL_REF_WEAK (x))
5358             return SYMBOL_FORCE_TO_MEM;
5359           return SYMBOL_TINY_ABSOLUTE;
5360
5361         case AARCH64_CMODEL_SMALL:
5362           if (SYMBOL_REF_WEAK (x))
5363             return SYMBOL_FORCE_TO_MEM;
5364           return SYMBOL_SMALL_ABSOLUTE;
5365
5366         case AARCH64_CMODEL_TINY_PIC:
5367           if (!aarch64_symbol_binds_local_p (x))
5368             return SYMBOL_TINY_GOT;
5369           return SYMBOL_TINY_ABSOLUTE;
5370
5371         case AARCH64_CMODEL_SMALL_PIC:
5372           if (!aarch64_symbol_binds_local_p (x))
5373             return SYMBOL_SMALL_GOT;
5374           return SYMBOL_SMALL_ABSOLUTE;
5375
5376         default:
5377           gcc_unreachable ();
5378         }
5379     }
5380
5381   /* By default push everything into the constant pool.  */
5382   return SYMBOL_FORCE_TO_MEM;
5383 }
5384
5385 bool
5386 aarch64_constant_address_p (rtx x)
5387 {
5388   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5389 }
5390
5391 bool
5392 aarch64_legitimate_pic_operand_p (rtx x)
5393 {
5394   if (GET_CODE (x) == SYMBOL_REF
5395       || (GET_CODE (x) == CONST
5396           && GET_CODE (XEXP (x, 0)) == PLUS
5397           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5398      return false;
5399
5400   return true;
5401 }
5402
5403 /* Return true if X holds either a quarter-precision or
5404      floating-point +0.0 constant.  */
5405 static bool
5406 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5407 {
5408   if (!CONST_DOUBLE_P (x))
5409     return false;
5410
5411   /* TODO: We could handle moving 0.0 to a TFmode register,
5412      but first we would like to refactor the movtf_aarch64
5413      to be more amicable to split moves properly and
5414      correctly gate on TARGET_SIMD.  For now - reject all
5415      constants which are not to SFmode or DFmode registers.  */
5416   if (!(mode == SFmode || mode == DFmode))
5417     return false;
5418
5419   if (aarch64_float_const_zero_rtx_p (x))
5420     return true;
5421   return aarch64_float_const_representable_p (x);
5422 }
5423
5424 static bool
5425 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5426 {
5427   /* Do not allow vector struct mode constants.  We could support
5428      0 and -1 easily, but they need support in aarch64-simd.md.  */
5429   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5430     return false;
5431
5432   /* This could probably go away because
5433      we now decompose CONST_INTs according to expand_mov_immediate.  */
5434   if ((GET_CODE (x) == CONST_VECTOR
5435        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5436       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5437         return !targetm.cannot_force_const_mem (mode, x);
5438
5439   if (GET_CODE (x) == HIGH
5440       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5441     return true;
5442
5443   return aarch64_constant_address_p (x);
5444 }
5445
5446 rtx
5447 aarch64_load_tp (rtx target)
5448 {
5449   if (!target
5450       || GET_MODE (target) != Pmode
5451       || !register_operand (target, Pmode))
5452     target = gen_reg_rtx (Pmode);
5453
5454   /* Can return in any reg.  */
5455   emit_insn (gen_aarch64_load_tp_hard (target));
5456   return target;
5457 }
5458
5459 /* On AAPCS systems, this is the "struct __va_list".  */
5460 static GTY(()) tree va_list_type;
5461
5462 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5463    Return the type to use as __builtin_va_list.
5464
5465    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5466
5467    struct __va_list
5468    {
5469      void *__stack;
5470      void *__gr_top;
5471      void *__vr_top;
5472      int   __gr_offs;
5473      int   __vr_offs;
5474    };  */
5475
5476 static tree
5477 aarch64_build_builtin_va_list (void)
5478 {
5479   tree va_list_name;
5480   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5481
5482   /* Create the type.  */
5483   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5484   /* Give it the required name.  */
5485   va_list_name = build_decl (BUILTINS_LOCATION,
5486                              TYPE_DECL,
5487                              get_identifier ("__va_list"),
5488                              va_list_type);
5489   DECL_ARTIFICIAL (va_list_name) = 1;
5490   TYPE_NAME (va_list_type) = va_list_name;
5491   TYPE_STUB_DECL (va_list_type) = va_list_name;
5492
5493   /* Create the fields.  */
5494   f_stack = build_decl (BUILTINS_LOCATION,
5495                         FIELD_DECL, get_identifier ("__stack"),
5496                         ptr_type_node);
5497   f_grtop = build_decl (BUILTINS_LOCATION,
5498                         FIELD_DECL, get_identifier ("__gr_top"),
5499                         ptr_type_node);
5500   f_vrtop = build_decl (BUILTINS_LOCATION,
5501                         FIELD_DECL, get_identifier ("__vr_top"),
5502                         ptr_type_node);
5503   f_groff = build_decl (BUILTINS_LOCATION,
5504                         FIELD_DECL, get_identifier ("__gr_offs"),
5505                         integer_type_node);
5506   f_vroff = build_decl (BUILTINS_LOCATION,
5507                         FIELD_DECL, get_identifier ("__vr_offs"),
5508                         integer_type_node);
5509
5510   DECL_ARTIFICIAL (f_stack) = 1;
5511   DECL_ARTIFICIAL (f_grtop) = 1;
5512   DECL_ARTIFICIAL (f_vrtop) = 1;
5513   DECL_ARTIFICIAL (f_groff) = 1;
5514   DECL_ARTIFICIAL (f_vroff) = 1;
5515
5516   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5517   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5518   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5519   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5520   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5521
5522   TYPE_FIELDS (va_list_type) = f_stack;
5523   DECL_CHAIN (f_stack) = f_grtop;
5524   DECL_CHAIN (f_grtop) = f_vrtop;
5525   DECL_CHAIN (f_vrtop) = f_groff;
5526   DECL_CHAIN (f_groff) = f_vroff;
5527
5528   /* Compute its layout.  */
5529   layout_type (va_list_type);
5530
5531   return va_list_type;
5532 }
5533
5534 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5535 static void
5536 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5537 {
5538   const CUMULATIVE_ARGS *cum;
5539   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5540   tree stack, grtop, vrtop, groff, vroff;
5541   tree t;
5542   int gr_save_area_size;
5543   int vr_save_area_size;
5544   int vr_offset;
5545
5546   cum = &crtl->args.info;
5547   gr_save_area_size
5548     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5549   vr_save_area_size
5550     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5551
5552   if (TARGET_GENERAL_REGS_ONLY)
5553     {
5554       if (cum->aapcs_nvrn > 0)
5555         sorry ("%qs and floating point or vector arguments",
5556                "-mgeneral-regs-only");
5557       vr_save_area_size = 0;
5558     }
5559
5560   f_stack = TYPE_FIELDS (va_list_type_node);
5561   f_grtop = DECL_CHAIN (f_stack);
5562   f_vrtop = DECL_CHAIN (f_grtop);
5563   f_groff = DECL_CHAIN (f_vrtop);
5564   f_vroff = DECL_CHAIN (f_groff);
5565
5566   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5567                   NULL_TREE);
5568   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5569                   NULL_TREE);
5570   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5571                   NULL_TREE);
5572   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5573                   NULL_TREE);
5574   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5575                   NULL_TREE);
5576
5577   /* Emit code to initialize STACK, which points to the next varargs stack
5578      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5579      by named arguments.  STACK is 8-byte aligned.  */
5580   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5581   if (cum->aapcs_stack_size > 0)
5582     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5583   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5584   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5585
5586   /* Emit code to initialize GRTOP, the top of the GR save area.
5587      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5588   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5589   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5590   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5591
5592   /* Emit code to initialize VRTOP, the top of the VR save area.
5593      This address is gr_save_area_bytes below GRTOP, rounded
5594      down to the next 16-byte boundary.  */
5595   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5596   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5597                              STACK_BOUNDARY / BITS_PER_UNIT);
5598
5599   if (vr_offset)
5600     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5601   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5602   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5603
5604   /* Emit code to initialize GROFF, the offset from GRTOP of the
5605      next GPR argument.  */
5606   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5607               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5608   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5609
5610   /* Likewise emit code to initialize VROFF, the offset from FTOP
5611      of the next VR argument.  */
5612   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5613               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5614   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5615 }
5616
5617 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5618
5619 static tree
5620 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5621                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5622 {
5623   tree addr;
5624   bool indirect_p;
5625   bool is_ha;           /* is HFA or HVA.  */
5626   bool dw_align;        /* double-word align.  */
5627   enum machine_mode ag_mode = VOIDmode;
5628   int nregs;
5629   enum machine_mode mode;
5630
5631   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5632   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5633   HOST_WIDE_INT size, rsize, adjust, align;
5634   tree t, u, cond1, cond2;
5635
5636   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5637   if (indirect_p)
5638     type = build_pointer_type (type);
5639
5640   mode = TYPE_MODE (type);
5641
5642   f_stack = TYPE_FIELDS (va_list_type_node);
5643   f_grtop = DECL_CHAIN (f_stack);
5644   f_vrtop = DECL_CHAIN (f_grtop);
5645   f_groff = DECL_CHAIN (f_vrtop);
5646   f_vroff = DECL_CHAIN (f_groff);
5647
5648   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5649                   f_stack, NULL_TREE);
5650   size = int_size_in_bytes (type);
5651   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5652
5653   dw_align = false;
5654   adjust = 0;
5655   if (aarch64_vfp_is_call_or_return_candidate (mode,
5656                                                type,
5657                                                &ag_mode,
5658                                                &nregs,
5659                                                &is_ha))
5660     {
5661       /* TYPE passed in fp/simd registers.  */
5662       if (TARGET_GENERAL_REGS_ONLY)
5663         sorry ("%qs and floating point or vector arguments",
5664                "-mgeneral-regs-only");
5665
5666       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5667                       unshare_expr (valist), f_vrtop, NULL_TREE);
5668       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5669                       unshare_expr (valist), f_vroff, NULL_TREE);
5670
5671       rsize = nregs * UNITS_PER_VREG;
5672
5673       if (is_ha)
5674         {
5675           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5676             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5677         }
5678       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5679                && size < UNITS_PER_VREG)
5680         {
5681           adjust = UNITS_PER_VREG - size;
5682         }
5683     }
5684   else
5685     {
5686       /* TYPE passed in general registers.  */
5687       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5688                       unshare_expr (valist), f_grtop, NULL_TREE);
5689       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5690                       unshare_expr (valist), f_groff, NULL_TREE);
5691       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5692       nregs = rsize / UNITS_PER_WORD;
5693
5694       if (align > 8)
5695         dw_align = true;
5696
5697       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5698           && size < UNITS_PER_WORD)
5699         {
5700           adjust = UNITS_PER_WORD  - size;
5701         }
5702     }
5703
5704   /* Get a local temporary for the field value.  */
5705   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5706
5707   /* Emit code to branch if off >= 0.  */
5708   t = build2 (GE_EXPR, boolean_type_node, off,
5709               build_int_cst (TREE_TYPE (off), 0));
5710   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5711
5712   if (dw_align)
5713     {
5714       /* Emit: offs = (offs + 15) & -16.  */
5715       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5716                   build_int_cst (TREE_TYPE (off), 15));
5717       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5718                   build_int_cst (TREE_TYPE (off), -16));
5719       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5720     }
5721   else
5722     roundup = NULL;
5723
5724   /* Update ap.__[g|v]r_offs  */
5725   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5726               build_int_cst (TREE_TYPE (off), rsize));
5727   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5728
5729   /* String up.  */
5730   if (roundup)
5731     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5732
5733   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5734   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5735               build_int_cst (TREE_TYPE (f_off), 0));
5736   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5737
5738   /* String up: make sure the assignment happens before the use.  */
5739   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5740   COND_EXPR_ELSE (cond1) = t;
5741
5742   /* Prepare the trees handling the argument that is passed on the stack;
5743      the top level node will store in ON_STACK.  */
5744   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5745   if (align > 8)
5746     {
5747       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5748       t = fold_convert (intDI_type_node, arg);
5749       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5750                   build_int_cst (TREE_TYPE (t), 15));
5751       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5752                   build_int_cst (TREE_TYPE (t), -16));
5753       t = fold_convert (TREE_TYPE (arg), t);
5754       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5755     }
5756   else
5757     roundup = NULL;
5758   /* Advance ap.__stack  */
5759   t = fold_convert (intDI_type_node, arg);
5760   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5761               build_int_cst (TREE_TYPE (t), size + 7));
5762   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5763               build_int_cst (TREE_TYPE (t), -8));
5764   t = fold_convert (TREE_TYPE (arg), t);
5765   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5766   /* String up roundup and advance.  */
5767   if (roundup)
5768     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5769   /* String up with arg */
5770   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5771   /* Big-endianness related address adjustment.  */
5772   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5773       && size < UNITS_PER_WORD)
5774   {
5775     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5776                 size_int (UNITS_PER_WORD - size));
5777     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5778   }
5779
5780   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5781   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5782
5783   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5784   t = off;
5785   if (adjust)
5786     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5787                 build_int_cst (TREE_TYPE (off), adjust));
5788
5789   t = fold_convert (sizetype, t);
5790   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5791
5792   if (is_ha)
5793     {
5794       /* type ha; // treat as "struct {ftype field[n];}"
5795          ... [computing offs]
5796          for (i = 0; i <nregs; ++i, offs += 16)
5797            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5798          return ha;  */
5799       int i;
5800       tree tmp_ha, field_t, field_ptr_t;
5801
5802       /* Declare a local variable.  */
5803       tmp_ha = create_tmp_var_raw (type, "ha");
5804       gimple_add_tmp_var (tmp_ha);
5805
5806       /* Establish the base type.  */
5807       switch (ag_mode)
5808         {
5809         case SFmode:
5810           field_t = float_type_node;
5811           field_ptr_t = float_ptr_type_node;
5812           break;
5813         case DFmode:
5814           field_t = double_type_node;
5815           field_ptr_t = double_ptr_type_node;
5816           break;
5817         case TFmode:
5818           field_t = long_double_type_node;
5819           field_ptr_t = long_double_ptr_type_node;
5820           break;
5821 /* The half precision and quad precision are not fully supported yet.  Enable
5822    the following code after the support is complete.  Need to find the correct
5823    type node for __fp16 *.  */
5824 #if 0
5825         case HFmode:
5826           field_t = float_type_node;
5827           field_ptr_t = float_ptr_type_node;
5828           break;
5829 #endif
5830         case V2SImode:
5831         case V4SImode:
5832             {
5833               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5834               field_t = build_vector_type_for_mode (innertype, ag_mode);
5835               field_ptr_t = build_pointer_type (field_t);
5836             }
5837           break;
5838         default:
5839           gcc_assert (0);
5840         }
5841
5842       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5843       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5844       addr = t;
5845       t = fold_convert (field_ptr_t, addr);
5846       t = build2 (MODIFY_EXPR, field_t,
5847                   build1 (INDIRECT_REF, field_t, tmp_ha),
5848                   build1 (INDIRECT_REF, field_t, t));
5849
5850       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5851       for (i = 1; i < nregs; ++i)
5852         {
5853           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5854           u = fold_convert (field_ptr_t, addr);
5855           u = build2 (MODIFY_EXPR, field_t,
5856                       build2 (MEM_REF, field_t, tmp_ha,
5857                               build_int_cst (field_ptr_t,
5858                                              (i *
5859                                               int_size_in_bytes (field_t)))),
5860                       build1 (INDIRECT_REF, field_t, u));
5861           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5862         }
5863
5864       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5865       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5866     }
5867
5868   COND_EXPR_ELSE (cond2) = t;
5869   addr = fold_convert (build_pointer_type (type), cond1);
5870   addr = build_va_arg_indirect_ref (addr);
5871
5872   if (indirect_p)
5873     addr = build_va_arg_indirect_ref (addr);
5874
5875   return addr;
5876 }
5877
5878 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5879
5880 static void
5881 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5882                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5883                                 int no_rtl)
5884 {
5885   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5886   CUMULATIVE_ARGS local_cum;
5887   int gr_saved, vr_saved;
5888
5889   /* The caller has advanced CUM up to, but not beyond, the last named
5890      argument.  Advance a local copy of CUM past the last "real" named
5891      argument, to find out how many registers are left over.  */
5892   local_cum = *cum;
5893   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5894
5895   /* Found out how many registers we need to save.  */
5896   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5897   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5898
5899   if (TARGET_GENERAL_REGS_ONLY)
5900     {
5901       if (local_cum.aapcs_nvrn > 0)
5902         sorry ("%qs and floating point or vector arguments",
5903                "-mgeneral-regs-only");
5904       vr_saved = 0;
5905     }
5906
5907   if (!no_rtl)
5908     {
5909       if (gr_saved > 0)
5910         {
5911           rtx ptr, mem;
5912
5913           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5914           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5915                                - gr_saved * UNITS_PER_WORD);
5916           mem = gen_frame_mem (BLKmode, ptr);
5917           set_mem_alias_set (mem, get_varargs_alias_set ());
5918
5919           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5920                                mem, gr_saved);
5921         }
5922       if (vr_saved > 0)
5923         {
5924           /* We can't use move_block_from_reg, because it will use
5925              the wrong mode, storing D regs only.  */
5926           enum machine_mode mode = TImode;
5927           int off, i;
5928
5929           /* Set OFF to the offset from virtual_incoming_args_rtx of
5930              the first vector register.  The VR save area lies below
5931              the GR one, and is aligned to 16 bytes.  */
5932           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5933                                    STACK_BOUNDARY / BITS_PER_UNIT);
5934           off -= vr_saved * UNITS_PER_VREG;
5935
5936           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5937             {
5938               rtx ptr, mem;
5939
5940               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5941               mem = gen_frame_mem (mode, ptr);
5942               set_mem_alias_set (mem, get_varargs_alias_set ());
5943               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5944               off += UNITS_PER_VREG;
5945             }
5946         }
5947     }
5948
5949   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5950      any complication of having crtl->args.pretend_args_size changed.  */
5951   cfun->machine->saved_varargs_size
5952     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5953                       STACK_BOUNDARY / BITS_PER_UNIT)
5954        + vr_saved * UNITS_PER_VREG);
5955 }
5956
5957 static void
5958 aarch64_conditional_register_usage (void)
5959 {
5960   int i;
5961   if (!TARGET_FLOAT)
5962     {
5963       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5964         {
5965           fixed_regs[i] = 1;
5966           call_used_regs[i] = 1;
5967         }
5968     }
5969 }
5970
5971 /* Walk down the type tree of TYPE counting consecutive base elements.
5972    If *MODEP is VOIDmode, then set it to the first valid floating point
5973    type.  If a non-floating point type is found, or if a floating point
5974    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5975    otherwise return the count in the sub-tree.  */
5976 static int
5977 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5978 {
5979   enum machine_mode mode;
5980   HOST_WIDE_INT size;
5981
5982   switch (TREE_CODE (type))
5983     {
5984     case REAL_TYPE:
5985       mode = TYPE_MODE (type);
5986       if (mode != DFmode && mode != SFmode && mode != TFmode)
5987         return -1;
5988
5989       if (*modep == VOIDmode)
5990         *modep = mode;
5991
5992       if (*modep == mode)
5993         return 1;
5994
5995       break;
5996
5997     case COMPLEX_TYPE:
5998       mode = TYPE_MODE (TREE_TYPE (type));
5999       if (mode != DFmode && mode != SFmode && mode != TFmode)
6000         return -1;
6001
6002       if (*modep == VOIDmode)
6003         *modep = mode;
6004
6005       if (*modep == mode)
6006         return 2;
6007
6008       break;
6009
6010     case VECTOR_TYPE:
6011       /* Use V2SImode and V4SImode as representatives of all 64-bit
6012          and 128-bit vector types.  */
6013       size = int_size_in_bytes (type);
6014       switch (size)
6015         {
6016         case 8:
6017           mode = V2SImode;
6018           break;
6019         case 16:
6020           mode = V4SImode;
6021           break;
6022         default:
6023           return -1;
6024         }
6025
6026       if (*modep == VOIDmode)
6027         *modep = mode;
6028
6029       /* Vector modes are considered to be opaque: two vectors are
6030          equivalent for the purposes of being homogeneous aggregates
6031          if they are the same size.  */
6032       if (*modep == mode)
6033         return 1;
6034
6035       break;
6036
6037     case ARRAY_TYPE:
6038       {
6039         int count;
6040         tree index = TYPE_DOMAIN (type);
6041
6042         /* Can't handle incomplete types.  */
6043         if (!COMPLETE_TYPE_P (type))
6044           return -1;
6045
6046         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6047         if (count == -1
6048             || !index
6049             || !TYPE_MAX_VALUE (index)
6050             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6051             || !TYPE_MIN_VALUE (index)
6052             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6053             || count < 0)
6054           return -1;
6055
6056         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6057                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6058
6059         /* There must be no padding.  */
6060         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6061             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6062                 != count * GET_MODE_BITSIZE (*modep)))
6063           return -1;
6064
6065         return count;
6066       }
6067
6068     case RECORD_TYPE:
6069       {
6070         int count = 0;
6071         int sub_count;
6072         tree field;
6073
6074         /* Can't handle incomplete types.  */
6075         if (!COMPLETE_TYPE_P (type))
6076           return -1;
6077
6078         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6079           {
6080             if (TREE_CODE (field) != FIELD_DECL)
6081               continue;
6082
6083             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6084             if (sub_count < 0)
6085               return -1;
6086             count += sub_count;
6087           }
6088
6089         /* There must be no padding.  */
6090         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6091             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6092                 != count * GET_MODE_BITSIZE (*modep)))
6093           return -1;
6094
6095         return count;
6096       }
6097
6098     case UNION_TYPE:
6099     case QUAL_UNION_TYPE:
6100       {
6101         /* These aren't very interesting except in a degenerate case.  */
6102         int count = 0;
6103         int sub_count;
6104         tree field;
6105
6106         /* Can't handle incomplete types.  */
6107         if (!COMPLETE_TYPE_P (type))
6108           return -1;
6109
6110         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6111           {
6112             if (TREE_CODE (field) != FIELD_DECL)
6113               continue;
6114
6115             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6116             if (sub_count < 0)
6117               return -1;
6118             count = count > sub_count ? count : sub_count;
6119           }
6120
6121         /* There must be no padding.  */
6122         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6123             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6124                 != count * GET_MODE_BITSIZE (*modep)))
6125           return -1;
6126
6127         return count;
6128       }
6129
6130     default:
6131       break;
6132     }
6133
6134   return -1;
6135 }
6136
6137 /* Return true if we use LRA instead of reload pass.  */
6138 static bool
6139 aarch64_lra_p (void)
6140 {
6141   return aarch64_lra_flag;
6142 }
6143
6144 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6145    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6146    array types.  The C99 floating-point complex types are also considered
6147    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6148    types, which are GCC extensions and out of the scope of AAPCS64, are
6149    treated as composite types here as well.
6150
6151    Note that MODE itself is not sufficient in determining whether a type
6152    is such a composite type or not.  This is because
6153    stor-layout.c:compute_record_mode may have already changed the MODE
6154    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6155    structure with only one field may have its MODE set to the mode of the
6156    field.  Also an integer mode whose size matches the size of the
6157    RECORD_TYPE type may be used to substitute the original mode
6158    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6159    solely relied on.  */
6160
6161 static bool
6162 aarch64_composite_type_p (const_tree type,
6163                           enum machine_mode mode)
6164 {
6165   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6166     return true;
6167
6168   if (mode == BLKmode
6169       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6170       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6171     return true;
6172
6173   return false;
6174 }
6175
6176 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6177    type as described in AAPCS64 \S 4.1.2.
6178
6179    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6180
6181 static bool
6182 aarch64_short_vector_p (const_tree type,
6183                         enum machine_mode mode)
6184 {
6185   HOST_WIDE_INT size = -1;
6186
6187   if (type && TREE_CODE (type) == VECTOR_TYPE)
6188     size = int_size_in_bytes (type);
6189   else if (!aarch64_composite_type_p (type, mode)
6190            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6191                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6192     size = GET_MODE_SIZE (mode);
6193
6194   return (size == 8 || size == 16) ? true : false;
6195 }
6196
6197 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6198    shall be passed or returned in simd/fp register(s) (providing these
6199    parameter passing registers are available).
6200
6201    Upon successful return, *COUNT returns the number of needed registers,
6202    *BASE_MODE returns the mode of the individual register and when IS_HAF
6203    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6204    floating-point aggregate or a homogeneous short-vector aggregate.  */
6205
6206 static bool
6207 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6208                                          const_tree type,
6209                                          enum machine_mode *base_mode,
6210                                          int *count,
6211                                          bool *is_ha)
6212 {
6213   enum machine_mode new_mode = VOIDmode;
6214   bool composite_p = aarch64_composite_type_p (type, mode);
6215
6216   if (is_ha != NULL) *is_ha = false;
6217
6218   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6219       || aarch64_short_vector_p (type, mode))
6220     {
6221       *count = 1;
6222       new_mode = mode;
6223     }
6224   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6225     {
6226       if (is_ha != NULL) *is_ha = true;
6227       *count = 2;
6228       new_mode = GET_MODE_INNER (mode);
6229     }
6230   else if (type && composite_p)
6231     {
6232       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6233
6234       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6235         {
6236           if (is_ha != NULL) *is_ha = true;
6237           *count = ag_count;
6238         }
6239       else
6240         return false;
6241     }
6242   else
6243     return false;
6244
6245   *base_mode = new_mode;
6246   return true;
6247 }
6248
6249 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6250
6251 static rtx
6252 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6253                           int incoming ATTRIBUTE_UNUSED)
6254 {
6255   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6256 }
6257
6258 /* Implements target hook vector_mode_supported_p.  */
6259 static bool
6260 aarch64_vector_mode_supported_p (enum machine_mode mode)
6261 {
6262   if (TARGET_SIMD
6263       && (mode == V4SImode  || mode == V8HImode
6264           || mode == V16QImode || mode == V2DImode
6265           || mode == V2SImode  || mode == V4HImode
6266           || mode == V8QImode || mode == V2SFmode
6267           || mode == V4SFmode || mode == V2DFmode))
6268     return true;
6269
6270   return false;
6271 }
6272
6273 /* Return appropriate SIMD container
6274    for MODE within a vector of WIDTH bits.  */
6275 static enum machine_mode
6276 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6277 {
6278   gcc_assert (width == 64 || width == 128);
6279   if (TARGET_SIMD)
6280     {
6281       if (width == 128)
6282         switch (mode)
6283           {
6284           case DFmode:
6285             return V2DFmode;
6286           case SFmode:
6287             return V4SFmode;
6288           case SImode:
6289             return V4SImode;
6290           case HImode:
6291             return V8HImode;
6292           case QImode:
6293             return V16QImode;
6294           case DImode:
6295             return V2DImode;
6296           default:
6297             break;
6298           }
6299       else
6300         switch (mode)
6301           {
6302           case SFmode:
6303             return V2SFmode;
6304           case SImode:
6305             return V2SImode;
6306           case HImode:
6307             return V4HImode;
6308           case QImode:
6309             return V8QImode;
6310           default:
6311             break;
6312           }
6313     }
6314   return word_mode;
6315 }
6316
6317 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6318 static enum machine_mode
6319 aarch64_preferred_simd_mode (enum machine_mode mode)
6320 {
6321   return aarch64_simd_container_mode (mode, 128);
6322 }
6323
6324 /* Return the bitmask of possible vector sizes for the vectorizer
6325    to iterate over.  */
6326 static unsigned int
6327 aarch64_autovectorize_vector_sizes (void)
6328 {
6329   return (16 | 8);
6330 }
6331
6332 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6333    vector types in order to conform to the AAPCS64 (see "Procedure
6334    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6335    qualify for emission with the mangled names defined in that document,
6336    a vector type must not only be of the correct mode but also be
6337    composed of AdvSIMD vector element types (e.g.
6338    _builtin_aarch64_simd_qi); these types are registered by
6339    aarch64_init_simd_builtins ().  In other words, vector types defined
6340    in other ways e.g. via vector_size attribute will get default
6341    mangled names.  */
6342 typedef struct
6343 {
6344   enum machine_mode mode;
6345   const char *element_type_name;
6346   const char *mangled_name;
6347 } aarch64_simd_mangle_map_entry;
6348
6349 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6350   /* 64-bit containerized types.  */
6351   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6352   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6353   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6354   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6355   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6356   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6357   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6358   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6359   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6360   /* 128-bit containerized types.  */
6361   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6362   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6363   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6364   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6365   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6366   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6367   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6368   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6369   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6370   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6371   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6372   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6373   { VOIDmode, NULL, NULL }
6374 };
6375
6376 /* Implement TARGET_MANGLE_TYPE.  */
6377
6378 static const char *
6379 aarch64_mangle_type (const_tree type)
6380 {
6381   /* The AArch64 ABI documents say that "__va_list" has to be
6382      managled as if it is in the "std" namespace.  */
6383   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6384     return "St9__va_list";
6385
6386   /* Check the mode of the vector type, and the name of the vector
6387      element type, against the table.  */
6388   if (TREE_CODE (type) == VECTOR_TYPE)
6389     {
6390       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6391
6392       while (pos->mode != VOIDmode)
6393         {
6394           tree elt_type = TREE_TYPE (type);
6395
6396           if (pos->mode == TYPE_MODE (type)
6397               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6398               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6399                           pos->element_type_name))
6400             return pos->mangled_name;
6401
6402           pos++;
6403         }
6404     }
6405
6406   /* Use the default mangling.  */
6407   return NULL;
6408 }
6409
6410 /* Return the equivalent letter for size.  */
6411 static char
6412 sizetochar (int size)
6413 {
6414   switch (size)
6415     {
6416     case 64: return 'd';
6417     case 32: return 's';
6418     case 16: return 'h';
6419     case 8 : return 'b';
6420     default: gcc_unreachable ();
6421     }
6422 }
6423
6424 /* Return true iff x is a uniform vector of floating-point
6425    constants, and the constant can be represented in
6426    quarter-precision form.  Note, as aarch64_float_const_representable
6427    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6428 static bool
6429 aarch64_vect_float_const_representable_p (rtx x)
6430 {
6431   int i = 0;
6432   REAL_VALUE_TYPE r0, ri;
6433   rtx x0, xi;
6434
6435   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6436     return false;
6437
6438   x0 = CONST_VECTOR_ELT (x, 0);
6439   if (!CONST_DOUBLE_P (x0))
6440     return false;
6441
6442   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6443
6444   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6445     {
6446       xi = CONST_VECTOR_ELT (x, i);
6447       if (!CONST_DOUBLE_P (xi))
6448         return false;
6449
6450       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6451       if (!REAL_VALUES_EQUAL (r0, ri))
6452         return false;
6453     }
6454
6455   return aarch64_float_const_representable_p (x0);
6456 }
6457
6458 /* Return true for valid and false for invalid.  */
6459 bool
6460 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6461                               struct simd_immediate_info *info)
6462 {
6463 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6464   matches = 1;                                          \
6465   for (i = 0; i < idx; i += (STRIDE))                   \
6466     if (!(TEST))                                        \
6467       matches = 0;                                      \
6468   if (matches)                                          \
6469     {                                                   \
6470       immtype = (CLASS);                                \
6471       elsize = (ELSIZE);                                \
6472       eshift = (SHIFT);                                 \
6473       emvn = (NEG);                                     \
6474       break;                                            \
6475     }
6476
6477   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6478   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6479   unsigned char bytes[16];
6480   int immtype = -1, matches;
6481   unsigned int invmask = inverse ? 0xff : 0;
6482   int eshift, emvn;
6483
6484   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6485     {
6486       if (! (aarch64_simd_imm_zero_p (op, mode)
6487              || aarch64_vect_float_const_representable_p (op)))
6488         return false;
6489
6490       if (info)
6491         {
6492           info->value = CONST_VECTOR_ELT (op, 0);
6493           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6494           info->mvn = false;
6495           info->shift = 0;
6496         }
6497
6498       return true;
6499     }
6500
6501   /* Splat vector constant out into a byte vector.  */
6502   for (i = 0; i < n_elts; i++)
6503     {
6504       rtx el = CONST_VECTOR_ELT (op, i);
6505       unsigned HOST_WIDE_INT elpart;
6506       unsigned int part, parts;
6507
6508       if (GET_CODE (el) == CONST_INT)
6509         {
6510           elpart = INTVAL (el);
6511           parts = 1;
6512         }
6513       else if (GET_CODE (el) == CONST_DOUBLE)
6514         {
6515           elpart = CONST_DOUBLE_LOW (el);
6516           parts = 2;
6517         }
6518       else
6519         gcc_unreachable ();
6520
6521       for (part = 0; part < parts; part++)
6522         {
6523           unsigned int byte;
6524           for (byte = 0; byte < innersize; byte++)
6525             {
6526               bytes[idx++] = (elpart & 0xff) ^ invmask;
6527               elpart >>= BITS_PER_UNIT;
6528             }
6529           if (GET_CODE (el) == CONST_DOUBLE)
6530             elpart = CONST_DOUBLE_HIGH (el);
6531         }
6532     }
6533
6534   /* Sanity check.  */
6535   gcc_assert (idx == GET_MODE_SIZE (mode));
6536
6537   do
6538     {
6539       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6540              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6541
6542       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6543              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6544
6545       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6546              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6547
6548       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6549              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6550
6551       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6552
6553       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6554
6555       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6556              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6557
6558       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6559              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6560
6561       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6562              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6563
6564       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6565              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6566
6567       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6568
6569       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6570
6571       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6572              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6573
6574       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6575              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6576
6577       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6578              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6579
6580       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6581              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6582
6583       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6584
6585       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6586              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6587     }
6588   while (0);
6589
6590   if (immtype == -1)
6591     return false;
6592
6593   if (info)
6594     {
6595       info->element_width = elsize;
6596       info->mvn = emvn != 0;
6597       info->shift = eshift;
6598
6599       unsigned HOST_WIDE_INT imm = 0;
6600
6601       if (immtype >= 12 && immtype <= 15)
6602         info->msl = true;
6603
6604       /* Un-invert bytes of recognized vector, if necessary.  */
6605       if (invmask != 0)
6606         for (i = 0; i < idx; i++)
6607           bytes[i] ^= invmask;
6608
6609       if (immtype == 17)
6610         {
6611           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6612           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6613
6614           for (i = 0; i < 8; i++)
6615             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6616               << (i * BITS_PER_UNIT);
6617
6618
6619           info->value = GEN_INT (imm);
6620         }
6621       else
6622         {
6623           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6624             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6625
6626           /* Construct 'abcdefgh' because the assembler cannot handle
6627              generic constants.  */
6628           if (info->mvn)
6629             imm = ~imm;
6630           imm = (imm >> info->shift) & 0xff;
6631           info->value = GEN_INT (imm);
6632         }
6633     }
6634
6635   return true;
6636 #undef CHECK
6637 }
6638
6639 static bool
6640 aarch64_const_vec_all_same_int_p (rtx x,
6641                                   HOST_WIDE_INT minval,
6642                                   HOST_WIDE_INT maxval)
6643 {
6644   HOST_WIDE_INT firstval;
6645   int count, i;
6646
6647   if (GET_CODE (x) != CONST_VECTOR
6648       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6649     return false;
6650
6651   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6652   if (firstval < minval || firstval > maxval)
6653     return false;
6654
6655   count = CONST_VECTOR_NUNITS (x);
6656   for (i = 1; i < count; i++)
6657     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6658       return false;
6659
6660   return true;
6661 }
6662
6663 /* Check of immediate shift constants are within range.  */
6664 bool
6665 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6666 {
6667   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6668   if (left)
6669     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6670   else
6671     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6672 }
6673
6674 /* Return true if X is a uniform vector where all elements
6675    are either the floating-point constant 0.0 or the
6676    integer constant 0.  */
6677 bool
6678 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6679 {
6680   return x == CONST0_RTX (mode);
6681 }
6682
6683 bool
6684 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6685 {
6686   HOST_WIDE_INT imm = INTVAL (x);
6687   int i;
6688
6689   for (i = 0; i < 8; i++)
6690     {
6691       unsigned int byte = imm & 0xff;
6692       if (byte != 0xff && byte != 0)
6693        return false;
6694       imm >>= 8;
6695     }
6696
6697   return true;
6698 }
6699
6700 bool
6701 aarch64_mov_operand_p (rtx x,
6702                        enum aarch64_symbol_context context,
6703                        enum machine_mode mode)
6704 {
6705   if (GET_CODE (x) == HIGH
6706       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6707     return true;
6708
6709   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6710     return true;
6711
6712   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6713     return true;
6714
6715   return aarch64_classify_symbolic_expression (x, context)
6716     == SYMBOL_TINY_ABSOLUTE;
6717 }
6718
6719 /* Return a const_int vector of VAL.  */
6720 rtx
6721 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6722 {
6723   int nunits = GET_MODE_NUNITS (mode);
6724   rtvec v = rtvec_alloc (nunits);
6725   int i;
6726
6727   for (i=0; i < nunits; i++)
6728     RTVEC_ELT (v, i) = GEN_INT (val);
6729
6730   return gen_rtx_CONST_VECTOR (mode, v);
6731 }
6732
6733 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6734
6735 bool
6736 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6737 {
6738   enum machine_mode vmode;
6739
6740   gcc_assert (!VECTOR_MODE_P (mode));
6741   vmode = aarch64_preferred_simd_mode (mode);
6742   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6743   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6744 }
6745
6746 /* Construct and return a PARALLEL RTX vector.  */
6747 rtx
6748 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6749 {
6750   int nunits = GET_MODE_NUNITS (mode);
6751   rtvec v = rtvec_alloc (nunits / 2);
6752   int base = high ? nunits / 2 : 0;
6753   rtx t1;
6754   int i;
6755
6756   for (i=0; i < nunits / 2; i++)
6757     RTVEC_ELT (v, i) = GEN_INT (base + i);
6758
6759   t1 = gen_rtx_PARALLEL (mode, v);
6760   return t1;
6761 }
6762
6763 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6764    HIGH (exclusive).  */
6765 void
6766 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6767 {
6768   HOST_WIDE_INT lane;
6769   gcc_assert (GET_CODE (operand) == CONST_INT);
6770   lane = INTVAL (operand);
6771
6772   if (lane < low || lane >= high)
6773     error ("lane out of range");
6774 }
6775
6776 void
6777 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6778 {
6779   gcc_assert (GET_CODE (operand) == CONST_INT);
6780   HOST_WIDE_INT lane = INTVAL (operand);
6781
6782   if (lane < low || lane >= high)
6783     error ("constant out of range");
6784 }
6785
6786 /* Emit code to reinterpret one AdvSIMD type as another,
6787    without altering bits.  */
6788 void
6789 aarch64_simd_reinterpret (rtx dest, rtx src)
6790 {
6791   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6792 }
6793
6794 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6795    registers).  */
6796 void
6797 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6798                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6799                             rtx op1)
6800 {
6801   rtx mem = gen_rtx_MEM (mode, destaddr);
6802   rtx tmp1 = gen_reg_rtx (mode);
6803   rtx tmp2 = gen_reg_rtx (mode);
6804
6805   emit_insn (intfn (tmp1, op1, tmp2));
6806
6807   emit_move_insn (mem, tmp1);
6808   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6809   emit_move_insn (mem, tmp2);
6810 }
6811
6812 /* Return TRUE if OP is a valid vector addressing mode.  */
6813 bool
6814 aarch64_simd_mem_operand_p (rtx op)
6815 {
6816   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6817                         || GET_CODE (XEXP (op, 0)) == REG);
6818 }
6819
6820 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6821    not to early-clobber SRC registers in the process.
6822
6823    We assume that the operands described by SRC and DEST represent a
6824    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6825    number of components into which the copy has been decomposed.  */
6826 void
6827 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6828                                 rtx *src, unsigned int count)
6829 {
6830   unsigned int i;
6831
6832   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6833       || REGNO (operands[0]) < REGNO (operands[1]))
6834     {
6835       for (i = 0; i < count; i++)
6836         {
6837           operands[2 * i] = dest[i];
6838           operands[2 * i + 1] = src[i];
6839         }
6840     }
6841   else
6842     {
6843       for (i = 0; i < count; i++)
6844         {
6845           operands[2 * i] = dest[count - i - 1];
6846           operands[2 * i + 1] = src[count - i - 1];
6847         }
6848     }
6849 }
6850
6851 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6852    one of VSTRUCT modes: OI, CI or XI.  */
6853 int
6854 aarch64_simd_attr_length_move (rtx insn)
6855 {
6856   enum machine_mode mode;
6857
6858   extract_insn_cached (insn);
6859
6860   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6861     {
6862       mode = GET_MODE (recog_data.operand[0]);
6863       switch (mode)
6864         {
6865         case OImode:
6866           return 8;
6867         case CImode:
6868           return 12;
6869         case XImode:
6870           return 16;
6871         default:
6872           gcc_unreachable ();
6873         }
6874     }
6875   return 4;
6876 }
6877
6878 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6879    alignment of a vector to 128 bits.  */
6880 static HOST_WIDE_INT
6881 aarch64_simd_vector_alignment (const_tree type)
6882 {
6883   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6884   return MIN (align, 128);
6885 }
6886
6887 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6888 static bool
6889 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6890 {
6891   if (is_packed)
6892     return false;
6893
6894   /* We guarantee alignment for vectors up to 128-bits.  */
6895   if (tree_int_cst_compare (TYPE_SIZE (type),
6896                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6897     return false;
6898
6899   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6900   return true;
6901 }
6902
6903 /* If VALS is a vector constant that can be loaded into a register
6904    using DUP, generate instructions to do so and return an RTX to
6905    assign to the register.  Otherwise return NULL_RTX.  */
6906 static rtx
6907 aarch64_simd_dup_constant (rtx vals)
6908 {
6909   enum machine_mode mode = GET_MODE (vals);
6910   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6911   int n_elts = GET_MODE_NUNITS (mode);
6912   bool all_same = true;
6913   rtx x;
6914   int i;
6915
6916   if (GET_CODE (vals) != CONST_VECTOR)
6917     return NULL_RTX;
6918
6919   for (i = 1; i < n_elts; ++i)
6920     {
6921       x = CONST_VECTOR_ELT (vals, i);
6922       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6923         all_same = false;
6924     }
6925
6926   if (!all_same)
6927     return NULL_RTX;
6928
6929   /* We can load this constant by using DUP and a constant in a
6930      single ARM register.  This will be cheaper than a vector
6931      load.  */
6932   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6933   return gen_rtx_VEC_DUPLICATE (mode, x);
6934 }
6935
6936
6937 /* Generate code to load VALS, which is a PARALLEL containing only
6938    constants (for vec_init) or CONST_VECTOR, efficiently into a
6939    register.  Returns an RTX to copy into the register, or NULL_RTX
6940    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6941 static rtx
6942 aarch64_simd_make_constant (rtx vals)
6943 {
6944   enum machine_mode mode = GET_MODE (vals);
6945   rtx const_dup;
6946   rtx const_vec = NULL_RTX;
6947   int n_elts = GET_MODE_NUNITS (mode);
6948   int n_const = 0;
6949   int i;
6950
6951   if (GET_CODE (vals) == CONST_VECTOR)
6952     const_vec = vals;
6953   else if (GET_CODE (vals) == PARALLEL)
6954     {
6955       /* A CONST_VECTOR must contain only CONST_INTs and
6956          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6957          Only store valid constants in a CONST_VECTOR.  */
6958       for (i = 0; i < n_elts; ++i)
6959         {
6960           rtx x = XVECEXP (vals, 0, i);
6961           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6962             n_const++;
6963         }
6964       if (n_const == n_elts)
6965         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6966     }
6967   else
6968     gcc_unreachable ();
6969
6970   if (const_vec != NULL_RTX
6971       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6972     /* Load using MOVI/MVNI.  */
6973     return const_vec;
6974   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6975     /* Loaded using DUP.  */
6976     return const_dup;
6977   else if (const_vec != NULL_RTX)
6978     /* Load from constant pool. We can not take advantage of single-cycle
6979        LD1 because we need a PC-relative addressing mode.  */
6980     return const_vec;
6981   else
6982     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6983        We can not construct an initializer.  */
6984     return NULL_RTX;
6985 }
6986
6987 void
6988 aarch64_expand_vector_init (rtx target, rtx vals)
6989 {
6990   enum machine_mode mode = GET_MODE (target);
6991   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6992   int n_elts = GET_MODE_NUNITS (mode);
6993   int n_var = 0, one_var = -1;
6994   bool all_same = true;
6995   rtx x, mem;
6996   int i;
6997
6998   x = XVECEXP (vals, 0, 0);
6999   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7000     n_var = 1, one_var = 0;
7001
7002   for (i = 1; i < n_elts; ++i)
7003     {
7004       x = XVECEXP (vals, 0, i);
7005       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7006         ++n_var, one_var = i;
7007
7008       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7009         all_same = false;
7010     }
7011
7012   if (n_var == 0)
7013     {
7014       rtx constant = aarch64_simd_make_constant (vals);
7015       if (constant != NULL_RTX)
7016         {
7017           emit_move_insn (target, constant);
7018           return;
7019         }
7020     }
7021
7022   /* Splat a single non-constant element if we can.  */
7023   if (all_same)
7024     {
7025       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7026       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7027       return;
7028     }
7029
7030   /* One field is non-constant.  Load constant then overwrite varying
7031      field.  This is more efficient than using the stack.  */
7032   if (n_var == 1)
7033     {
7034       rtx copy = copy_rtx (vals);
7035       rtx index = GEN_INT (one_var);
7036       enum insn_code icode;
7037
7038       /* Load constant part of vector, substitute neighboring value for
7039          varying element.  */
7040       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7041       aarch64_expand_vector_init (target, copy);
7042
7043       /* Insert variable.  */
7044       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7045       icode = optab_handler (vec_set_optab, mode);
7046       gcc_assert (icode != CODE_FOR_nothing);
7047       emit_insn (GEN_FCN (icode) (target, x, index));
7048       return;
7049     }
7050
7051   /* Construct the vector in memory one field at a time
7052      and load the whole vector.  */
7053   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7054   for (i = 0; i < n_elts; i++)
7055     emit_move_insn (adjust_address_nv (mem, inner_mode,
7056                                     i * GET_MODE_SIZE (inner_mode)),
7057                     XVECEXP (vals, 0, i));
7058   emit_move_insn (target, mem);
7059
7060 }
7061
7062 static unsigned HOST_WIDE_INT
7063 aarch64_shift_truncation_mask (enum machine_mode mode)
7064 {
7065   return
7066     (aarch64_vector_mode_supported_p (mode)
7067      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7068 }
7069
7070 #ifndef TLS_SECTION_ASM_FLAG
7071 #define TLS_SECTION_ASM_FLAG 'T'
7072 #endif
7073
7074 void
7075 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7076                                tree decl ATTRIBUTE_UNUSED)
7077 {
7078   char flagchars[10], *f = flagchars;
7079
7080   /* If we have already declared this section, we can use an
7081      abbreviated form to switch back to it -- unless this section is
7082      part of a COMDAT groups, in which case GAS requires the full
7083      declaration every time.  */
7084   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7085       && (flags & SECTION_DECLARED))
7086     {
7087       fprintf (asm_out_file, "\t.section\t%s\n", name);
7088       return;
7089     }
7090
7091   if (!(flags & SECTION_DEBUG))
7092     *f++ = 'a';
7093   if (flags & SECTION_WRITE)
7094     *f++ = 'w';
7095   if (flags & SECTION_CODE)
7096     *f++ = 'x';
7097   if (flags & SECTION_SMALL)
7098     *f++ = 's';
7099   if (flags & SECTION_MERGE)
7100     *f++ = 'M';
7101   if (flags & SECTION_STRINGS)
7102     *f++ = 'S';
7103   if (flags & SECTION_TLS)
7104     *f++ = TLS_SECTION_ASM_FLAG;
7105   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7106     *f++ = 'G';
7107   *f = '\0';
7108
7109   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7110
7111   if (!(flags & SECTION_NOTYPE))
7112     {
7113       const char *type;
7114       const char *format;
7115
7116       if (flags & SECTION_BSS)
7117         type = "nobits";
7118       else
7119         type = "progbits";
7120
7121 #ifdef TYPE_OPERAND_FMT
7122       format = "," TYPE_OPERAND_FMT;
7123 #else
7124       format = ",@%s";
7125 #endif
7126
7127       fprintf (asm_out_file, format, type);
7128
7129       if (flags & SECTION_ENTSIZE)
7130         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7131       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7132         {
7133           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7134             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7135           else
7136             fprintf (asm_out_file, ",%s,comdat",
7137                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7138         }
7139     }
7140
7141   putc ('\n', asm_out_file);
7142 }
7143
7144 /* Select a format to encode pointers in exception handling data.  */
7145 int
7146 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7147 {
7148    int type;
7149    switch (aarch64_cmodel)
7150      {
7151      case AARCH64_CMODEL_TINY:
7152      case AARCH64_CMODEL_TINY_PIC:
7153      case AARCH64_CMODEL_SMALL:
7154      case AARCH64_CMODEL_SMALL_PIC:
7155        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7156           for everything.  */
7157        type = DW_EH_PE_sdata4;
7158        break;
7159      default:
7160        /* No assumptions here.  8-byte relocs required.  */
7161        type = DW_EH_PE_sdata8;
7162        break;
7163      }
7164    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7165 }
7166
7167 /* Emit load exclusive.  */
7168
7169 static void
7170 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7171                              rtx mem, rtx model_rtx)
7172 {
7173   rtx (*gen) (rtx, rtx, rtx);
7174
7175   switch (mode)
7176     {
7177     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7178     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7179     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7180     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7181     default:
7182       gcc_unreachable ();
7183     }
7184
7185   emit_insn (gen (rval, mem, model_rtx));
7186 }
7187
7188 /* Emit store exclusive.  */
7189
7190 static void
7191 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7192                               rtx rval, rtx mem, rtx model_rtx)
7193 {
7194   rtx (*gen) (rtx, rtx, rtx, rtx);
7195
7196   switch (mode)
7197     {
7198     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7199     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7200     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7201     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7202     default:
7203       gcc_unreachable ();
7204     }
7205
7206   emit_insn (gen (bval, rval, mem, model_rtx));
7207 }
7208
7209 /* Mark the previous jump instruction as unlikely.  */
7210
7211 static void
7212 aarch64_emit_unlikely_jump (rtx insn)
7213 {
7214   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7215
7216   insn = emit_jump_insn (insn);
7217   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7218 }
7219
7220 /* Expand a compare and swap pattern.  */
7221
7222 void
7223 aarch64_expand_compare_and_swap (rtx operands[])
7224 {
7225   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7226   enum machine_mode mode, cmp_mode;
7227   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7228
7229   bval = operands[0];
7230   rval = operands[1];
7231   mem = operands[2];
7232   oldval = operands[3];
7233   newval = operands[4];
7234   is_weak = operands[5];
7235   mod_s = operands[6];
7236   mod_f = operands[7];
7237   mode = GET_MODE (mem);
7238   cmp_mode = mode;
7239
7240   /* Normally the succ memory model must be stronger than fail, but in the
7241      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7242      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7243
7244   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7245       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7246     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7247
7248   switch (mode)
7249     {
7250     case QImode:
7251     case HImode:
7252       /* For short modes, we're going to perform the comparison in SImode,
7253          so do the zero-extension now.  */
7254       cmp_mode = SImode;
7255       rval = gen_reg_rtx (SImode);
7256       oldval = convert_modes (SImode, mode, oldval, true);
7257       /* Fall through.  */
7258
7259     case SImode:
7260     case DImode:
7261       /* Force the value into a register if needed.  */
7262       if (!aarch64_plus_operand (oldval, mode))
7263         oldval = force_reg (cmp_mode, oldval);
7264       break;
7265
7266     default:
7267       gcc_unreachable ();
7268     }
7269
7270   switch (mode)
7271     {
7272     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7273     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7274     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7275     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7276     default:
7277       gcc_unreachable ();
7278     }
7279
7280   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7281
7282   if (mode == QImode || mode == HImode)
7283     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7284
7285   x = gen_rtx_REG (CCmode, CC_REGNUM);
7286   x = gen_rtx_EQ (SImode, x, const0_rtx);
7287   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7288 }
7289
7290 /* Split a compare and swap pattern.  */
7291
7292 void
7293 aarch64_split_compare_and_swap (rtx operands[])
7294 {
7295   rtx rval, mem, oldval, newval, scratch;
7296   enum machine_mode mode;
7297   bool is_weak;
7298   rtx label1, label2, x, cond;
7299
7300   rval = operands[0];
7301   mem = operands[1];
7302   oldval = operands[2];
7303   newval = operands[3];
7304   is_weak = (operands[4] != const0_rtx);
7305   scratch = operands[7];
7306   mode = GET_MODE (mem);
7307
7308   label1 = NULL_RTX;
7309   if (!is_weak)
7310     {
7311       label1 = gen_label_rtx ();
7312       emit_label (label1);
7313     }
7314   label2 = gen_label_rtx ();
7315
7316   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7317
7318   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7319   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7320   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7321                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7322   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7323
7324   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7325
7326   if (!is_weak)
7327     {
7328       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7329       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7330                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7331       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7332     }
7333   else
7334     {
7335       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7336       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7337       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7338     }
7339
7340   emit_label (label2);
7341 }
7342
7343 /* Split an atomic operation.  */
7344
7345 void
7346 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7347                      rtx value, rtx model_rtx, rtx cond)
7348 {
7349   enum machine_mode mode = GET_MODE (mem);
7350   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7351   rtx label, x;
7352
7353   label = gen_label_rtx ();
7354   emit_label (label);
7355
7356   if (new_out)
7357     new_out = gen_lowpart (wmode, new_out);
7358   if (old_out)
7359     old_out = gen_lowpart (wmode, old_out);
7360   else
7361     old_out = new_out;
7362   value = simplify_gen_subreg (wmode, value, mode, 0);
7363
7364   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7365
7366   switch (code)
7367     {
7368     case SET:
7369       new_out = value;
7370       break;
7371
7372     case NOT:
7373       x = gen_rtx_AND (wmode, old_out, value);
7374       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7375       x = gen_rtx_NOT (wmode, new_out);
7376       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7377       break;
7378
7379     case MINUS:
7380       if (CONST_INT_P (value))
7381         {
7382           value = GEN_INT (-INTVAL (value));
7383           code = PLUS;
7384         }
7385       /* Fall through.  */
7386
7387     default:
7388       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7389       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7390       break;
7391     }
7392
7393   aarch64_emit_store_exclusive (mode, cond, mem,
7394                                 gen_lowpart (mode, new_out), model_rtx);
7395
7396   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7397   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7398                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7399   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7400 }
7401
7402 static void
7403 aarch64_print_extension (void)
7404 {
7405   const struct aarch64_option_extension *opt = NULL;
7406
7407   for (opt = all_extensions; opt->name != NULL; opt++)
7408     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7409       asm_fprintf (asm_out_file, "+%s", opt->name);
7410
7411   asm_fprintf (asm_out_file, "\n");
7412 }
7413
7414 static void
7415 aarch64_start_file (void)
7416 {
7417   if (selected_arch)
7418     {
7419       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7420       aarch64_print_extension ();
7421     }
7422   else if (selected_cpu)
7423     {
7424       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7425       aarch64_print_extension ();
7426     }
7427   default_file_start();
7428 }
7429
7430 /* Target hook for c_mode_for_suffix.  */
7431 static enum machine_mode
7432 aarch64_c_mode_for_suffix (char suffix)
7433 {
7434   if (suffix == 'q')
7435     return TFmode;
7436
7437   return VOIDmode;
7438 }
7439
7440 /* We can only represent floating point constants which will fit in
7441    "quarter-precision" values.  These values are characterised by
7442    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7443    by:
7444
7445    (-1)^s * (n/16) * 2^r
7446
7447    Where:
7448      's' is the sign bit.
7449      'n' is an integer in the range 16 <= n <= 31.
7450      'r' is an integer in the range -3 <= r <= 4.  */
7451
7452 /* Return true iff X can be represented by a quarter-precision
7453    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7454 bool
7455 aarch64_float_const_representable_p (rtx x)
7456 {
7457   /* This represents our current view of how many bits
7458      make up the mantissa.  */
7459   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7460   int exponent;
7461   unsigned HOST_WIDE_INT mantissa, mask;
7462   HOST_WIDE_INT m1, m2;
7463   REAL_VALUE_TYPE r, m;
7464
7465   if (!CONST_DOUBLE_P (x))
7466     return false;
7467
7468   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7469
7470   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7471      know if we have +zero until we analyse the mantissa, but we
7472      can reject the other invalid values.  */
7473   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7474       || REAL_VALUE_MINUS_ZERO (r))
7475     return false;
7476
7477   /* Extract exponent.  */
7478   r = real_value_abs (&r);
7479   exponent = REAL_EXP (&r);
7480
7481   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7482      highest (sign) bit, with a fixed binary point at bit point_pos.
7483      m1 holds the low part of the mantissa, m2 the high part.
7484      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7485      bits for the mantissa, this can fail (low bits will be lost).  */
7486   real_ldexp (&m, &r, point_pos - exponent);
7487   REAL_VALUE_TO_INT (&m1, &m2, m);
7488
7489   /* If the low part of the mantissa has bits set we cannot represent
7490      the value.  */
7491   if (m1 != 0)
7492     return false;
7493   /* We have rejected the lower HOST_WIDE_INT, so update our
7494      understanding of how many bits lie in the mantissa and
7495      look only at the high HOST_WIDE_INT.  */
7496   mantissa = m2;
7497   point_pos -= HOST_BITS_PER_WIDE_INT;
7498
7499   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7500   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7501   if ((mantissa & mask) != 0)
7502     return false;
7503
7504   /* Having filtered unrepresentable values, we may now remove all
7505      but the highest 5 bits.  */
7506   mantissa >>= point_pos - 5;
7507
7508   /* We cannot represent the value 0.0, so reject it.  This is handled
7509      elsewhere.  */
7510   if (mantissa == 0)
7511     return false;
7512
7513   /* Then, as bit 4 is always set, we can mask it off, leaving
7514      the mantissa in the range [0, 15].  */
7515   mantissa &= ~(1 << 4);
7516   gcc_assert (mantissa <= 15);
7517
7518   /* GCC internally does not use IEEE754-like encoding (where normalized
7519      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7520      Our mantissa values are shifted 4 places to the left relative to
7521      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7522      by 5 places to correct for GCC's representation.  */
7523   exponent = 5 - exponent;
7524
7525   return (exponent >= 0 && exponent <= 7);
7526 }
7527
7528 char*
7529 aarch64_output_simd_mov_immediate (rtx const_vector,
7530                                    enum machine_mode mode,
7531                                    unsigned width)
7532 {
7533   bool is_valid;
7534   static char templ[40];
7535   const char *mnemonic;
7536   const char *shift_op;
7537   unsigned int lane_count = 0;
7538   char element_char;
7539
7540   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7541
7542   /* This will return true to show const_vector is legal for use as either
7543      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7544      also update INFO to show how the immediate should be generated.  */
7545   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7546   gcc_assert (is_valid);
7547
7548   element_char = sizetochar (info.element_width);
7549   lane_count = width / info.element_width;
7550
7551   mode = GET_MODE_INNER (mode);
7552   if (mode == SFmode || mode == DFmode)
7553     {
7554       gcc_assert (info.shift == 0 && ! info.mvn);
7555       if (aarch64_float_const_zero_rtx_p (info.value))
7556         info.value = GEN_INT (0);
7557       else
7558         {
7559 #define buf_size 20
7560           REAL_VALUE_TYPE r;
7561           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7562           char float_buf[buf_size] = {'\0'};
7563           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7564 #undef buf_size
7565
7566           if (lane_count == 1)
7567             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7568           else
7569             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7570                       lane_count, element_char, float_buf);
7571           return templ;
7572         }
7573     }
7574
7575   mnemonic = info.mvn ? "mvni" : "movi";
7576   shift_op = info.msl ? "msl" : "lsl";
7577
7578   if (lane_count == 1)
7579     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7580               mnemonic, UINTVAL (info.value));
7581   else if (info.shift)
7582     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7583               ", %s %d", mnemonic, lane_count, element_char,
7584               UINTVAL (info.value), shift_op, info.shift);
7585   else
7586     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7587               mnemonic, lane_count, element_char, UINTVAL (info.value));
7588   return templ;
7589 }
7590
7591 char*
7592 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7593                                           enum machine_mode mode)
7594 {
7595   enum machine_mode vmode;
7596
7597   gcc_assert (!VECTOR_MODE_P (mode));
7598   vmode = aarch64_simd_container_mode (mode, 64);
7599   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7600   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7601 }
7602
7603 /* Split operands into moves from op[1] + op[2] into op[0].  */
7604
7605 void
7606 aarch64_split_combinev16qi (rtx operands[3])
7607 {
7608   unsigned int dest = REGNO (operands[0]);
7609   unsigned int src1 = REGNO (operands[1]);
7610   unsigned int src2 = REGNO (operands[2]);
7611   enum machine_mode halfmode = GET_MODE (operands[1]);
7612   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7613   rtx destlo, desthi;
7614
7615   gcc_assert (halfmode == V16QImode);
7616
7617   if (src1 == dest && src2 == dest + halfregs)
7618     {
7619       /* No-op move.  Can't split to nothing; emit something.  */
7620       emit_note (NOTE_INSN_DELETED);
7621       return;
7622     }
7623
7624   /* Preserve register attributes for variable tracking.  */
7625   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7626   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7627                                GET_MODE_SIZE (halfmode));
7628
7629   /* Special case of reversed high/low parts.  */
7630   if (reg_overlap_mentioned_p (operands[2], destlo)
7631       && reg_overlap_mentioned_p (operands[1], desthi))
7632     {
7633       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7634       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7635       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7636     }
7637   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7638     {
7639       /* Try to avoid unnecessary moves if part of the result
7640          is in the right place already.  */
7641       if (src1 != dest)
7642         emit_move_insn (destlo, operands[1]);
7643       if (src2 != dest + halfregs)
7644         emit_move_insn (desthi, operands[2]);
7645     }
7646   else
7647     {
7648       if (src2 != dest + halfregs)
7649         emit_move_insn (desthi, operands[2]);
7650       if (src1 != dest)
7651         emit_move_insn (destlo, operands[1]);
7652     }
7653 }
7654
7655 /* vec_perm support.  */
7656
7657 #define MAX_VECT_LEN 16
7658
7659 struct expand_vec_perm_d
7660 {
7661   rtx target, op0, op1;
7662   unsigned char perm[MAX_VECT_LEN];
7663   enum machine_mode vmode;
7664   unsigned char nelt;
7665   bool one_vector_p;
7666   bool testing_p;
7667 };
7668
7669 /* Generate a variable permutation.  */
7670
7671 static void
7672 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7673 {
7674   enum machine_mode vmode = GET_MODE (target);
7675   bool one_vector_p = rtx_equal_p (op0, op1);
7676
7677   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7678   gcc_checking_assert (GET_MODE (op0) == vmode);
7679   gcc_checking_assert (GET_MODE (op1) == vmode);
7680   gcc_checking_assert (GET_MODE (sel) == vmode);
7681   gcc_checking_assert (TARGET_SIMD);
7682
7683   if (one_vector_p)
7684     {
7685       if (vmode == V8QImode)
7686         {
7687           /* Expand the argument to a V16QI mode by duplicating it.  */
7688           rtx pair = gen_reg_rtx (V16QImode);
7689           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7690           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7691         }
7692       else
7693         {
7694           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7695         }
7696     }
7697   else
7698     {
7699       rtx pair;
7700
7701       if (vmode == V8QImode)
7702         {
7703           pair = gen_reg_rtx (V16QImode);
7704           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7705           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7706         }
7707       else
7708         {
7709           pair = gen_reg_rtx (OImode);
7710           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7711           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7712         }
7713     }
7714 }
7715
7716 void
7717 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7718 {
7719   enum machine_mode vmode = GET_MODE (target);
7720   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7721   bool one_vector_p = rtx_equal_p (op0, op1);
7722   rtx rmask[MAX_VECT_LEN], mask;
7723
7724   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7725
7726   /* The TBL instruction does not use a modulo index, so we must take care
7727      of that ourselves.  */
7728   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7729   for (i = 0; i < nelt; ++i)
7730     rmask[i] = mask;
7731   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7732   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7733
7734   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7735 }
7736
7737 /* Recognize patterns suitable for the TRN instructions.  */
7738 static bool
7739 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7740 {
7741   unsigned int i, odd, mask, nelt = d->nelt;
7742   rtx out, in0, in1, x;
7743   rtx (*gen) (rtx, rtx, rtx);
7744   enum machine_mode vmode = d->vmode;
7745
7746   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7747     return false;
7748
7749   /* Note that these are little-endian tests.
7750      We correct for big-endian later.  */
7751   if (d->perm[0] == 0)
7752     odd = 0;
7753   else if (d->perm[0] == 1)
7754     odd = 1;
7755   else
7756     return false;
7757   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7758
7759   for (i = 0; i < nelt; i += 2)
7760     {
7761       if (d->perm[i] != i + odd)
7762         return false;
7763       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7764         return false;
7765     }
7766
7767   /* Success!  */
7768   if (d->testing_p)
7769     return true;
7770
7771   in0 = d->op0;
7772   in1 = d->op1;
7773   if (BYTES_BIG_ENDIAN)
7774     {
7775       x = in0, in0 = in1, in1 = x;
7776       odd = !odd;
7777     }
7778   out = d->target;
7779
7780   if (odd)
7781     {
7782       switch (vmode)
7783         {
7784         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7785         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7786         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7787         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7788         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7789         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7790         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7791         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7792         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7793         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7794         default:
7795           return false;
7796         }
7797     }
7798   else
7799     {
7800       switch (vmode)
7801         {
7802         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7803         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7804         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7805         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7806         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7807         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7808         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7809         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7810         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7811         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7812         default:
7813           return false;
7814         }
7815     }
7816
7817   emit_insn (gen (out, in0, in1));
7818   return true;
7819 }
7820
7821 /* Recognize patterns suitable for the UZP instructions.  */
7822 static bool
7823 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7824 {
7825   unsigned int i, odd, mask, nelt = d->nelt;
7826   rtx out, in0, in1, x;
7827   rtx (*gen) (rtx, rtx, rtx);
7828   enum machine_mode vmode = d->vmode;
7829
7830   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7831     return false;
7832
7833   /* Note that these are little-endian tests.
7834      We correct for big-endian later.  */
7835   if (d->perm[0] == 0)
7836     odd = 0;
7837   else if (d->perm[0] == 1)
7838     odd = 1;
7839   else
7840     return false;
7841   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7842
7843   for (i = 0; i < nelt; i++)
7844     {
7845       unsigned elt = (i * 2 + odd) & mask;
7846       if (d->perm[i] != elt)
7847         return false;
7848     }
7849
7850   /* Success!  */
7851   if (d->testing_p)
7852     return true;
7853
7854   in0 = d->op0;
7855   in1 = d->op1;
7856   if (BYTES_BIG_ENDIAN)
7857     {
7858       x = in0, in0 = in1, in1 = x;
7859       odd = !odd;
7860     }
7861   out = d->target;
7862
7863   if (odd)
7864     {
7865       switch (vmode)
7866         {
7867         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7868         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7869         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7870         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7871         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7872         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7873         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7874         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7875         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7876         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7877         default:
7878           return false;
7879         }
7880     }
7881   else
7882     {
7883       switch (vmode)
7884         {
7885         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7886         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7887         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7888         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7889         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7890         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7891         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7892         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7893         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7894         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7895         default:
7896           return false;
7897         }
7898     }
7899
7900   emit_insn (gen (out, in0, in1));
7901   return true;
7902 }
7903
7904 /* Recognize patterns suitable for the ZIP instructions.  */
7905 static bool
7906 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7907 {
7908   unsigned int i, high, mask, nelt = d->nelt;
7909   rtx out, in0, in1, x;
7910   rtx (*gen) (rtx, rtx, rtx);
7911   enum machine_mode vmode = d->vmode;
7912
7913   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7914     return false;
7915
7916   /* Note that these are little-endian tests.
7917      We correct for big-endian later.  */
7918   high = nelt / 2;
7919   if (d->perm[0] == high)
7920     /* Do Nothing.  */
7921     ;
7922   else if (d->perm[0] == 0)
7923     high = 0;
7924   else
7925     return false;
7926   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7927
7928   for (i = 0; i < nelt / 2; i++)
7929     {
7930       unsigned elt = (i + high) & mask;
7931       if (d->perm[i * 2] != elt)
7932         return false;
7933       elt = (elt + nelt) & mask;
7934       if (d->perm[i * 2 + 1] != elt)
7935         return false;
7936     }
7937
7938   /* Success!  */
7939   if (d->testing_p)
7940     return true;
7941
7942   in0 = d->op0;
7943   in1 = d->op1;
7944   if (BYTES_BIG_ENDIAN)
7945     {
7946       x = in0, in0 = in1, in1 = x;
7947       high = !high;
7948     }
7949   out = d->target;
7950
7951   if (high)
7952     {
7953       switch (vmode)
7954         {
7955         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7956         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7957         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7958         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7959         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7960         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7961         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7962         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7963         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7964         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7965         default:
7966           return false;
7967         }
7968     }
7969   else
7970     {
7971       switch (vmode)
7972         {
7973         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7974         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7975         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7976         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7977         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7978         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7979         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7980         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7981         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7982         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7983         default:
7984           return false;
7985         }
7986     }
7987
7988   emit_insn (gen (out, in0, in1));
7989   return true;
7990 }
7991
7992 static bool
7993 aarch64_evpc_dup (struct expand_vec_perm_d *d)
7994 {
7995   rtx (*gen) (rtx, rtx, rtx);
7996   rtx out = d->target;
7997   rtx in0;
7998   enum machine_mode vmode = d->vmode;
7999   unsigned int i, elt, nelt = d->nelt;
8000   rtx lane;
8001
8002   /* TODO: This may not be big-endian safe.  */
8003   if (BYTES_BIG_ENDIAN)
8004     return false;
8005
8006   elt = d->perm[0];
8007   for (i = 1; i < nelt; i++)
8008     {
8009       if (elt != d->perm[i])
8010         return false;
8011     }
8012
8013   /* The generic preparation in aarch64_expand_vec_perm_const_1
8014      swaps the operand order and the permute indices if it finds
8015      d->perm[0] to be in the second operand.  Thus, we can always
8016      use d->op0 and need not do any extra arithmetic to get the
8017      correct lane number.  */
8018   in0 = d->op0;
8019   lane = GEN_INT (elt);
8020
8021   switch (vmode)
8022     {
8023     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8024     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8025     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8026     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8027     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8028     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8029     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8030     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8031     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8032     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8033     default:
8034       return false;
8035     }
8036
8037   emit_insn (gen (out, in0, lane));
8038   return true;
8039 }
8040
8041 static bool
8042 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8043 {
8044   rtx rperm[MAX_VECT_LEN], sel;
8045   enum machine_mode vmode = d->vmode;
8046   unsigned int i, nelt = d->nelt;
8047
8048   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8049      numbering of elements for big-endian, we must reverse the order.  */
8050   if (BYTES_BIG_ENDIAN)
8051     return false;
8052
8053   if (d->testing_p)
8054     return true;
8055
8056   /* Generic code will try constant permutation twice.  Once with the
8057      original mode and again with the elements lowered to QImode.
8058      So wait and don't do the selector expansion ourselves.  */
8059   if (vmode != V8QImode && vmode != V16QImode)
8060     return false;
8061
8062   for (i = 0; i < nelt; ++i)
8063     rperm[i] = GEN_INT (d->perm[i]);
8064   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8065   sel = force_reg (vmode, sel);
8066
8067   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8068   return true;
8069 }
8070
8071 static bool
8072 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8073 {
8074   /* The pattern matching functions above are written to look for a small
8075      number to begin the sequence (0, 1, N/2).  If we begin with an index
8076      from the second operand, we can swap the operands.  */
8077   if (d->perm[0] >= d->nelt)
8078     {
8079       unsigned i, nelt = d->nelt;
8080       rtx x;
8081
8082       for (i = 0; i < nelt; ++i)
8083         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8084
8085       x = d->op0;
8086       d->op0 = d->op1;
8087       d->op1 = x;
8088     }
8089
8090   if (TARGET_SIMD)
8091     {
8092       if (aarch64_evpc_zip (d))
8093         return true;
8094       else if (aarch64_evpc_uzp (d))
8095         return true;
8096       else if (aarch64_evpc_trn (d))
8097         return true;
8098       else if (aarch64_evpc_dup (d))
8099         return true;
8100       return aarch64_evpc_tbl (d);
8101     }
8102   return false;
8103 }
8104
8105 /* Expand a vec_perm_const pattern.  */
8106
8107 bool
8108 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8109 {
8110   struct expand_vec_perm_d d;
8111   int i, nelt, which;
8112
8113   d.target = target;
8114   d.op0 = op0;
8115   d.op1 = op1;
8116
8117   d.vmode = GET_MODE (target);
8118   gcc_assert (VECTOR_MODE_P (d.vmode));
8119   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8120   d.testing_p = false;
8121
8122   for (i = which = 0; i < nelt; ++i)
8123     {
8124       rtx e = XVECEXP (sel, 0, i);
8125       int ei = INTVAL (e) & (2 * nelt - 1);
8126       which |= (ei < nelt ? 1 : 2);
8127       d.perm[i] = ei;
8128     }
8129
8130   switch (which)
8131     {
8132     default:
8133       gcc_unreachable ();
8134
8135     case 3:
8136       d.one_vector_p = false;
8137       if (!rtx_equal_p (op0, op1))
8138         break;
8139
8140       /* The elements of PERM do not suggest that only the first operand
8141          is used, but both operands are identical.  Allow easier matching
8142          of the permutation by folding the permutation into the single
8143          input vector.  */
8144       /* Fall Through.  */
8145     case 2:
8146       for (i = 0; i < nelt; ++i)
8147         d.perm[i] &= nelt - 1;
8148       d.op0 = op1;
8149       d.one_vector_p = true;
8150       break;
8151
8152     case 1:
8153       d.op1 = op0;
8154       d.one_vector_p = true;
8155       break;
8156     }
8157
8158   return aarch64_expand_vec_perm_const_1 (&d);
8159 }
8160
8161 static bool
8162 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8163                                      const unsigned char *sel)
8164 {
8165   struct expand_vec_perm_d d;
8166   unsigned int i, nelt, which;
8167   bool ret;
8168
8169   d.vmode = vmode;
8170   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8171   d.testing_p = true;
8172   memcpy (d.perm, sel, nelt);
8173
8174   /* Calculate whether all elements are in one vector.  */
8175   for (i = which = 0; i < nelt; ++i)
8176     {
8177       unsigned char e = d.perm[i];
8178       gcc_assert (e < 2 * nelt);
8179       which |= (e < nelt ? 1 : 2);
8180     }
8181
8182   /* If all elements are from the second vector, reindex as if from the
8183      first vector.  */
8184   if (which == 2)
8185     for (i = 0; i < nelt; ++i)
8186       d.perm[i] -= nelt;
8187
8188   /* Check whether the mask can be applied to a single vector.  */
8189   d.one_vector_p = (which != 3);
8190
8191   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8192   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8193   if (!d.one_vector_p)
8194     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8195
8196   start_sequence ();
8197   ret = aarch64_expand_vec_perm_const_1 (&d);
8198   end_sequence ();
8199
8200   return ret;
8201 }
8202
8203 #undef TARGET_ADDRESS_COST
8204 #define TARGET_ADDRESS_COST aarch64_address_cost
8205
8206 /* This hook will determines whether unnamed bitfields affect the alignment
8207    of the containing structure.  The hook returns true if the structure
8208    should inherit the alignment requirements of an unnamed bitfield's
8209    type.  */
8210 #undef TARGET_ALIGN_ANON_BITFIELD
8211 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8212
8213 #undef TARGET_ASM_ALIGNED_DI_OP
8214 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8215
8216 #undef TARGET_ASM_ALIGNED_HI_OP
8217 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8218
8219 #undef TARGET_ASM_ALIGNED_SI_OP
8220 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8221
8222 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8223 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8224   hook_bool_const_tree_hwi_hwi_const_tree_true
8225
8226 #undef TARGET_ASM_FILE_START
8227 #define TARGET_ASM_FILE_START aarch64_start_file
8228
8229 #undef TARGET_ASM_OUTPUT_MI_THUNK
8230 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8231
8232 #undef TARGET_ASM_SELECT_RTX_SECTION
8233 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8234
8235 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8236 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8237
8238 #undef TARGET_BUILD_BUILTIN_VA_LIST
8239 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8240
8241 #undef TARGET_CALLEE_COPIES
8242 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8243
8244 #undef TARGET_CAN_ELIMINATE
8245 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8246
8247 #undef TARGET_CANNOT_FORCE_CONST_MEM
8248 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8249
8250 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8251 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8252
8253 /* Only the least significant bit is used for initialization guard
8254    variables.  */
8255 #undef TARGET_CXX_GUARD_MASK_BIT
8256 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8257
8258 #undef TARGET_C_MODE_FOR_SUFFIX
8259 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8260
8261 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8262 #undef  TARGET_DEFAULT_TARGET_FLAGS
8263 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8264 #endif
8265
8266 #undef TARGET_CLASS_MAX_NREGS
8267 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8268
8269 #undef TARGET_BUILTIN_DECL
8270 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8271
8272 #undef  TARGET_EXPAND_BUILTIN
8273 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8274
8275 #undef TARGET_EXPAND_BUILTIN_VA_START
8276 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8277
8278 #undef TARGET_FOLD_BUILTIN
8279 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8280
8281 #undef TARGET_FUNCTION_ARG
8282 #define TARGET_FUNCTION_ARG aarch64_function_arg
8283
8284 #undef TARGET_FUNCTION_ARG_ADVANCE
8285 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8286
8287 #undef TARGET_FUNCTION_ARG_BOUNDARY
8288 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8289
8290 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8291 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8292
8293 #undef TARGET_FUNCTION_VALUE
8294 #define TARGET_FUNCTION_VALUE aarch64_function_value
8295
8296 #undef TARGET_FUNCTION_VALUE_REGNO_P
8297 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8298
8299 #undef TARGET_FRAME_POINTER_REQUIRED
8300 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8301
8302 #undef TARGET_GIMPLE_FOLD_BUILTIN
8303 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8304
8305 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8306 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8307
8308 #undef  TARGET_INIT_BUILTINS
8309 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8310
8311 #undef TARGET_LEGITIMATE_ADDRESS_P
8312 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8313
8314 #undef TARGET_LEGITIMATE_CONSTANT_P
8315 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8316
8317 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8318 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8319
8320 #undef TARGET_LRA_P
8321 #define TARGET_LRA_P aarch64_lra_p
8322
8323 #undef TARGET_MANGLE_TYPE
8324 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8325
8326 #undef TARGET_MEMORY_MOVE_COST
8327 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8328
8329 #undef TARGET_MUST_PASS_IN_STACK
8330 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8331
8332 /* This target hook should return true if accesses to volatile bitfields
8333    should use the narrowest mode possible.  It should return false if these
8334    accesses should use the bitfield container type.  */
8335 #undef TARGET_NARROW_VOLATILE_BITFIELD
8336 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8337
8338 #undef  TARGET_OPTION_OVERRIDE
8339 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8340
8341 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8342 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8343   aarch64_override_options_after_change
8344
8345 #undef TARGET_PASS_BY_REFERENCE
8346 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8347
8348 #undef TARGET_PREFERRED_RELOAD_CLASS
8349 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8350
8351 #undef TARGET_SECONDARY_RELOAD
8352 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8353
8354 #undef TARGET_SHIFT_TRUNCATION_MASK
8355 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8356
8357 #undef TARGET_SETUP_INCOMING_VARARGS
8358 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8359
8360 #undef TARGET_STRUCT_VALUE_RTX
8361 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8362
8363 #undef TARGET_REGISTER_MOVE_COST
8364 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8365
8366 #undef TARGET_RETURN_IN_MEMORY
8367 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8368
8369 #undef TARGET_RETURN_IN_MSB
8370 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8371
8372 #undef TARGET_RTX_COSTS
8373 #define TARGET_RTX_COSTS aarch64_rtx_costs
8374
8375 #undef TARGET_TRAMPOLINE_INIT
8376 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8377
8378 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8379 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8380
8381 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8382 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8383
8384 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8385 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8386
8387 #undef TARGET_VECTORIZE_ADD_STMT_COST
8388 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8389
8390 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8391 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8392   aarch64_builtin_vectorization_cost
8393
8394 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8395 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8396
8397 #undef TARGET_VECTORIZE_BUILTINS
8398 #define TARGET_VECTORIZE_BUILTINS
8399
8400 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8401 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8402   aarch64_builtin_vectorized_function
8403
8404 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8405 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8406   aarch64_autovectorize_vector_sizes
8407
8408 /* Section anchor support.  */
8409
8410 #undef TARGET_MIN_ANCHOR_OFFSET
8411 #define TARGET_MIN_ANCHOR_OFFSET -256
8412
8413 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8414    byte offset; we can do much more for larger data types, but have no way
8415    to determine the size of the access.  We assume accesses are aligned.  */
8416 #undef TARGET_MAX_ANCHOR_OFFSET
8417 #define TARGET_MAX_ANCHOR_OFFSET 4095
8418
8419 #undef TARGET_VECTOR_ALIGNMENT
8420 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8421
8422 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8423 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8424   aarch64_simd_vector_alignment_reachable
8425
8426 /* vec_perm support.  */
8427
8428 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8429 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8430   aarch64_vectorize_vec_perm_const_ok
8431
8432
8433 #undef TARGET_FIXED_CONDITION_CODE_REGS
8434 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8435
8436 struct gcc_target targetm = TARGET_INITIALIZER;
8437
8438 #include "gt-aarch64.h"