gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "regs.h"
  30 #include "df.h"
  31 #include "hard-reg-set.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "reload.h"
  35 #include "toplev.h"
  36 #include "target.h"
  37 #include "target-def.h"
  38 #include "targhooks.h"
  39 #include "ggc.h"
  40 #include "function.h"
  41 #include "tm_p.h"
  42 #include "recog.h"
  43 #include "langhooks.h"
  44 #include "diagnostic-core.h"
  45 #include "gimple.h"
  46 #include "optabs.h"
  47 #include "dwarf2.h"
  48 #include "cfgloop.h"
  49 #include "tree-vectorizer.h"
  50
  51 /* Defined for convenience.  */
  52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  53
  54 /* Classifies an address.
  55
  56    ADDRESS_REG_IMM
  57        A simple base register plus immediate offset.
  58
  59    ADDRESS_REG_WB
  60        A base register indexed by immediate offset with writeback.
  61
  62    ADDRESS_REG_REG
  63        A base register indexed by (optionally scaled) register.
  64
  65    ADDRESS_REG_UXTW
  66        A base register indexed by (optionally scaled) zero-extended register.
  67
  68    ADDRESS_REG_SXTW
  69        A base register indexed by (optionally scaled) sign-extended register.
  70
  71    ADDRESS_LO_SUM
  72        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  73
  74    ADDRESS_SYMBOLIC:
  75        A constant symbolic address, in pc-relative literal pool.  */
  76
  77 enum aarch64_address_type {
  78   ADDRESS_REG_IMM,
  79   ADDRESS_REG_WB,
  80   ADDRESS_REG_REG,
  81   ADDRESS_REG_UXTW,
  82   ADDRESS_REG_SXTW,
  83   ADDRESS_LO_SUM,
  84   ADDRESS_SYMBOLIC
  85 };
  86
  87 struct aarch64_address_info {
  88   enum aarch64_address_type type;
  89   rtx base;
  90   rtx offset;
  91   int shift;
  92   enum aarch64_symbol_type symbol_type;
  93 };
  94
  95 struct simd_immediate_info
  96 {
  97   rtx value;
  98   int shift;
  99   int element_width;
 100   bool mvn;
 101   bool msl;
 102 };
 103
 104 /* The current code model.  */
 105 enum aarch64_code_model aarch64_cmodel;
 106
 107 #ifdef HAVE_AS_TLS
 108 #undef TARGET_HAVE_TLS
 109 #define TARGET_HAVE_TLS 1
 110 #endif
 111
 112 static bool aarch64_lra_p (void);
 113 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 114 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 115                                                      const_tree,
 116                                                      enum machine_mode *, int *,
 117                                                      bool *);
 118 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 119 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 120 static void aarch64_override_options_after_change (void);
 121 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 122 static unsigned bit_count (unsigned HOST_WIDE_INT);
 123 static bool aarch64_const_vec_all_same_int_p (rtx,
 124                                               HOST_WIDE_INT, HOST_WIDE_INT);
 125
 126 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 127                                                  const unsigned char *sel);
 128
 129 /* The processor for which instructions should be scheduled.  */
 130 enum aarch64_processor aarch64_tune = generic;
 131
 132 /* The current tuning set.  */
 133 const struct tune_params *aarch64_tune_params;
 134
 135 /* Mask to specify which instructions we are allowed to generate.  */
 136 unsigned long aarch64_isa_flags = 0;
 137
 138 /* Mask to specify which instruction scheduling options should be used.  */
 139 unsigned long aarch64_tune_flags = 0;
 140
 141 /* Tuning parameters.  */
 142
 143 #if HAVE_DESIGNATED_INITIALIZERS
 144 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 145 #else
 146 #define NAMED_PARAM(NAME, VAL) (VAL)
 147 #endif
 148
 149 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 150 __extension__
 151 #endif
 152 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
 153 {
 154   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
 155   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
 156   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
 157   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
 158   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
 159   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
 160   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
 161   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
 162   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
 163   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
 164   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
 165   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
 166 };
 167
 168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 169 __extension__
 170 #endif
 171 static const struct cpu_addrcost_table generic_addrcost_table =
 172 {
 173   NAMED_PARAM (pre_modify, 0),
 174   NAMED_PARAM (post_modify, 0),
 175   NAMED_PARAM (register_offset, 0),
 176   NAMED_PARAM (register_extend, 0),
 177   NAMED_PARAM (imm_offset, 0)
 178 };
 179
 180 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 181 __extension__
 182 #endif
 183 static const struct cpu_regmove_cost generic_regmove_cost =
 184 {
 185   NAMED_PARAM (GP2GP, 1),
 186   NAMED_PARAM (GP2FP, 2),
 187   NAMED_PARAM (FP2GP, 2),
 188   /* We currently do not provide direct support for TFmode Q->Q move.
 189      Therefore we need to raise the cost above 2 in order to have
 190      reload handle the situation.  */
 191   NAMED_PARAM (FP2FP, 4)
 192 };
 193
 194 /* Generic costs for vector insn classes.  */
 195 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 196 __extension__
 197 #endif
 198 static const struct cpu_vector_cost generic_vector_cost =
 199 {
 200   NAMED_PARAM (scalar_stmt_cost, 1),
 201   NAMED_PARAM (scalar_load_cost, 1),
 202   NAMED_PARAM (scalar_store_cost, 1),
 203   NAMED_PARAM (vec_stmt_cost, 1),
 204   NAMED_PARAM (vec_to_scalar_cost, 1),
 205   NAMED_PARAM (scalar_to_vec_cost, 1),
 206   NAMED_PARAM (vec_align_load_cost, 1),
 207   NAMED_PARAM (vec_unalign_load_cost, 1),
 208   NAMED_PARAM (vec_unalign_store_cost, 1),
 209   NAMED_PARAM (vec_store_cost, 1),
 210   NAMED_PARAM (cond_taken_branch_cost, 3),
 211   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 212 };
 213
 214 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 215 __extension__
 216 #endif
 217 static const struct tune_params generic_tunings =
 218 {
 219   &generic_rtx_cost_table,
 220   &generic_addrcost_table,
 221   &generic_regmove_cost,
 222   &generic_vector_cost,
 223   NAMED_PARAM (memmov_cost, 4)
 224 };
 225
 226 /* A processor implementing AArch64.  */
 227 struct processor
 228 {
 229   const char *const name;
 230   enum aarch64_processor core;
 231   const char *arch;
 232   const unsigned long flags;
 233   const struct tune_params *const tune;
 234 };
 235
 236 /* Processor cores implementing AArch64.  */
 237 static const struct processor all_cores[] =
 238 {
 239 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 240   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 241 #include "aarch64-cores.def"
 242 #undef AARCH64_CORE
 243   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 244   {NULL, aarch64_none, NULL, 0, NULL}
 245 };
 246
 247 /* Architectures implementing AArch64.  */
 248 static const struct processor all_architectures[] =
 249 {
 250 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 251   {NAME, CORE, #ARCH, FLAGS, NULL},
 252 #include "aarch64-arches.def"
 253 #undef AARCH64_ARCH
 254   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
 255   {NULL, aarch64_none, NULL, 0, NULL}
 256 };
 257
 258 /* Target specification.  These are populated as commandline arguments
 259    are processed, or NULL if not specified.  */
 260 static const struct processor *selected_arch;
 261 static const struct processor *selected_cpu;
 262 static const struct processor *selected_tune;
 263
 264 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 265
 266 /* An ISA extension in the co-processor and main instruction set space.  */
 267 struct aarch64_option_extension
 268 {
 269   const char *const name;
 270   const unsigned long flags_on;
 271   const unsigned long flags_off;
 272 };
 273
 274 /* ISA extensions in AArch64.  */
 275 static const struct aarch64_option_extension all_extensions[] =
 276 {
 277 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 278   {NAME, FLAGS_ON, FLAGS_OFF},
 279 #include "aarch64-option-extensions.def"
 280 #undef AARCH64_OPT_EXTENSION
 281   {NULL, 0, 0}
 282 };
 283
 284 /* Used to track the size of an address when generating a pre/post
 285    increment address.  */
 286 static enum machine_mode aarch64_memory_reference_mode;
 287
 288 /* Used to force GTY into this file.  */
 289 static GTY(()) int gty_dummy;
 290
 291 /* A table of valid AArch64 "bitmask immediate" values for
 292    logical instructions.  */
 293
 294 #define AARCH64_NUM_BITMASKS  5334
 295 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 296
 297 /* Did we set flag_omit_frame_pointer just so
 298    aarch64_frame_pointer_required would be called? */
 299 static bool faked_omit_frame_pointer;
 300
 301 typedef enum aarch64_cond_code
 302 {
 303   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 304   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 305   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 306 }
 307 aarch64_cc;
 308
 309 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 310
 311 /* The condition codes of the processor, and the inverse function.  */
 312 static const char * const aarch64_condition_codes[] =
 313 {
 314   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 315   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 316 };
 317
 318 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 319 unsigned
 320 aarch64_dbx_register_number (unsigned regno)
 321 {
 322    if (GP_REGNUM_P (regno))
 323      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 324    else if (regno == SP_REGNUM)
 325      return AARCH64_DWARF_SP;
 326    else if (FP_REGNUM_P (regno))
 327      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 328
 329    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 330       equivalent DWARF register.  */
 331    return DWARF_FRAME_REGISTERS;
 332 }
 333
 334 /* Return TRUE if MODE is any of the large INT modes.  */
 335 static bool
 336 aarch64_vect_struct_mode_p (enum machine_mode mode)
 337 {
 338   return mode == OImode || mode == CImode || mode == XImode;
 339 }
 340
 341 /* Return TRUE if MODE is any of the vector modes.  */
 342 static bool
 343 aarch64_vector_mode_p (enum machine_mode mode)
 344 {
 345   return aarch64_vector_mode_supported_p (mode)
 346          || aarch64_vect_struct_mode_p (mode);
 347 }
 348
 349 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 350 static bool
 351 aarch64_array_mode_supported_p (enum machine_mode mode,
 352                                 unsigned HOST_WIDE_INT nelems)
 353 {
 354   if (TARGET_SIMD
 355       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 356       && (nelems >= 2 && nelems <= 4))
 357     return true;
 358
 359   return false;
 360 }
 361
 362 /* Implement HARD_REGNO_NREGS.  */
 363
 364 int
 365 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 366 {
 367   switch (aarch64_regno_regclass (regno))
 368     {
 369     case FP_REGS:
 370     case FP_LO_REGS:
 371       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 372     default:
 373       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 374     }
 375   gcc_unreachable ();
 376 }
 377
 378 /* Implement HARD_REGNO_MODE_OK.  */
 379
 380 int
 381 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 382 {
 383   if (GET_MODE_CLASS (mode) == MODE_CC)
 384     return regno == CC_REGNUM;
 385
 386   if (regno == SP_REGNUM)
 387     /* The purpose of comparing with ptr_mode is to support the
 388        global register variable associated with the stack pointer
 389        register via the syntax of asm ("wsp") in ILP32.  */
 390     return mode == Pmode || mode == ptr_mode;
 391
 392   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 393     return mode == Pmode;
 394
 395   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 396     return 1;
 397
 398   if (FP_REGNUM_P (regno))
 399     {
 400       if (aarch64_vect_struct_mode_p (mode))
 401         return
 402           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 403       else
 404         return 1;
 405     }
 406
 407   return 0;
 408 }
 409
 410 /* Return true if calls to DECL should be treated as
 411    long-calls (ie called via a register).  */
 412 static bool
 413 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 414 {
 415   return false;
 416 }
 417
 418 /* Return true if calls to symbol-ref SYM should be treated as
 419    long-calls (ie called via a register).  */
 420 bool
 421 aarch64_is_long_call_p (rtx sym)
 422 {
 423   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 424 }
 425
 426 /* Return true if the offsets to a zero/sign-extract operation
 427    represent an expression that matches an extend operation.  The
 428    operands represent the paramters from
 429
 430    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 431 bool
 432 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 433                                 rtx extract_imm)
 434 {
 435   HOST_WIDE_INT mult_val, extract_val;
 436
 437   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 438     return false;
 439
 440   mult_val = INTVAL (mult_imm);
 441   extract_val = INTVAL (extract_imm);
 442
 443   if (extract_val > 8
 444       && extract_val < GET_MODE_BITSIZE (mode)
 445       && exact_log2 (extract_val & ~7) > 0
 446       && (extract_val & 7) <= 4
 447       && mult_val == (1 << (extract_val & 7)))
 448     return true;
 449
 450   return false;
 451 }
 452
 453 /* Emit an insn that's a simple single-set.  Both the operands must be
 454    known to be valid.  */
 455 inline static rtx
 456 emit_set_insn (rtx x, rtx y)
 457 {
 458   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 459 }
 460
 461 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 462    return the rtx for register 0 in the proper mode.  */
 463 rtx
 464 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 465 {
 466   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 467   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 468
 469   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 470   return cc_reg;
 471 }
 472
 473 /* Build the SYMBOL_REF for __tls_get_addr.  */
 474
 475 static GTY(()) rtx tls_get_addr_libfunc;
 476
 477 rtx
 478 aarch64_tls_get_addr (void)
 479 {
 480   if (!tls_get_addr_libfunc)
 481     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 482   return tls_get_addr_libfunc;
 483 }
 484
 485 /* Return the TLS model to use for ADDR.  */
 486
 487 static enum tls_model
 488 tls_symbolic_operand_type (rtx addr)
 489 {
 490   enum tls_model tls_kind = TLS_MODEL_NONE;
 491   rtx sym, addend;
 492
 493   if (GET_CODE (addr) == CONST)
 494     {
 495       split_const (addr, &sym, &addend);
 496       if (GET_CODE (sym) == SYMBOL_REF)
 497         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 498     }
 499   else if (GET_CODE (addr) == SYMBOL_REF)
 500     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 501
 502   return tls_kind;
 503 }
 504
 505 /* We'll allow lo_sum's in addresses in our legitimate addresses
 506    so that combine would take care of combining addresses where
 507    necessary, but for generation purposes, we'll generate the address
 508    as :
 509    RTL                               Absolute
 510    tmp = hi (symbol_ref);            adrp  x1, foo
 511    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 512                                      nop
 513
 514    PIC                               TLS
 515    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 516    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 517                                      bl   __tls_get_addr
 518                                      nop
 519
 520    Load TLS symbol, depending on TLS mechanism and TLS access model.
 521
 522    Global Dynamic - Traditional TLS:
 523    adrp tmp, :tlsgd:imm
 524    add  dest, tmp, #:tlsgd_lo12:imm
 525    bl   __tls_get_addr
 526
 527    Global Dynamic - TLS Descriptors:
 528    adrp dest, :tlsdesc:imm
 529    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 530    add  dest, dest, #:tlsdesc_lo12:imm
 531    blr  tmp
 532    mrs  tp, tpidr_el0
 533    add  dest, dest, tp
 534
 535    Initial Exec:
 536    mrs  tp, tpidr_el0
 537    adrp tmp, :gottprel:imm
 538    ldr  dest, [tmp, #:gottprel_lo12:imm]
 539    add  dest, dest, tp
 540
 541    Local Exec:
 542    mrs  tp, tpidr_el0
 543    add  t0, tp, #:tprel_hi12:imm
 544    add  t0, #:tprel_lo12_nc:imm
 545 */
 546
 547 static void
 548 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 549                                    enum aarch64_symbol_type type)
 550 {
 551   switch (type)
 552     {
 553     case SYMBOL_SMALL_ABSOLUTE:
 554       {
 555         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 556         rtx tmp_reg = dest;
 557         enum machine_mode mode = GET_MODE (dest);
 558
 559         gcc_assert (mode == Pmode || mode == ptr_mode);
 560
 561         if (can_create_pseudo_p ())
 562           tmp_reg = gen_reg_rtx (mode);
 563
 564         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 565         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 566         return;
 567       }
 568
 569     case SYMBOL_TINY_ABSOLUTE:
 570       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 571       return;
 572
 573     case SYMBOL_SMALL_GOT:
 574       {
 575         /* In ILP32, the mode of dest can be either SImode or DImode,
 576            while the got entry is always of SImode size.  The mode of
 577            dest depends on how dest is used: if dest is assigned to a
 578            pointer (e.g. in the memory), it has SImode; it may have
 579            DImode if dest is dereferenced to access the memeory.
 580            This is why we have to handle three different ldr_got_small
 581            patterns here (two patterns for ILP32).  */
 582         rtx tmp_reg = dest;
 583         enum machine_mode mode = GET_MODE (dest);
 584
 585         if (can_create_pseudo_p ())
 586           tmp_reg = gen_reg_rtx (mode);
 587
 588         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 589         if (mode == ptr_mode)
 590           {
 591             if (mode == DImode)
 592               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 593             else
 594               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 595           }
 596         else
 597           {
 598             gcc_assert (mode == Pmode);
 599             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 600           }
 601
 602         return;
 603       }
 604
 605     case SYMBOL_SMALL_TLSGD:
 606       {
 607         rtx insns;
 608         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 609
 610         start_sequence ();
 611         emit_call_insn (gen_tlsgd_small (result, imm));
 612         insns = get_insns ();
 613         end_sequence ();
 614
 615         RTL_CONST_CALL_P (insns) = 1;
 616         emit_libcall_block (insns, dest, result, imm);
 617         return;
 618       }
 619
 620     case SYMBOL_SMALL_TLSDESC:
 621       {
 622         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 623         rtx tp;
 624
 625         emit_insn (gen_tlsdesc_small (imm));
 626         tp = aarch64_load_tp (NULL);
 627         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 628         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 629         return;
 630       }
 631
 632     case SYMBOL_SMALL_GOTTPREL:
 633       {
 634         rtx tmp_reg = gen_reg_rtx (Pmode);
 635         rtx tp = aarch64_load_tp (NULL);
 636         emit_insn (gen_tlsie_small (tmp_reg, imm));
 637         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 638         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 639         return;
 640       }
 641
 642     case SYMBOL_SMALL_TPREL:
 643       {
 644         rtx tp = aarch64_load_tp (NULL);
 645         emit_insn (gen_tlsle_small (dest, tp, imm));
 646         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 647         return;
 648       }
 649
 650     case SYMBOL_TINY_GOT:
 651       emit_insn (gen_ldr_got_tiny (dest, imm));
 652       return;
 653
 654     default:
 655       gcc_unreachable ();
 656     }
 657 }
 658
 659 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 660    handle all moves if !can_create_pseudo_p ().  The distinction is
 661    important because, unlike emit_move_insn, the move expanders know
 662    how to force Pmode objects into the constant pool even when the
 663    constant pool address is not itself legitimate.  */
 664 static rtx
 665 aarch64_emit_move (rtx dest, rtx src)
 666 {
 667   return (can_create_pseudo_p ()
 668           ? emit_move_insn (dest, src)
 669           : emit_move_insn_1 (dest, src));
 670 }
 671
 672 void
 673 aarch64_split_128bit_move (rtx dst, rtx src)
 674 {
 675   rtx low_dst;
 676
 677   enum machine_mode src_mode = GET_MODE (src);
 678   enum machine_mode dst_mode = GET_MODE (dst);
 679   int src_regno = REGNO (src);
 680   int dst_regno = REGNO (dst);
 681
 682   gcc_assert (dst_mode == TImode || dst_mode == TFmode);
 683
 684   if (REG_P (dst) && REG_P (src))
 685     {
 686       gcc_assert (src_mode == TImode || src_mode == TFmode);
 687
 688       /* Handle r -> w, w -> r.  */
 689       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 690         {
 691           switch (src_mode) {
 692           case TImode:
 693             emit_insn
 694               (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
 695             emit_insn
 696               (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
 697             return;
 698           case TFmode:
 699             emit_insn
 700               (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
 701             emit_insn
 702               (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
 703             return;
 704           default:
 705             gcc_unreachable ();
 706           }
 707         }
 708       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 709         {
 710           switch (src_mode) {
 711           case TImode:
 712             emit_insn
 713               (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
 714             emit_insn
 715               (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
 716             return;
 717           case TFmode:
 718             emit_insn
 719               (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
 720             emit_insn
 721               (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
 722             return;
 723           default:
 724             gcc_unreachable ();
 725           }
 726         }
 727       /* Fall through to r -> r cases.  */
 728     }
 729
 730   switch (dst_mode) {
 731   case TImode:
 732     low_dst = gen_lowpart (word_mode, dst);
 733     if (REG_P (low_dst)
 734         && reg_overlap_mentioned_p (low_dst, src))
 735       {
 736         aarch64_emit_move (gen_highpart (word_mode, dst),
 737                            gen_highpart_mode (word_mode, TImode, src));
 738         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 739       }
 740     else
 741       {
 742         aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 743         aarch64_emit_move (gen_highpart (word_mode, dst),
 744                            gen_highpart_mode (word_mode, TImode, src));
 745       }
 746     return;
 747   case TFmode:
 748     emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
 749                     gen_rtx_REG (DFmode, src_regno));
 750     emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
 751                     gen_rtx_REG (DFmode, src_regno + 1));
 752     return;
 753   default:
 754     gcc_unreachable ();
 755   }
 756 }
 757
 758 bool
 759 aarch64_split_128bit_move_p (rtx dst, rtx src)
 760 {
 761   return (! REG_P (src)
 762           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 763 }
 764
 765 /* Split a complex SIMD combine.  */
 766
 767 void
 768 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 769 {
 770   enum machine_mode src_mode = GET_MODE (src1);
 771   enum machine_mode dst_mode = GET_MODE (dst);
 772
 773   gcc_assert (VECTOR_MODE_P (dst_mode));
 774
 775   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 776     {
 777       rtx (*gen) (rtx, rtx, rtx);
 778
 779       switch (src_mode)
 780         {
 781         case V8QImode:
 782           gen = gen_aarch64_simd_combinev8qi;
 783           break;
 784         case V4HImode:
 785           gen = gen_aarch64_simd_combinev4hi;
 786           break;
 787         case V2SImode:
 788           gen = gen_aarch64_simd_combinev2si;
 789           break;
 790         case V2SFmode:
 791           gen = gen_aarch64_simd_combinev2sf;
 792           break;
 793         case DImode:
 794           gen = gen_aarch64_simd_combinedi;
 795           break;
 796         case DFmode:
 797           gen = gen_aarch64_simd_combinedf;
 798           break;
 799         default:
 800           gcc_unreachable ();
 801         }
 802
 803       emit_insn (gen (dst, src1, src2));
 804       return;
 805     }
 806 }
 807
 808 /* Split a complex SIMD move.  */
 809
 810 void
 811 aarch64_split_simd_move (rtx dst, rtx src)
 812 {
 813   enum machine_mode src_mode = GET_MODE (src);
 814   enum machine_mode dst_mode = GET_MODE (dst);
 815
 816   gcc_assert (VECTOR_MODE_P (dst_mode));
 817
 818   if (REG_P (dst) && REG_P (src))
 819     {
 820       rtx (*gen) (rtx, rtx);
 821
 822       gcc_assert (VECTOR_MODE_P (src_mode));
 823
 824       switch (src_mode)
 825         {
 826         case V16QImode:
 827           gen = gen_aarch64_split_simd_movv16qi;
 828           break;
 829         case V8HImode:
 830           gen = gen_aarch64_split_simd_movv8hi;
 831           break;
 832         case V4SImode:
 833           gen = gen_aarch64_split_simd_movv4si;
 834           break;
 835         case V2DImode:
 836           gen = gen_aarch64_split_simd_movv2di;
 837           break;
 838         case V4SFmode:
 839           gen = gen_aarch64_split_simd_movv4sf;
 840           break;
 841         case V2DFmode:
 842           gen = gen_aarch64_split_simd_movv2df;
 843           break;
 844         default:
 845           gcc_unreachable ();
 846         }
 847
 848       emit_insn (gen (dst, src));
 849       return;
 850     }
 851 }
 852
 853 static rtx
 854 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 855 {
 856   if (can_create_pseudo_p ())
 857     return force_reg (mode, value);
 858   else
 859     {
 860       x = aarch64_emit_move (x, value);
 861       return x;
 862     }
 863 }
 864
 865
 866 static rtx
 867 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 868 {
 869   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 870     {
 871       rtx high;
 872       /* Load the full offset into a register.  This
 873          might be improvable in the future.  */
 874       high = GEN_INT (offset);
 875       offset = 0;
 876       high = aarch64_force_temporary (mode, temp, high);
 877       reg = aarch64_force_temporary (mode, temp,
 878                                      gen_rtx_PLUS (mode, high, reg));
 879     }
 880   return plus_constant (mode, reg, offset);
 881 }
 882
 883 void
 884 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 885 {
 886   enum machine_mode mode = GET_MODE (dest);
 887   unsigned HOST_WIDE_INT mask;
 888   int i;
 889   bool first;
 890   unsigned HOST_WIDE_INT val;
 891   bool subtargets;
 892   rtx subtarget;
 893   int one_match, zero_match;
 894
 895   gcc_assert (mode == SImode || mode == DImode);
 896
 897   /* Check on what type of symbol it is.  */
 898   if (GET_CODE (imm) == SYMBOL_REF
 899       || GET_CODE (imm) == LABEL_REF
 900       || GET_CODE (imm) == CONST)
 901     {
 902       rtx mem, base, offset;
 903       enum aarch64_symbol_type sty;
 904
 905       /* If we have (const (plus symbol offset)), separate out the offset
 906          before we start classifying the symbol.  */
 907       split_const (imm, &base, &offset);
 908
 909       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 910       switch (sty)
 911         {
 912         case SYMBOL_FORCE_TO_MEM:
 913           if (offset != const0_rtx
 914               && targetm.cannot_force_const_mem (mode, imm))
 915             {
 916               gcc_assert(can_create_pseudo_p ());
 917               base = aarch64_force_temporary (mode, dest, base);
 918               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 919               aarch64_emit_move (dest, base);
 920               return;
 921             }
 922           mem = force_const_mem (ptr_mode, imm);
 923           gcc_assert (mem);
 924           if (mode != ptr_mode)
 925             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 926           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 927           return;
 928
 929         case SYMBOL_SMALL_TLSGD:
 930         case SYMBOL_SMALL_TLSDESC:
 931         case SYMBOL_SMALL_GOTTPREL:
 932         case SYMBOL_SMALL_GOT:
 933         case SYMBOL_TINY_GOT:
 934           if (offset != const0_rtx)
 935             {
 936               gcc_assert(can_create_pseudo_p ());
 937               base = aarch64_force_temporary (mode, dest, base);
 938               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 939               aarch64_emit_move (dest, base);
 940               return;
 941             }
 942           /* FALLTHRU */
 943
 944         case SYMBOL_SMALL_TPREL:
 945         case SYMBOL_SMALL_ABSOLUTE:
 946         case SYMBOL_TINY_ABSOLUTE:
 947           aarch64_load_symref_appropriately (dest, imm, sty);
 948           return;
 949
 950         default:
 951           gcc_unreachable ();
 952         }
 953     }
 954
 955   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 956     {
 957       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 958       return;
 959     }
 960
 961   if (!CONST_INT_P (imm))
 962     {
 963       if (GET_CODE (imm) == HIGH)
 964         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 965       else
 966         {
 967           rtx mem = force_const_mem (mode, imm);
 968           gcc_assert (mem);
 969           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 970         }
 971
 972       return;
 973     }
 974
 975   if (mode == SImode)
 976     {
 977       /* We know we can't do this in 1 insn, and we must be able to do it
 978          in two; so don't mess around looking for sequences that don't buy
 979          us anything.  */
 980       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 981       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 982                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 983       return;
 984     }
 985
 986   /* Remaining cases are all for DImode.  */
 987
 988   val = INTVAL (imm);
 989   subtargets = optimize && can_create_pseudo_p ();
 990
 991   one_match = 0;
 992   zero_match = 0;
 993   mask = 0xffff;
 994
 995   for (i = 0; i < 64; i += 16, mask <<= 16)
 996     {
 997       if ((val & mask) == 0)
 998         zero_match++;
 999       else if ((val & mask) == mask)
1000         one_match++;
1001     }
1002
1003   if (one_match == 2)
1004     {
1005       mask = 0xffff;
1006       for (i = 0; i < 64; i += 16, mask <<= 16)
1007         {
1008           if ((val & mask) != mask)
1009             {
1010               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1011               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1012                                          GEN_INT ((val >> i) & 0xffff)));
1013               return;
1014             }
1015         }
1016       gcc_unreachable ();
1017     }
1018
1019   if (zero_match == 2)
1020     goto simple_sequence;
1021
1022   mask = 0x0ffff0000UL;
1023   for (i = 16; i < 64; i += 16, mask <<= 16)
1024     {
1025       HOST_WIDE_INT comp = mask & ~(mask - 1);
1026
1027       if (aarch64_uimm12_shift (val - (val & mask)))
1028         {
1029           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1030
1031           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1032           emit_insn (gen_adddi3 (dest, subtarget,
1033                                  GEN_INT (val - (val & mask))));
1034           return;
1035         }
1036       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1037         {
1038           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1041                                   GEN_INT ((val + comp) & mask)));
1042           emit_insn (gen_adddi3 (dest, subtarget,
1043                                  GEN_INT (val - ((val + comp) & mask))));
1044           return;
1045         }
1046       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1047         {
1048           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049
1050           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1051                                   GEN_INT ((val - comp) | ~mask)));
1052           emit_insn (gen_adddi3 (dest, subtarget,
1053                                  GEN_INT (val - ((val - comp) | ~mask))));
1054           return;
1055         }
1056       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1057         {
1058           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1059
1060           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1061                                   GEN_INT (val | ~mask)));
1062           emit_insn (gen_adddi3 (dest, subtarget,
1063                                  GEN_INT (val - (val | ~mask))));
1064           return;
1065         }
1066     }
1067
1068   /* See if we can do it by arithmetically combining two
1069      immediates.  */
1070   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1071     {
1072       int j;
1073       mask = 0xffff;
1074
1075       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1076           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1077         {
1078           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1079           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1080                                   GEN_INT (aarch64_bitmasks[i])));
1081           emit_insn (gen_adddi3 (dest, subtarget,
1082                                  GEN_INT (val - aarch64_bitmasks[i])));
1083           return;
1084         }
1085
1086       for (j = 0; j < 64; j += 16, mask <<= 16)
1087         {
1088           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1089             {
1090               emit_insn (gen_rtx_SET (VOIDmode, dest,
1091                                       GEN_INT (aarch64_bitmasks[i])));
1092               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1093                                          GEN_INT ((val >> j) & 0xffff)));
1094               return;
1095             }
1096         }
1097     }
1098
1099   /* See if we can do it by logically combining two immediates.  */
1100   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1101     {
1102       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1103         {
1104           int j;
1105
1106           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1107             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1108               {
1109                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1110                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1111                                         GEN_INT (aarch64_bitmasks[i])));
1112                 emit_insn (gen_iordi3 (dest, subtarget,
1113                                        GEN_INT (aarch64_bitmasks[j])));
1114                 return;
1115               }
1116         }
1117       else if ((val & aarch64_bitmasks[i]) == val)
1118         {
1119           int j;
1120
1121           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1122             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1123               {
1124
1125                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1126                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1127                                         GEN_INT (aarch64_bitmasks[j])));
1128                 emit_insn (gen_anddi3 (dest, subtarget,
1129                                        GEN_INT (aarch64_bitmasks[i])));
1130                 return;
1131               }
1132         }
1133     }
1134
1135  simple_sequence:
1136   first = true;
1137   mask = 0xffff;
1138   for (i = 0; i < 64; i += 16, mask <<= 16)
1139     {
1140       if ((val & mask) != 0)
1141         {
1142           if (first)
1143             {
1144               emit_insn (gen_rtx_SET (VOIDmode, dest,
1145                                       GEN_INT (val & mask)));
1146               first = false;
1147             }
1148           else
1149             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1150                                        GEN_INT ((val >> i) & 0xffff)));
1151         }
1152     }
1153 }
1154
1155 static bool
1156 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1157 {
1158   /* Indirect calls are not currently supported.  */
1159   if (decl == NULL)
1160     return false;
1161
1162   /* Cannot tail-call to long-calls, since these are outside of the
1163      range of a branch instruction (we could handle this if we added
1164      support for indirect tail-calls.  */
1165   if (aarch64_decl_is_long_call_p (decl))
1166     return false;
1167
1168   return true;
1169 }
1170
1171 /* Implement TARGET_PASS_BY_REFERENCE.  */
1172
1173 static bool
1174 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1175                            enum machine_mode mode,
1176                            const_tree type,
1177                            bool named ATTRIBUTE_UNUSED)
1178 {
1179   HOST_WIDE_INT size;
1180   enum machine_mode dummymode;
1181   int nregs;
1182
1183   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1184   size = (mode == BLKmode && type)
1185     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1186
1187   if (type)
1188     {
1189       /* Arrays always passed by reference.  */
1190       if (TREE_CODE (type) == ARRAY_TYPE)
1191         return true;
1192       /* Other aggregates based on their size.  */
1193       if (AGGREGATE_TYPE_P (type))
1194         size = int_size_in_bytes (type);
1195     }
1196
1197   /* Variable sized arguments are always returned by reference.  */
1198   if (size < 0)
1199     return true;
1200
1201   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1202   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1203                                                &dummymode, &nregs,
1204                                                NULL))
1205     return false;
1206
1207   /* Arguments which are variable sized or larger than 2 registers are
1208      passed by reference unless they are a homogenous floating point
1209      aggregate.  */
1210   return size > 2 * UNITS_PER_WORD;
1211 }
1212
1213 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1214 static bool
1215 aarch64_return_in_msb (const_tree valtype)
1216 {
1217   enum machine_mode dummy_mode;
1218   int dummy_int;
1219
1220   /* Never happens in little-endian mode.  */
1221   if (!BYTES_BIG_ENDIAN)
1222     return false;
1223
1224   /* Only composite types smaller than or equal to 16 bytes can
1225      be potentially returned in registers.  */
1226   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1227       || int_size_in_bytes (valtype) <= 0
1228       || int_size_in_bytes (valtype) > 16)
1229     return false;
1230
1231   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1232      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1233      is always passed/returned in the least significant bits of fp/simd
1234      register(s).  */
1235   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1236                                                &dummy_mode, &dummy_int, NULL))
1237     return false;
1238
1239   return true;
1240 }
1241
1242 /* Implement TARGET_FUNCTION_VALUE.
1243    Define how to find the value returned by a function.  */
1244
1245 static rtx
1246 aarch64_function_value (const_tree type, const_tree func,
1247                         bool outgoing ATTRIBUTE_UNUSED)
1248 {
1249   enum machine_mode mode;
1250   int unsignedp;
1251   int count;
1252   enum machine_mode ag_mode;
1253
1254   mode = TYPE_MODE (type);
1255   if (INTEGRAL_TYPE_P (type))
1256     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1257
1258   if (aarch64_return_in_msb (type))
1259     {
1260       HOST_WIDE_INT size = int_size_in_bytes (type);
1261
1262       if (size % UNITS_PER_WORD != 0)
1263         {
1264           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1265           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1266         }
1267     }
1268
1269   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1270                                                &ag_mode, &count, NULL))
1271     {
1272       if (!aarch64_composite_type_p (type, mode))
1273         {
1274           gcc_assert (count == 1 && mode == ag_mode);
1275           return gen_rtx_REG (mode, V0_REGNUM);
1276         }
1277       else
1278         {
1279           int i;
1280           rtx par;
1281
1282           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1283           for (i = 0; i < count; i++)
1284             {
1285               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1286               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1287                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1288               XVECEXP (par, 0, i) = tmp;
1289             }
1290           return par;
1291         }
1292     }
1293   else
1294     return gen_rtx_REG (mode, R0_REGNUM);
1295 }
1296
1297 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1298    Return true if REGNO is the number of a hard register in which the values
1299    of called function may come back.  */
1300
1301 static bool
1302 aarch64_function_value_regno_p (const unsigned int regno)
1303 {
1304   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1305      of 16-byte return values are: 128-bit integers and 16-byte small
1306      structures (excluding homogeneous floating-point aggregates).  */
1307   if (regno == R0_REGNUM || regno == R1_REGNUM)
1308     return true;
1309
1310   /* Up to four fp/simd registers can return a function value, e.g. a
1311      homogeneous floating-point aggregate having four members.  */
1312   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1313     return !TARGET_GENERAL_REGS_ONLY;
1314
1315   return false;
1316 }
1317
1318 /* Implement TARGET_RETURN_IN_MEMORY.
1319
1320    If the type T of the result of a function is such that
1321      void func (T arg)
1322    would require that arg be passed as a value in a register (or set of
1323    registers) according to the parameter passing rules, then the result
1324    is returned in the same registers as would be used for such an
1325    argument.  */
1326
1327 static bool
1328 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1329 {
1330   HOST_WIDE_INT size;
1331   enum machine_mode ag_mode;
1332   int count;
1333
1334   if (!AGGREGATE_TYPE_P (type)
1335       && TREE_CODE (type) != COMPLEX_TYPE
1336       && TREE_CODE (type) != VECTOR_TYPE)
1337     /* Simple scalar types always returned in registers.  */
1338     return false;
1339
1340   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1341                                                type,
1342                                                &ag_mode,
1343                                                &count,
1344                                                NULL))
1345     return false;
1346
1347   /* Types larger than 2 registers returned in memory.  */
1348   size = int_size_in_bytes (type);
1349   return (size < 0 || size > 2 * UNITS_PER_WORD);
1350 }
1351
1352 static bool
1353 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1354                                const_tree type, int *nregs)
1355 {
1356   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1357   return aarch64_vfp_is_call_or_return_candidate (mode,
1358                                                   type,
1359                                                   &pcum->aapcs_vfp_rmode,
1360                                                   nregs,
1361                                                   NULL);
1362 }
1363
1364 /* Given MODE and TYPE of a function argument, return the alignment in
1365    bits.  The idea is to suppress any stronger alignment requested by
1366    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1367    This is a helper function for local use only.  */
1368
1369 static unsigned int
1370 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1371 {
1372   unsigned int alignment;
1373
1374   if (type)
1375     {
1376       if (!integer_zerop (TYPE_SIZE (type)))
1377         {
1378           if (TYPE_MODE (type) == mode)
1379             alignment = TYPE_ALIGN (type);
1380           else
1381             alignment = GET_MODE_ALIGNMENT (mode);
1382         }
1383       else
1384         alignment = 0;
1385     }
1386   else
1387     alignment = GET_MODE_ALIGNMENT (mode);
1388
1389   return alignment;
1390 }
1391
1392 /* Layout a function argument according to the AAPCS64 rules.  The rule
1393    numbers refer to the rule numbers in the AAPCS64.  */
1394
1395 static void
1396 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1397                     const_tree type,
1398                     bool named ATTRIBUTE_UNUSED)
1399 {
1400   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1401   int ncrn, nvrn, nregs;
1402   bool allocate_ncrn, allocate_nvrn;
1403
1404   /* We need to do this once per argument.  */
1405   if (pcum->aapcs_arg_processed)
1406     return;
1407
1408   pcum->aapcs_arg_processed = true;
1409
1410   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1411   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1412                                                  mode,
1413                                                  type,
1414                                                  &nregs);
1415
1416   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1417      The following code thus handles passing by SIMD/FP registers first.  */
1418
1419   nvrn = pcum->aapcs_nvrn;
1420
1421   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1422      and homogenous short-vector aggregates (HVA).  */
1423   if (allocate_nvrn)
1424     {
1425       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1426         {
1427           pcum->aapcs_nextnvrn = nvrn + nregs;
1428           if (!aarch64_composite_type_p (type, mode))
1429             {
1430               gcc_assert (nregs == 1);
1431               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1432             }
1433           else
1434             {
1435               rtx par;
1436               int i;
1437               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1438               for (i = 0; i < nregs; i++)
1439                 {
1440                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1441                                          V0_REGNUM + nvrn + i);
1442                   tmp = gen_rtx_EXPR_LIST
1443                     (VOIDmode, tmp,
1444                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1445                   XVECEXP (par, 0, i) = tmp;
1446                 }
1447               pcum->aapcs_reg = par;
1448             }
1449           return;
1450         }
1451       else
1452         {
1453           /* C.3 NSRN is set to 8.  */
1454           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1455           goto on_stack;
1456         }
1457     }
1458
1459   ncrn = pcum->aapcs_ncrn;
1460   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1461            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1462
1463
1464   /* C6 - C9.  though the sign and zero extension semantics are
1465      handled elsewhere.  This is the case where the argument fits
1466      entirely general registers.  */
1467   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1468     {
1469       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1470
1471       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1472
1473       /* C.8 if the argument has an alignment of 16 then the NGRN is
1474          rounded up to the next even number.  */
1475       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1476         {
1477           ++ncrn;
1478           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1479         }
1480       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1481          A reg is still generated for it, but the caller should be smart
1482          enough not to use it.  */
1483       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1484         {
1485           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1486         }
1487       else
1488         {
1489           rtx par;
1490           int i;
1491
1492           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1493           for (i = 0; i < nregs; i++)
1494             {
1495               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1496               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1497                                        GEN_INT (i * UNITS_PER_WORD));
1498               XVECEXP (par, 0, i) = tmp;
1499             }
1500           pcum->aapcs_reg = par;
1501         }
1502
1503       pcum->aapcs_nextncrn = ncrn + nregs;
1504       return;
1505     }
1506
1507   /* C.11  */
1508   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1509
1510   /* The argument is passed on stack; record the needed number of words for
1511      this argument (we can re-use NREGS) and align the total size if
1512      necessary.  */
1513 on_stack:
1514   pcum->aapcs_stack_words = nregs;
1515   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1516     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1517                                                16 / UNITS_PER_WORD) + 1;
1518   return;
1519 }
1520
1521 /* Implement TARGET_FUNCTION_ARG.  */
1522
1523 static rtx
1524 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1525                       const_tree type, bool named)
1526 {
1527   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1528   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1529
1530   if (mode == VOIDmode)
1531     return NULL_RTX;
1532
1533   aarch64_layout_arg (pcum_v, mode, type, named);
1534   return pcum->aapcs_reg;
1535 }
1536
1537 void
1538 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1539                            const_tree fntype ATTRIBUTE_UNUSED,
1540                            rtx libname ATTRIBUTE_UNUSED,
1541                            const_tree fndecl ATTRIBUTE_UNUSED,
1542                            unsigned n_named ATTRIBUTE_UNUSED)
1543 {
1544   pcum->aapcs_ncrn = 0;
1545   pcum->aapcs_nvrn = 0;
1546   pcum->aapcs_nextncrn = 0;
1547   pcum->aapcs_nextnvrn = 0;
1548   pcum->pcs_variant = ARM_PCS_AAPCS64;
1549   pcum->aapcs_reg = NULL_RTX;
1550   pcum->aapcs_arg_processed = false;
1551   pcum->aapcs_stack_words = 0;
1552   pcum->aapcs_stack_size = 0;
1553
1554   return;
1555 }
1556
1557 static void
1558 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1559                               enum machine_mode mode,
1560                               const_tree type,
1561                               bool named)
1562 {
1563   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1564   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1565     {
1566       aarch64_layout_arg (pcum_v, mode, type, named);
1567       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1568                   != (pcum->aapcs_stack_words != 0));
1569       pcum->aapcs_arg_processed = false;
1570       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1571       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1572       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1573       pcum->aapcs_stack_words = 0;
1574       pcum->aapcs_reg = NULL_RTX;
1575     }
1576 }
1577
1578 bool
1579 aarch64_function_arg_regno_p (unsigned regno)
1580 {
1581   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1582           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1583 }
1584
1585 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1586    PARM_BOUNDARY bits of alignment, but will be given anything up
1587    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1588    that both before and after the layout of each argument, the Next
1589    Stacked Argument Address (NSAA) will have a minimum alignment of
1590    8 bytes.  */
1591
1592 static unsigned int
1593 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1594 {
1595   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1596
1597   if (alignment < PARM_BOUNDARY)
1598     alignment = PARM_BOUNDARY;
1599   if (alignment > STACK_BOUNDARY)
1600     alignment = STACK_BOUNDARY;
1601   return alignment;
1602 }
1603
1604 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1605
1606    Return true if an argument passed on the stack should be padded upwards,
1607    i.e. if the least-significant byte of the stack slot has useful data.
1608
1609    Small aggregate types are placed in the lowest memory address.
1610
1611    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1612
1613 bool
1614 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1615 {
1616   /* On little-endian targets, the least significant byte of every stack
1617      argument is passed at the lowest byte address of the stack slot.  */
1618   if (!BYTES_BIG_ENDIAN)
1619     return true;
1620
1621   /* Otherwise, integral, floating-point and pointer types are padded downward:
1622      the least significant byte of a stack argument is passed at the highest
1623      byte address of the stack slot.  */
1624   if (type
1625       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1626          || POINTER_TYPE_P (type))
1627       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1628     return false;
1629
1630   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1631   return true;
1632 }
1633
1634 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1635
1636    It specifies padding for the last (may also be the only)
1637    element of a block move between registers and memory.  If
1638    assuming the block is in the memory, padding upward means that
1639    the last element is padded after its highest significant byte,
1640    while in downward padding, the last element is padded at the
1641    its least significant byte side.
1642
1643    Small aggregates and small complex types are always padded
1644    upwards.
1645
1646    We don't need to worry about homogeneous floating-point or
1647    short-vector aggregates; their move is not affected by the
1648    padding direction determined here.  Regardless of endianness,
1649    each element of such an aggregate is put in the least
1650    significant bits of a fp/simd register.
1651
1652    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1653    register has useful data, and return the opposite if the most
1654    significant byte does.  */
1655
1656 bool
1657 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1658                      bool first ATTRIBUTE_UNUSED)
1659 {
1660
1661   /* Small composite types are always padded upward.  */
1662   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1663     {
1664       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1665                             : GET_MODE_SIZE (mode));
1666       if (size < 2 * UNITS_PER_WORD)
1667         return true;
1668     }
1669
1670   /* Otherwise, use the default padding.  */
1671   return !BYTES_BIG_ENDIAN;
1672 }
1673
1674 static enum machine_mode
1675 aarch64_libgcc_cmp_return_mode (void)
1676 {
1677   return SImode;
1678 }
1679
1680 static bool
1681 aarch64_frame_pointer_required (void)
1682 {
1683   /* If the function contains dynamic stack allocations, we need to
1684      use the frame pointer to access the static parts of the frame.  */
1685   if (cfun->calls_alloca)
1686     return true;
1687
1688   /* We may have turned flag_omit_frame_pointer on in order to have this
1689      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1690      and we'll check it here.
1691      If we really did set flag_omit_frame_pointer normally, then we return false
1692      (no frame pointer required) in all cases.  */
1693
1694   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1695     return false;
1696   else if (flag_omit_leaf_frame_pointer)
1697     return !crtl->is_leaf;
1698   return true;
1699 }
1700
1701 /* Mark the registers that need to be saved by the callee and calculate
1702    the size of the callee-saved registers area and frame record (both FP
1703    and LR may be omitted).  */
1704 static void
1705 aarch64_layout_frame (void)
1706 {
1707   HOST_WIDE_INT offset = 0;
1708   int regno;
1709
1710   if (reload_completed && cfun->machine->frame.laid_out)
1711     return;
1712
1713   cfun->machine->frame.fp_lr_offset = 0;
1714
1715   /* First mark all the registers that really need to be saved...  */
1716   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1717     cfun->machine->frame.reg_offset[regno] = -1;
1718
1719   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1720     cfun->machine->frame.reg_offset[regno] = -1;
1721
1722   /* ... that includes the eh data registers (if needed)...  */
1723   if (crtl->calls_eh_return)
1724     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1725       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1726
1727   /* ... and any callee saved register that dataflow says is live.  */
1728   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1729     if (df_regs_ever_live_p (regno)
1730         && !call_used_regs[regno])
1731       cfun->machine->frame.reg_offset[regno] = 0;
1732
1733   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1734     if (df_regs_ever_live_p (regno)
1735         && !call_used_regs[regno])
1736       cfun->machine->frame.reg_offset[regno] = 0;
1737
1738   if (frame_pointer_needed)
1739     {
1740       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1741       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1742       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1743     }
1744
1745   /* Now assign stack slots for them.  */
1746   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1747     if (cfun->machine->frame.reg_offset[regno] != -1)
1748       {
1749         cfun->machine->frame.reg_offset[regno] = offset;
1750         offset += UNITS_PER_WORD;
1751       }
1752
1753   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1754     if (cfun->machine->frame.reg_offset[regno] != -1)
1755       {
1756         cfun->machine->frame.reg_offset[regno] = offset;
1757         offset += UNITS_PER_WORD;
1758       }
1759
1760   if (frame_pointer_needed)
1761     {
1762       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1763       offset += UNITS_PER_WORD;
1764       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1765     }
1766
1767   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1768     {
1769       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1770       offset += UNITS_PER_WORD;
1771       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1772     }
1773
1774   cfun->machine->frame.padding0 =
1775     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1776   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1777
1778   cfun->machine->frame.saved_regs_size = offset;
1779   cfun->machine->frame.laid_out = true;
1780 }
1781
1782 /* Make the last instruction frame-related and note that it performs
1783    the operation described by FRAME_PATTERN.  */
1784
1785 static void
1786 aarch64_set_frame_expr (rtx frame_pattern)
1787 {
1788   rtx insn;
1789
1790   insn = get_last_insn ();
1791   RTX_FRAME_RELATED_P (insn) = 1;
1792   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1793   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1794                                       frame_pattern,
1795                                       REG_NOTES (insn));
1796 }
1797
1798 static bool
1799 aarch64_register_saved_on_entry (int regno)
1800 {
1801   return cfun->machine->frame.reg_offset[regno] != -1;
1802 }
1803
1804
1805 static void
1806 aarch64_save_or_restore_fprs (int start_offset, int increment,
1807                               bool restore, rtx base_rtx)
1808
1809 {
1810   unsigned regno;
1811   unsigned regno2;
1812   rtx insn;
1813   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1814
1815
1816   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1817     {
1818       if (aarch64_register_saved_on_entry (regno))
1819         {
1820           rtx mem;
1821           mem = gen_mem_ref (DFmode,
1822                              plus_constant (Pmode,
1823                                             base_rtx,
1824                                             start_offset));
1825
1826           for (regno2 = regno + 1;
1827                regno2 <= V31_REGNUM
1828                  && !aarch64_register_saved_on_entry (regno2);
1829                regno2++)
1830             {
1831               /* Empty loop.  */
1832             }
1833           if (regno2 <= V31_REGNUM &&
1834               aarch64_register_saved_on_entry (regno2))
1835             {
1836               rtx mem2;
1837               /* Next highest register to be saved.  */
1838               mem2 = gen_mem_ref (DFmode,
1839                                   plus_constant
1840                                   (Pmode,
1841                                    base_rtx,
1842                                    start_offset + increment));
1843               if (restore == false)
1844                 {
1845                   insn = emit_insn
1846                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1847                                         mem2, gen_rtx_REG (DFmode, regno2)));
1848
1849                 }
1850               else
1851                 {
1852                   insn = emit_insn
1853                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1854                                        gen_rtx_REG (DFmode, regno2), mem2));
1855
1856                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1857                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1858                 }
1859
1860                   /* The first part of a frame-related parallel insn
1861                      is always assumed to be relevant to the frame
1862                      calculations; subsequent parts, are only
1863                      frame-related if explicitly marked.  */
1864               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1865                                             1)) = 1;
1866               regno = regno2;
1867               start_offset += increment * 2;
1868             }
1869           else
1870             {
1871               if (restore == false)
1872                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1873               else
1874                 {
1875                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1876                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1877                 }
1878               start_offset += increment;
1879             }
1880           RTX_FRAME_RELATED_P (insn) = 1;
1881         }
1882     }
1883
1884 }
1885
1886
1887 /* offset from the stack pointer of where the saves and
1888    restore's have to happen.  */
1889 static void
1890 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1891                                             bool restore)
1892 {
1893   rtx insn;
1894   rtx base_rtx = stack_pointer_rtx;
1895   HOST_WIDE_INT start_offset = offset;
1896   HOST_WIDE_INT increment = UNITS_PER_WORD;
1897   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1898   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1899   unsigned regno;
1900   unsigned regno2;
1901
1902   for (regno = R0_REGNUM; regno <= limit; regno++)
1903     {
1904       if (aarch64_register_saved_on_entry (regno))
1905         {
1906           rtx mem;
1907           mem = gen_mem_ref (Pmode,
1908                              plus_constant (Pmode,
1909                                             base_rtx,
1910                                             start_offset));
1911
1912           for (regno2 = regno + 1;
1913                regno2 <= limit
1914                  && !aarch64_register_saved_on_entry (regno2);
1915                regno2++)
1916             {
1917               /* Empty loop.  */
1918             }
1919           if (regno2 <= limit &&
1920               aarch64_register_saved_on_entry (regno2))
1921             {
1922               rtx mem2;
1923               /* Next highest register to be saved.  */
1924               mem2 = gen_mem_ref (Pmode,
1925                                   plus_constant
1926                                   (Pmode,
1927                                    base_rtx,
1928                                    start_offset + increment));
1929               if (restore == false)
1930                 {
1931                   insn = emit_insn
1932                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1933                                         mem2, gen_rtx_REG (DImode, regno2)));
1934
1935                 }
1936               else
1937                 {
1938                   insn = emit_insn
1939                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1940                                      gen_rtx_REG (DImode, regno2), mem2));
1941
1942                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1943                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1944                 }
1945
1946                   /* The first part of a frame-related parallel insn
1947                      is always assumed to be relevant to the frame
1948                      calculations; subsequent parts, are only
1949                      frame-related if explicitly marked.  */
1950               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1951                                             1)) = 1;
1952               regno = regno2;
1953               start_offset += increment * 2;
1954             }
1955           else
1956             {
1957               if (restore == false)
1958                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1959               else
1960                 {
1961                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1962                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1963                 }
1964               start_offset += increment;
1965             }
1966           RTX_FRAME_RELATED_P (insn) = 1;
1967         }
1968     }
1969
1970   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1971
1972 }
1973
1974 /* AArch64 stack frames generated by this compiler look like:
1975
1976         +-------------------------------+
1977         |                               |
1978         |  incoming stack arguments     |
1979         |                               |
1980         +-------------------------------+ <-- arg_pointer_rtx
1981         |                               |
1982         |  callee-allocated save area   |
1983         |  for register varargs         |
1984         |                               |
1985         +-------------------------------+
1986         |                               |
1987         |  local variables              |
1988         |                               |
1989         +-------------------------------+ <-- frame_pointer_rtx
1990         |                               |
1991         |  callee-saved registers       |
1992         |                               |
1993         +-------------------------------+
1994         |  LR'                          |
1995         +-------------------------------+
1996         |  FP'                          |
1997       P +-------------------------------+ <-- hard_frame_pointer_rtx
1998         |  dynamic allocation           |
1999         +-------------------------------+
2000         |                               |
2001         |  outgoing stack arguments     |
2002         |                               |
2003         +-------------------------------+ <-- stack_pointer_rtx
2004
2005    Dynamic stack allocations such as alloca insert data at point P.
2006    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2007    hard_frame_pointer_rtx unchanged.  */
2008
2009 /* Generate the prologue instructions for entry into a function.
2010    Establish the stack frame by decreasing the stack pointer with a
2011    properly calculated size and, if necessary, create a frame record
2012    filled with the values of LR and previous frame pointer.  The
2013    current FP is also set up is it is in use.  */
2014
2015 void
2016 aarch64_expand_prologue (void)
2017 {
2018   /* sub sp, sp, #<frame_size>
2019      stp {fp, lr}, [sp, #<frame_size> - 16]
2020      add fp, sp, #<frame_size> - hardfp_offset
2021      stp {cs_reg}, [fp, #-16] etc.
2022
2023      sub sp, sp, <final_adjustment_if_any>
2024   */
2025   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2026   HOST_WIDE_INT frame_size, offset;
2027   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2028   rtx insn;
2029
2030   aarch64_layout_frame ();
2031   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2032   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2033               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2034   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2035                 + crtl->outgoing_args_size);
2036   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2037                                           STACK_BOUNDARY / BITS_PER_UNIT);
2038
2039   if (flag_stack_usage_info)
2040     current_function_static_stack_size = frame_size;
2041
2042   fp_offset = (offset
2043                - original_frame_size
2044                - cfun->machine->frame.saved_regs_size);
2045
2046   /* Store pairs and load pairs have a range only -512 to 504.  */
2047   if (offset >= 512)
2048     {
2049       /* When the frame has a large size, an initial decrease is done on
2050          the stack pointer to jump over the callee-allocated save area for
2051          register varargs, the local variable area and/or the callee-saved
2052          register area.  This will allow the pre-index write-back
2053          store pair instructions to be used for setting up the stack frame
2054          efficiently.  */
2055       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2056       if (offset >= 512)
2057         offset = cfun->machine->frame.saved_regs_size;
2058
2059       frame_size -= (offset + crtl->outgoing_args_size);
2060       fp_offset = 0;
2061
2062       if (frame_size >= 0x1000000)
2063         {
2064           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2065           emit_move_insn (op0, GEN_INT (-frame_size));
2066           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2067           aarch64_set_frame_expr (gen_rtx_SET
2068                                   (Pmode, stack_pointer_rtx,
2069                                    plus_constant (Pmode,
2070                                                   stack_pointer_rtx,
2071                                                   -frame_size)));
2072         }
2073       else if (frame_size > 0)
2074         {
2075           if ((frame_size & 0xfff) != frame_size)
2076             {
2077               insn = emit_insn (gen_add2_insn
2078                                 (stack_pointer_rtx,
2079                                  GEN_INT (-(frame_size
2080                                             & ~(HOST_WIDE_INT)0xfff))));
2081               RTX_FRAME_RELATED_P (insn) = 1;
2082             }
2083           if ((frame_size & 0xfff) != 0)
2084             {
2085               insn = emit_insn (gen_add2_insn
2086                                 (stack_pointer_rtx,
2087                                  GEN_INT (-(frame_size
2088                                             & (HOST_WIDE_INT)0xfff))));
2089               RTX_FRAME_RELATED_P (insn) = 1;
2090             }
2091         }
2092     }
2093   else
2094     frame_size = -1;
2095
2096   if (offset > 0)
2097     {
2098       /* Save the frame pointer and lr if the frame pointer is needed
2099          first.  Make the frame pointer point to the location of the
2100          old frame pointer on the stack.  */
2101       if (frame_pointer_needed)
2102         {
2103           rtx mem_fp, mem_lr;
2104
2105           if (fp_offset)
2106             {
2107               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2108                                                GEN_INT (-offset)));
2109               RTX_FRAME_RELATED_P (insn) = 1;
2110               aarch64_set_frame_expr (gen_rtx_SET
2111                                       (Pmode, stack_pointer_rtx,
2112                                        gen_rtx_MINUS (Pmode,
2113                                                       stack_pointer_rtx,
2114                                                       GEN_INT (offset))));
2115               mem_fp = gen_frame_mem (DImode,
2116                                       plus_constant (Pmode,
2117                                                      stack_pointer_rtx,
2118                                                      fp_offset));
2119               mem_lr = gen_frame_mem (DImode,
2120                                       plus_constant (Pmode,
2121                                                      stack_pointer_rtx,
2122                                                      fp_offset
2123                                                      + UNITS_PER_WORD));
2124               insn = emit_insn (gen_store_pairdi (mem_fp,
2125                                                   hard_frame_pointer_rtx,
2126                                                   mem_lr,
2127                                                   gen_rtx_REG (DImode,
2128                                                                LR_REGNUM)));
2129             }
2130           else
2131             {
2132               insn = emit_insn (gen_storewb_pairdi_di
2133                                 (stack_pointer_rtx, stack_pointer_rtx,
2134                                  hard_frame_pointer_rtx,
2135                                  gen_rtx_REG (DImode, LR_REGNUM),
2136                                  GEN_INT (-offset),
2137                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2138               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2139             }
2140
2141           /* The first part of a frame-related parallel insn is always
2142              assumed to be relevant to the frame calculations;
2143              subsequent parts, are only frame-related if explicitly
2144              marked.  */
2145           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2146           RTX_FRAME_RELATED_P (insn) = 1;
2147
2148           /* Set up frame pointer to point to the location of the
2149              previous frame pointer on the stack.  */
2150           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2151                                            stack_pointer_rtx,
2152                                            GEN_INT (fp_offset)));
2153           aarch64_set_frame_expr (gen_rtx_SET
2154                                   (Pmode, hard_frame_pointer_rtx,
2155                                    plus_constant (Pmode,
2156                                                   stack_pointer_rtx,
2157                                                   fp_offset)));
2158           RTX_FRAME_RELATED_P (insn) = 1;
2159           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2160                                            hard_frame_pointer_rtx));
2161         }
2162       else
2163         {
2164           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2165                                            GEN_INT (-offset)));
2166           RTX_FRAME_RELATED_P (insn) = 1;
2167         }
2168
2169       aarch64_save_or_restore_callee_save_registers
2170         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2171     }
2172
2173   /* when offset >= 512,
2174      sub sp, sp, #<outgoing_args_size> */
2175   if (frame_size > -1)
2176     {
2177       if (crtl->outgoing_args_size > 0)
2178         {
2179           insn = emit_insn (gen_add2_insn
2180                             (stack_pointer_rtx,
2181                              GEN_INT (- crtl->outgoing_args_size)));
2182           RTX_FRAME_RELATED_P (insn) = 1;
2183         }
2184     }
2185 }
2186
2187 /* Generate the epilogue instructions for returning from a function.  */
2188 void
2189 aarch64_expand_epilogue (bool for_sibcall)
2190 {
2191   HOST_WIDE_INT original_frame_size, frame_size, offset;
2192   HOST_WIDE_INT fp_offset;
2193   rtx insn;
2194   rtx cfa_reg;
2195
2196   aarch64_layout_frame ();
2197   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2198   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2199                 + crtl->outgoing_args_size);
2200   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2201                                           STACK_BOUNDARY / BITS_PER_UNIT);
2202
2203   fp_offset = (offset
2204                - original_frame_size
2205                - cfun->machine->frame.saved_regs_size);
2206
2207   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2208
2209   /* Store pairs and load pairs have a range only -512 to 504.  */
2210   if (offset >= 512)
2211     {
2212       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2213       if (offset >= 512)
2214         offset = cfun->machine->frame.saved_regs_size;
2215
2216       frame_size -= (offset + crtl->outgoing_args_size);
2217       fp_offset = 0;
2218       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2219         {
2220           insn = emit_insn (gen_add2_insn
2221                             (stack_pointer_rtx,
2222                              GEN_INT (crtl->outgoing_args_size)));
2223           RTX_FRAME_RELATED_P (insn) = 1;
2224         }
2225     }
2226   else
2227     frame_size = -1;
2228
2229   /* If there were outgoing arguments or we've done dynamic stack
2230      allocation, then restore the stack pointer from the frame
2231      pointer.  This is at most one insn and more efficient than using
2232      GCC's internal mechanism.  */
2233   if (frame_pointer_needed
2234       && (crtl->outgoing_args_size || cfun->calls_alloca))
2235     {
2236       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2237                                        hard_frame_pointer_rtx,
2238                                        GEN_INT (- fp_offset)));
2239       RTX_FRAME_RELATED_P (insn) = 1;
2240       /* As SP is set to (FP - fp_offset), according to the rules in
2241          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2242          from the value of SP from now on.  */
2243       cfa_reg = stack_pointer_rtx;
2244     }
2245
2246   aarch64_save_or_restore_callee_save_registers
2247     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2248
2249   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2250   if (offset > 0)
2251     {
2252       if (frame_pointer_needed)
2253         {
2254           rtx mem_fp, mem_lr;
2255
2256           if (fp_offset)
2257             {
2258               mem_fp = gen_frame_mem (DImode,
2259                                       plus_constant (Pmode,
2260                                                      stack_pointer_rtx,
2261                                                      fp_offset));
2262               mem_lr = gen_frame_mem (DImode,
2263                                       plus_constant (Pmode,
2264                                                      stack_pointer_rtx,
2265                                                      fp_offset
2266                                                      + UNITS_PER_WORD));
2267               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2268                                                  mem_fp,
2269                                                  gen_rtx_REG (DImode,
2270                                                               LR_REGNUM),
2271                                                  mem_lr));
2272             }
2273           else
2274             {
2275               insn = emit_insn (gen_loadwb_pairdi_di
2276                                 (stack_pointer_rtx,
2277                                  stack_pointer_rtx,
2278                                  hard_frame_pointer_rtx,
2279                                  gen_rtx_REG (DImode, LR_REGNUM),
2280                                  GEN_INT (offset),
2281                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2282               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2283               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2284                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2285                                           plus_constant (Pmode, cfa_reg,
2286                                                          offset))));
2287             }
2288
2289           /* The first part of a frame-related parallel insn
2290              is always assumed to be relevant to the frame
2291              calculations; subsequent parts, are only
2292              frame-related if explicitly marked.  */
2293           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2294           RTX_FRAME_RELATED_P (insn) = 1;
2295           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2296           add_reg_note (insn, REG_CFA_RESTORE,
2297                         gen_rtx_REG (DImode, LR_REGNUM));
2298
2299           if (fp_offset)
2300             {
2301               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2302                                                GEN_INT (offset)));
2303               RTX_FRAME_RELATED_P (insn) = 1;
2304             }
2305         }
2306       else
2307         {
2308           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2309                                            GEN_INT (offset)));
2310           RTX_FRAME_RELATED_P (insn) = 1;
2311         }
2312     }
2313
2314   /* Stack adjustment for exception handler.  */
2315   if (crtl->calls_eh_return)
2316     {
2317       /* We need to unwind the stack by the offset computed by
2318          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2319          based on SP.  Ideally we would update the SP and define the
2320          CFA along the lines of:
2321
2322          SP = SP + EH_RETURN_STACKADJ_RTX
2323          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2324
2325          However the dwarf emitter only understands a constant
2326          register offset.
2327
2328          The solution chosen here is to use the otherwise unused IP0
2329          as a temporary register to hold the current SP value.  The
2330          CFA is described using IP0 then SP is modified.  */
2331
2332       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2333
2334       insn = emit_move_insn (ip0, stack_pointer_rtx);
2335       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2336       RTX_FRAME_RELATED_P (insn) = 1;
2337
2338       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2339
2340       /* Ensure the assignment to IP0 does not get optimized away.  */
2341       emit_use (ip0);
2342     }
2343
2344   if (frame_size > -1)
2345     {
2346       if (frame_size >= 0x1000000)
2347         {
2348           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2349           emit_move_insn (op0, GEN_INT (frame_size));
2350           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2351           aarch64_set_frame_expr (gen_rtx_SET
2352                                   (Pmode, stack_pointer_rtx,
2353                                    plus_constant (Pmode,
2354                                                   stack_pointer_rtx,
2355                                                   frame_size)));
2356         }
2357       else if (frame_size > 0)
2358         {
2359           if ((frame_size & 0xfff) != 0)
2360             {
2361               insn = emit_insn (gen_add2_insn
2362                                 (stack_pointer_rtx,
2363                                  GEN_INT ((frame_size
2364                                            & (HOST_WIDE_INT) 0xfff))));
2365               RTX_FRAME_RELATED_P (insn) = 1;
2366             }
2367           if ((frame_size & 0xfff) != frame_size)
2368             {
2369               insn = emit_insn (gen_add2_insn
2370                                 (stack_pointer_rtx,
2371                                  GEN_INT ((frame_size
2372                                            & ~ (HOST_WIDE_INT) 0xfff))));
2373               RTX_FRAME_RELATED_P (insn) = 1;
2374             }
2375         }
2376
2377         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2378                                              plus_constant (Pmode,
2379                                                             stack_pointer_rtx,
2380                                                             offset)));
2381     }
2382
2383   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2384   if (!for_sibcall)
2385     emit_jump_insn (ret_rtx);
2386 }
2387
2388 /* Return the place to copy the exception unwinding return address to.
2389    This will probably be a stack slot, but could (in theory be the
2390    return register).  */
2391 rtx
2392 aarch64_final_eh_return_addr (void)
2393 {
2394   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2395   aarch64_layout_frame ();
2396   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2397   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2398                 + crtl->outgoing_args_size);
2399   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2400                                           STACK_BOUNDARY / BITS_PER_UNIT);
2401   fp_offset = offset
2402     - original_frame_size
2403     - cfun->machine->frame.saved_regs_size;
2404
2405   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2406     return gen_rtx_REG (DImode, LR_REGNUM);
2407
2408   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2409      result in a store to save LR introduced by builtin_eh_return () being
2410      incorrectly deleted because the alias is not detected.
2411      So in the calculation of the address to copy the exception unwinding
2412      return address to, we note 2 cases.
2413      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2414      we return a SP-relative location since all the addresses are SP-relative
2415      in this case.  This prevents the store from being optimized away.
2416      If the fp_offset is not 0, then the addresses will be FP-relative and
2417      therefore we return a FP-relative location.  */
2418
2419   if (frame_pointer_needed)
2420     {
2421       if (fp_offset)
2422         return gen_frame_mem (DImode,
2423                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2424       else
2425         return gen_frame_mem (DImode,
2426                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2427     }
2428
2429   /* If FP is not needed, we calculate the location of LR, which would be
2430      at the top of the saved registers block.  */
2431
2432   return gen_frame_mem (DImode,
2433                         plus_constant (Pmode,
2434                                        stack_pointer_rtx,
2435                                        fp_offset
2436                                        + cfun->machine->frame.saved_regs_size
2437                                        - 2 * UNITS_PER_WORD));
2438 }
2439
2440 /* Output code to build up a constant in a register.  */
2441 static void
2442 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2443 {
2444   if (aarch64_bitmask_imm (val, DImode))
2445     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2446   else
2447     {
2448       int i;
2449       int ncount = 0;
2450       int zcount = 0;
2451       HOST_WIDE_INT valp = val >> 16;
2452       HOST_WIDE_INT valm;
2453       HOST_WIDE_INT tval;
2454
2455       for (i = 16; i < 64; i += 16)
2456         {
2457           valm = (valp & 0xffff);
2458
2459           if (valm != 0)
2460             ++ zcount;
2461
2462           if (valm != 0xffff)
2463             ++ ncount;
2464
2465           valp >>= 16;
2466         }
2467
2468       /* zcount contains the number of additional MOVK instructions
2469          required if the constant is built up with an initial MOVZ instruction,
2470          while ncount is the number of MOVK instructions required if starting
2471          with a MOVN instruction.  Choose the sequence that yields the fewest
2472          number of instructions, preferring MOVZ instructions when they are both
2473          the same.  */
2474       if (ncount < zcount)
2475         {
2476           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2477                           GEN_INT ((~val) & 0xffff));
2478           tval = 0xffff;
2479         }
2480       else
2481         {
2482           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2483                           GEN_INT (val & 0xffff));
2484           tval = 0;
2485         }
2486
2487       val >>= 16;
2488
2489       for (i = 16; i < 64; i += 16)
2490         {
2491           if ((val & 0xffff) != tval)
2492             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2493                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2494           val >>= 16;
2495         }
2496     }
2497 }
2498
2499 static void
2500 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2501 {
2502   HOST_WIDE_INT mdelta = delta;
2503   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2504   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2505
2506   if (mdelta < 0)
2507     mdelta = -mdelta;
2508
2509   if (mdelta >= 4096 * 4096)
2510     {
2511       aarch64_build_constant (scratchreg, delta);
2512       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2513     }
2514   else if (mdelta > 0)
2515     {
2516       if (mdelta >= 4096)
2517         {
2518           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2519           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2520           if (delta < 0)
2521             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2522                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2523           else
2524             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2525                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2526         }
2527       if (mdelta % 4096 != 0)
2528         {
2529           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2530           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2531                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2532         }
2533     }
2534 }
2535
2536 /* Output code to add DELTA to the first argument, and then jump
2537    to FUNCTION.  Used for C++ multiple inheritance.  */
2538 static void
2539 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2540                          HOST_WIDE_INT delta,
2541                          HOST_WIDE_INT vcall_offset,
2542                          tree function)
2543 {
2544   /* The this pointer is always in x0.  Note that this differs from
2545      Arm where the this pointer maybe bumped to r1 if r0 is required
2546      to return a pointer to an aggregate.  On AArch64 a result value
2547      pointer will be in x8.  */
2548   int this_regno = R0_REGNUM;
2549   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2550
2551   reload_completed = 1;
2552   emit_note (NOTE_INSN_PROLOGUE_END);
2553
2554   if (vcall_offset == 0)
2555     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2556   else
2557     {
2558       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2559
2560       this_rtx = gen_rtx_REG (Pmode, this_regno);
2561       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2562       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2563
2564       addr = this_rtx;
2565       if (delta != 0)
2566         {
2567           if (delta >= -256 && delta < 256)
2568             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2569                                        plus_constant (Pmode, this_rtx, delta));
2570           else
2571             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2572         }
2573
2574       if (Pmode == ptr_mode)
2575         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2576       else
2577         aarch64_emit_move (temp0,
2578                            gen_rtx_ZERO_EXTEND (Pmode,
2579                                                 gen_rtx_MEM (ptr_mode, addr)));
2580
2581       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2582           addr = plus_constant (Pmode, temp0, vcall_offset);
2583       else
2584         {
2585           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2586           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2587         }
2588
2589       if (Pmode == ptr_mode)
2590         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2591       else
2592         aarch64_emit_move (temp1,
2593                            gen_rtx_SIGN_EXTEND (Pmode,
2594                                                 gen_rtx_MEM (ptr_mode, addr)));
2595
2596       emit_insn (gen_add2_insn (this_rtx, temp1));
2597     }
2598
2599   /* Generate a tail call to the target function.  */
2600   if (!TREE_USED (function))
2601     {
2602       assemble_external (function);
2603       TREE_USED (function) = 1;
2604     }
2605   funexp = XEXP (DECL_RTL (function), 0);
2606   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2607   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2608   SIBLING_CALL_P (insn) = 1;
2609
2610   insn = get_insns ();
2611   shorten_branches (insn);
2612   final_start_function (insn, file, 1);
2613   final (insn, file, 1);
2614   final_end_function ();
2615
2616   /* Stop pretending to be a post-reload pass.  */
2617   reload_completed = 0;
2618 }
2619
2620 static int
2621 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2622 {
2623   if (GET_CODE (*x) == SYMBOL_REF)
2624     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2625
2626   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2627      TLS offsets, not real symbol references.  */
2628   if (GET_CODE (*x) == UNSPEC
2629       && XINT (*x, 1) == UNSPEC_TLS)
2630     return -1;
2631
2632   return 0;
2633 }
2634
2635 static bool
2636 aarch64_tls_referenced_p (rtx x)
2637 {
2638   if (!TARGET_HAVE_TLS)
2639     return false;
2640
2641   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2642 }
2643
2644
2645 static int
2646 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2647 {
2648   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2649   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2650
2651   if (*imm1 < *imm2)
2652     return -1;
2653   if (*imm1 > *imm2)
2654     return +1;
2655   return 0;
2656 }
2657
2658
2659 static void
2660 aarch64_build_bitmask_table (void)
2661 {
2662   unsigned HOST_WIDE_INT mask, imm;
2663   unsigned int log_e, e, s, r;
2664   unsigned int nimms = 0;
2665
2666   for (log_e = 1; log_e <= 6; log_e++)
2667     {
2668       e = 1 << log_e;
2669       if (e == 64)
2670         mask = ~(HOST_WIDE_INT) 0;
2671       else
2672         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2673       for (s = 1; s < e; s++)
2674         {
2675           for (r = 0; r < e; r++)
2676             {
2677               /* set s consecutive bits to 1 (s < 64) */
2678               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2679               /* rotate right by r */
2680               if (r != 0)
2681                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2682               /* replicate the constant depending on SIMD size */
2683               switch (log_e) {
2684               case 1: imm |= (imm <<  2);
2685               case 2: imm |= (imm <<  4);
2686               case 3: imm |= (imm <<  8);
2687               case 4: imm |= (imm << 16);
2688               case 5: imm |= (imm << 32);
2689               case 6:
2690                 break;
2691               default:
2692                 gcc_unreachable ();
2693               }
2694               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2695               aarch64_bitmasks[nimms++] = imm;
2696             }
2697         }
2698     }
2699
2700   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2701   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2702          aarch64_bitmasks_cmp);
2703 }
2704
2705
2706 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2707    a left shift of 0 or 12 bits.  */
2708 bool
2709 aarch64_uimm12_shift (HOST_WIDE_INT val)
2710 {
2711   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2712           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2713           );
2714 }
2715
2716
2717 /* Return true if val is an immediate that can be loaded into a
2718    register by a MOVZ instruction.  */
2719 static bool
2720 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2721 {
2722   if (GET_MODE_SIZE (mode) > 4)
2723     {
2724       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2725           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2726         return 1;
2727     }
2728   else
2729     {
2730       /* Ignore sign extension.  */
2731       val &= (HOST_WIDE_INT) 0xffffffff;
2732     }
2733   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2734           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2735 }
2736
2737
2738 /* Return true if val is a valid bitmask immediate.  */
2739 bool
2740 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2741 {
2742   if (GET_MODE_SIZE (mode) < 8)
2743     {
2744       /* Replicate bit pattern.  */
2745       val &= (HOST_WIDE_INT) 0xffffffff;
2746       val |= val << 32;
2747     }
2748   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2749                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2750 }
2751
2752
2753 /* Return true if val is an immediate that can be loaded into a
2754    register in a single instruction.  */
2755 bool
2756 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2757 {
2758   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2759     return 1;
2760   return aarch64_bitmask_imm (val, mode);
2761 }
2762
2763 static bool
2764 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2765 {
2766   rtx base, offset;
2767
2768   if (GET_CODE (x) == HIGH)
2769     return true;
2770
2771   split_const (x, &base, &offset);
2772   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2773     {
2774       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2775           != SYMBOL_FORCE_TO_MEM)
2776         return true;
2777       else
2778         /* Avoid generating a 64-bit relocation in ILP32; leave
2779            to aarch64_expand_mov_immediate to handle it properly.  */
2780         return mode != ptr_mode;
2781     }
2782
2783   return aarch64_tls_referenced_p (x);
2784 }
2785
2786 /* Return true if register REGNO is a valid index register.
2787    STRICT_P is true if REG_OK_STRICT is in effect.  */
2788
2789 bool
2790 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2791 {
2792   if (!HARD_REGISTER_NUM_P (regno))
2793     {
2794       if (!strict_p)
2795         return true;
2796
2797       if (!reg_renumber)
2798         return false;
2799
2800       regno = reg_renumber[regno];
2801     }
2802   return GP_REGNUM_P (regno);
2803 }
2804
2805 /* Return true if register REGNO is a valid base register for mode MODE.
2806    STRICT_P is true if REG_OK_STRICT is in effect.  */
2807
2808 bool
2809 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2810 {
2811   if (!HARD_REGISTER_NUM_P (regno))
2812     {
2813       if (!strict_p)
2814         return true;
2815
2816       if (!reg_renumber)
2817         return false;
2818
2819       regno = reg_renumber[regno];
2820     }
2821
2822   /* The fake registers will be eliminated to either the stack or
2823      hard frame pointer, both of which are usually valid base registers.
2824      Reload deals with the cases where the eliminated form isn't valid.  */
2825   return (GP_REGNUM_P (regno)
2826           || regno == SP_REGNUM
2827           || regno == FRAME_POINTER_REGNUM
2828           || regno == ARG_POINTER_REGNUM);
2829 }
2830
2831 /* Return true if X is a valid base register for mode MODE.
2832    STRICT_P is true if REG_OK_STRICT is in effect.  */
2833
2834 static bool
2835 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2836 {
2837   if (!strict_p && GET_CODE (x) == SUBREG)
2838     x = SUBREG_REG (x);
2839
2840   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2841 }
2842
2843 /* Return true if address offset is a valid index.  If it is, fill in INFO
2844    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2845
2846 static bool
2847 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2848                         enum machine_mode mode, bool strict_p)
2849 {
2850   enum aarch64_address_type type;
2851   rtx index;
2852   int shift;
2853
2854   /* (reg:P) */
2855   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2856       && GET_MODE (x) == Pmode)
2857     {
2858       type = ADDRESS_REG_REG;
2859       index = x;
2860       shift = 0;
2861     }
2862   /* (sign_extend:DI (reg:SI)) */
2863   else if ((GET_CODE (x) == SIGN_EXTEND
2864             || GET_CODE (x) == ZERO_EXTEND)
2865            && GET_MODE (x) == DImode
2866            && GET_MODE (XEXP (x, 0)) == SImode)
2867     {
2868       type = (GET_CODE (x) == SIGN_EXTEND)
2869         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2870       index = XEXP (x, 0);
2871       shift = 0;
2872     }
2873   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2874   else if (GET_CODE (x) == MULT
2875            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2876                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2877            && GET_MODE (XEXP (x, 0)) == DImode
2878            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2879            && CONST_INT_P (XEXP (x, 1)))
2880     {
2881       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2882         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2883       index = XEXP (XEXP (x, 0), 0);
2884       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2885     }
2886   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2887   else if (GET_CODE (x) == ASHIFT
2888            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2889                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2890            && GET_MODE (XEXP (x, 0)) == DImode
2891            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2892            && CONST_INT_P (XEXP (x, 1)))
2893     {
2894       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2895         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2896       index = XEXP (XEXP (x, 0), 0);
2897       shift = INTVAL (XEXP (x, 1));
2898     }
2899   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2900   else if ((GET_CODE (x) == SIGN_EXTRACT
2901             || GET_CODE (x) == ZERO_EXTRACT)
2902            && GET_MODE (x) == DImode
2903            && GET_CODE (XEXP (x, 0)) == MULT
2904            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2905            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2906     {
2907       type = (GET_CODE (x) == SIGN_EXTRACT)
2908         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2909       index = XEXP (XEXP (x, 0), 0);
2910       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2911       if (INTVAL (XEXP (x, 1)) != 32 + shift
2912           || INTVAL (XEXP (x, 2)) != 0)
2913         shift = -1;
2914     }
2915   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2916      (const_int 0xffffffff<<shift)) */
2917   else if (GET_CODE (x) == AND
2918            && GET_MODE (x) == DImode
2919            && GET_CODE (XEXP (x, 0)) == MULT
2920            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2921            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2922            && CONST_INT_P (XEXP (x, 1)))
2923     {
2924       type = ADDRESS_REG_UXTW;
2925       index = XEXP (XEXP (x, 0), 0);
2926       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2927       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2928         shift = -1;
2929     }
2930   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2931   else if ((GET_CODE (x) == SIGN_EXTRACT
2932             || GET_CODE (x) == ZERO_EXTRACT)
2933            && GET_MODE (x) == DImode
2934            && GET_CODE (XEXP (x, 0)) == ASHIFT
2935            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2936            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2937     {
2938       type = (GET_CODE (x) == SIGN_EXTRACT)
2939         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2940       index = XEXP (XEXP (x, 0), 0);
2941       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2942       if (INTVAL (XEXP (x, 1)) != 32 + shift
2943           || INTVAL (XEXP (x, 2)) != 0)
2944         shift = -1;
2945     }
2946   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2947      (const_int 0xffffffff<<shift)) */
2948   else if (GET_CODE (x) == AND
2949            && GET_MODE (x) == DImode
2950            && GET_CODE (XEXP (x, 0)) == ASHIFT
2951            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2952            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2953            && CONST_INT_P (XEXP (x, 1)))
2954     {
2955       type = ADDRESS_REG_UXTW;
2956       index = XEXP (XEXP (x, 0), 0);
2957       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2958       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2959         shift = -1;
2960     }
2961   /* (mult:P (reg:P) (const_int scale)) */
2962   else if (GET_CODE (x) == MULT
2963            && GET_MODE (x) == Pmode
2964            && GET_MODE (XEXP (x, 0)) == Pmode
2965            && CONST_INT_P (XEXP (x, 1)))
2966     {
2967       type = ADDRESS_REG_REG;
2968       index = XEXP (x, 0);
2969       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2970     }
2971   /* (ashift:P (reg:P) (const_int shift)) */
2972   else if (GET_CODE (x) == ASHIFT
2973            && GET_MODE (x) == Pmode
2974            && GET_MODE (XEXP (x, 0)) == Pmode
2975            && CONST_INT_P (XEXP (x, 1)))
2976     {
2977       type = ADDRESS_REG_REG;
2978       index = XEXP (x, 0);
2979       shift = INTVAL (XEXP (x, 1));
2980     }
2981   else
2982     return false;
2983
2984   if (GET_CODE (index) == SUBREG)
2985     index = SUBREG_REG (index);
2986
2987   if ((shift == 0 ||
2988        (shift > 0 && shift <= 3
2989         && (1 << shift) == GET_MODE_SIZE (mode)))
2990       && REG_P (index)
2991       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2992     {
2993       info->type = type;
2994       info->offset = index;
2995       info->shift = shift;
2996       return true;
2997     }
2998
2999   return false;
3000 }
3001
3002 static inline bool
3003 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3004 {
3005   return (offset >= -64 * GET_MODE_SIZE (mode)
3006           && offset < 64 * GET_MODE_SIZE (mode)
3007           && offset % GET_MODE_SIZE (mode) == 0);
3008 }
3009
3010 static inline bool
3011 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3012                                HOST_WIDE_INT offset)
3013 {
3014   return offset >= -256 && offset < 256;
3015 }
3016
3017 static inline bool
3018 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3019 {
3020   return (offset >= 0
3021           && offset < 4096 * GET_MODE_SIZE (mode)
3022           && offset % GET_MODE_SIZE (mode) == 0);
3023 }
3024
3025 /* Return true if X is a valid address for machine mode MODE.  If it is,
3026    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3027    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3028
3029 static bool
3030 aarch64_classify_address (struct aarch64_address_info *info,
3031                           rtx x, enum machine_mode mode,
3032                           RTX_CODE outer_code, bool strict_p)
3033 {
3034   enum rtx_code code = GET_CODE (x);
3035   rtx op0, op1;
3036   bool allow_reg_index_p =
3037     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3038
3039   /* Don't support anything other than POST_INC or REG addressing for
3040      AdvSIMD.  */
3041   if (aarch64_vector_mode_p (mode)
3042       && (code != POST_INC && code != REG))
3043     return false;
3044
3045   switch (code)
3046     {
3047     case REG:
3048     case SUBREG:
3049       info->type = ADDRESS_REG_IMM;
3050       info->base = x;
3051       info->offset = const0_rtx;
3052       return aarch64_base_register_rtx_p (x, strict_p);
3053
3054     case PLUS:
3055       op0 = XEXP (x, 0);
3056       op1 = XEXP (x, 1);
3057       if (GET_MODE_SIZE (mode) != 0
3058           && CONST_INT_P (op1)
3059           && aarch64_base_register_rtx_p (op0, strict_p))
3060         {
3061           HOST_WIDE_INT offset = INTVAL (op1);
3062
3063           info->type = ADDRESS_REG_IMM;
3064           info->base = op0;
3065           info->offset = op1;
3066
3067           /* TImode and TFmode values are allowed in both pairs of X
3068              registers and individual Q registers.  The available
3069              address modes are:
3070              X,X: 7-bit signed scaled offset
3071              Q:   9-bit signed offset
3072              We conservatively require an offset representable in either mode.
3073            */
3074           if (mode == TImode || mode == TFmode)
3075             return (offset_7bit_signed_scaled_p (mode, offset)
3076                     && offset_9bit_signed_unscaled_p (mode, offset));
3077
3078           if (outer_code == PARALLEL)
3079             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3080                     && offset_7bit_signed_scaled_p (mode, offset));
3081           else
3082             return (offset_9bit_signed_unscaled_p (mode, offset)
3083                     || offset_12bit_unsigned_scaled_p (mode, offset));
3084         }
3085
3086       if (allow_reg_index_p)
3087         {
3088           /* Look for base + (scaled/extended) index register.  */
3089           if (aarch64_base_register_rtx_p (op0, strict_p)
3090               && aarch64_classify_index (info, op1, mode, strict_p))
3091             {
3092               info->base = op0;
3093               return true;
3094             }
3095           if (aarch64_base_register_rtx_p (op1, strict_p)
3096               && aarch64_classify_index (info, op0, mode, strict_p))
3097             {
3098               info->base = op1;
3099               return true;
3100             }
3101         }
3102
3103       return false;
3104
3105     case POST_INC:
3106     case POST_DEC:
3107     case PRE_INC:
3108     case PRE_DEC:
3109       info->type = ADDRESS_REG_WB;
3110       info->base = XEXP (x, 0);
3111       info->offset = NULL_RTX;
3112       return aarch64_base_register_rtx_p (info->base, strict_p);
3113
3114     case POST_MODIFY:
3115     case PRE_MODIFY:
3116       info->type = ADDRESS_REG_WB;
3117       info->base = XEXP (x, 0);
3118       if (GET_CODE (XEXP (x, 1)) == PLUS
3119           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3120           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3121           && aarch64_base_register_rtx_p (info->base, strict_p))
3122         {
3123           HOST_WIDE_INT offset;
3124           info->offset = XEXP (XEXP (x, 1), 1);
3125           offset = INTVAL (info->offset);
3126
3127           /* TImode and TFmode values are allowed in both pairs of X
3128              registers and individual Q registers.  The available
3129              address modes are:
3130              X,X: 7-bit signed scaled offset
3131              Q:   9-bit signed offset
3132              We conservatively require an offset representable in either mode.
3133            */
3134           if (mode == TImode || mode == TFmode)
3135             return (offset_7bit_signed_scaled_p (mode, offset)
3136                     && offset_9bit_signed_unscaled_p (mode, offset));
3137
3138           if (outer_code == PARALLEL)
3139             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3140                     && offset_7bit_signed_scaled_p (mode, offset));
3141           else
3142             return offset_9bit_signed_unscaled_p (mode, offset);
3143         }
3144       return false;
3145
3146     case CONST:
3147     case SYMBOL_REF:
3148     case LABEL_REF:
3149       /* load literal: pc-relative constant pool entry.  Only supported
3150          for SI mode or larger.  */
3151       info->type = ADDRESS_SYMBOLIC;
3152       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3153         {
3154           rtx sym, addend;
3155
3156           split_const (x, &sym, &addend);
3157           return (GET_CODE (sym) == LABEL_REF
3158                   || (GET_CODE (sym) == SYMBOL_REF
3159                       && CONSTANT_POOL_ADDRESS_P (sym)));
3160         }
3161       return false;
3162
3163     case LO_SUM:
3164       info->type = ADDRESS_LO_SUM;
3165       info->base = XEXP (x, 0);
3166       info->offset = XEXP (x, 1);
3167       if (allow_reg_index_p
3168           && aarch64_base_register_rtx_p (info->base, strict_p))
3169         {
3170           rtx sym, offs;
3171           split_const (info->offset, &sym, &offs);
3172           if (GET_CODE (sym) == SYMBOL_REF
3173               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3174                   == SYMBOL_SMALL_ABSOLUTE))
3175             {
3176               /* The symbol and offset must be aligned to the access size.  */
3177               unsigned int align;
3178               unsigned int ref_size;
3179
3180               if (CONSTANT_POOL_ADDRESS_P (sym))
3181                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3182               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3183                 {
3184                   tree exp = SYMBOL_REF_DECL (sym);
3185                   align = TYPE_ALIGN (TREE_TYPE (exp));
3186                   align = CONSTANT_ALIGNMENT (exp, align);
3187                 }
3188               else if (SYMBOL_REF_DECL (sym))
3189                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3190               else
3191                 align = BITS_PER_UNIT;
3192
3193               ref_size = GET_MODE_SIZE (mode);
3194               if (ref_size == 0)
3195                 ref_size = GET_MODE_SIZE (DImode);
3196
3197               return ((INTVAL (offs) & (ref_size - 1)) == 0
3198                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3199             }
3200         }
3201       return false;
3202
3203     default:
3204       return false;
3205     }
3206 }
3207
3208 bool
3209 aarch64_symbolic_address_p (rtx x)
3210 {
3211   rtx offset;
3212
3213   split_const (x, &x, &offset);
3214   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3215 }
3216
3217 /* Classify the base of symbolic expression X, given that X appears in
3218    context CONTEXT.  */
3219
3220 enum aarch64_symbol_type
3221 aarch64_classify_symbolic_expression (rtx x,
3222                                       enum aarch64_symbol_context context)
3223 {
3224   rtx offset;
3225
3226   split_const (x, &x, &offset);
3227   return aarch64_classify_symbol (x, context);
3228 }
3229
3230
3231 /* Return TRUE if X is a legitimate address for accessing memory in
3232    mode MODE.  */
3233 static bool
3234 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3235 {
3236   struct aarch64_address_info addr;
3237
3238   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3239 }
3240
3241 /* Return TRUE if X is a legitimate address for accessing memory in
3242    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3243    pair operation.  */
3244 bool
3245 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3246                            RTX_CODE outer_code, bool strict_p)
3247 {
3248   struct aarch64_address_info addr;
3249
3250   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3251 }
3252
3253 /* Return TRUE if rtx X is immediate constant 0.0 */
3254 bool
3255 aarch64_float_const_zero_rtx_p (rtx x)
3256 {
3257   REAL_VALUE_TYPE r;
3258
3259   if (GET_MODE (x) == VOIDmode)
3260     return false;
3261
3262   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3263   if (REAL_VALUE_MINUS_ZERO (r))
3264     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3265   return REAL_VALUES_EQUAL (r, dconst0);
3266 }
3267
3268 /* Return the fixed registers used for condition codes.  */
3269
3270 static bool
3271 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3272 {
3273   *p1 = CC_REGNUM;
3274   *p2 = INVALID_REGNUM;
3275   return true;
3276 }
3277
3278 enum machine_mode
3279 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3280 {
3281   /* All floating point compares return CCFP if it is an equality
3282      comparison, and CCFPE otherwise.  */
3283   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3284     {
3285       switch (code)
3286         {
3287         case EQ:
3288         case NE:
3289         case UNORDERED:
3290         case ORDERED:
3291         case UNLT:
3292         case UNLE:
3293         case UNGT:
3294         case UNGE:
3295         case UNEQ:
3296         case LTGT:
3297           return CCFPmode;
3298
3299         case LT:
3300         case LE:
3301         case GT:
3302         case GE:
3303           return CCFPEmode;
3304
3305         default:
3306           gcc_unreachable ();
3307         }
3308     }
3309
3310   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3311       && y == const0_rtx
3312       && (code == EQ || code == NE || code == LT || code == GE)
3313       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3314           || GET_CODE (x) == NEG))
3315     return CC_NZmode;
3316
3317   /* A compare with a shifted or negated operand.  Because of canonicalization,
3318      the comparison will have to be swapped when we emit the assembly
3319      code.  */
3320   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3321       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3322       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3323           || GET_CODE (x) == LSHIFTRT
3324           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3325           || GET_CODE (x) == NEG))
3326     return CC_SWPmode;
3327
3328   /* A compare of a mode narrower than SI mode against zero can be done
3329      by extending the value in the comparison.  */
3330   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3331       && y == const0_rtx)
3332     /* Only use sign-extension if we really need it.  */
3333     return ((code == GT || code == GE || code == LE || code == LT)
3334             ? CC_SESWPmode : CC_ZESWPmode);
3335
3336   /* For everything else, return CCmode.  */
3337   return CCmode;
3338 }
3339
3340 static unsigned
3341 aarch64_get_condition_code (rtx x)
3342 {
3343   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3344   enum rtx_code comp_code = GET_CODE (x);
3345
3346   if (GET_MODE_CLASS (mode) != MODE_CC)
3347     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3348
3349   switch (mode)
3350     {
3351     case CCFPmode:
3352     case CCFPEmode:
3353       switch (comp_code)
3354         {
3355         case GE: return AARCH64_GE;
3356         case GT: return AARCH64_GT;
3357         case LE: return AARCH64_LS;
3358         case LT: return AARCH64_MI;
3359         case NE: return AARCH64_NE;
3360         case EQ: return AARCH64_EQ;
3361         case ORDERED: return AARCH64_VC;
3362         case UNORDERED: return AARCH64_VS;
3363         case UNLT: return AARCH64_LT;
3364         case UNLE: return AARCH64_LE;
3365         case UNGT: return AARCH64_HI;
3366         case UNGE: return AARCH64_PL;
3367         default: gcc_unreachable ();
3368         }
3369       break;
3370
3371     case CCmode:
3372       switch (comp_code)
3373         {
3374         case NE: return AARCH64_NE;
3375         case EQ: return AARCH64_EQ;
3376         case GE: return AARCH64_GE;
3377         case GT: return AARCH64_GT;
3378         case LE: return AARCH64_LE;
3379         case LT: return AARCH64_LT;
3380         case GEU: return AARCH64_CS;
3381         case GTU: return AARCH64_HI;
3382         case LEU: return AARCH64_LS;
3383         case LTU: return AARCH64_CC;
3384         default: gcc_unreachable ();
3385         }
3386       break;
3387
3388     case CC_SWPmode:
3389     case CC_ZESWPmode:
3390     case CC_SESWPmode:
3391       switch (comp_code)
3392         {
3393         case NE: return AARCH64_NE;
3394         case EQ: return AARCH64_EQ;
3395         case GE: return AARCH64_LE;
3396         case GT: return AARCH64_LT;
3397         case LE: return AARCH64_GE;
3398         case LT: return AARCH64_GT;
3399         case GEU: return AARCH64_LS;
3400         case GTU: return AARCH64_CC;
3401         case LEU: return AARCH64_CS;
3402         case LTU: return AARCH64_HI;
3403         default: gcc_unreachable ();
3404         }
3405       break;
3406
3407     case CC_NZmode:
3408       switch (comp_code)
3409         {
3410         case NE: return AARCH64_NE;
3411         case EQ: return AARCH64_EQ;
3412         case GE: return AARCH64_PL;
3413         case LT: return AARCH64_MI;
3414         default: gcc_unreachable ();
3415         }
3416       break;
3417
3418     default:
3419       gcc_unreachable ();
3420       break;
3421     }
3422 }
3423
3424 static unsigned
3425 bit_count (unsigned HOST_WIDE_INT value)
3426 {
3427   unsigned count = 0;
3428
3429   while (value)
3430     {
3431       count++;
3432       value &= value - 1;
3433     }
3434
3435   return count;
3436 }
3437
3438 void
3439 aarch64_print_operand (FILE *f, rtx x, char code)
3440 {
3441   switch (code)
3442     {
3443     /* An integer or symbol address without a preceding # sign.  */
3444     case 'c':
3445       switch (GET_CODE (x))
3446         {
3447         case CONST_INT:
3448           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3449           break;
3450
3451         case SYMBOL_REF:
3452           output_addr_const (f, x);
3453           break;
3454
3455         case CONST:
3456           if (GET_CODE (XEXP (x, 0)) == PLUS
3457               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3458             {
3459               output_addr_const (f, x);
3460               break;
3461             }
3462           /* Fall through.  */
3463
3464         default:
3465           output_operand_lossage ("Unsupported operand for code '%c'", code);
3466         }
3467       break;
3468
3469     case 'e':
3470       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3471       {
3472         int n;
3473
3474         if (GET_CODE (x) != CONST_INT
3475             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3476           {
3477             output_operand_lossage ("invalid operand for '%%%c'", code);
3478             return;
3479           }
3480
3481         switch (n)
3482           {
3483           case 3:
3484             fputc ('b', f);
3485             break;
3486           case 4:
3487             fputc ('h', f);
3488             break;
3489           case 5:
3490             fputc ('w', f);
3491             break;
3492           default:
3493             output_operand_lossage ("invalid operand for '%%%c'", code);
3494             return;
3495           }
3496       }
3497       break;
3498
3499     case 'p':
3500       {
3501         int n;
3502
3503         /* Print N such that 2^N == X.  */
3504         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3505           {
3506             output_operand_lossage ("invalid operand for '%%%c'", code);
3507             return;
3508           }
3509
3510         asm_fprintf (f, "%d", n);
3511       }
3512       break;
3513
3514     case 'P':
3515       /* Print the number of non-zero bits in X (a const_int).  */
3516       if (GET_CODE (x) != CONST_INT)
3517         {
3518           output_operand_lossage ("invalid operand for '%%%c'", code);
3519           return;
3520         }
3521
3522       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3523       break;
3524
3525     case 'H':
3526       /* Print the higher numbered register of a pair (TImode) of regs.  */
3527       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3528         {
3529           output_operand_lossage ("invalid operand for '%%%c'", code);
3530           return;
3531         }
3532
3533       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3534       break;
3535
3536     case 'm':
3537       /* Print a condition (eq, ne, etc).  */
3538
3539       /* CONST_TRUE_RTX means always -- that's the default.  */
3540       if (x == const_true_rtx)
3541         return;
3542
3543       if (!COMPARISON_P (x))
3544         {
3545           output_operand_lossage ("invalid operand for '%%%c'", code);
3546           return;
3547         }
3548
3549       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3550       break;
3551
3552     case 'M':
3553       /* Print the inverse of a condition (eq <-> ne, etc).  */
3554
3555       /* CONST_TRUE_RTX means never -- that's the default.  */
3556       if (x == const_true_rtx)
3557         {
3558           fputs ("nv", f);
3559           return;
3560         }
3561
3562       if (!COMPARISON_P (x))
3563         {
3564           output_operand_lossage ("invalid operand for '%%%c'", code);
3565           return;
3566         }
3567
3568       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3569                                   (aarch64_get_condition_code (x))], f);
3570       break;
3571
3572     case 'b':
3573     case 'h':
3574     case 's':
3575     case 'd':
3576     case 'q':
3577       /* Print a scalar FP/SIMD register name.  */
3578       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3579         {
3580           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3581           return;
3582         }
3583       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3584       break;
3585
3586     case 'S':
3587     case 'T':
3588     case 'U':
3589     case 'V':
3590       /* Print the first FP/SIMD register name in a list.  */
3591       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3592         {
3593           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3594           return;
3595         }
3596       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3597       break;
3598
3599     case 'X':
3600       /* Print bottom 16 bits of integer constant in hex.  */
3601       if (GET_CODE (x) != CONST_INT)
3602         {
3603           output_operand_lossage ("invalid operand for '%%%c'", code);
3604           return;
3605         }
3606       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3607       break;
3608
3609     case 'w':
3610     case 'x':
3611       /* Print a general register name or the zero register (32-bit or
3612          64-bit).  */
3613       if (x == const0_rtx
3614           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3615         {
3616           asm_fprintf (f, "%czr", code);
3617           break;
3618         }
3619
3620       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3621         {
3622           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3623           break;
3624         }
3625
3626       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3627         {
3628           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3629           break;
3630         }
3631
3632       /* Fall through */
3633
3634     case 0:
3635       /* Print a normal operand, if it's a general register, then we
3636          assume DImode.  */
3637       if (x == NULL)
3638         {
3639           output_operand_lossage ("missing operand");
3640           return;
3641         }
3642
3643       switch (GET_CODE (x))
3644         {
3645         case REG:
3646           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3647           break;
3648
3649         case MEM:
3650           aarch64_memory_reference_mode = GET_MODE (x);
3651           output_address (XEXP (x, 0));
3652           break;
3653
3654         case LABEL_REF:
3655         case SYMBOL_REF:
3656           output_addr_const (asm_out_file, x);
3657           break;
3658
3659         case CONST_INT:
3660           asm_fprintf (f, "%wd", INTVAL (x));
3661           break;
3662
3663         case CONST_VECTOR:
3664           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3665             {
3666               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3667                                                             HOST_WIDE_INT_MIN,
3668                                                             HOST_WIDE_INT_MAX));
3669               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3670             }
3671           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3672             {
3673               fputc ('0', f);
3674             }
3675           else
3676             gcc_unreachable ();
3677           break;
3678
3679         case CONST_DOUBLE:
3680           /* CONST_DOUBLE can represent a double-width integer.
3681              In this case, the mode of x is VOIDmode.  */
3682           if (GET_MODE (x) == VOIDmode)
3683             ; /* Do Nothing.  */
3684           else if (aarch64_float_const_zero_rtx_p (x))
3685             {
3686               fputc ('0', f);
3687               break;
3688             }
3689           else if (aarch64_float_const_representable_p (x))
3690             {
3691 #define buf_size 20
3692               char float_buf[buf_size] = {'\0'};
3693               REAL_VALUE_TYPE r;
3694               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3695               real_to_decimal_for_mode (float_buf, &r,
3696                                         buf_size, buf_size,
3697                                         1, GET_MODE (x));
3698               asm_fprintf (asm_out_file, "%s", float_buf);
3699               break;
3700 #undef buf_size
3701             }
3702           output_operand_lossage ("invalid constant");
3703           return;
3704         default:
3705           output_operand_lossage ("invalid operand");
3706           return;
3707         }
3708       break;
3709
3710     case 'A':
3711       if (GET_CODE (x) == HIGH)
3712         x = XEXP (x, 0);
3713
3714       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3715         {
3716         case SYMBOL_SMALL_GOT:
3717           asm_fprintf (asm_out_file, ":got:");
3718           break;
3719
3720         case SYMBOL_SMALL_TLSGD:
3721           asm_fprintf (asm_out_file, ":tlsgd:");
3722           break;
3723
3724         case SYMBOL_SMALL_TLSDESC:
3725           asm_fprintf (asm_out_file, ":tlsdesc:");
3726           break;
3727
3728         case SYMBOL_SMALL_GOTTPREL:
3729           asm_fprintf (asm_out_file, ":gottprel:");
3730           break;
3731
3732         case SYMBOL_SMALL_TPREL:
3733           asm_fprintf (asm_out_file, ":tprel:");
3734           break;
3735
3736         case SYMBOL_TINY_GOT:
3737           gcc_unreachable ();
3738           break;
3739
3740         default:
3741           break;
3742         }
3743       output_addr_const (asm_out_file, x);
3744       break;
3745
3746     case 'L':
3747       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3748         {
3749         case SYMBOL_SMALL_GOT:
3750           asm_fprintf (asm_out_file, ":lo12:");
3751           break;
3752
3753         case SYMBOL_SMALL_TLSGD:
3754           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3755           break;
3756
3757         case SYMBOL_SMALL_TLSDESC:
3758           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3759           break;
3760
3761         case SYMBOL_SMALL_GOTTPREL:
3762           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3763           break;
3764
3765         case SYMBOL_SMALL_TPREL:
3766           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3767           break;
3768
3769         case SYMBOL_TINY_GOT:
3770           asm_fprintf (asm_out_file, ":got:");
3771           break;
3772
3773         default:
3774           break;
3775         }
3776       output_addr_const (asm_out_file, x);
3777       break;
3778
3779     case 'G':
3780
3781       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3782         {
3783         case SYMBOL_SMALL_TPREL:
3784           asm_fprintf (asm_out_file, ":tprel_hi12:");
3785           break;
3786         default:
3787           break;
3788         }
3789       output_addr_const (asm_out_file, x);
3790       break;
3791
3792     default:
3793       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3794       return;
3795     }
3796 }
3797
3798 void
3799 aarch64_print_operand_address (FILE *f, rtx x)
3800 {
3801   struct aarch64_address_info addr;
3802
3803   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3804                              MEM, true))
3805     switch (addr.type)
3806       {
3807       case ADDRESS_REG_IMM:
3808         if (addr.offset == const0_rtx)
3809           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3810         else
3811           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3812                        INTVAL (addr.offset));
3813         return;
3814
3815       case ADDRESS_REG_REG:
3816         if (addr.shift == 0)
3817           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3818                        reg_names [REGNO (addr.offset)]);
3819         else
3820           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3821                        reg_names [REGNO (addr.offset)], addr.shift);
3822         return;
3823
3824       case ADDRESS_REG_UXTW:
3825         if (addr.shift == 0)
3826           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3827                        REGNO (addr.offset) - R0_REGNUM);
3828         else
3829           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3830                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3831         return;
3832
3833       case ADDRESS_REG_SXTW:
3834         if (addr.shift == 0)
3835           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3836                        REGNO (addr.offset) - R0_REGNUM);
3837         else
3838           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3839                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3840         return;
3841
3842       case ADDRESS_REG_WB:
3843         switch (GET_CODE (x))
3844           {
3845           case PRE_INC:
3846             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3847                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3848             return;
3849           case POST_INC:
3850             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3851                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3852             return;
3853           case PRE_DEC:
3854             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3855                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3856             return;
3857           case POST_DEC:
3858             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3859                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3860             return;
3861           case PRE_MODIFY:
3862             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3863                          INTVAL (addr.offset));
3864             return;
3865           case POST_MODIFY:
3866             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3867                          INTVAL (addr.offset));
3868             return;
3869           default:
3870             break;
3871           }
3872         break;
3873
3874       case ADDRESS_LO_SUM:
3875         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3876         output_addr_const (f, addr.offset);
3877         asm_fprintf (f, "]");
3878         return;
3879
3880       case ADDRESS_SYMBOLIC:
3881         break;
3882       }
3883
3884   output_addr_const (f, x);
3885 }
3886
3887 bool
3888 aarch64_label_mentioned_p (rtx x)
3889 {
3890   const char *fmt;
3891   int i;
3892
3893   if (GET_CODE (x) == LABEL_REF)
3894     return true;
3895
3896   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3897      referencing instruction, but they are constant offsets, not
3898      symbols.  */
3899   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3900     return false;
3901
3902   fmt = GET_RTX_FORMAT (GET_CODE (x));
3903   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3904     {
3905       if (fmt[i] == 'E')
3906         {
3907           int j;
3908
3909           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3910             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3911               return 1;
3912         }
3913       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3914         return 1;
3915     }
3916
3917   return 0;
3918 }
3919
3920 /* Implement REGNO_REG_CLASS.  */
3921
3922 enum reg_class
3923 aarch64_regno_regclass (unsigned regno)
3924 {
3925   if (GP_REGNUM_P (regno))
3926     return CORE_REGS;
3927
3928   if (regno == SP_REGNUM)
3929     return STACK_REG;
3930
3931   if (regno == FRAME_POINTER_REGNUM
3932       || regno == ARG_POINTER_REGNUM)
3933     return POINTER_REGS;
3934
3935   if (FP_REGNUM_P (regno))
3936     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3937
3938   return NO_REGS;
3939 }
3940
3941 /* Try a machine-dependent way of reloading an illegitimate address
3942    operand.  If we find one, push the reload and return the new rtx.  */
3943
3944 rtx
3945 aarch64_legitimize_reload_address (rtx *x_p,
3946                                    enum machine_mode mode,
3947                                    int opnum, int type,
3948                                    int ind_levels ATTRIBUTE_UNUSED)
3949 {
3950   rtx x = *x_p;
3951
3952   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3953   if (aarch64_vector_mode_p (mode)
3954       && GET_CODE (x) == PLUS
3955       && REG_P (XEXP (x, 0))
3956       && CONST_INT_P (XEXP (x, 1)))
3957     {
3958       rtx orig_rtx = x;
3959       x = copy_rtx (x);
3960       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3961                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3962                    opnum, (enum reload_type) type);
3963       return x;
3964     }
3965
3966   /* We must recognize output that we have already generated ourselves.  */
3967   if (GET_CODE (x) == PLUS
3968       && GET_CODE (XEXP (x, 0)) == PLUS
3969       && REG_P (XEXP (XEXP (x, 0), 0))
3970       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3971       && CONST_INT_P (XEXP (x, 1)))
3972     {
3973       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3974                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3975                    opnum, (enum reload_type) type);
3976       return x;
3977     }
3978
3979   /* We wish to handle large displacements off a base register by splitting
3980      the addend across an add and the mem insn.  This can cut the number of
3981      extra insns needed from 3 to 1.  It is only useful for load/store of a
3982      single register with 12 bit offset field.  */
3983   if (GET_CODE (x) == PLUS
3984       && REG_P (XEXP (x, 0))
3985       && CONST_INT_P (XEXP (x, 1))
3986       && HARD_REGISTER_P (XEXP (x, 0))
3987       && mode != TImode
3988       && mode != TFmode
3989       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3990     {
3991       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3992       HOST_WIDE_INT low = val & 0xfff;
3993       HOST_WIDE_INT high = val - low;
3994       HOST_WIDE_INT offs;
3995       rtx cst;
3996       enum machine_mode xmode = GET_MODE (x);
3997
3998       /* In ILP32, xmode can be either DImode or SImode.  */
3999       gcc_assert (xmode == DImode || xmode == SImode);
4000
4001       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4002          BLKmode alignment.  */
4003       if (GET_MODE_SIZE (mode) == 0)
4004         return NULL_RTX;
4005
4006       offs = low % GET_MODE_SIZE (mode);
4007
4008       /* Align misaligned offset by adjusting high part to compensate.  */
4009       if (offs != 0)
4010         {
4011           if (aarch64_uimm12_shift (high + offs))
4012             {
4013               /* Align down.  */
4014               low = low - offs;
4015               high = high + offs;
4016             }
4017           else
4018             {
4019               /* Align up.  */
4020               offs = GET_MODE_SIZE (mode) - offs;
4021               low = low + offs;
4022               high = high + (low & 0x1000) - offs;
4023               low &= 0xfff;
4024             }
4025         }
4026
4027       /* Check for overflow.  */
4028       if (high + low != val)
4029         return NULL_RTX;
4030
4031       cst = GEN_INT (high);
4032       if (!aarch64_uimm12_shift (high))
4033         cst = force_const_mem (xmode, cst);
4034
4035       /* Reload high part into base reg, leaving the low part
4036          in the mem instruction.
4037          Note that replacing this gen_rtx_PLUS with plus_constant is
4038          wrong in this case because we rely on the
4039          (plus (plus reg c1) c2) structure being preserved so that
4040          XEXP (*p, 0) in push_reload below uses the correct term.  */
4041       x = gen_rtx_PLUS (xmode,
4042                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4043                         GEN_INT (low));
4044
4045       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4046                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4047                    opnum, (enum reload_type) type);
4048       return x;
4049     }
4050
4051   return NULL_RTX;
4052 }
4053
4054
4055 static reg_class_t
4056 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4057                           reg_class_t rclass,
4058                           enum machine_mode mode,
4059                           secondary_reload_info *sri)
4060 {
4061   /* Without the TARGET_SIMD instructions we cannot move a Q register
4062      to a Q register directly.  We need a scratch.  */
4063   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4064       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4065       && reg_class_subset_p (rclass, FP_REGS))
4066     {
4067       if (mode == TFmode)
4068         sri->icode = CODE_FOR_aarch64_reload_movtf;
4069       else if (mode == TImode)
4070         sri->icode = CODE_FOR_aarch64_reload_movti;
4071       return NO_REGS;
4072     }
4073
4074   /* A TFmode or TImode memory access should be handled via an FP_REGS
4075      because AArch64 has richer addressing modes for LDR/STR instructions
4076      than LDP/STP instructions.  */
4077   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4078       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4079     return FP_REGS;
4080
4081   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4082       return CORE_REGS;
4083
4084   return NO_REGS;
4085 }
4086
4087 static bool
4088 aarch64_can_eliminate (const int from, const int to)
4089 {
4090   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4091      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4092
4093   if (frame_pointer_needed)
4094     {
4095       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4096         return true;
4097       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4098         return false;
4099       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4100           && !cfun->calls_alloca)
4101         return true;
4102       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4103         return true;
4104     return false;
4105     }
4106   else
4107     {
4108       /* If we decided that we didn't need a leaf frame pointer but then used
4109          LR in the function, then we'll want a frame pointer after all, so
4110          prevent this elimination to ensure a frame pointer is used.
4111
4112          NOTE: the original value of flag_omit_frame_pointer gets trashed
4113          IFF flag_omit_leaf_frame_pointer is true, so we check the value
4114          of faked_omit_frame_pointer here (which is true when we always
4115          wish to keep non-leaf frame pointers but only wish to keep leaf frame
4116          pointers when LR is clobbered).  */
4117       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4118           && df_regs_ever_live_p (LR_REGNUM)
4119           && faked_omit_frame_pointer)
4120         return false;
4121     }
4122
4123   return true;
4124 }
4125
4126 HOST_WIDE_INT
4127 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4128 {
4129   HOST_WIDE_INT frame_size;
4130   HOST_WIDE_INT offset;
4131
4132   aarch64_layout_frame ();
4133   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4134                 + crtl->outgoing_args_size
4135                 + cfun->machine->saved_varargs_size);
4136
4137    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4138    offset = frame_size;
4139
4140    if (to == HARD_FRAME_POINTER_REGNUM)
4141      {
4142        if (from == ARG_POINTER_REGNUM)
4143          return offset - crtl->outgoing_args_size;
4144
4145        if (from == FRAME_POINTER_REGNUM)
4146          return cfun->machine->frame.saved_regs_size;
4147      }
4148
4149    if (to == STACK_POINTER_REGNUM)
4150      {
4151        if (from == FRAME_POINTER_REGNUM)
4152          {
4153            HOST_WIDE_INT elim = crtl->outgoing_args_size
4154                               + cfun->machine->frame.saved_regs_size
4155                               - cfun->machine->frame.fp_lr_offset;
4156            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4157            return elim;
4158          }
4159      }
4160
4161    return offset;
4162 }
4163
4164
4165 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4166    previous frame.  */
4167
4168 rtx
4169 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4170 {
4171   if (count != 0)
4172     return const0_rtx;
4173   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4174 }
4175
4176
4177 static void
4178 aarch64_asm_trampoline_template (FILE *f)
4179 {
4180   if (TARGET_ILP32)
4181     {
4182       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4183       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4184     }
4185   else
4186     {
4187       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4188       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4189     }
4190   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4191   assemble_aligned_integer (4, const0_rtx);
4192   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4193   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4194 }
4195
4196 static void
4197 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4198 {
4199   rtx fnaddr, mem, a_tramp;
4200   const int tramp_code_sz = 16;
4201
4202   /* Don't need to copy the trailing D-words, we fill those in below.  */
4203   emit_block_move (m_tramp, assemble_trampoline_template (),
4204                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4205   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4206   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4207   if (GET_MODE (fnaddr) != ptr_mode)
4208     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4209   emit_move_insn (mem, fnaddr);
4210
4211   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4212   emit_move_insn (mem, chain_value);
4213
4214   /* XXX We should really define a "clear_cache" pattern and use
4215      gen_clear_cache().  */
4216   a_tramp = XEXP (m_tramp, 0);
4217   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4218                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4219                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4220                      ptr_mode);
4221 }
4222
4223 static unsigned char
4224 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4225 {
4226   switch (regclass)
4227     {
4228     case CORE_REGS:
4229     case POINTER_REGS:
4230     case GENERAL_REGS:
4231     case ALL_REGS:
4232     case FP_REGS:
4233     case FP_LO_REGS:
4234       return
4235         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4236                                        (GET_MODE_SIZE (mode) + 7) / 8;
4237     case STACK_REG:
4238       return 1;
4239
4240     case NO_REGS:
4241       return 0;
4242
4243     default:
4244       break;
4245     }
4246   gcc_unreachable ();
4247 }
4248
4249 static reg_class_t
4250 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4251 {
4252   if (regclass == POINTER_REGS)
4253     return GENERAL_REGS;
4254
4255   if (regclass == STACK_REG)
4256     {
4257       if (REG_P(x)
4258           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4259           return regclass;
4260
4261       return NO_REGS;
4262     }
4263
4264   /* If it's an integer immediate that MOVI can't handle, then
4265      FP_REGS is not an option, so we return NO_REGS instead.  */
4266   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4267       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4268     return NO_REGS;
4269
4270   /* Register eliminiation can result in a request for
4271      SP+constant->FP_REGS.  We cannot support such operations which
4272      use SP as source and an FP_REG as destination, so reject out
4273      right now.  */
4274   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4275     {
4276       rtx lhs = XEXP (x, 0);
4277
4278       /* Look through a possible SUBREG introduced by ILP32.  */
4279       if (GET_CODE (lhs) == SUBREG)
4280         lhs = SUBREG_REG (lhs);
4281
4282       gcc_assert (REG_P (lhs));
4283       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4284                                       POINTER_REGS));
4285       return NO_REGS;
4286     }
4287
4288   return regclass;
4289 }
4290
4291 void
4292 aarch64_asm_output_labelref (FILE* f, const char *name)
4293 {
4294   asm_fprintf (f, "%U%s", name);
4295 }
4296
4297 static void
4298 aarch64_elf_asm_constructor (rtx symbol, int priority)
4299 {
4300   if (priority == DEFAULT_INIT_PRIORITY)
4301     default_ctor_section_asm_out_constructor (symbol, priority);
4302   else
4303     {
4304       section *s;
4305       char buf[18];
4306       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4307       s = get_section (buf, SECTION_WRITE, NULL);
4308       switch_to_section (s);
4309       assemble_align (POINTER_SIZE);
4310       assemble_aligned_integer (POINTER_BYTES, symbol);
4311     }
4312 }
4313
4314 static void
4315 aarch64_elf_asm_destructor (rtx symbol, int priority)
4316 {
4317   if (priority == DEFAULT_INIT_PRIORITY)
4318     default_dtor_section_asm_out_destructor (symbol, priority);
4319   else
4320     {
4321       section *s;
4322       char buf[18];
4323       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4324       s = get_section (buf, SECTION_WRITE, NULL);
4325       switch_to_section (s);
4326       assemble_align (POINTER_SIZE);
4327       assemble_aligned_integer (POINTER_BYTES, symbol);
4328     }
4329 }
4330
4331 const char*
4332 aarch64_output_casesi (rtx *operands)
4333 {
4334   char buf[100];
4335   char label[100];
4336   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4337   int index;
4338   static const char *const patterns[4][2] =
4339   {
4340     {
4341       "ldrb\t%w3, [%0,%w1,uxtw]",
4342       "add\t%3, %4, %w3, sxtb #2"
4343     },
4344     {
4345       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4346       "add\t%3, %4, %w3, sxth #2"
4347     },
4348     {
4349       "ldr\t%w3, [%0,%w1,uxtw #2]",
4350       "add\t%3, %4, %w3, sxtw #2"
4351     },
4352     /* We assume that DImode is only generated when not optimizing and
4353        that we don't really need 64-bit address offsets.  That would
4354        imply an object file with 8GB of code in a single function!  */
4355     {
4356       "ldr\t%w3, [%0,%w1,uxtw #2]",
4357       "add\t%3, %4, %w3, sxtw #2"
4358     }
4359   };
4360
4361   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4362
4363   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4364
4365   gcc_assert (index >= 0 && index <= 3);
4366
4367   /* Need to implement table size reduction, by chaning the code below.  */
4368   output_asm_insn (patterns[index][0], operands);
4369   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4370   snprintf (buf, sizeof (buf),
4371             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4372   output_asm_insn (buf, operands);
4373   output_asm_insn (patterns[index][1], operands);
4374   output_asm_insn ("br\t%3", operands);
4375   assemble_label (asm_out_file, label);
4376   return "";
4377 }
4378
4379
4380 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4381    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4382    operator.  */
4383
4384 int
4385 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4386 {
4387   if (shift >= 0 && shift <= 3)
4388     {
4389       int size;
4390       for (size = 8; size <= 32; size *= 2)
4391         {
4392           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4393           if (mask == bits << shift)
4394             return size;
4395         }
4396     }
4397   return 0;
4398 }
4399
4400 static bool
4401 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4402                                    const_rtx x ATTRIBUTE_UNUSED)
4403 {
4404   /* We can't use blocks for constants when we're using a per-function
4405      constant pool.  */
4406   return false;
4407 }
4408
4409 static section *
4410 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4411                             rtx x ATTRIBUTE_UNUSED,
4412                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4413 {
4414   /* Force all constant pool entries into the current function section.  */
4415   return function_section (current_function_decl);
4416 }
4417
4418
4419 /* Costs.  */
4420
4421 /* Helper function for rtx cost calculation.  Strip a shift expression
4422    from X.  Returns the inner operand if successful, or the original
4423    expression on failure.  */
4424 static rtx
4425 aarch64_strip_shift (rtx x)
4426 {
4427   rtx op = x;
4428
4429   if ((GET_CODE (op) == ASHIFT
4430        || GET_CODE (op) == ASHIFTRT
4431        || GET_CODE (op) == LSHIFTRT)
4432       && CONST_INT_P (XEXP (op, 1)))
4433     return XEXP (op, 0);
4434
4435   if (GET_CODE (op) == MULT
4436       && CONST_INT_P (XEXP (op, 1))
4437       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4438     return XEXP (op, 0);
4439
4440   return x;
4441 }
4442
4443 /* Helper function for rtx cost calculation.  Strip a shift or extend
4444    expression from X.  Returns the inner operand if successful, or the
4445    original expression on failure.  We deal with a number of possible
4446    canonicalization variations here.  */
4447 static rtx
4448 aarch64_strip_shift_or_extend (rtx x)
4449 {
4450   rtx op = x;
4451
4452   /* Zero and sign extraction of a widened value.  */
4453   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4454       && XEXP (op, 2) == const0_rtx
4455       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4456                                          XEXP (op, 1)))
4457     return XEXP (XEXP (op, 0), 0);
4458
4459   /* It can also be represented (for zero-extend) as an AND with an
4460      immediate.  */
4461   if (GET_CODE (op) == AND
4462       && GET_CODE (XEXP (op, 0)) == MULT
4463       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4464       && CONST_INT_P (XEXP (op, 1))
4465       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4466                            INTVAL (XEXP (op, 1))) != 0)
4467     return XEXP (XEXP (op, 0), 0);
4468
4469   /* Now handle extended register, as this may also have an optional
4470      left shift by 1..4.  */
4471   if (GET_CODE (op) == ASHIFT
4472       && CONST_INT_P (XEXP (op, 1))
4473       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4474     op = XEXP (op, 0);
4475
4476   if (GET_CODE (op) == ZERO_EXTEND
4477       || GET_CODE (op) == SIGN_EXTEND)
4478     op = XEXP (op, 0);
4479
4480   if (op != x)
4481     return op;
4482
4483   return aarch64_strip_shift (x);
4484 }
4485
4486 /* Calculate the cost of calculating X, storing it in *COST.  Result
4487    is true if the total cost of the operation has now been calculated.  */
4488 static bool
4489 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4490                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4491 {
4492   rtx op0, op1;
4493   const struct cpu_rtx_cost_table *extra_cost
4494     = aarch64_tune_params->insn_extra_cost;
4495
4496   switch (code)
4497     {
4498     case SET:
4499       op0 = SET_DEST (x);
4500       op1 = SET_SRC (x);
4501
4502       switch (GET_CODE (op0))
4503         {
4504         case MEM:
4505           if (speed)
4506             *cost += extra_cost->memory_store;
4507
4508           if (op1 != const0_rtx)
4509             *cost += rtx_cost (op1, SET, 1, speed);
4510           return true;
4511
4512         case SUBREG:
4513           if (! REG_P (SUBREG_REG (op0)))
4514             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4515           /* Fall through.  */
4516         case REG:
4517           /* Cost is just the cost of the RHS of the set.  */
4518           *cost += rtx_cost (op1, SET, 1, true);
4519           return true;
4520
4521         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4522         case SIGN_EXTRACT:
4523           /* Strip any redundant widening of the RHS to meet the width of
4524              the target.  */
4525           if (GET_CODE (op1) == SUBREG)
4526             op1 = SUBREG_REG (op1);
4527           if ((GET_CODE (op1) == ZERO_EXTEND
4528                || GET_CODE (op1) == SIGN_EXTEND)
4529               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4530               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4531                   >= INTVAL (XEXP (op0, 1))))
4532             op1 = XEXP (op1, 0);
4533           *cost += rtx_cost (op1, SET, 1, speed);
4534           return true;
4535
4536         default:
4537           break;
4538         }
4539       return false;
4540
4541     case MEM:
4542       if (speed)
4543         *cost += extra_cost->memory_load;
4544
4545       return true;
4546
4547     case NEG:
4548       op0 = CONST0_RTX (GET_MODE (x));
4549       op1 = XEXP (x, 0);
4550       goto cost_minus;
4551
4552     case COMPARE:
4553       op0 = XEXP (x, 0);
4554       op1 = XEXP (x, 1);
4555
4556       if (op1 == const0_rtx
4557           && GET_CODE (op0) == AND)
4558         {
4559           x = op0;
4560           goto cost_logic;
4561         }
4562
4563       /* Comparisons can work if the order is swapped.
4564          Canonicalization puts the more complex operation first, but
4565          we want it in op1.  */
4566       if (! (REG_P (op0)
4567              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4568         {
4569           op0 = XEXP (x, 1);
4570           op1 = XEXP (x, 0);
4571         }
4572       goto cost_minus;
4573
4574     case MINUS:
4575       op0 = XEXP (x, 0);
4576       op1 = XEXP (x, 1);
4577
4578     cost_minus:
4579       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4580           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4581               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4582         {
4583           if (op0 != const0_rtx)
4584             *cost += rtx_cost (op0, MINUS, 0, speed);
4585
4586           if (CONST_INT_P (op1))
4587             {
4588               if (!aarch64_uimm12_shift (INTVAL (op1)))
4589                 *cost += rtx_cost (op1, MINUS, 1, speed);
4590             }
4591           else
4592             {
4593               op1 = aarch64_strip_shift_or_extend (op1);
4594               *cost += rtx_cost (op1, MINUS, 1, speed);
4595             }
4596           return true;
4597         }
4598
4599       return false;
4600
4601     case PLUS:
4602       op0 = XEXP (x, 0);
4603       op1 = XEXP (x, 1);
4604
4605       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4606         {
4607           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4608             {
4609               *cost += rtx_cost (op0, PLUS, 0, speed);
4610             }
4611           else
4612             {
4613               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4614
4615               if (new_op0 == op0
4616                   && GET_CODE (op0) == MULT)
4617                 {
4618                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4619                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4620                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4621                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4622                     {
4623                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4624                                           speed)
4625                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4626                                             speed)
4627                                 + rtx_cost (op1, PLUS, 1, speed));
4628                       if (speed)
4629                         *cost += extra_cost->int_multiply_extend_add;
4630                       return true;
4631                     }
4632                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4633                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4634                             + rtx_cost (op1, PLUS, 1, speed));
4635
4636                   if (speed)
4637                     *cost += extra_cost->int_multiply_add;
4638                 }
4639
4640               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4641                         + rtx_cost (op1, PLUS, 1, speed));
4642             }
4643           return true;
4644         }
4645
4646       return false;
4647
4648     case IOR:
4649     case XOR:
4650     case AND:
4651     cost_logic:
4652       op0 = XEXP (x, 0);
4653       op1 = XEXP (x, 1);
4654
4655       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4656         {
4657           if (CONST_INT_P (op1)
4658               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4659             {
4660               *cost += rtx_cost (op0, AND, 0, speed);
4661             }
4662           else
4663             {
4664               if (GET_CODE (op0) == NOT)
4665                 op0 = XEXP (op0, 0);
4666               op0 = aarch64_strip_shift (op0);
4667               *cost += (rtx_cost (op0, AND, 0, speed)
4668                         + rtx_cost (op1, AND, 1, speed));
4669             }
4670           return true;
4671         }
4672       return false;
4673
4674     case ZERO_EXTEND:
4675       if ((GET_MODE (x) == DImode
4676            && GET_MODE (XEXP (x, 0)) == SImode)
4677           || GET_CODE (XEXP (x, 0)) == MEM)
4678         {
4679           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4680           return true;
4681         }
4682       return false;
4683
4684     case SIGN_EXTEND:
4685       if (GET_CODE (XEXP (x, 0)) == MEM)
4686         {
4687           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4688           return true;
4689         }
4690       return false;
4691
4692     case ROTATE:
4693       if (!CONST_INT_P (XEXP (x, 1)))
4694         *cost += COSTS_N_INSNS (2);
4695       /* Fall through.  */
4696     case ROTATERT:
4697     case LSHIFTRT:
4698     case ASHIFT:
4699     case ASHIFTRT:
4700
4701       /* Shifting by a register often takes an extra cycle.  */
4702       if (speed && !CONST_INT_P (XEXP (x, 1)))
4703         *cost += extra_cost->register_shift;
4704
4705       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4706       return true;
4707
4708     case HIGH:
4709       if (!CONSTANT_P (XEXP (x, 0)))
4710         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4711       return true;
4712
4713     case LO_SUM:
4714       if (!CONSTANT_P (XEXP (x, 1)))
4715         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4716       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4717       return true;
4718
4719     case ZERO_EXTRACT:
4720     case SIGN_EXTRACT:
4721       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4722       return true;
4723
4724     case MULT:
4725       op0 = XEXP (x, 0);
4726       op1 = XEXP (x, 1);
4727
4728       *cost = COSTS_N_INSNS (1);
4729       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4730         {
4731           if (CONST_INT_P (op1)
4732               && exact_log2 (INTVAL (op1)) > 0)
4733             {
4734               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4735               return true;
4736             }
4737
4738           if ((GET_CODE (op0) == ZERO_EXTEND
4739                && GET_CODE (op1) == ZERO_EXTEND)
4740               || (GET_CODE (op0) == SIGN_EXTEND
4741                   && GET_CODE (op1) == SIGN_EXTEND))
4742             {
4743               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4744                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4745               if (speed)
4746                 *cost += extra_cost->int_multiply_extend;
4747               return true;
4748             }
4749
4750           if (speed)
4751             *cost += extra_cost->int_multiply;
4752         }
4753       else if (speed)
4754         {
4755           if (GET_MODE (x) == DFmode)
4756             *cost += extra_cost->double_multiply;
4757           else if (GET_MODE (x) == SFmode)
4758             *cost += extra_cost->float_multiply;
4759         }
4760
4761       return false;  /* All arguments need to be in registers.  */
4762
4763     case MOD:
4764     case UMOD:
4765       *cost = COSTS_N_INSNS (2);
4766       if (speed)
4767         {
4768           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4769             *cost += (extra_cost->int_multiply_add
4770                       + extra_cost->int_divide);
4771           else if (GET_MODE (x) == DFmode)
4772             *cost += (extra_cost->double_multiply
4773                       + extra_cost->double_divide);
4774           else if (GET_MODE (x) == SFmode)
4775             *cost += (extra_cost->float_multiply
4776                       + extra_cost->float_divide);
4777         }
4778       return false;  /* All arguments need to be in registers.  */
4779
4780     case DIV:
4781     case UDIV:
4782       *cost = COSTS_N_INSNS (1);
4783       if (speed)
4784         {
4785           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4786             *cost += extra_cost->int_divide;
4787           else if (GET_MODE (x) == DFmode)
4788             *cost += extra_cost->double_divide;
4789           else if (GET_MODE (x) == SFmode)
4790             *cost += extra_cost->float_divide;
4791         }
4792       return false;  /* All arguments need to be in registers.  */
4793
4794     default:
4795       break;
4796     }
4797   return false;
4798 }
4799
4800 static int
4801 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4802                   enum machine_mode mode ATTRIBUTE_UNUSED,
4803                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4804 {
4805   enum rtx_code c  = GET_CODE (x);
4806   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4807
4808   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4809     return addr_cost->pre_modify;
4810
4811   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4812     return addr_cost->post_modify;
4813
4814   if (c == PLUS)
4815     {
4816       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4817         return addr_cost->imm_offset;
4818       else if (GET_CODE (XEXP (x, 0)) == MULT
4819                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4820                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4821         return addr_cost->register_extend;
4822
4823       return addr_cost->register_offset;
4824     }
4825   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4826     return addr_cost->imm_offset;
4827
4828   return 0;
4829 }
4830
4831 static int
4832 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4833                             reg_class_t from, reg_class_t to)
4834 {
4835   const struct cpu_regmove_cost *regmove_cost
4836     = aarch64_tune_params->regmove_cost;
4837
4838   if (from == GENERAL_REGS && to == GENERAL_REGS)
4839     return regmove_cost->GP2GP;
4840   else if (from == GENERAL_REGS)
4841     return regmove_cost->GP2FP;
4842   else if (to == GENERAL_REGS)
4843     return regmove_cost->FP2GP;
4844
4845   /* When AdvSIMD instructions are disabled it is not possible to move
4846      a 128-bit value directly between Q registers.  This is handled in
4847      secondary reload.  A general register is used as a scratch to move
4848      the upper DI value and the lower DI value is moved directly,
4849      hence the cost is the sum of three moves. */
4850
4851   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4852     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4853
4854   return regmove_cost->FP2FP;
4855 }
4856
4857 static int
4858 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4859                           reg_class_t rclass ATTRIBUTE_UNUSED,
4860                           bool in ATTRIBUTE_UNUSED)
4861 {
4862   return aarch64_tune_params->memmov_cost;
4863 }
4864
4865 /* Vectorizer cost model target hooks.  */
4866
4867 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
4868 static int
4869 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4870                                     tree vectype,
4871                                     int misalign ATTRIBUTE_UNUSED)
4872 {
4873   unsigned elements;
4874
4875   switch (type_of_cost)
4876     {
4877       case scalar_stmt:
4878         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4879
4880       case scalar_load:
4881         return aarch64_tune_params->vec_costs->scalar_load_cost;
4882
4883       case scalar_store:
4884         return aarch64_tune_params->vec_costs->scalar_store_cost;
4885
4886       case vector_stmt:
4887         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4888
4889       case vector_load:
4890         return aarch64_tune_params->vec_costs->vec_align_load_cost;
4891
4892       case vector_store:
4893         return aarch64_tune_params->vec_costs->vec_store_cost;
4894
4895       case vec_to_scalar:
4896         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4897
4898       case scalar_to_vec:
4899         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4900
4901       case unaligned_load:
4902         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4903
4904       case unaligned_store:
4905         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4906
4907       case cond_branch_taken:
4908         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4909
4910       case cond_branch_not_taken:
4911         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4912
4913       case vec_perm:
4914       case vec_promote_demote:
4915         return aarch64_tune_params->vec_costs->vec_stmt_cost;
4916
4917       case vec_construct:
4918         elements = TYPE_VECTOR_SUBPARTS (vectype);
4919         return elements / 2 + 1;
4920
4921       default:
4922         gcc_unreachable ();
4923     }
4924 }
4925
4926 /* Implement targetm.vectorize.add_stmt_cost.  */
4927 static unsigned
4928 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4929                        struct _stmt_vec_info *stmt_info, int misalign,
4930                        enum vect_cost_model_location where)
4931 {
4932   unsigned *cost = (unsigned *) data;
4933   unsigned retval = 0;
4934
4935   if (flag_vect_cost_model)
4936     {
4937       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4938       int stmt_cost =
4939             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4940
4941       /* Statements in an inner loop relative to the loop being
4942          vectorized are weighted more heavily.  The value here is
4943          a function (linear for now) of the loop nest level.  */
4944       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4945         {
4946           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4947           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
4948           unsigned nest_level = loop_depth (loop);
4949
4950           count *= nest_level;
4951         }
4952
4953       retval = (unsigned) (count * stmt_cost);
4954       cost[where] += retval;
4955     }
4956
4957   return retval;
4958 }
4959
4960 static void initialize_aarch64_code_model (void);
4961
4962 /* Parse the architecture extension string.  */
4963
4964 static void
4965 aarch64_parse_extension (char *str)
4966 {
4967   /* The extension string is parsed left to right.  */
4968   const struct aarch64_option_extension *opt = NULL;
4969
4970   /* Flag to say whether we are adding or removing an extension.  */
4971   int adding_ext = -1;
4972
4973   while (str != NULL && *str != 0)
4974     {
4975       char *ext;
4976       size_t len;
4977
4978       str++;
4979       ext = strchr (str, '+');
4980
4981       if (ext != NULL)
4982         len = ext - str;
4983       else
4984         len = strlen (str);
4985
4986       if (len >= 2 && strncmp (str, "no", 2) == 0)
4987         {
4988           adding_ext = 0;
4989           len -= 2;
4990           str += 2;
4991         }
4992       else if (len > 0)
4993         adding_ext = 1;
4994
4995       if (len == 0)
4996         {
4997           error ("missing feature modifier after %qs", "+no");
4998           return;
4999         }
5000
5001       /* Scan over the extensions table trying to find an exact match.  */
5002       for (opt = all_extensions; opt->name != NULL; opt++)
5003         {
5004           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5005             {
5006               /* Add or remove the extension.  */
5007               if (adding_ext)
5008                 aarch64_isa_flags |= opt->flags_on;
5009               else
5010                 aarch64_isa_flags &= ~(opt->flags_off);
5011               break;
5012             }
5013         }
5014
5015       if (opt->name == NULL)
5016         {
5017           /* Extension not found in list.  */
5018           error ("unknown feature modifier %qs", str);
5019           return;
5020         }
5021
5022       str = ext;
5023     };
5024
5025   return;
5026 }
5027
5028 /* Parse the ARCH string.  */
5029
5030 static void
5031 aarch64_parse_arch (void)
5032 {
5033   char *ext;
5034   const struct processor *arch;
5035   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5036   size_t len;
5037
5038   strcpy (str, aarch64_arch_string);
5039
5040   ext = strchr (str, '+');
5041
5042   if (ext != NULL)
5043     len = ext - str;
5044   else
5045     len = strlen (str);
5046
5047   if (len == 0)
5048     {
5049       error ("missing arch name in -march=%qs", str);
5050       return;
5051     }
5052
5053   /* Loop through the list of supported ARCHs to find a match.  */
5054   for (arch = all_architectures; arch->name != NULL; arch++)
5055     {
5056       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5057         {
5058           selected_arch = arch;
5059           aarch64_isa_flags = selected_arch->flags;
5060           selected_cpu = &all_cores[selected_arch->core];
5061
5062           if (ext != NULL)
5063             {
5064               /* ARCH string contains at least one extension.  */
5065               aarch64_parse_extension (ext);
5066             }
5067
5068           return;
5069         }
5070     }
5071
5072   /* ARCH name not found in list.  */
5073   error ("unknown value %qs for -march", str);
5074   return;
5075 }
5076
5077 /* Parse the CPU string.  */
5078
5079 static void
5080 aarch64_parse_cpu (void)
5081 {
5082   char *ext;
5083   const struct processor *cpu;
5084   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5085   size_t len;
5086
5087   strcpy (str, aarch64_cpu_string);
5088
5089   ext = strchr (str, '+');
5090
5091   if (ext != NULL)
5092     len = ext - str;
5093   else
5094     len = strlen (str);
5095
5096   if (len == 0)
5097     {
5098       error ("missing cpu name in -mcpu=%qs", str);
5099       return;
5100     }
5101
5102   /* Loop through the list of supported CPUs to find a match.  */
5103   for (cpu = all_cores; cpu->name != NULL; cpu++)
5104     {
5105       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5106         {
5107           selected_cpu = cpu;
5108           aarch64_isa_flags = selected_cpu->flags;
5109
5110           if (ext != NULL)
5111             {
5112               /* CPU string contains at least one extension.  */
5113               aarch64_parse_extension (ext);
5114             }
5115
5116           return;
5117         }
5118     }
5119
5120   /* CPU name not found in list.  */
5121   error ("unknown value %qs for -mcpu", str);
5122   return;
5123 }
5124
5125 /* Parse the TUNE string.  */
5126
5127 static void
5128 aarch64_parse_tune (void)
5129 {
5130   const struct processor *cpu;
5131   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5132   strcpy (str, aarch64_tune_string);
5133
5134   /* Loop through the list of supported CPUs to find a match.  */
5135   for (cpu = all_cores; cpu->name != NULL; cpu++)
5136     {
5137       if (strcmp (cpu->name, str) == 0)
5138         {
5139           selected_tune = cpu;
5140           return;
5141         }
5142     }
5143
5144   /* CPU name not found in list.  */
5145   error ("unknown value %qs for -mtune", str);
5146   return;
5147 }
5148
5149
5150 /* Implement TARGET_OPTION_OVERRIDE.  */
5151
5152 static void
5153 aarch64_override_options (void)
5154 {
5155   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5156      otherwise march remains undefined.  mtune can be used with either march or
5157      mcpu.  */
5158
5159   if (aarch64_arch_string)
5160     {
5161       aarch64_parse_arch ();
5162       aarch64_cpu_string = NULL;
5163     }
5164
5165   if (aarch64_cpu_string)
5166     {
5167       aarch64_parse_cpu ();
5168       selected_arch = NULL;
5169     }
5170
5171   if (aarch64_tune_string)
5172     {
5173       aarch64_parse_tune ();
5174     }
5175
5176   initialize_aarch64_code_model ();
5177
5178   aarch64_build_bitmask_table ();
5179
5180   /* This target defaults to strict volatile bitfields.  */
5181   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5182     flag_strict_volatile_bitfields = 1;
5183
5184   /* If the user did not specify a processor, choose the default
5185      one for them.  This will be the CPU set during configuration using
5186      --with-cpu, otherwise it is "generic".  */
5187   if (!selected_cpu)
5188     {
5189       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5190       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5191     }
5192
5193   gcc_assert (selected_cpu);
5194
5195   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5196   if (!selected_tune)
5197     selected_tune = &all_cores[selected_cpu->core];
5198
5199   aarch64_tune_flags = selected_tune->flags;
5200   aarch64_tune = selected_tune->core;
5201   aarch64_tune_params = selected_tune->tune;
5202
5203   aarch64_override_options_after_change ();
5204 }
5205
5206 /* Implement targetm.override_options_after_change.  */
5207
5208 static void
5209 aarch64_override_options_after_change (void)
5210 {
5211   faked_omit_frame_pointer = false;
5212
5213   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5214      that aarch64_frame_pointer_required will be called.  We need to remember
5215      whether flag_omit_frame_pointer was turned on normally or just faked.  */
5216
5217   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5218     {
5219       flag_omit_frame_pointer = true;
5220       faked_omit_frame_pointer = true;
5221     }
5222 }
5223
5224 static struct machine_function *
5225 aarch64_init_machine_status (void)
5226 {
5227   struct machine_function *machine;
5228   machine = ggc_alloc_cleared_machine_function ();
5229   return machine;
5230 }
5231
5232 void
5233 aarch64_init_expanders (void)
5234 {
5235   init_machine_status = aarch64_init_machine_status;
5236 }
5237
5238 /* A checking mechanism for the implementation of the various code models.  */
5239 static void
5240 initialize_aarch64_code_model (void)
5241 {
5242    if (flag_pic)
5243      {
5244        switch (aarch64_cmodel_var)
5245          {
5246          case AARCH64_CMODEL_TINY:
5247            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5248            break;
5249          case AARCH64_CMODEL_SMALL:
5250            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5251            break;
5252          case AARCH64_CMODEL_LARGE:
5253            sorry ("code model %qs with -f%s", "large",
5254                   flag_pic > 1 ? "PIC" : "pic");
5255          default:
5256            gcc_unreachable ();
5257          }
5258      }
5259    else
5260      aarch64_cmodel = aarch64_cmodel_var;
5261 }
5262
5263 /* Return true if SYMBOL_REF X binds locally.  */
5264
5265 static bool
5266 aarch64_symbol_binds_local_p (const_rtx x)
5267 {
5268   return (SYMBOL_REF_DECL (x)
5269           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5270           : SYMBOL_REF_LOCAL_P (x));
5271 }
5272
5273 /* Return true if SYMBOL_REF X is thread local */
5274 static bool
5275 aarch64_tls_symbol_p (rtx x)
5276 {
5277   if (! TARGET_HAVE_TLS)
5278     return false;
5279
5280   if (GET_CODE (x) != SYMBOL_REF)
5281     return false;
5282
5283   return SYMBOL_REF_TLS_MODEL (x) != 0;
5284 }
5285
5286 /* Classify a TLS symbol into one of the TLS kinds.  */
5287 enum aarch64_symbol_type
5288 aarch64_classify_tls_symbol (rtx x)
5289 {
5290   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5291
5292   switch (tls_kind)
5293     {
5294     case TLS_MODEL_GLOBAL_DYNAMIC:
5295     case TLS_MODEL_LOCAL_DYNAMIC:
5296       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5297
5298     case TLS_MODEL_INITIAL_EXEC:
5299       return SYMBOL_SMALL_GOTTPREL;
5300
5301     case TLS_MODEL_LOCAL_EXEC:
5302       return SYMBOL_SMALL_TPREL;
5303
5304     case TLS_MODEL_EMULATED:
5305     case TLS_MODEL_NONE:
5306       return SYMBOL_FORCE_TO_MEM;
5307
5308     default:
5309       gcc_unreachable ();
5310     }
5311 }
5312
5313 /* Return the method that should be used to access SYMBOL_REF or
5314    LABEL_REF X in context CONTEXT.  */
5315
5316 enum aarch64_symbol_type
5317 aarch64_classify_symbol (rtx x,
5318                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5319 {
5320   if (GET_CODE (x) == LABEL_REF)
5321     {
5322       switch (aarch64_cmodel)
5323         {
5324         case AARCH64_CMODEL_LARGE:
5325           return SYMBOL_FORCE_TO_MEM;
5326
5327         case AARCH64_CMODEL_TINY_PIC:
5328         case AARCH64_CMODEL_TINY:
5329           return SYMBOL_TINY_ABSOLUTE;
5330
5331         case AARCH64_CMODEL_SMALL_PIC:
5332         case AARCH64_CMODEL_SMALL:
5333           return SYMBOL_SMALL_ABSOLUTE;
5334
5335         default:
5336           gcc_unreachable ();
5337         }
5338     }
5339
5340   if (GET_CODE (x) == SYMBOL_REF)
5341     {
5342       if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5343           || CONSTANT_POOL_ADDRESS_P (x))
5344         return SYMBOL_FORCE_TO_MEM;
5345
5346       if (aarch64_tls_symbol_p (x))
5347         return aarch64_classify_tls_symbol (x);
5348
5349       switch (aarch64_cmodel)
5350         {
5351         case AARCH64_CMODEL_TINY:
5352           if (SYMBOL_REF_WEAK (x))
5353             return SYMBOL_FORCE_TO_MEM;
5354           return SYMBOL_TINY_ABSOLUTE;
5355
5356         case AARCH64_CMODEL_SMALL:
5357           if (SYMBOL_REF_WEAK (x))
5358             return SYMBOL_FORCE_TO_MEM;
5359           return SYMBOL_SMALL_ABSOLUTE;
5360
5361         case AARCH64_CMODEL_TINY_PIC:
5362           if (!aarch64_symbol_binds_local_p (x))
5363             return SYMBOL_TINY_GOT;
5364           return SYMBOL_TINY_ABSOLUTE;
5365
5366         case AARCH64_CMODEL_SMALL_PIC:
5367           if (!aarch64_symbol_binds_local_p (x))
5368             return SYMBOL_SMALL_GOT;
5369           return SYMBOL_SMALL_ABSOLUTE;
5370
5371         default:
5372           gcc_unreachable ();
5373         }
5374     }
5375
5376   /* By default push everything into the constant pool.  */
5377   return SYMBOL_FORCE_TO_MEM;
5378 }
5379
5380 bool
5381 aarch64_constant_address_p (rtx x)
5382 {
5383   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5384 }
5385
5386 bool
5387 aarch64_legitimate_pic_operand_p (rtx x)
5388 {
5389   if (GET_CODE (x) == SYMBOL_REF
5390       || (GET_CODE (x) == CONST
5391           && GET_CODE (XEXP (x, 0)) == PLUS
5392           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5393      return false;
5394
5395   return true;
5396 }
5397
5398 /* Return true if X holds either a quarter-precision or
5399      floating-point +0.0 constant.  */
5400 static bool
5401 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5402 {
5403   if (!CONST_DOUBLE_P (x))
5404     return false;
5405
5406   /* TODO: We could handle moving 0.0 to a TFmode register,
5407      but first we would like to refactor the movtf_aarch64
5408      to be more amicable to split moves properly and
5409      correctly gate on TARGET_SIMD.  For now - reject all
5410      constants which are not to SFmode or DFmode registers.  */
5411   if (!(mode == SFmode || mode == DFmode))
5412     return false;
5413
5414   if (aarch64_float_const_zero_rtx_p (x))
5415     return true;
5416   return aarch64_float_const_representable_p (x);
5417 }
5418
5419 static bool
5420 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5421 {
5422   /* Do not allow vector struct mode constants.  We could support
5423      0 and -1 easily, but they need support in aarch64-simd.md.  */
5424   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5425     return false;
5426
5427   /* This could probably go away because
5428      we now decompose CONST_INTs according to expand_mov_immediate.  */
5429   if ((GET_CODE (x) == CONST_VECTOR
5430        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5431       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5432         return !targetm.cannot_force_const_mem (mode, x);
5433
5434   if (GET_CODE (x) == HIGH
5435       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5436     return true;
5437
5438   return aarch64_constant_address_p (x);
5439 }
5440
5441 rtx
5442 aarch64_load_tp (rtx target)
5443 {
5444   if (!target
5445       || GET_MODE (target) != Pmode
5446       || !register_operand (target, Pmode))
5447     target = gen_reg_rtx (Pmode);
5448
5449   /* Can return in any reg.  */
5450   emit_insn (gen_aarch64_load_tp_hard (target));
5451   return target;
5452 }
5453
5454 /* On AAPCS systems, this is the "struct __va_list".  */
5455 static GTY(()) tree va_list_type;
5456
5457 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5458    Return the type to use as __builtin_va_list.
5459
5460    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5461
5462    struct __va_list
5463    {
5464      void *__stack;
5465      void *__gr_top;
5466      void *__vr_top;
5467      int   __gr_offs;
5468      int   __vr_offs;
5469    };  */
5470
5471 static tree
5472 aarch64_build_builtin_va_list (void)
5473 {
5474   tree va_list_name;
5475   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5476
5477   /* Create the type.  */
5478   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5479   /* Give it the required name.  */
5480   va_list_name = build_decl (BUILTINS_LOCATION,
5481                              TYPE_DECL,
5482                              get_identifier ("__va_list"),
5483                              va_list_type);
5484   DECL_ARTIFICIAL (va_list_name) = 1;
5485   TYPE_NAME (va_list_type) = va_list_name;
5486   TYPE_STUB_DECL (va_list_type) = va_list_name;
5487
5488   /* Create the fields.  */
5489   f_stack = build_decl (BUILTINS_LOCATION,
5490                         FIELD_DECL, get_identifier ("__stack"),
5491                         ptr_type_node);
5492   f_grtop = build_decl (BUILTINS_LOCATION,
5493                         FIELD_DECL, get_identifier ("__gr_top"),
5494                         ptr_type_node);
5495   f_vrtop = build_decl (BUILTINS_LOCATION,
5496                         FIELD_DECL, get_identifier ("__vr_top"),
5497                         ptr_type_node);
5498   f_groff = build_decl (BUILTINS_LOCATION,
5499                         FIELD_DECL, get_identifier ("__gr_offs"),
5500                         integer_type_node);
5501   f_vroff = build_decl (BUILTINS_LOCATION,
5502                         FIELD_DECL, get_identifier ("__vr_offs"),
5503                         integer_type_node);
5504
5505   DECL_ARTIFICIAL (f_stack) = 1;
5506   DECL_ARTIFICIAL (f_grtop) = 1;
5507   DECL_ARTIFICIAL (f_vrtop) = 1;
5508   DECL_ARTIFICIAL (f_groff) = 1;
5509   DECL_ARTIFICIAL (f_vroff) = 1;
5510
5511   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5512   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5513   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5514   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5515   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5516
5517   TYPE_FIELDS (va_list_type) = f_stack;
5518   DECL_CHAIN (f_stack) = f_grtop;
5519   DECL_CHAIN (f_grtop) = f_vrtop;
5520   DECL_CHAIN (f_vrtop) = f_groff;
5521   DECL_CHAIN (f_groff) = f_vroff;
5522
5523   /* Compute its layout.  */
5524   layout_type (va_list_type);
5525
5526   return va_list_type;
5527 }
5528
5529 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5530 static void
5531 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5532 {
5533   const CUMULATIVE_ARGS *cum;
5534   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5535   tree stack, grtop, vrtop, groff, vroff;
5536   tree t;
5537   int gr_save_area_size;
5538   int vr_save_area_size;
5539   int vr_offset;
5540
5541   cum = &crtl->args.info;
5542   gr_save_area_size
5543     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5544   vr_save_area_size
5545     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5546
5547   if (TARGET_GENERAL_REGS_ONLY)
5548     {
5549       if (cum->aapcs_nvrn > 0)
5550         sorry ("%qs and floating point or vector arguments",
5551                "-mgeneral-regs-only");
5552       vr_save_area_size = 0;
5553     }
5554
5555   f_stack = TYPE_FIELDS (va_list_type_node);
5556   f_grtop = DECL_CHAIN (f_stack);
5557   f_vrtop = DECL_CHAIN (f_grtop);
5558   f_groff = DECL_CHAIN (f_vrtop);
5559   f_vroff = DECL_CHAIN (f_groff);
5560
5561   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5562                   NULL_TREE);
5563   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5564                   NULL_TREE);
5565   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5566                   NULL_TREE);
5567   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5568                   NULL_TREE);
5569   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5570                   NULL_TREE);
5571
5572   /* Emit code to initialize STACK, which points to the next varargs stack
5573      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5574      by named arguments.  STACK is 8-byte aligned.  */
5575   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5576   if (cum->aapcs_stack_size > 0)
5577     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5578   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5579   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5580
5581   /* Emit code to initialize GRTOP, the top of the GR save area.
5582      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5583   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5584   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5585   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5586
5587   /* Emit code to initialize VRTOP, the top of the VR save area.
5588      This address is gr_save_area_bytes below GRTOP, rounded
5589      down to the next 16-byte boundary.  */
5590   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5591   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5592                              STACK_BOUNDARY / BITS_PER_UNIT);
5593
5594   if (vr_offset)
5595     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5596   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5597   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5598
5599   /* Emit code to initialize GROFF, the offset from GRTOP of the
5600      next GPR argument.  */
5601   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5602               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5603   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5604
5605   /* Likewise emit code to initialize VROFF, the offset from FTOP
5606      of the next VR argument.  */
5607   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5608               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5609   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5610 }
5611
5612 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5613
5614 static tree
5615 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5616                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5617 {
5618   tree addr;
5619   bool indirect_p;
5620   bool is_ha;           /* is HFA or HVA.  */
5621   bool dw_align;        /* double-word align.  */
5622   enum machine_mode ag_mode = VOIDmode;
5623   int nregs;
5624   enum machine_mode mode;
5625
5626   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5627   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5628   HOST_WIDE_INT size, rsize, adjust, align;
5629   tree t, u, cond1, cond2;
5630
5631   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5632   if (indirect_p)
5633     type = build_pointer_type (type);
5634
5635   mode = TYPE_MODE (type);
5636
5637   f_stack = TYPE_FIELDS (va_list_type_node);
5638   f_grtop = DECL_CHAIN (f_stack);
5639   f_vrtop = DECL_CHAIN (f_grtop);
5640   f_groff = DECL_CHAIN (f_vrtop);
5641   f_vroff = DECL_CHAIN (f_groff);
5642
5643   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5644                   f_stack, NULL_TREE);
5645   size = int_size_in_bytes (type);
5646   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5647
5648   dw_align = false;
5649   adjust = 0;
5650   if (aarch64_vfp_is_call_or_return_candidate (mode,
5651                                                type,
5652                                                &ag_mode,
5653                                                &nregs,
5654                                                &is_ha))
5655     {
5656       /* TYPE passed in fp/simd registers.  */
5657       if (TARGET_GENERAL_REGS_ONLY)
5658         sorry ("%qs and floating point or vector arguments",
5659                "-mgeneral-regs-only");
5660
5661       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5662                       unshare_expr (valist), f_vrtop, NULL_TREE);
5663       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5664                       unshare_expr (valist), f_vroff, NULL_TREE);
5665
5666       rsize = nregs * UNITS_PER_VREG;
5667
5668       if (is_ha)
5669         {
5670           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5671             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5672         }
5673       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5674                && size < UNITS_PER_VREG)
5675         {
5676           adjust = UNITS_PER_VREG - size;
5677         }
5678     }
5679   else
5680     {
5681       /* TYPE passed in general registers.  */
5682       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5683                       unshare_expr (valist), f_grtop, NULL_TREE);
5684       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5685                       unshare_expr (valist), f_groff, NULL_TREE);
5686       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5687       nregs = rsize / UNITS_PER_WORD;
5688
5689       if (align > 8)
5690         dw_align = true;
5691
5692       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5693           && size < UNITS_PER_WORD)
5694         {
5695           adjust = UNITS_PER_WORD  - size;
5696         }
5697     }
5698
5699   /* Get a local temporary for the field value.  */
5700   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5701
5702   /* Emit code to branch if off >= 0.  */
5703   t = build2 (GE_EXPR, boolean_type_node, off,
5704               build_int_cst (TREE_TYPE (off), 0));
5705   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5706
5707   if (dw_align)
5708     {
5709       /* Emit: offs = (offs + 15) & -16.  */
5710       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5711                   build_int_cst (TREE_TYPE (off), 15));
5712       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5713                   build_int_cst (TREE_TYPE (off), -16));
5714       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5715     }
5716   else
5717     roundup = NULL;
5718
5719   /* Update ap.__[g|v]r_offs  */
5720   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5721               build_int_cst (TREE_TYPE (off), rsize));
5722   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5723
5724   /* String up.  */
5725   if (roundup)
5726     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5727
5728   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5729   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5730               build_int_cst (TREE_TYPE (f_off), 0));
5731   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5732
5733   /* String up: make sure the assignment happens before the use.  */
5734   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5735   COND_EXPR_ELSE (cond1) = t;
5736
5737   /* Prepare the trees handling the argument that is passed on the stack;
5738      the top level node will store in ON_STACK.  */
5739   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5740   if (align > 8)
5741     {
5742       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5743       t = fold_convert (intDI_type_node, arg);
5744       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5745                   build_int_cst (TREE_TYPE (t), 15));
5746       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5747                   build_int_cst (TREE_TYPE (t), -16));
5748       t = fold_convert (TREE_TYPE (arg), t);
5749       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5750     }
5751   else
5752     roundup = NULL;
5753   /* Advance ap.__stack  */
5754   t = fold_convert (intDI_type_node, arg);
5755   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5756               build_int_cst (TREE_TYPE (t), size + 7));
5757   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5758               build_int_cst (TREE_TYPE (t), -8));
5759   t = fold_convert (TREE_TYPE (arg), t);
5760   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5761   /* String up roundup and advance.  */
5762   if (roundup)
5763     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5764   /* String up with arg */
5765   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5766   /* Big-endianness related address adjustment.  */
5767   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5768       && size < UNITS_PER_WORD)
5769   {
5770     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5771                 size_int (UNITS_PER_WORD - size));
5772     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5773   }
5774
5775   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5776   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5777
5778   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5779   t = off;
5780   if (adjust)
5781     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5782                 build_int_cst (TREE_TYPE (off), adjust));
5783
5784   t = fold_convert (sizetype, t);
5785   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5786
5787   if (is_ha)
5788     {
5789       /* type ha; // treat as "struct {ftype field[n];}"
5790          ... [computing offs]
5791          for (i = 0; i <nregs; ++i, offs += 16)
5792            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5793          return ha;  */
5794       int i;
5795       tree tmp_ha, field_t, field_ptr_t;
5796
5797       /* Declare a local variable.  */
5798       tmp_ha = create_tmp_var_raw (type, "ha");
5799       gimple_add_tmp_var (tmp_ha);
5800
5801       /* Establish the base type.  */
5802       switch (ag_mode)
5803         {
5804         case SFmode:
5805           field_t = float_type_node;
5806           field_ptr_t = float_ptr_type_node;
5807           break;
5808         case DFmode:
5809           field_t = double_type_node;
5810           field_ptr_t = double_ptr_type_node;
5811           break;
5812         case TFmode:
5813           field_t = long_double_type_node;
5814           field_ptr_t = long_double_ptr_type_node;
5815           break;
5816 /* The half precision and quad precision are not fully supported yet.  Enable
5817    the following code after the support is complete.  Need to find the correct
5818    type node for __fp16 *.  */
5819 #if 0
5820         case HFmode:
5821           field_t = float_type_node;
5822           field_ptr_t = float_ptr_type_node;
5823           break;
5824 #endif
5825         case V2SImode:
5826         case V4SImode:
5827             {
5828               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5829               field_t = build_vector_type_for_mode (innertype, ag_mode);
5830               field_ptr_t = build_pointer_type (field_t);
5831             }
5832           break;
5833         default:
5834           gcc_assert (0);
5835         }
5836
5837       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5838       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5839       addr = t;
5840       t = fold_convert (field_ptr_t, addr);
5841       t = build2 (MODIFY_EXPR, field_t,
5842                   build1 (INDIRECT_REF, field_t, tmp_ha),
5843                   build1 (INDIRECT_REF, field_t, t));
5844
5845       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5846       for (i = 1; i < nregs; ++i)
5847         {
5848           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5849           u = fold_convert (field_ptr_t, addr);
5850           u = build2 (MODIFY_EXPR, field_t,
5851                       build2 (MEM_REF, field_t, tmp_ha,
5852                               build_int_cst (field_ptr_t,
5853                                              (i *
5854                                               int_size_in_bytes (field_t)))),
5855                       build1 (INDIRECT_REF, field_t, u));
5856           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5857         }
5858
5859       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5860       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5861     }
5862
5863   COND_EXPR_ELSE (cond2) = t;
5864   addr = fold_convert (build_pointer_type (type), cond1);
5865   addr = build_va_arg_indirect_ref (addr);
5866
5867   if (indirect_p)
5868     addr = build_va_arg_indirect_ref (addr);
5869
5870   return addr;
5871 }
5872
5873 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5874
5875 static void
5876 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5877                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5878                                 int no_rtl)
5879 {
5880   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5881   CUMULATIVE_ARGS local_cum;
5882   int gr_saved, vr_saved;
5883
5884   /* The caller has advanced CUM up to, but not beyond, the last named
5885      argument.  Advance a local copy of CUM past the last "real" named
5886      argument, to find out how many registers are left over.  */
5887   local_cum = *cum;
5888   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5889
5890   /* Found out how many registers we need to save.  */
5891   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5892   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5893
5894   if (TARGET_GENERAL_REGS_ONLY)
5895     {
5896       if (local_cum.aapcs_nvrn > 0)
5897         sorry ("%qs and floating point or vector arguments",
5898                "-mgeneral-regs-only");
5899       vr_saved = 0;
5900     }
5901
5902   if (!no_rtl)
5903     {
5904       if (gr_saved > 0)
5905         {
5906           rtx ptr, mem;
5907
5908           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5909           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5910                                - gr_saved * UNITS_PER_WORD);
5911           mem = gen_frame_mem (BLKmode, ptr);
5912           set_mem_alias_set (mem, get_varargs_alias_set ());
5913
5914           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5915                                mem, gr_saved);
5916         }
5917       if (vr_saved > 0)
5918         {
5919           /* We can't use move_block_from_reg, because it will use
5920              the wrong mode, storing D regs only.  */
5921           enum machine_mode mode = TImode;
5922           int off, i;
5923
5924           /* Set OFF to the offset from virtual_incoming_args_rtx of
5925              the first vector register.  The VR save area lies below
5926              the GR one, and is aligned to 16 bytes.  */
5927           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5928                                    STACK_BOUNDARY / BITS_PER_UNIT);
5929           off -= vr_saved * UNITS_PER_VREG;
5930
5931           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5932             {
5933               rtx ptr, mem;
5934
5935               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5936               mem = gen_frame_mem (mode, ptr);
5937               set_mem_alias_set (mem, get_varargs_alias_set ());
5938               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5939               off += UNITS_PER_VREG;
5940             }
5941         }
5942     }
5943
5944   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5945      any complication of having crtl->args.pretend_args_size changed.  */
5946   cfun->machine->saved_varargs_size
5947     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5948                       STACK_BOUNDARY / BITS_PER_UNIT)
5949        + vr_saved * UNITS_PER_VREG);
5950 }
5951
5952 static void
5953 aarch64_conditional_register_usage (void)
5954 {
5955   int i;
5956   if (!TARGET_FLOAT)
5957     {
5958       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5959         {
5960           fixed_regs[i] = 1;
5961           call_used_regs[i] = 1;
5962         }
5963     }
5964 }
5965
5966 /* Walk down the type tree of TYPE counting consecutive base elements.
5967    If *MODEP is VOIDmode, then set it to the first valid floating point
5968    type.  If a non-floating point type is found, or if a floating point
5969    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5970    otherwise return the count in the sub-tree.  */
5971 static int
5972 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5973 {
5974   enum machine_mode mode;
5975   HOST_WIDE_INT size;
5976
5977   switch (TREE_CODE (type))
5978     {
5979     case REAL_TYPE:
5980       mode = TYPE_MODE (type);
5981       if (mode != DFmode && mode != SFmode && mode != TFmode)
5982         return -1;
5983
5984       if (*modep == VOIDmode)
5985         *modep = mode;
5986
5987       if (*modep == mode)
5988         return 1;
5989
5990       break;
5991
5992     case COMPLEX_TYPE:
5993       mode = TYPE_MODE (TREE_TYPE (type));
5994       if (mode != DFmode && mode != SFmode && mode != TFmode)
5995         return -1;
5996
5997       if (*modep == VOIDmode)
5998         *modep = mode;
5999
6000       if (*modep == mode)
6001         return 2;
6002
6003       break;
6004
6005     case VECTOR_TYPE:
6006       /* Use V2SImode and V4SImode as representatives of all 64-bit
6007          and 128-bit vector types.  */
6008       size = int_size_in_bytes (type);
6009       switch (size)
6010         {
6011         case 8:
6012           mode = V2SImode;
6013           break;
6014         case 16:
6015           mode = V4SImode;
6016           break;
6017         default:
6018           return -1;
6019         }
6020
6021       if (*modep == VOIDmode)
6022         *modep = mode;
6023
6024       /* Vector modes are considered to be opaque: two vectors are
6025          equivalent for the purposes of being homogeneous aggregates
6026          if they are the same size.  */
6027       if (*modep == mode)
6028         return 1;
6029
6030       break;
6031
6032     case ARRAY_TYPE:
6033       {
6034         int count;
6035         tree index = TYPE_DOMAIN (type);
6036
6037         /* Can't handle incomplete types.  */
6038         if (!COMPLETE_TYPE_P (type))
6039           return -1;
6040
6041         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6042         if (count == -1
6043             || !index
6044             || !TYPE_MAX_VALUE (index)
6045             || !host_integerp (TYPE_MAX_VALUE (index), 1)
6046             || !TYPE_MIN_VALUE (index)
6047             || !host_integerp (TYPE_MIN_VALUE (index), 1)
6048             || count < 0)
6049           return -1;
6050
6051         count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
6052                       - tree_low_cst (TYPE_MIN_VALUE (index), 1));
6053
6054         /* There must be no padding.  */
6055         if (!host_integerp (TYPE_SIZE (type), 1)
6056             || (tree_low_cst (TYPE_SIZE (type), 1)
6057                 != count * GET_MODE_BITSIZE (*modep)))
6058           return -1;
6059
6060         return count;
6061       }
6062
6063     case RECORD_TYPE:
6064       {
6065         int count = 0;
6066         int sub_count;
6067         tree field;
6068
6069         /* Can't handle incomplete types.  */
6070         if (!COMPLETE_TYPE_P (type))
6071           return -1;
6072
6073         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6074           {
6075             if (TREE_CODE (field) != FIELD_DECL)
6076               continue;
6077
6078             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6079             if (sub_count < 0)
6080               return -1;
6081             count += sub_count;
6082           }
6083
6084         /* There must be no padding.  */
6085         if (!host_integerp (TYPE_SIZE (type), 1)
6086             || (tree_low_cst (TYPE_SIZE (type), 1)
6087                 != count * GET_MODE_BITSIZE (*modep)))
6088           return -1;
6089
6090         return count;
6091       }
6092
6093     case UNION_TYPE:
6094     case QUAL_UNION_TYPE:
6095       {
6096         /* These aren't very interesting except in a degenerate case.  */
6097         int count = 0;
6098         int sub_count;
6099         tree field;
6100
6101         /* Can't handle incomplete types.  */
6102         if (!COMPLETE_TYPE_P (type))
6103           return -1;
6104
6105         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6106           {
6107             if (TREE_CODE (field) != FIELD_DECL)
6108               continue;
6109
6110             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6111             if (sub_count < 0)
6112               return -1;
6113             count = count > sub_count ? count : sub_count;
6114           }
6115
6116         /* There must be no padding.  */
6117         if (!host_integerp (TYPE_SIZE (type), 1)
6118             || (tree_low_cst (TYPE_SIZE (type), 1)
6119                 != count * GET_MODE_BITSIZE (*modep)))
6120           return -1;
6121
6122         return count;
6123       }
6124
6125     default:
6126       break;
6127     }
6128
6129   return -1;
6130 }
6131
6132 /* Return true if we use LRA instead of reload pass.  */
6133 static bool
6134 aarch64_lra_p (void)
6135 {
6136   return aarch64_lra_flag;
6137 }
6138
6139 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6140    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6141    array types.  The C99 floating-point complex types are also considered
6142    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6143    types, which are GCC extensions and out of the scope of AAPCS64, are
6144    treated as composite types here as well.
6145
6146    Note that MODE itself is not sufficient in determining whether a type
6147    is such a composite type or not.  This is because
6148    stor-layout.c:compute_record_mode may have already changed the MODE
6149    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6150    structure with only one field may have its MODE set to the mode of the
6151    field.  Also an integer mode whose size matches the size of the
6152    RECORD_TYPE type may be used to substitute the original mode
6153    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6154    solely relied on.  */
6155
6156 static bool
6157 aarch64_composite_type_p (const_tree type,
6158                           enum machine_mode mode)
6159 {
6160   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6161     return true;
6162
6163   if (mode == BLKmode
6164       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6165       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6166     return true;
6167
6168   return false;
6169 }
6170
6171 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6172    type as described in AAPCS64 \S 4.1.2.
6173
6174    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6175
6176 static bool
6177 aarch64_short_vector_p (const_tree type,
6178                         enum machine_mode mode)
6179 {
6180   HOST_WIDE_INT size = -1;
6181
6182   if (type && TREE_CODE (type) == VECTOR_TYPE)
6183     size = int_size_in_bytes (type);
6184   else if (!aarch64_composite_type_p (type, mode)
6185            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6186                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6187     size = GET_MODE_SIZE (mode);
6188
6189   return (size == 8 || size == 16) ? true : false;
6190 }
6191
6192 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6193    shall be passed or returned in simd/fp register(s) (providing these
6194    parameter passing registers are available).
6195
6196    Upon successful return, *COUNT returns the number of needed registers,
6197    *BASE_MODE returns the mode of the individual register and when IS_HAF
6198    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6199    floating-point aggregate or a homogeneous short-vector aggregate.  */
6200
6201 static bool
6202 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6203                                          const_tree type,
6204                                          enum machine_mode *base_mode,
6205                                          int *count,
6206                                          bool *is_ha)
6207 {
6208   enum machine_mode new_mode = VOIDmode;
6209   bool composite_p = aarch64_composite_type_p (type, mode);
6210
6211   if (is_ha != NULL) *is_ha = false;
6212
6213   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6214       || aarch64_short_vector_p (type, mode))
6215     {
6216       *count = 1;
6217       new_mode = mode;
6218     }
6219   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6220     {
6221       if (is_ha != NULL) *is_ha = true;
6222       *count = 2;
6223       new_mode = GET_MODE_INNER (mode);
6224     }
6225   else if (type && composite_p)
6226     {
6227       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6228
6229       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6230         {
6231           if (is_ha != NULL) *is_ha = true;
6232           *count = ag_count;
6233         }
6234       else
6235         return false;
6236     }
6237   else
6238     return false;
6239
6240   *base_mode = new_mode;
6241   return true;
6242 }
6243
6244 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6245
6246 static rtx
6247 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6248                           int incoming ATTRIBUTE_UNUSED)
6249 {
6250   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6251 }
6252
6253 /* Implements target hook vector_mode_supported_p.  */
6254 static bool
6255 aarch64_vector_mode_supported_p (enum machine_mode mode)
6256 {
6257   if (TARGET_SIMD
6258       && (mode == V4SImode  || mode == V8HImode
6259           || mode == V16QImode || mode == V2DImode
6260           || mode == V2SImode  || mode == V4HImode
6261           || mode == V8QImode || mode == V2SFmode
6262           || mode == V4SFmode || mode == V2DFmode))
6263     return true;
6264
6265   return false;
6266 }
6267
6268 /* Return appropriate SIMD container
6269    for MODE within a vector of WIDTH bits.  */
6270 static enum machine_mode
6271 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6272 {
6273   gcc_assert (width == 64 || width == 128);
6274   if (TARGET_SIMD)
6275     {
6276       if (width == 128)
6277         switch (mode)
6278           {
6279           case DFmode:
6280             return V2DFmode;
6281           case SFmode:
6282             return V4SFmode;
6283           case SImode:
6284             return V4SImode;
6285           case HImode:
6286             return V8HImode;
6287           case QImode:
6288             return V16QImode;
6289           case DImode:
6290             return V2DImode;
6291           default:
6292             break;
6293           }
6294       else
6295         switch (mode)
6296           {
6297           case SFmode:
6298             return V2SFmode;
6299           case SImode:
6300             return V2SImode;
6301           case HImode:
6302             return V4HImode;
6303           case QImode:
6304             return V8QImode;
6305           default:
6306             break;
6307           }
6308     }
6309   return word_mode;
6310 }
6311
6312 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6313 static enum machine_mode
6314 aarch64_preferred_simd_mode (enum machine_mode mode)
6315 {
6316   return aarch64_simd_container_mode (mode, 128);
6317 }
6318
6319 /* Return the bitmask of possible vector sizes for the vectorizer
6320    to iterate over.  */
6321 static unsigned int
6322 aarch64_autovectorize_vector_sizes (void)
6323 {
6324   return (16 | 8);
6325 }
6326
6327 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6328    vector types in order to conform to the AAPCS64 (see "Procedure
6329    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6330    qualify for emission with the mangled names defined in that document,
6331    a vector type must not only be of the correct mode but also be
6332    composed of AdvSIMD vector element types (e.g.
6333    _builtin_aarch64_simd_qi); these types are registered by
6334    aarch64_init_simd_builtins ().  In other words, vector types defined
6335    in other ways e.g. via vector_size attribute will get default
6336    mangled names.  */
6337 typedef struct
6338 {
6339   enum machine_mode mode;
6340   const char *element_type_name;
6341   const char *mangled_name;
6342 } aarch64_simd_mangle_map_entry;
6343
6344 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6345   /* 64-bit containerized types.  */
6346   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6347   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6348   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6349   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6350   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6351   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6352   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6353   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6354   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6355   /* 128-bit containerized types.  */
6356   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6357   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6358   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6359   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6360   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6361   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6362   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6363   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6364   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6365   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6366   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6367   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6368   { VOIDmode, NULL, NULL }
6369 };
6370
6371 /* Implement TARGET_MANGLE_TYPE.  */
6372
6373 static const char *
6374 aarch64_mangle_type (const_tree type)
6375 {
6376   /* The AArch64 ABI documents say that "__va_list" has to be
6377      managled as if it is in the "std" namespace.  */
6378   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6379     return "St9__va_list";
6380
6381   /* Check the mode of the vector type, and the name of the vector
6382      element type, against the table.  */
6383   if (TREE_CODE (type) == VECTOR_TYPE)
6384     {
6385       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6386
6387       while (pos->mode != VOIDmode)
6388         {
6389           tree elt_type = TREE_TYPE (type);
6390
6391           if (pos->mode == TYPE_MODE (type)
6392               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6393               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6394                           pos->element_type_name))
6395             return pos->mangled_name;
6396
6397           pos++;
6398         }
6399     }
6400
6401   /* Use the default mangling.  */
6402   return NULL;
6403 }
6404
6405 /* Return the equivalent letter for size.  */
6406 static char
6407 sizetochar (int size)
6408 {
6409   switch (size)
6410     {
6411     case 64: return 'd';
6412     case 32: return 's';
6413     case 16: return 'h';
6414     case 8 : return 'b';
6415     default: gcc_unreachable ();
6416     }
6417 }
6418
6419 /* Return true iff x is a uniform vector of floating-point
6420    constants, and the constant can be represented in
6421    quarter-precision form.  Note, as aarch64_float_const_representable
6422    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6423 static bool
6424 aarch64_vect_float_const_representable_p (rtx x)
6425 {
6426   int i = 0;
6427   REAL_VALUE_TYPE r0, ri;
6428   rtx x0, xi;
6429
6430   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6431     return false;
6432
6433   x0 = CONST_VECTOR_ELT (x, 0);
6434   if (!CONST_DOUBLE_P (x0))
6435     return false;
6436
6437   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6438
6439   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6440     {
6441       xi = CONST_VECTOR_ELT (x, i);
6442       if (!CONST_DOUBLE_P (xi))
6443         return false;
6444
6445       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6446       if (!REAL_VALUES_EQUAL (r0, ri))
6447         return false;
6448     }
6449
6450   return aarch64_float_const_representable_p (x0);
6451 }
6452
6453 /* Return true for valid and false for invalid.  */
6454 bool
6455 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6456                               struct simd_immediate_info *info)
6457 {
6458 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6459   matches = 1;                                          \
6460   for (i = 0; i < idx; i += (STRIDE))                   \
6461     if (!(TEST))                                        \
6462       matches = 0;                                      \
6463   if (matches)                                          \
6464     {                                                   \
6465       immtype = (CLASS);                                \
6466       elsize = (ELSIZE);                                \
6467       eshift = (SHIFT);                                 \
6468       emvn = (NEG);                                     \
6469       break;                                            \
6470     }
6471
6472   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6473   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6474   unsigned char bytes[16];
6475   int immtype = -1, matches;
6476   unsigned int invmask = inverse ? 0xff : 0;
6477   int eshift, emvn;
6478
6479   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6480     {
6481       if (! (aarch64_simd_imm_zero_p (op, mode)
6482              || aarch64_vect_float_const_representable_p (op)))
6483         return false;
6484
6485       if (info)
6486         {
6487           info->value = CONST_VECTOR_ELT (op, 0);
6488           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6489           info->mvn = false;
6490           info->shift = 0;
6491         }
6492
6493       return true;
6494     }
6495
6496   /* Splat vector constant out into a byte vector.  */
6497   for (i = 0; i < n_elts; i++)
6498     {
6499       rtx el = CONST_VECTOR_ELT (op, i);
6500       unsigned HOST_WIDE_INT elpart;
6501       unsigned int part, parts;
6502
6503       if (GET_CODE (el) == CONST_INT)
6504         {
6505           elpart = INTVAL (el);
6506           parts = 1;
6507         }
6508       else if (GET_CODE (el) == CONST_DOUBLE)
6509         {
6510           elpart = CONST_DOUBLE_LOW (el);
6511           parts = 2;
6512         }
6513       else
6514         gcc_unreachable ();
6515
6516       for (part = 0; part < parts; part++)
6517         {
6518           unsigned int byte;
6519           for (byte = 0; byte < innersize; byte++)
6520             {
6521               bytes[idx++] = (elpart & 0xff) ^ invmask;
6522               elpart >>= BITS_PER_UNIT;
6523             }
6524           if (GET_CODE (el) == CONST_DOUBLE)
6525             elpart = CONST_DOUBLE_HIGH (el);
6526         }
6527     }
6528
6529   /* Sanity check.  */
6530   gcc_assert (idx == GET_MODE_SIZE (mode));
6531
6532   do
6533     {
6534       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6535              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6536
6537       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6538              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6539
6540       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6541              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6542
6543       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6544              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6545
6546       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6547
6548       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6549
6550       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6551              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6552
6553       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6554              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6555
6556       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6557              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6558
6559       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6560              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6561
6562       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6563
6564       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6565
6566       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6567              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6568
6569       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6570              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6571
6572       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6573              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6574
6575       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6576              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6577
6578       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6579
6580       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6581              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6582     }
6583   while (0);
6584
6585   if (immtype == -1)
6586     return false;
6587
6588   if (info)
6589     {
6590       info->element_width = elsize;
6591       info->mvn = emvn != 0;
6592       info->shift = eshift;
6593
6594       unsigned HOST_WIDE_INT imm = 0;
6595
6596       if (immtype >= 12 && immtype <= 15)
6597         info->msl = true;
6598
6599       /* Un-invert bytes of recognized vector, if necessary.  */
6600       if (invmask != 0)
6601         for (i = 0; i < idx; i++)
6602           bytes[i] ^= invmask;
6603
6604       if (immtype == 17)
6605         {
6606           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6607           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6608
6609           for (i = 0; i < 8; i++)
6610             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6611               << (i * BITS_PER_UNIT);
6612
6613
6614           info->value = GEN_INT (imm);
6615         }
6616       else
6617         {
6618           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6619             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6620
6621           /* Construct 'abcdefgh' because the assembler cannot handle
6622              generic constants.  */
6623           if (info->mvn)
6624             imm = ~imm;
6625           imm = (imm >> info->shift) & 0xff;
6626           info->value = GEN_INT (imm);
6627         }
6628     }
6629
6630   return true;
6631 #undef CHECK
6632 }
6633
6634 static bool
6635 aarch64_const_vec_all_same_int_p (rtx x,
6636                                   HOST_WIDE_INT minval,
6637                                   HOST_WIDE_INT maxval)
6638 {
6639   HOST_WIDE_INT firstval;
6640   int count, i;
6641
6642   if (GET_CODE (x) != CONST_VECTOR
6643       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6644     return false;
6645
6646   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6647   if (firstval < minval || firstval > maxval)
6648     return false;
6649
6650   count = CONST_VECTOR_NUNITS (x);
6651   for (i = 1; i < count; i++)
6652     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6653       return false;
6654
6655   return true;
6656 }
6657
6658 /* Check of immediate shift constants are within range.  */
6659 bool
6660 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6661 {
6662   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6663   if (left)
6664     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6665   else
6666     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6667 }
6668
6669 /* Return true if X is a uniform vector where all elements
6670    are either the floating-point constant 0.0 or the
6671    integer constant 0.  */
6672 bool
6673 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6674 {
6675   return x == CONST0_RTX (mode);
6676 }
6677
6678 bool
6679 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6680 {
6681   HOST_WIDE_INT imm = INTVAL (x);
6682   int i;
6683
6684   for (i = 0; i < 8; i++)
6685     {
6686       unsigned int byte = imm & 0xff;
6687       if (byte != 0xff && byte != 0)
6688        return false;
6689       imm >>= 8;
6690     }
6691
6692   return true;
6693 }
6694
6695 bool
6696 aarch64_mov_operand_p (rtx x,
6697                        enum aarch64_symbol_context context,
6698                        enum machine_mode mode)
6699 {
6700   if (GET_CODE (x) == HIGH
6701       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6702     return true;
6703
6704   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6705     return true;
6706
6707   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6708     return true;
6709
6710   return aarch64_classify_symbolic_expression (x, context)
6711     == SYMBOL_TINY_ABSOLUTE;
6712 }
6713
6714 /* Return a const_int vector of VAL.  */
6715 rtx
6716 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6717 {
6718   int nunits = GET_MODE_NUNITS (mode);
6719   rtvec v = rtvec_alloc (nunits);
6720   int i;
6721
6722   for (i=0; i < nunits; i++)
6723     RTVEC_ELT (v, i) = GEN_INT (val);
6724
6725   return gen_rtx_CONST_VECTOR (mode, v);
6726 }
6727
6728 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
6729
6730 bool
6731 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6732 {
6733   enum machine_mode vmode;
6734
6735   gcc_assert (!VECTOR_MODE_P (mode));
6736   vmode = aarch64_preferred_simd_mode (mode);
6737   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6738   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6739 }
6740
6741 /* Construct and return a PARALLEL RTX vector.  */
6742 rtx
6743 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6744 {
6745   int nunits = GET_MODE_NUNITS (mode);
6746   rtvec v = rtvec_alloc (nunits / 2);
6747   int base = high ? nunits / 2 : 0;
6748   rtx t1;
6749   int i;
6750
6751   for (i=0; i < nunits / 2; i++)
6752     RTVEC_ELT (v, i) = GEN_INT (base + i);
6753
6754   t1 = gen_rtx_PARALLEL (mode, v);
6755   return t1;
6756 }
6757
6758 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6759    HIGH (exclusive).  */
6760 void
6761 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6762 {
6763   HOST_WIDE_INT lane;
6764   gcc_assert (GET_CODE (operand) == CONST_INT);
6765   lane = INTVAL (operand);
6766
6767   if (lane < low || lane >= high)
6768     error ("lane out of range");
6769 }
6770
6771 void
6772 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6773 {
6774   gcc_assert (GET_CODE (operand) == CONST_INT);
6775   HOST_WIDE_INT lane = INTVAL (operand);
6776
6777   if (lane < low || lane >= high)
6778     error ("constant out of range");
6779 }
6780
6781 /* Emit code to reinterpret one AdvSIMD type as another,
6782    without altering bits.  */
6783 void
6784 aarch64_simd_reinterpret (rtx dest, rtx src)
6785 {
6786   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6787 }
6788
6789 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6790    registers).  */
6791 void
6792 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6793                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6794                             rtx op1)
6795 {
6796   rtx mem = gen_rtx_MEM (mode, destaddr);
6797   rtx tmp1 = gen_reg_rtx (mode);
6798   rtx tmp2 = gen_reg_rtx (mode);
6799
6800   emit_insn (intfn (tmp1, op1, tmp2));
6801
6802   emit_move_insn (mem, tmp1);
6803   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6804   emit_move_insn (mem, tmp2);
6805 }
6806
6807 /* Return TRUE if OP is a valid vector addressing mode.  */
6808 bool
6809 aarch64_simd_mem_operand_p (rtx op)
6810 {
6811   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6812                         || GET_CODE (XEXP (op, 0)) == REG);
6813 }
6814
6815 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6816    not to early-clobber SRC registers in the process.
6817
6818    We assume that the operands described by SRC and DEST represent a
6819    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6820    number of components into which the copy has been decomposed.  */
6821 void
6822 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6823                                 rtx *src, unsigned int count)
6824 {
6825   unsigned int i;
6826
6827   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6828       || REGNO (operands[0]) < REGNO (operands[1]))
6829     {
6830       for (i = 0; i < count; i++)
6831         {
6832           operands[2 * i] = dest[i];
6833           operands[2 * i + 1] = src[i];
6834         }
6835     }
6836   else
6837     {
6838       for (i = 0; i < count; i++)
6839         {
6840           operands[2 * i] = dest[count - i - 1];
6841           operands[2 * i + 1] = src[count - i - 1];
6842         }
6843     }
6844 }
6845
6846 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6847    one of VSTRUCT modes: OI, CI or XI.  */
6848 int
6849 aarch64_simd_attr_length_move (rtx insn)
6850 {
6851   enum machine_mode mode;
6852
6853   extract_insn_cached (insn);
6854
6855   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6856     {
6857       mode = GET_MODE (recog_data.operand[0]);
6858       switch (mode)
6859         {
6860         case OImode:
6861           return 8;
6862         case CImode:
6863           return 12;
6864         case XImode:
6865           return 16;
6866         default:
6867           gcc_unreachable ();
6868         }
6869     }
6870   return 4;
6871 }
6872
6873 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6874    alignment of a vector to 128 bits.  */
6875 static HOST_WIDE_INT
6876 aarch64_simd_vector_alignment (const_tree type)
6877 {
6878   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6879   return MIN (align, 128);
6880 }
6881
6882 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6883 static bool
6884 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6885 {
6886   if (is_packed)
6887     return false;
6888
6889   /* We guarantee alignment for vectors up to 128-bits.  */
6890   if (tree_int_cst_compare (TYPE_SIZE (type),
6891                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6892     return false;
6893
6894   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6895   return true;
6896 }
6897
6898 /* If VALS is a vector constant that can be loaded into a register
6899    using DUP, generate instructions to do so and return an RTX to
6900    assign to the register.  Otherwise return NULL_RTX.  */
6901 static rtx
6902 aarch64_simd_dup_constant (rtx vals)
6903 {
6904   enum machine_mode mode = GET_MODE (vals);
6905   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6906   int n_elts = GET_MODE_NUNITS (mode);
6907   bool all_same = true;
6908   rtx x;
6909   int i;
6910
6911   if (GET_CODE (vals) != CONST_VECTOR)
6912     return NULL_RTX;
6913
6914   for (i = 1; i < n_elts; ++i)
6915     {
6916       x = CONST_VECTOR_ELT (vals, i);
6917       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6918         all_same = false;
6919     }
6920
6921   if (!all_same)
6922     return NULL_RTX;
6923
6924   /* We can load this constant by using DUP and a constant in a
6925      single ARM register.  This will be cheaper than a vector
6926      load.  */
6927   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6928   return gen_rtx_VEC_DUPLICATE (mode, x);
6929 }
6930
6931
6932 /* Generate code to load VALS, which is a PARALLEL containing only
6933    constants (for vec_init) or CONST_VECTOR, efficiently into a
6934    register.  Returns an RTX to copy into the register, or NULL_RTX
6935    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6936 static rtx
6937 aarch64_simd_make_constant (rtx vals)
6938 {
6939   enum machine_mode mode = GET_MODE (vals);
6940   rtx const_dup;
6941   rtx const_vec = NULL_RTX;
6942   int n_elts = GET_MODE_NUNITS (mode);
6943   int n_const = 0;
6944   int i;
6945
6946   if (GET_CODE (vals) == CONST_VECTOR)
6947     const_vec = vals;
6948   else if (GET_CODE (vals) == PARALLEL)
6949     {
6950       /* A CONST_VECTOR must contain only CONST_INTs and
6951          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6952          Only store valid constants in a CONST_VECTOR.  */
6953       for (i = 0; i < n_elts; ++i)
6954         {
6955           rtx x = XVECEXP (vals, 0, i);
6956           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6957             n_const++;
6958         }
6959       if (n_const == n_elts)
6960         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6961     }
6962   else
6963     gcc_unreachable ();
6964
6965   if (const_vec != NULL_RTX
6966       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6967     /* Load using MOVI/MVNI.  */
6968     return const_vec;
6969   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6970     /* Loaded using DUP.  */
6971     return const_dup;
6972   else if (const_vec != NULL_RTX)
6973     /* Load from constant pool. We can not take advantage of single-cycle
6974        LD1 because we need a PC-relative addressing mode.  */
6975     return const_vec;
6976   else
6977     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6978        We can not construct an initializer.  */
6979     return NULL_RTX;
6980 }
6981
6982 void
6983 aarch64_expand_vector_init (rtx target, rtx vals)
6984 {
6985   enum machine_mode mode = GET_MODE (target);
6986   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6987   int n_elts = GET_MODE_NUNITS (mode);
6988   int n_var = 0, one_var = -1;
6989   bool all_same = true;
6990   rtx x, mem;
6991   int i;
6992
6993   x = XVECEXP (vals, 0, 0);
6994   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6995     n_var = 1, one_var = 0;
6996
6997   for (i = 1; i < n_elts; ++i)
6998     {
6999       x = XVECEXP (vals, 0, i);
7000       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7001         ++n_var, one_var = i;
7002
7003       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7004         all_same = false;
7005     }
7006
7007   if (n_var == 0)
7008     {
7009       rtx constant = aarch64_simd_make_constant (vals);
7010       if (constant != NULL_RTX)
7011         {
7012           emit_move_insn (target, constant);
7013           return;
7014         }
7015     }
7016
7017   /* Splat a single non-constant element if we can.  */
7018   if (all_same)
7019     {
7020       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7021       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7022       return;
7023     }
7024
7025   /* One field is non-constant.  Load constant then overwrite varying
7026      field.  This is more efficient than using the stack.  */
7027   if (n_var == 1)
7028     {
7029       rtx copy = copy_rtx (vals);
7030       rtx index = GEN_INT (one_var);
7031       enum insn_code icode;
7032
7033       /* Load constant part of vector, substitute neighboring value for
7034          varying element.  */
7035       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7036       aarch64_expand_vector_init (target, copy);
7037
7038       /* Insert variable.  */
7039       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7040       icode = optab_handler (vec_set_optab, mode);
7041       gcc_assert (icode != CODE_FOR_nothing);
7042       emit_insn (GEN_FCN (icode) (target, x, index));
7043       return;
7044     }
7045
7046   /* Construct the vector in memory one field at a time
7047      and load the whole vector.  */
7048   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7049   for (i = 0; i < n_elts; i++)
7050     emit_move_insn (adjust_address_nv (mem, inner_mode,
7051                                     i * GET_MODE_SIZE (inner_mode)),
7052                     XVECEXP (vals, 0, i));
7053   emit_move_insn (target, mem);
7054
7055 }
7056
7057 static unsigned HOST_WIDE_INT
7058 aarch64_shift_truncation_mask (enum machine_mode mode)
7059 {
7060   return
7061     (aarch64_vector_mode_supported_p (mode)
7062      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7063 }
7064
7065 #ifndef TLS_SECTION_ASM_FLAG
7066 #define TLS_SECTION_ASM_FLAG 'T'
7067 #endif
7068
7069 void
7070 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7071                                tree decl ATTRIBUTE_UNUSED)
7072 {
7073   char flagchars[10], *f = flagchars;
7074
7075   /* If we have already declared this section, we can use an
7076      abbreviated form to switch back to it -- unless this section is
7077      part of a COMDAT groups, in which case GAS requires the full
7078      declaration every time.  */
7079   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7080       && (flags & SECTION_DECLARED))
7081     {
7082       fprintf (asm_out_file, "\t.section\t%s\n", name);
7083       return;
7084     }
7085
7086   if (!(flags & SECTION_DEBUG))
7087     *f++ = 'a';
7088   if (flags & SECTION_WRITE)
7089     *f++ = 'w';
7090   if (flags & SECTION_CODE)
7091     *f++ = 'x';
7092   if (flags & SECTION_SMALL)
7093     *f++ = 's';
7094   if (flags & SECTION_MERGE)
7095     *f++ = 'M';
7096   if (flags & SECTION_STRINGS)
7097     *f++ = 'S';
7098   if (flags & SECTION_TLS)
7099     *f++ = TLS_SECTION_ASM_FLAG;
7100   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7101     *f++ = 'G';
7102   *f = '\0';
7103
7104   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7105
7106   if (!(flags & SECTION_NOTYPE))
7107     {
7108       const char *type;
7109       const char *format;
7110
7111       if (flags & SECTION_BSS)
7112         type = "nobits";
7113       else
7114         type = "progbits";
7115
7116 #ifdef TYPE_OPERAND_FMT
7117       format = "," TYPE_OPERAND_FMT;
7118 #else
7119       format = ",@%s";
7120 #endif
7121
7122       fprintf (asm_out_file, format, type);
7123
7124       if (flags & SECTION_ENTSIZE)
7125         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7126       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7127         {
7128           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7129             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7130           else
7131             fprintf (asm_out_file, ",%s,comdat",
7132                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7133         }
7134     }
7135
7136   putc ('\n', asm_out_file);
7137 }
7138
7139 /* Select a format to encode pointers in exception handling data.  */
7140 int
7141 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7142 {
7143    int type;
7144    switch (aarch64_cmodel)
7145      {
7146      case AARCH64_CMODEL_TINY:
7147      case AARCH64_CMODEL_TINY_PIC:
7148      case AARCH64_CMODEL_SMALL:
7149      case AARCH64_CMODEL_SMALL_PIC:
7150        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7151           for everything.  */
7152        type = DW_EH_PE_sdata4;
7153        break;
7154      default:
7155        /* No assumptions here.  8-byte relocs required.  */
7156        type = DW_EH_PE_sdata8;
7157        break;
7158      }
7159    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7160 }
7161
7162 /* Emit load exclusive.  */
7163
7164 static void
7165 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7166                              rtx mem, rtx model_rtx)
7167 {
7168   rtx (*gen) (rtx, rtx, rtx);
7169
7170   switch (mode)
7171     {
7172     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7173     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7174     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7175     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7176     default:
7177       gcc_unreachable ();
7178     }
7179
7180   emit_insn (gen (rval, mem, model_rtx));
7181 }
7182
7183 /* Emit store exclusive.  */
7184
7185 static void
7186 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7187                               rtx rval, rtx mem, rtx model_rtx)
7188 {
7189   rtx (*gen) (rtx, rtx, rtx, rtx);
7190
7191   switch (mode)
7192     {
7193     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7194     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7195     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7196     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7197     default:
7198       gcc_unreachable ();
7199     }
7200
7201   emit_insn (gen (bval, rval, mem, model_rtx));
7202 }
7203
7204 /* Mark the previous jump instruction as unlikely.  */
7205
7206 static void
7207 aarch64_emit_unlikely_jump (rtx insn)
7208 {
7209   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7210
7211   insn = emit_jump_insn (insn);
7212   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7213 }
7214
7215 /* Expand a compare and swap pattern.  */
7216
7217 void
7218 aarch64_expand_compare_and_swap (rtx operands[])
7219 {
7220   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7221   enum machine_mode mode, cmp_mode;
7222   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7223
7224   bval = operands[0];
7225   rval = operands[1];
7226   mem = operands[2];
7227   oldval = operands[3];
7228   newval = operands[4];
7229   is_weak = operands[5];
7230   mod_s = operands[6];
7231   mod_f = operands[7];
7232   mode = GET_MODE (mem);
7233   cmp_mode = mode;
7234
7235   /* Normally the succ memory model must be stronger than fail, but in the
7236      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7237      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7238
7239   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7240       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7241     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7242
7243   switch (mode)
7244     {
7245     case QImode:
7246     case HImode:
7247       /* For short modes, we're going to perform the comparison in SImode,
7248          so do the zero-extension now.  */
7249       cmp_mode = SImode;
7250       rval = gen_reg_rtx (SImode);
7251       oldval = convert_modes (SImode, mode, oldval, true);
7252       /* Fall through.  */
7253
7254     case SImode:
7255     case DImode:
7256       /* Force the value into a register if needed.  */
7257       if (!aarch64_plus_operand (oldval, mode))
7258         oldval = force_reg (cmp_mode, oldval);
7259       break;
7260
7261     default:
7262       gcc_unreachable ();
7263     }
7264
7265   switch (mode)
7266     {
7267     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7268     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7269     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7270     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7271     default:
7272       gcc_unreachable ();
7273     }
7274
7275   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7276
7277   if (mode == QImode || mode == HImode)
7278     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7279
7280   x = gen_rtx_REG (CCmode, CC_REGNUM);
7281   x = gen_rtx_EQ (SImode, x, const0_rtx);
7282   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7283 }
7284
7285 /* Split a compare and swap pattern.  */
7286
7287 void
7288 aarch64_split_compare_and_swap (rtx operands[])
7289 {
7290   rtx rval, mem, oldval, newval, scratch;
7291   enum machine_mode mode;
7292   bool is_weak;
7293   rtx label1, label2, x, cond;
7294
7295   rval = operands[0];
7296   mem = operands[1];
7297   oldval = operands[2];
7298   newval = operands[3];
7299   is_weak = (operands[4] != const0_rtx);
7300   scratch = operands[7];
7301   mode = GET_MODE (mem);
7302
7303   label1 = NULL_RTX;
7304   if (!is_weak)
7305     {
7306       label1 = gen_label_rtx ();
7307       emit_label (label1);
7308     }
7309   label2 = gen_label_rtx ();
7310
7311   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7312
7313   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7314   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7315   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7316                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7317   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7318
7319   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7320
7321   if (!is_weak)
7322     {
7323       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7324       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7325                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7326       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7327     }
7328   else
7329     {
7330       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7331       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7332       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7333     }
7334
7335   emit_label (label2);
7336 }
7337
7338 /* Split an atomic operation.  */
7339
7340 void
7341 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7342                      rtx value, rtx model_rtx, rtx cond)
7343 {
7344   enum machine_mode mode = GET_MODE (mem);
7345   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7346   rtx label, x;
7347
7348   label = gen_label_rtx ();
7349   emit_label (label);
7350
7351   if (new_out)
7352     new_out = gen_lowpart (wmode, new_out);
7353   if (old_out)
7354     old_out = gen_lowpart (wmode, old_out);
7355   else
7356     old_out = new_out;
7357   value = simplify_gen_subreg (wmode, value, mode, 0);
7358
7359   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7360
7361   switch (code)
7362     {
7363     case SET:
7364       new_out = value;
7365       break;
7366
7367     case NOT:
7368       x = gen_rtx_AND (wmode, old_out, value);
7369       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7370       x = gen_rtx_NOT (wmode, new_out);
7371       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7372       break;
7373
7374     case MINUS:
7375       if (CONST_INT_P (value))
7376         {
7377           value = GEN_INT (-INTVAL (value));
7378           code = PLUS;
7379         }
7380       /* Fall through.  */
7381
7382     default:
7383       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7384       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7385       break;
7386     }
7387
7388   aarch64_emit_store_exclusive (mode, cond, mem,
7389                                 gen_lowpart (mode, new_out), model_rtx);
7390
7391   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7392   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7393                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7394   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7395 }
7396
7397 static void
7398 aarch64_print_extension (void)
7399 {
7400   const struct aarch64_option_extension *opt = NULL;
7401
7402   for (opt = all_extensions; opt->name != NULL; opt++)
7403     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7404       asm_fprintf (asm_out_file, "+%s", opt->name);
7405
7406   asm_fprintf (asm_out_file, "\n");
7407 }
7408
7409 static void
7410 aarch64_start_file (void)
7411 {
7412   if (selected_arch)
7413     {
7414       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7415       aarch64_print_extension ();
7416     }
7417   else if (selected_cpu)
7418     {
7419       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7420       aarch64_print_extension ();
7421     }
7422   default_file_start();
7423 }
7424
7425 /* Target hook for c_mode_for_suffix.  */
7426 static enum machine_mode
7427 aarch64_c_mode_for_suffix (char suffix)
7428 {
7429   if (suffix == 'q')
7430     return TFmode;
7431
7432   return VOIDmode;
7433 }
7434
7435 /* We can only represent floating point constants which will fit in
7436    "quarter-precision" values.  These values are characterised by
7437    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7438    by:
7439
7440    (-1)^s * (n/16) * 2^r
7441
7442    Where:
7443      's' is the sign bit.
7444      'n' is an integer in the range 16 <= n <= 31.
7445      'r' is an integer in the range -3 <= r <= 4.  */
7446
7447 /* Return true iff X can be represented by a quarter-precision
7448    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7449 bool
7450 aarch64_float_const_representable_p (rtx x)
7451 {
7452   /* This represents our current view of how many bits
7453      make up the mantissa.  */
7454   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7455   int exponent;
7456   unsigned HOST_WIDE_INT mantissa, mask;
7457   HOST_WIDE_INT m1, m2;
7458   REAL_VALUE_TYPE r, m;
7459
7460   if (!CONST_DOUBLE_P (x))
7461     return false;
7462
7463   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7464
7465   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7466      know if we have +zero until we analyse the mantissa, but we
7467      can reject the other invalid values.  */
7468   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7469       || REAL_VALUE_MINUS_ZERO (r))
7470     return false;
7471
7472   /* Extract exponent.  */
7473   r = real_value_abs (&r);
7474   exponent = REAL_EXP (&r);
7475
7476   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7477      highest (sign) bit, with a fixed binary point at bit point_pos.
7478      m1 holds the low part of the mantissa, m2 the high part.
7479      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7480      bits for the mantissa, this can fail (low bits will be lost).  */
7481   real_ldexp (&m, &r, point_pos - exponent);
7482   REAL_VALUE_TO_INT (&m1, &m2, m);
7483
7484   /* If the low part of the mantissa has bits set we cannot represent
7485      the value.  */
7486   if (m1 != 0)
7487     return false;
7488   /* We have rejected the lower HOST_WIDE_INT, so update our
7489      understanding of how many bits lie in the mantissa and
7490      look only at the high HOST_WIDE_INT.  */
7491   mantissa = m2;
7492   point_pos -= HOST_BITS_PER_WIDE_INT;
7493
7494   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7495   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7496   if ((mantissa & mask) != 0)
7497     return false;
7498
7499   /* Having filtered unrepresentable values, we may now remove all
7500      but the highest 5 bits.  */
7501   mantissa >>= point_pos - 5;
7502
7503   /* We cannot represent the value 0.0, so reject it.  This is handled
7504      elsewhere.  */
7505   if (mantissa == 0)
7506     return false;
7507
7508   /* Then, as bit 4 is always set, we can mask it off, leaving
7509      the mantissa in the range [0, 15].  */
7510   mantissa &= ~(1 << 4);
7511   gcc_assert (mantissa <= 15);
7512
7513   /* GCC internally does not use IEEE754-like encoding (where normalized
7514      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7515      Our mantissa values are shifted 4 places to the left relative to
7516      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7517      by 5 places to correct for GCC's representation.  */
7518   exponent = 5 - exponent;
7519
7520   return (exponent >= 0 && exponent <= 7);
7521 }
7522
7523 char*
7524 aarch64_output_simd_mov_immediate (rtx const_vector,
7525                                    enum machine_mode mode,
7526                                    unsigned width)
7527 {
7528   bool is_valid;
7529   static char templ[40];
7530   const char *mnemonic;
7531   const char *shift_op;
7532   unsigned int lane_count = 0;
7533   char element_char;
7534
7535   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7536
7537   /* This will return true to show const_vector is legal for use as either
7538      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
7539      also update INFO to show how the immediate should be generated.  */
7540   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7541   gcc_assert (is_valid);
7542
7543   element_char = sizetochar (info.element_width);
7544   lane_count = width / info.element_width;
7545
7546   mode = GET_MODE_INNER (mode);
7547   if (mode == SFmode || mode == DFmode)
7548     {
7549       gcc_assert (info.shift == 0 && ! info.mvn);
7550       if (aarch64_float_const_zero_rtx_p (info.value))
7551         info.value = GEN_INT (0);
7552       else
7553         {
7554 #define buf_size 20
7555           REAL_VALUE_TYPE r;
7556           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7557           char float_buf[buf_size] = {'\0'};
7558           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7559 #undef buf_size
7560
7561           if (lane_count == 1)
7562             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7563           else
7564             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7565                       lane_count, element_char, float_buf);
7566           return templ;
7567         }
7568     }
7569
7570   mnemonic = info.mvn ? "mvni" : "movi";
7571   shift_op = info.msl ? "msl" : "lsl";
7572
7573   if (lane_count == 1)
7574     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7575               mnemonic, UINTVAL (info.value));
7576   else if (info.shift)
7577     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7578               ", %s %d", mnemonic, lane_count, element_char,
7579               UINTVAL (info.value), shift_op, info.shift);
7580   else
7581     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7582               mnemonic, lane_count, element_char, UINTVAL (info.value));
7583   return templ;
7584 }
7585
7586 char*
7587 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7588                                           enum machine_mode mode)
7589 {
7590   enum machine_mode vmode;
7591
7592   gcc_assert (!VECTOR_MODE_P (mode));
7593   vmode = aarch64_simd_container_mode (mode, 64);
7594   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7595   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7596 }
7597
7598 /* Split operands into moves from op[1] + op[2] into op[0].  */
7599
7600 void
7601 aarch64_split_combinev16qi (rtx operands[3])
7602 {
7603   unsigned int dest = REGNO (operands[0]);
7604   unsigned int src1 = REGNO (operands[1]);
7605   unsigned int src2 = REGNO (operands[2]);
7606   enum machine_mode halfmode = GET_MODE (operands[1]);
7607   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7608   rtx destlo, desthi;
7609
7610   gcc_assert (halfmode == V16QImode);
7611
7612   if (src1 == dest && src2 == dest + halfregs)
7613     {
7614       /* No-op move.  Can't split to nothing; emit something.  */
7615       emit_note (NOTE_INSN_DELETED);
7616       return;
7617     }
7618
7619   /* Preserve register attributes for variable tracking.  */
7620   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7621   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7622                                GET_MODE_SIZE (halfmode));
7623
7624   /* Special case of reversed high/low parts.  */
7625   if (reg_overlap_mentioned_p (operands[2], destlo)
7626       && reg_overlap_mentioned_p (operands[1], desthi))
7627     {
7628       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7629       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7630       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7631     }
7632   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7633     {
7634       /* Try to avoid unnecessary moves if part of the result
7635          is in the right place already.  */
7636       if (src1 != dest)
7637         emit_move_insn (destlo, operands[1]);
7638       if (src2 != dest + halfregs)
7639         emit_move_insn (desthi, operands[2]);
7640     }
7641   else
7642     {
7643       if (src2 != dest + halfregs)
7644         emit_move_insn (desthi, operands[2]);
7645       if (src1 != dest)
7646         emit_move_insn (destlo, operands[1]);
7647     }
7648 }
7649
7650 /* vec_perm support.  */
7651
7652 #define MAX_VECT_LEN 16
7653
7654 struct expand_vec_perm_d
7655 {
7656   rtx target, op0, op1;
7657   unsigned char perm[MAX_VECT_LEN];
7658   enum machine_mode vmode;
7659   unsigned char nelt;
7660   bool one_vector_p;
7661   bool testing_p;
7662 };
7663
7664 /* Generate a variable permutation.  */
7665
7666 static void
7667 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7668 {
7669   enum machine_mode vmode = GET_MODE (target);
7670   bool one_vector_p = rtx_equal_p (op0, op1);
7671
7672   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7673   gcc_checking_assert (GET_MODE (op0) == vmode);
7674   gcc_checking_assert (GET_MODE (op1) == vmode);
7675   gcc_checking_assert (GET_MODE (sel) == vmode);
7676   gcc_checking_assert (TARGET_SIMD);
7677
7678   if (one_vector_p)
7679     {
7680       if (vmode == V8QImode)
7681         {
7682           /* Expand the argument to a V16QI mode by duplicating it.  */
7683           rtx pair = gen_reg_rtx (V16QImode);
7684           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7685           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7686         }
7687       else
7688         {
7689           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7690         }
7691     }
7692   else
7693     {
7694       rtx pair;
7695
7696       if (vmode == V8QImode)
7697         {
7698           pair = gen_reg_rtx (V16QImode);
7699           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7700           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7701         }
7702       else
7703         {
7704           pair = gen_reg_rtx (OImode);
7705           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7706           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7707         }
7708     }
7709 }
7710
7711 void
7712 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7713 {
7714   enum machine_mode vmode = GET_MODE (target);
7715   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7716   bool one_vector_p = rtx_equal_p (op0, op1);
7717   rtx rmask[MAX_VECT_LEN], mask;
7718
7719   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7720
7721   /* The TBL instruction does not use a modulo index, so we must take care
7722      of that ourselves.  */
7723   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7724   for (i = 0; i < nelt; ++i)
7725     rmask[i] = mask;
7726   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7727   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7728
7729   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7730 }
7731
7732 /* Recognize patterns suitable for the TRN instructions.  */
7733 static bool
7734 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7735 {
7736   unsigned int i, odd, mask, nelt = d->nelt;
7737   rtx out, in0, in1, x;
7738   rtx (*gen) (rtx, rtx, rtx);
7739   enum machine_mode vmode = d->vmode;
7740
7741   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7742     return false;
7743
7744   /* Note that these are little-endian tests.
7745      We correct for big-endian later.  */
7746   if (d->perm[0] == 0)
7747     odd = 0;
7748   else if (d->perm[0] == 1)
7749     odd = 1;
7750   else
7751     return false;
7752   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7753
7754   for (i = 0; i < nelt; i += 2)
7755     {
7756       if (d->perm[i] != i + odd)
7757         return false;
7758       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7759         return false;
7760     }
7761
7762   /* Success!  */
7763   if (d->testing_p)
7764     return true;
7765
7766   in0 = d->op0;
7767   in1 = d->op1;
7768   if (BYTES_BIG_ENDIAN)
7769     {
7770       x = in0, in0 = in1, in1 = x;
7771       odd = !odd;
7772     }
7773   out = d->target;
7774
7775   if (odd)
7776     {
7777       switch (vmode)
7778         {
7779         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7780         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7781         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7782         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7783         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7784         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7785         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7786         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7787         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7788         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7789         default:
7790           return false;
7791         }
7792     }
7793   else
7794     {
7795       switch (vmode)
7796         {
7797         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7798         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7799         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7800         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7801         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7802         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7803         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7804         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7805         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7806         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7807         default:
7808           return false;
7809         }
7810     }
7811
7812   emit_insn (gen (out, in0, in1));
7813   return true;
7814 }
7815
7816 /* Recognize patterns suitable for the UZP instructions.  */
7817 static bool
7818 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7819 {
7820   unsigned int i, odd, mask, nelt = d->nelt;
7821   rtx out, in0, in1, x;
7822   rtx (*gen) (rtx, rtx, rtx);
7823   enum machine_mode vmode = d->vmode;
7824
7825   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7826     return false;
7827
7828   /* Note that these are little-endian tests.
7829      We correct for big-endian later.  */
7830   if (d->perm[0] == 0)
7831     odd = 0;
7832   else if (d->perm[0] == 1)
7833     odd = 1;
7834   else
7835     return false;
7836   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7837
7838   for (i = 0; i < nelt; i++)
7839     {
7840       unsigned elt = (i * 2 + odd) & mask;
7841       if (d->perm[i] != elt)
7842         return false;
7843     }
7844
7845   /* Success!  */
7846   if (d->testing_p)
7847     return true;
7848
7849   in0 = d->op0;
7850   in1 = d->op1;
7851   if (BYTES_BIG_ENDIAN)
7852     {
7853       x = in0, in0 = in1, in1 = x;
7854       odd = !odd;
7855     }
7856   out = d->target;
7857
7858   if (odd)
7859     {
7860       switch (vmode)
7861         {
7862         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7863         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7864         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7865         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7866         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7867         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7868         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7869         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7870         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7871         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7872         default:
7873           return false;
7874         }
7875     }
7876   else
7877     {
7878       switch (vmode)
7879         {
7880         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7881         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7882         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7883         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7884         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7885         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7886         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7887         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7888         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7889         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7890         default:
7891           return false;
7892         }
7893     }
7894
7895   emit_insn (gen (out, in0, in1));
7896   return true;
7897 }
7898
7899 /* Recognize patterns suitable for the ZIP instructions.  */
7900 static bool
7901 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7902 {
7903   unsigned int i, high, mask, nelt = d->nelt;
7904   rtx out, in0, in1, x;
7905   rtx (*gen) (rtx, rtx, rtx);
7906   enum machine_mode vmode = d->vmode;
7907
7908   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7909     return false;
7910
7911   /* Note that these are little-endian tests.
7912      We correct for big-endian later.  */
7913   high = nelt / 2;
7914   if (d->perm[0] == high)
7915     /* Do Nothing.  */
7916     ;
7917   else if (d->perm[0] == 0)
7918     high = 0;
7919   else
7920     return false;
7921   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7922
7923   for (i = 0; i < nelt / 2; i++)
7924     {
7925       unsigned elt = (i + high) & mask;
7926       if (d->perm[i * 2] != elt)
7927         return false;
7928       elt = (elt + nelt) & mask;
7929       if (d->perm[i * 2 + 1] != elt)
7930         return false;
7931     }
7932
7933   /* Success!  */
7934   if (d->testing_p)
7935     return true;
7936
7937   in0 = d->op0;
7938   in1 = d->op1;
7939   if (BYTES_BIG_ENDIAN)
7940     {
7941       x = in0, in0 = in1, in1 = x;
7942       high = !high;
7943     }
7944   out = d->target;
7945
7946   if (high)
7947     {
7948       switch (vmode)
7949         {
7950         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7951         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7952         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7953         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7954         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7955         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7956         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7957         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7958         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7959         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7960         default:
7961           return false;
7962         }
7963     }
7964   else
7965     {
7966       switch (vmode)
7967         {
7968         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7969         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7970         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7971         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7972         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7973         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7974         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7975         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7976         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7977         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7978         default:
7979           return false;
7980         }
7981     }
7982
7983   emit_insn (gen (out, in0, in1));
7984   return true;
7985 }
7986
7987 static bool
7988 aarch64_evpc_dup (struct expand_vec_perm_d *d)
7989 {
7990   rtx (*gen) (rtx, rtx, rtx);
7991   rtx out = d->target;
7992   rtx in0;
7993   enum machine_mode vmode = d->vmode;
7994   unsigned int i, elt, nelt = d->nelt;
7995   rtx lane;
7996
7997   /* TODO: This may not be big-endian safe.  */
7998   if (BYTES_BIG_ENDIAN)
7999     return false;
8000
8001   elt = d->perm[0];
8002   for (i = 1; i < nelt; i++)
8003     {
8004       if (elt != d->perm[i])
8005         return false;
8006     }
8007
8008   /* The generic preparation in aarch64_expand_vec_perm_const_1
8009      swaps the operand order and the permute indices if it finds
8010      d->perm[0] to be in the second operand.  Thus, we can always
8011      use d->op0 and need not do any extra arithmetic to get the
8012      correct lane number.  */
8013   in0 = d->op0;
8014   lane = GEN_INT (elt);
8015
8016   switch (vmode)
8017     {
8018     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8019     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8020     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8021     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8022     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8023     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8024     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8025     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8026     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8027     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8028     default:
8029       return false;
8030     }
8031
8032   emit_insn (gen (out, in0, lane));
8033   return true;
8034 }
8035
8036 static bool
8037 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8038 {
8039   rtx rperm[MAX_VECT_LEN], sel;
8040   enum machine_mode vmode = d->vmode;
8041   unsigned int i, nelt = d->nelt;
8042
8043   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8044      numbering of elements for big-endian, we must reverse the order.  */
8045   if (BYTES_BIG_ENDIAN)
8046     return false;
8047
8048   if (d->testing_p)
8049     return true;
8050
8051   /* Generic code will try constant permutation twice.  Once with the
8052      original mode and again with the elements lowered to QImode.
8053      So wait and don't do the selector expansion ourselves.  */
8054   if (vmode != V8QImode && vmode != V16QImode)
8055     return false;
8056
8057   for (i = 0; i < nelt; ++i)
8058     rperm[i] = GEN_INT (d->perm[i]);
8059   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8060   sel = force_reg (vmode, sel);
8061
8062   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8063   return true;
8064 }
8065
8066 static bool
8067 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8068 {
8069   /* The pattern matching functions above are written to look for a small
8070      number to begin the sequence (0, 1, N/2).  If we begin with an index
8071      from the second operand, we can swap the operands.  */
8072   if (d->perm[0] >= d->nelt)
8073     {
8074       unsigned i, nelt = d->nelt;
8075       rtx x;
8076
8077       for (i = 0; i < nelt; ++i)
8078         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8079
8080       x = d->op0;
8081       d->op0 = d->op1;
8082       d->op1 = x;
8083     }
8084
8085   if (TARGET_SIMD)
8086     {
8087       if (aarch64_evpc_zip (d))
8088         return true;
8089       else if (aarch64_evpc_uzp (d))
8090         return true;
8091       else if (aarch64_evpc_trn (d))
8092         return true;
8093       else if (aarch64_evpc_dup (d))
8094         return true;
8095       return aarch64_evpc_tbl (d);
8096     }
8097   return false;
8098 }
8099
8100 /* Expand a vec_perm_const pattern.  */
8101
8102 bool
8103 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8104 {
8105   struct expand_vec_perm_d d;
8106   int i, nelt, which;
8107
8108   d.target = target;
8109   d.op0 = op0;
8110   d.op1 = op1;
8111
8112   d.vmode = GET_MODE (target);
8113   gcc_assert (VECTOR_MODE_P (d.vmode));
8114   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8115   d.testing_p = false;
8116
8117   for (i = which = 0; i < nelt; ++i)
8118     {
8119       rtx e = XVECEXP (sel, 0, i);
8120       int ei = INTVAL (e) & (2 * nelt - 1);
8121       which |= (ei < nelt ? 1 : 2);
8122       d.perm[i] = ei;
8123     }
8124
8125   switch (which)
8126     {
8127     default:
8128       gcc_unreachable ();
8129
8130     case 3:
8131       d.one_vector_p = false;
8132       if (!rtx_equal_p (op0, op1))
8133         break;
8134
8135       /* The elements of PERM do not suggest that only the first operand
8136          is used, but both operands are identical.  Allow easier matching
8137          of the permutation by folding the permutation into the single
8138          input vector.  */
8139       /* Fall Through.  */
8140     case 2:
8141       for (i = 0; i < nelt; ++i)
8142         d.perm[i] &= nelt - 1;
8143       d.op0 = op1;
8144       d.one_vector_p = true;
8145       break;
8146
8147     case 1:
8148       d.op1 = op0;
8149       d.one_vector_p = true;
8150       break;
8151     }
8152
8153   return aarch64_expand_vec_perm_const_1 (&d);
8154 }
8155
8156 static bool
8157 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8158                                      const unsigned char *sel)
8159 {
8160   struct expand_vec_perm_d d;
8161   unsigned int i, nelt, which;
8162   bool ret;
8163
8164   d.vmode = vmode;
8165   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8166   d.testing_p = true;
8167   memcpy (d.perm, sel, nelt);
8168
8169   /* Calculate whether all elements are in one vector.  */
8170   for (i = which = 0; i < nelt; ++i)
8171     {
8172       unsigned char e = d.perm[i];
8173       gcc_assert (e < 2 * nelt);
8174       which |= (e < nelt ? 1 : 2);
8175     }
8176
8177   /* If all elements are from the second vector, reindex as if from the
8178      first vector.  */
8179   if (which == 2)
8180     for (i = 0; i < nelt; ++i)
8181       d.perm[i] -= nelt;
8182
8183   /* Check whether the mask can be applied to a single vector.  */
8184   d.one_vector_p = (which != 3);
8185
8186   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8187   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8188   if (!d.one_vector_p)
8189     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8190
8191   start_sequence ();
8192   ret = aarch64_expand_vec_perm_const_1 (&d);
8193   end_sequence ();
8194
8195   return ret;
8196 }
8197
8198 #undef TARGET_ADDRESS_COST
8199 #define TARGET_ADDRESS_COST aarch64_address_cost
8200
8201 /* This hook will determines whether unnamed bitfields affect the alignment
8202    of the containing structure.  The hook returns true if the structure
8203    should inherit the alignment requirements of an unnamed bitfield's
8204    type.  */
8205 #undef TARGET_ALIGN_ANON_BITFIELD
8206 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8207
8208 #undef TARGET_ASM_ALIGNED_DI_OP
8209 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8210
8211 #undef TARGET_ASM_ALIGNED_HI_OP
8212 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8213
8214 #undef TARGET_ASM_ALIGNED_SI_OP
8215 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8216
8217 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8218 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8219   hook_bool_const_tree_hwi_hwi_const_tree_true
8220
8221 #undef TARGET_ASM_FILE_START
8222 #define TARGET_ASM_FILE_START aarch64_start_file
8223
8224 #undef TARGET_ASM_OUTPUT_MI_THUNK
8225 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8226
8227 #undef TARGET_ASM_SELECT_RTX_SECTION
8228 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8229
8230 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8231 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8232
8233 #undef TARGET_BUILD_BUILTIN_VA_LIST
8234 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8235
8236 #undef TARGET_CALLEE_COPIES
8237 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8238
8239 #undef TARGET_CAN_ELIMINATE
8240 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8241
8242 #undef TARGET_CANNOT_FORCE_CONST_MEM
8243 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8244
8245 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8246 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8247
8248 /* Only the least significant bit is used for initialization guard
8249    variables.  */
8250 #undef TARGET_CXX_GUARD_MASK_BIT
8251 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8252
8253 #undef TARGET_C_MODE_FOR_SUFFIX
8254 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8255
8256 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8257 #undef  TARGET_DEFAULT_TARGET_FLAGS
8258 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8259 #endif
8260
8261 #undef TARGET_CLASS_MAX_NREGS
8262 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8263
8264 #undef TARGET_BUILTIN_DECL
8265 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8266
8267 #undef  TARGET_EXPAND_BUILTIN
8268 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8269
8270 #undef TARGET_EXPAND_BUILTIN_VA_START
8271 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8272
8273 #undef TARGET_FOLD_BUILTIN
8274 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8275
8276 #undef TARGET_FUNCTION_ARG
8277 #define TARGET_FUNCTION_ARG aarch64_function_arg
8278
8279 #undef TARGET_FUNCTION_ARG_ADVANCE
8280 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8281
8282 #undef TARGET_FUNCTION_ARG_BOUNDARY
8283 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8284
8285 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8286 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8287
8288 #undef TARGET_FUNCTION_VALUE
8289 #define TARGET_FUNCTION_VALUE aarch64_function_value
8290
8291 #undef TARGET_FUNCTION_VALUE_REGNO_P
8292 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8293
8294 #undef TARGET_FRAME_POINTER_REQUIRED
8295 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8296
8297 #undef TARGET_GIMPLE_FOLD_BUILTIN
8298 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8299
8300 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8301 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8302
8303 #undef  TARGET_INIT_BUILTINS
8304 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8305
8306 #undef TARGET_LEGITIMATE_ADDRESS_P
8307 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8308
8309 #undef TARGET_LEGITIMATE_CONSTANT_P
8310 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8311
8312 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8313 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8314
8315 #undef TARGET_LRA_P
8316 #define TARGET_LRA_P aarch64_lra_p
8317
8318 #undef TARGET_MANGLE_TYPE
8319 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8320
8321 #undef TARGET_MEMORY_MOVE_COST
8322 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8323
8324 #undef TARGET_MUST_PASS_IN_STACK
8325 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8326
8327 /* This target hook should return true if accesses to volatile bitfields
8328    should use the narrowest mode possible.  It should return false if these
8329    accesses should use the bitfield container type.  */
8330 #undef TARGET_NARROW_VOLATILE_BITFIELD
8331 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8332
8333 #undef  TARGET_OPTION_OVERRIDE
8334 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8335
8336 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8337 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8338   aarch64_override_options_after_change
8339
8340 #undef TARGET_PASS_BY_REFERENCE
8341 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8342
8343 #undef TARGET_PREFERRED_RELOAD_CLASS
8344 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8345
8346 #undef TARGET_SECONDARY_RELOAD
8347 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8348
8349 #undef TARGET_SHIFT_TRUNCATION_MASK
8350 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8351
8352 #undef TARGET_SETUP_INCOMING_VARARGS
8353 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8354
8355 #undef TARGET_STRUCT_VALUE_RTX
8356 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8357
8358 #undef TARGET_REGISTER_MOVE_COST
8359 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8360
8361 #undef TARGET_RETURN_IN_MEMORY
8362 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8363
8364 #undef TARGET_RETURN_IN_MSB
8365 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8366
8367 #undef TARGET_RTX_COSTS
8368 #define TARGET_RTX_COSTS aarch64_rtx_costs
8369
8370 #undef TARGET_TRAMPOLINE_INIT
8371 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8372
8373 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8374 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8375
8376 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8377 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8378
8379 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8380 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8381
8382 #undef TARGET_VECTORIZE_ADD_STMT_COST
8383 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8384
8385 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8386 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8387   aarch64_builtin_vectorization_cost
8388
8389 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8390 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8391
8392 #undef TARGET_VECTORIZE_BUILTINS
8393 #define TARGET_VECTORIZE_BUILTINS
8394
8395 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8396 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8397   aarch64_builtin_vectorized_function
8398
8399 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8400 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8401   aarch64_autovectorize_vector_sizes
8402
8403 /* Section anchor support.  */
8404
8405 #undef TARGET_MIN_ANCHOR_OFFSET
8406 #define TARGET_MIN_ANCHOR_OFFSET -256
8407
8408 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8409    byte offset; we can do much more for larger data types, but have no way
8410    to determine the size of the access.  We assume accesses are aligned.  */
8411 #undef TARGET_MAX_ANCHOR_OFFSET
8412 #define TARGET_MAX_ANCHOR_OFFSET 4095
8413
8414 #undef TARGET_VECTOR_ALIGNMENT
8415 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8416
8417 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8418 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8419   aarch64_simd_vector_alignment_reachable
8420
8421 /* vec_perm support.  */
8422
8423 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8424 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8425   aarch64_vectorize_vec_perm_const_ok
8426
8427
8428 #undef TARGET_FIXED_CONDITION_CODE_REGS
8429 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8430
8431 struct gcc_target targetm = TARGET_INITIALIZER;
8432
8433 #include "gt-aarch64.h"