gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66 #include "dumpfile.h"
  67 #include "builtins.h"
  68
  69 /* Defined for convenience.  */
  70 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  71
  72 /* Classifies an address.
  73
  74    ADDRESS_REG_IMM
  75        A simple base register plus immediate offset.
  76
  77    ADDRESS_REG_WB
  78        A base register indexed by immediate offset with writeback.
  79
  80    ADDRESS_REG_REG
  81        A base register indexed by (optionally scaled) register.
  82
  83    ADDRESS_REG_UXTW
  84        A base register indexed by (optionally scaled) zero-extended register.
  85
  86    ADDRESS_REG_SXTW
  87        A base register indexed by (optionally scaled) sign-extended register.
  88
  89    ADDRESS_LO_SUM
  90        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  91
  92    ADDRESS_SYMBOLIC:
  93        A constant symbolic address, in pc-relative literal pool.  */
  94
  95 enum aarch64_address_type {
  96   ADDRESS_REG_IMM,
  97   ADDRESS_REG_WB,
  98   ADDRESS_REG_REG,
  99   ADDRESS_REG_UXTW,
 100   ADDRESS_REG_SXTW,
 101   ADDRESS_LO_SUM,
 102   ADDRESS_SYMBOLIC
 103 };
 104
 105 struct aarch64_address_info {
 106   enum aarch64_address_type type;
 107   rtx base;
 108   rtx offset;
 109   int shift;
 110   enum aarch64_symbol_type symbol_type;
 111 };
 112
 113 struct simd_immediate_info
 114 {
 115   rtx value;
 116   int shift;
 117   int element_width;
 118   bool mvn;
 119   bool msl;
 120 };
 121
 122 /* The current code model.  */
 123 enum aarch64_code_model aarch64_cmodel;
 124
 125 #ifdef HAVE_AS_TLS
 126 #undef TARGET_HAVE_TLS
 127 #define TARGET_HAVE_TLS 1
 128 #endif
 129
 130 static bool aarch64_lra_p (void);
 131 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 132 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 133                                                      const_tree,
 134                                                      enum machine_mode *, int *,
 135                                                      bool *);
 136 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 137 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 138 static void aarch64_override_options_after_change (void);
 139 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 140 static unsigned bit_count (unsigned HOST_WIDE_INT);
 141 static bool aarch64_const_vec_all_same_int_p (rtx,
 142                                               HOST_WIDE_INT, HOST_WIDE_INT);
 143
 144 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 145                                                  const unsigned char *sel);
 146 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 147
 148 /* The processor for which instructions should be scheduled.  */
 149 enum aarch64_processor aarch64_tune = cortexa53;
 150
 151 /* The current tuning set.  */
 152 const struct tune_params *aarch64_tune_params;
 153
 154 /* Mask to specify which instructions we are allowed to generate.  */
 155 unsigned long aarch64_isa_flags = 0;
 156
 157 /* Mask to specify which instruction scheduling options should be used.  */
 158 unsigned long aarch64_tune_flags = 0;
 159
 160 /* Tuning parameters.  */
 161
 162 #if HAVE_DESIGNATED_INITIALIZERS
 163 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 164 #else
 165 #define NAMED_PARAM(NAME, VAL) (VAL)
 166 #endif
 167
 168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 169 __extension__
 170 #endif
 171
 172 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 173 __extension__
 174 #endif
 175 static const struct cpu_addrcost_table generic_addrcost_table =
 176 {
 177 #if HAVE_DESIGNATED_INITIALIZERS
 178   .addr_scale_costs =
 179 #endif
 180     {
 181       NAMED_PARAM (qi, 0),
 182       NAMED_PARAM (hi, 0),
 183       NAMED_PARAM (si, 0),
 184       NAMED_PARAM (ti, 0),
 185     },
 186   NAMED_PARAM (pre_modify, 0),
 187   NAMED_PARAM (post_modify, 0),
 188   NAMED_PARAM (register_offset, 0),
 189   NAMED_PARAM (register_extend, 0),
 190   NAMED_PARAM (imm_offset, 0)
 191 };
 192
 193 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 194 __extension__
 195 #endif
 196 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 197 {
 198 #if HAVE_DESIGNATED_INITIALIZERS
 199   .addr_scale_costs =
 200 #endif
 201     {
 202       NAMED_PARAM (qi, 0),
 203       NAMED_PARAM (hi, 1),
 204       NAMED_PARAM (si, 0),
 205       NAMED_PARAM (ti, 1),
 206     },
 207   NAMED_PARAM (pre_modify, 0),
 208   NAMED_PARAM (post_modify, 0),
 209   NAMED_PARAM (register_offset, 0),
 210   NAMED_PARAM (register_extend, 0),
 211   NAMED_PARAM (imm_offset, 0),
 212 };
 213
 214 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 215 __extension__
 216 #endif
 217 static const struct cpu_regmove_cost generic_regmove_cost =
 218 {
 219   NAMED_PARAM (GP2GP, 1),
 220   NAMED_PARAM (GP2FP, 2),
 221   NAMED_PARAM (FP2GP, 2),
 222   /* We currently do not provide direct support for TFmode Q->Q move.
 223      Therefore we need to raise the cost above 2 in order to have
 224      reload handle the situation.  */
 225   NAMED_PARAM (FP2FP, 4)
 226 };
 227
 228 /* Generic costs for vector insn classes.  */
 229 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 230 __extension__
 231 #endif
 232 static const struct cpu_vector_cost generic_vector_cost =
 233 {
 234   NAMED_PARAM (scalar_stmt_cost, 1),
 235   NAMED_PARAM (scalar_load_cost, 1),
 236   NAMED_PARAM (scalar_store_cost, 1),
 237   NAMED_PARAM (vec_stmt_cost, 1),
 238   NAMED_PARAM (vec_to_scalar_cost, 1),
 239   NAMED_PARAM (scalar_to_vec_cost, 1),
 240   NAMED_PARAM (vec_align_load_cost, 1),
 241   NAMED_PARAM (vec_unalign_load_cost, 1),
 242   NAMED_PARAM (vec_unalign_store_cost, 1),
 243   NAMED_PARAM (vec_store_cost, 1),
 244   NAMED_PARAM (cond_taken_branch_cost, 3),
 245   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 246 };
 247
 248 /* Generic costs for vector insn classes.  */
 249 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 250 __extension__
 251 #endif
 252 static const struct cpu_vector_cost cortexa57_vector_cost =
 253 {
 254   NAMED_PARAM (scalar_stmt_cost, 1),
 255   NAMED_PARAM (scalar_load_cost, 4),
 256   NAMED_PARAM (scalar_store_cost, 1),
 257   NAMED_PARAM (vec_stmt_cost, 3),
 258   NAMED_PARAM (vec_to_scalar_cost, 8),
 259   NAMED_PARAM (scalar_to_vec_cost, 8),
 260   NAMED_PARAM (vec_align_load_cost, 5),
 261   NAMED_PARAM (vec_unalign_load_cost, 5),
 262   NAMED_PARAM (vec_unalign_store_cost, 1),
 263   NAMED_PARAM (vec_store_cost, 1),
 264   NAMED_PARAM (cond_taken_branch_cost, 1),
 265   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 266 };
 267
 268 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 269 __extension__
 270 #endif
 271 static const struct tune_params generic_tunings =
 272 {
 273   &cortexa57_extra_costs,
 274   &generic_addrcost_table,
 275   &generic_regmove_cost,
 276   &generic_vector_cost,
 277   NAMED_PARAM (memmov_cost, 4),
 278   NAMED_PARAM (issue_rate, 2)
 279 };
 280
 281 static const struct tune_params cortexa53_tunings =
 282 {
 283   &cortexa53_extra_costs,
 284   &generic_addrcost_table,
 285   &generic_regmove_cost,
 286   &generic_vector_cost,
 287   NAMED_PARAM (memmov_cost, 4),
 288   NAMED_PARAM (issue_rate, 2)
 289 };
 290
 291 static const struct tune_params cortexa57_tunings =
 292 {
 293   &cortexa57_extra_costs,
 294   &cortexa57_addrcost_table,
 295   &generic_regmove_cost,
 296   &cortexa57_vector_cost,
 297   NAMED_PARAM (memmov_cost, 4),
 298   NAMED_PARAM (issue_rate, 3)
 299 };
 300
 301 /* A processor implementing AArch64.  */
 302 struct processor
 303 {
 304   const char *const name;
 305   enum aarch64_processor core;
 306   const char *arch;
 307   const unsigned long flags;
 308   const struct tune_params *const tune;
 309 };
 310
 311 /* Processor cores implementing AArch64.  */
 312 static const struct processor all_cores[] =
 313 {
 314 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 315   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 316 #include "aarch64-cores.def"
 317 #undef AARCH64_CORE
 318   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 319   {NULL, aarch64_none, NULL, 0, NULL}
 320 };
 321
 322 /* Architectures implementing AArch64.  */
 323 static const struct processor all_architectures[] =
 324 {
 325 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 326   {NAME, CORE, #ARCH, FLAGS, NULL},
 327 #include "aarch64-arches.def"
 328 #undef AARCH64_ARCH
 329   {NULL, aarch64_none, NULL, 0, NULL}
 330 };
 331
 332 /* Target specification.  These are populated as commandline arguments
 333    are processed, or NULL if not specified.  */
 334 static const struct processor *selected_arch;
 335 static const struct processor *selected_cpu;
 336 static const struct processor *selected_tune;
 337
 338 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 339
 340 /* An ISA extension in the co-processor and main instruction set space.  */
 341 struct aarch64_option_extension
 342 {
 343   const char *const name;
 344   const unsigned long flags_on;
 345   const unsigned long flags_off;
 346 };
 347
 348 /* ISA extensions in AArch64.  */
 349 static const struct aarch64_option_extension all_extensions[] =
 350 {
 351 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 352   {NAME, FLAGS_ON, FLAGS_OFF},
 353 #include "aarch64-option-extensions.def"
 354 #undef AARCH64_OPT_EXTENSION
 355   {NULL, 0, 0}
 356 };
 357
 358 /* Used to track the size of an address when generating a pre/post
 359    increment address.  */
 360 static enum machine_mode aarch64_memory_reference_mode;
 361
 362 /* Used to force GTY into this file.  */
 363 static GTY(()) int gty_dummy;
 364
 365 /* A table of valid AArch64 "bitmask immediate" values for
 366    logical instructions.  */
 367
 368 #define AARCH64_NUM_BITMASKS  5334
 369 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 370
 371 typedef enum aarch64_cond_code
 372 {
 373   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 374   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 375   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 376 }
 377 aarch64_cc;
 378
 379 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 380
 381 /* The condition codes of the processor, and the inverse function.  */
 382 static const char * const aarch64_condition_codes[] =
 383 {
 384   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 385   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 386 };
 387
 388 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 389 unsigned
 390 aarch64_dbx_register_number (unsigned regno)
 391 {
 392    if (GP_REGNUM_P (regno))
 393      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 394    else if (regno == SP_REGNUM)
 395      return AARCH64_DWARF_SP;
 396    else if (FP_REGNUM_P (regno))
 397      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 398
 399    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 400       equivalent DWARF register.  */
 401    return DWARF_FRAME_REGISTERS;
 402 }
 403
 404 /* Return TRUE if MODE is any of the large INT modes.  */
 405 static bool
 406 aarch64_vect_struct_mode_p (enum machine_mode mode)
 407 {
 408   return mode == OImode || mode == CImode || mode == XImode;
 409 }
 410
 411 /* Return TRUE if MODE is any of the vector modes.  */
 412 static bool
 413 aarch64_vector_mode_p (enum machine_mode mode)
 414 {
 415   return aarch64_vector_mode_supported_p (mode)
 416          || aarch64_vect_struct_mode_p (mode);
 417 }
 418
 419 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 420 static bool
 421 aarch64_array_mode_supported_p (enum machine_mode mode,
 422                                 unsigned HOST_WIDE_INT nelems)
 423 {
 424   if (TARGET_SIMD
 425       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 426       && (nelems >= 2 && nelems <= 4))
 427     return true;
 428
 429   return false;
 430 }
 431
 432 /* Implement HARD_REGNO_NREGS.  */
 433
 434 int
 435 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 436 {
 437   switch (aarch64_regno_regclass (regno))
 438     {
 439     case FP_REGS:
 440     case FP_LO_REGS:
 441       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 442     default:
 443       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 444     }
 445   gcc_unreachable ();
 446 }
 447
 448 /* Implement HARD_REGNO_MODE_OK.  */
 449
 450 int
 451 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 452 {
 453   if (GET_MODE_CLASS (mode) == MODE_CC)
 454     return regno == CC_REGNUM;
 455
 456   if (regno == SP_REGNUM)
 457     /* The purpose of comparing with ptr_mode is to support the
 458        global register variable associated with the stack pointer
 459        register via the syntax of asm ("wsp") in ILP32.  */
 460     return mode == Pmode || mode == ptr_mode;
 461
 462   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 463     return mode == Pmode;
 464
 465   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 466     return 1;
 467
 468   if (FP_REGNUM_P (regno))
 469     {
 470       if (aarch64_vect_struct_mode_p (mode))
 471         return
 472           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 473       else
 474         return 1;
 475     }
 476
 477   return 0;
 478 }
 479
 480 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 481 enum machine_mode
 482 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
 483                                      enum machine_mode mode)
 484 {
 485   /* Handle modes that fit within single registers.  */
 486   if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
 487     {
 488       if (GET_MODE_SIZE (mode) >= 4)
 489         return mode;
 490       else
 491         return SImode;
 492     }
 493   /* Fall back to generic for multi-reg and very large modes.  */
 494   else
 495     return choose_hard_reg_mode (regno, nregs, false);
 496 }
 497
 498 /* Return true if calls to DECL should be treated as
 499    long-calls (ie called via a register).  */
 500 static bool
 501 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 502 {
 503   return false;
 504 }
 505
 506 /* Return true if calls to symbol-ref SYM should be treated as
 507    long-calls (ie called via a register).  */
 508 bool
 509 aarch64_is_long_call_p (rtx sym)
 510 {
 511   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 512 }
 513
 514 /* Return true if the offsets to a zero/sign-extract operation
 515    represent an expression that matches an extend operation.  The
 516    operands represent the paramters from
 517
 518    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 519 bool
 520 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 521                                 rtx extract_imm)
 522 {
 523   HOST_WIDE_INT mult_val, extract_val;
 524
 525   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 526     return false;
 527
 528   mult_val = INTVAL (mult_imm);
 529   extract_val = INTVAL (extract_imm);
 530
 531   if (extract_val > 8
 532       && extract_val < GET_MODE_BITSIZE (mode)
 533       && exact_log2 (extract_val & ~7) > 0
 534       && (extract_val & 7) <= 4
 535       && mult_val == (1 << (extract_val & 7)))
 536     return true;
 537
 538   return false;
 539 }
 540
 541 /* Emit an insn that's a simple single-set.  Both the operands must be
 542    known to be valid.  */
 543 inline static rtx
 544 emit_set_insn (rtx x, rtx y)
 545 {
 546   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 547 }
 548
 549 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 550    return the rtx for register 0 in the proper mode.  */
 551 rtx
 552 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 553 {
 554   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 555   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 556
 557   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 558   return cc_reg;
 559 }
 560
 561 /* Build the SYMBOL_REF for __tls_get_addr.  */
 562
 563 static GTY(()) rtx tls_get_addr_libfunc;
 564
 565 rtx
 566 aarch64_tls_get_addr (void)
 567 {
 568   if (!tls_get_addr_libfunc)
 569     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 570   return tls_get_addr_libfunc;
 571 }
 572
 573 /* Return the TLS model to use for ADDR.  */
 574
 575 static enum tls_model
 576 tls_symbolic_operand_type (rtx addr)
 577 {
 578   enum tls_model tls_kind = TLS_MODEL_NONE;
 579   rtx sym, addend;
 580
 581   if (GET_CODE (addr) == CONST)
 582     {
 583       split_const (addr, &sym, &addend);
 584       if (GET_CODE (sym) == SYMBOL_REF)
 585         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 586     }
 587   else if (GET_CODE (addr) == SYMBOL_REF)
 588     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 589
 590   return tls_kind;
 591 }
 592
 593 /* We'll allow lo_sum's in addresses in our legitimate addresses
 594    so that combine would take care of combining addresses where
 595    necessary, but for generation purposes, we'll generate the address
 596    as :
 597    RTL                               Absolute
 598    tmp = hi (symbol_ref);            adrp  x1, foo
 599    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 600                                      nop
 601
 602    PIC                               TLS
 603    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 604    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 605                                      bl   __tls_get_addr
 606                                      nop
 607
 608    Load TLS symbol, depending on TLS mechanism and TLS access model.
 609
 610    Global Dynamic - Traditional TLS:
 611    adrp tmp, :tlsgd:imm
 612    add  dest, tmp, #:tlsgd_lo12:imm
 613    bl   __tls_get_addr
 614
 615    Global Dynamic - TLS Descriptors:
 616    adrp dest, :tlsdesc:imm
 617    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 618    add  dest, dest, #:tlsdesc_lo12:imm
 619    blr  tmp
 620    mrs  tp, tpidr_el0
 621    add  dest, dest, tp
 622
 623    Initial Exec:
 624    mrs  tp, tpidr_el0
 625    adrp tmp, :gottprel:imm
 626    ldr  dest, [tmp, #:gottprel_lo12:imm]
 627    add  dest, dest, tp
 628
 629    Local Exec:
 630    mrs  tp, tpidr_el0
 631    add  t0, tp, #:tprel_hi12:imm
 632    add  t0, #:tprel_lo12_nc:imm
 633 */
 634
 635 static void
 636 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 637                                    enum aarch64_symbol_type type)
 638 {
 639   switch (type)
 640     {
 641     case SYMBOL_SMALL_ABSOLUTE:
 642       {
 643         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 644         rtx tmp_reg = dest;
 645         enum machine_mode mode = GET_MODE (dest);
 646
 647         gcc_assert (mode == Pmode || mode == ptr_mode);
 648
 649         if (can_create_pseudo_p ())
 650           tmp_reg = gen_reg_rtx (mode);
 651
 652         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 653         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 654         return;
 655       }
 656
 657     case SYMBOL_TINY_ABSOLUTE:
 658       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 659       return;
 660
 661     case SYMBOL_SMALL_GOT:
 662       {
 663         /* In ILP32, the mode of dest can be either SImode or DImode,
 664            while the got entry is always of SImode size.  The mode of
 665            dest depends on how dest is used: if dest is assigned to a
 666            pointer (e.g. in the memory), it has SImode; it may have
 667            DImode if dest is dereferenced to access the memeory.
 668            This is why we have to handle three different ldr_got_small
 669            patterns here (two patterns for ILP32).  */
 670         rtx tmp_reg = dest;
 671         enum machine_mode mode = GET_MODE (dest);
 672
 673         if (can_create_pseudo_p ())
 674           tmp_reg = gen_reg_rtx (mode);
 675
 676         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 677         if (mode == ptr_mode)
 678           {
 679             if (mode == DImode)
 680               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 681             else
 682               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 683           }
 684         else
 685           {
 686             gcc_assert (mode == Pmode);
 687             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 688           }
 689
 690         return;
 691       }
 692
 693     case SYMBOL_SMALL_TLSGD:
 694       {
 695         rtx insns;
 696         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 697
 698         start_sequence ();
 699         emit_call_insn (gen_tlsgd_small (result, imm));
 700         insns = get_insns ();
 701         end_sequence ();
 702
 703         RTL_CONST_CALL_P (insns) = 1;
 704         emit_libcall_block (insns, dest, result, imm);
 705         return;
 706       }
 707
 708     case SYMBOL_SMALL_TLSDESC:
 709       {
 710         enum machine_mode mode = GET_MODE (dest);
 711         rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 712         rtx tp;
 713
 714         gcc_assert (mode == Pmode || mode == ptr_mode);
 715
 716         /* In ILP32, the got entry is always of SImode size.  Unlike
 717            small GOT, the dest is fixed at reg 0.  */
 718         if (TARGET_ILP32)
 719           emit_insn (gen_tlsdesc_small_si (imm));
 720         else
 721           emit_insn (gen_tlsdesc_small_di (imm));
 722         tp = aarch64_load_tp (NULL);
 723
 724         if (mode != Pmode)
 725           tp = gen_lowpart (mode, tp);
 726
 727         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 728         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 729         return;
 730       }
 731
 732     case SYMBOL_SMALL_GOTTPREL:
 733       {
 734         /* In ILP32, the mode of dest can be either SImode or DImode,
 735            while the got entry is always of SImode size.  The mode of
 736            dest depends on how dest is used: if dest is assigned to a
 737            pointer (e.g. in the memory), it has SImode; it may have
 738            DImode if dest is dereferenced to access the memeory.
 739            This is why we have to handle three different tlsie_small
 740            patterns here (two patterns for ILP32).  */
 741         enum machine_mode mode = GET_MODE (dest);
 742         rtx tmp_reg = gen_reg_rtx (mode);
 743         rtx tp = aarch64_load_tp (NULL);
 744
 745         if (mode == ptr_mode)
 746           {
 747             if (mode == DImode)
 748               emit_insn (gen_tlsie_small_di (tmp_reg, imm));
 749             else
 750               {
 751                 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
 752                 tp = gen_lowpart (mode, tp);
 753               }
 754           }
 755         else
 756           {
 757             gcc_assert (mode == Pmode);
 758             emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
 759           }
 760
 761         emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 762         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 763         return;
 764       }
 765
 766     case SYMBOL_SMALL_TPREL:
 767       {
 768         rtx tp = aarch64_load_tp (NULL);
 769         emit_insn (gen_tlsle_small (dest, tp, imm));
 770         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 771         return;
 772       }
 773
 774     case SYMBOL_TINY_GOT:
 775       emit_insn (gen_ldr_got_tiny (dest, imm));
 776       return;
 777
 778     default:
 779       gcc_unreachable ();
 780     }
 781 }
 782
 783 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 784    handle all moves if !can_create_pseudo_p ().  The distinction is
 785    important because, unlike emit_move_insn, the move expanders know
 786    how to force Pmode objects into the constant pool even when the
 787    constant pool address is not itself legitimate.  */
 788 static rtx
 789 aarch64_emit_move (rtx dest, rtx src)
 790 {
 791   return (can_create_pseudo_p ()
 792           ? emit_move_insn (dest, src)
 793           : emit_move_insn_1 (dest, src));
 794 }
 795
 796 /* Split a 128-bit move operation into two 64-bit move operations,
 797    taking care to handle partial overlap of register to register
 798    copies.  Special cases are needed when moving between GP regs and
 799    FP regs.  SRC can be a register, constant or memory; DST a register
 800    or memory.  If either operand is memory it must not have any side
 801    effects.  */
 802 void
 803 aarch64_split_128bit_move (rtx dst, rtx src)
 804 {
 805   rtx dst_lo, dst_hi;
 806   rtx src_lo, src_hi;
 807
 808   enum machine_mode mode = GET_MODE (dst);
 809
 810   gcc_assert (mode == TImode || mode == TFmode);
 811   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 812   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 813
 814   if (REG_P (dst) && REG_P (src))
 815     {
 816       int src_regno = REGNO (src);
 817       int dst_regno = REGNO (dst);
 818
 819       /* Handle FP <-> GP regs.  */
 820       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 821         {
 822           src_lo = gen_lowpart (word_mode, src);
 823           src_hi = gen_highpart (word_mode, src);
 824
 825           if (mode == TImode)
 826             {
 827               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 828               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 829             }
 830           else
 831             {
 832               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 833               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 834             }
 835           return;
 836         }
 837       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 838         {
 839           dst_lo = gen_lowpart (word_mode, dst);
 840           dst_hi = gen_highpart (word_mode, dst);
 841
 842           if (mode == TImode)
 843             {
 844               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 845               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 846             }
 847           else
 848             {
 849               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 850               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 851             }
 852           return;
 853         }
 854     }
 855
 856   dst_lo = gen_lowpart (word_mode, dst);
 857   dst_hi = gen_highpart (word_mode, dst);
 858   src_lo = gen_lowpart (word_mode, src);
 859   src_hi = gen_highpart_mode (word_mode, mode, src);
 860
 861   /* At most one pairing may overlap.  */
 862   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 863     {
 864       aarch64_emit_move (dst_hi, src_hi);
 865       aarch64_emit_move (dst_lo, src_lo);
 866     }
 867   else
 868     {
 869       aarch64_emit_move (dst_lo, src_lo);
 870       aarch64_emit_move (dst_hi, src_hi);
 871     }
 872 }
 873
 874 bool
 875 aarch64_split_128bit_move_p (rtx dst, rtx src)
 876 {
 877   return (! REG_P (src)
 878           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 879 }
 880
 881 /* Split a complex SIMD combine.  */
 882
 883 void
 884 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 885 {
 886   enum machine_mode src_mode = GET_MODE (src1);
 887   enum machine_mode dst_mode = GET_MODE (dst);
 888
 889   gcc_assert (VECTOR_MODE_P (dst_mode));
 890
 891   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 892     {
 893       rtx (*gen) (rtx, rtx, rtx);
 894
 895       switch (src_mode)
 896         {
 897         case V8QImode:
 898           gen = gen_aarch64_simd_combinev8qi;
 899           break;
 900         case V4HImode:
 901           gen = gen_aarch64_simd_combinev4hi;
 902           break;
 903         case V2SImode:
 904           gen = gen_aarch64_simd_combinev2si;
 905           break;
 906         case V2SFmode:
 907           gen = gen_aarch64_simd_combinev2sf;
 908           break;
 909         case DImode:
 910           gen = gen_aarch64_simd_combinedi;
 911           break;
 912         case DFmode:
 913           gen = gen_aarch64_simd_combinedf;
 914           break;
 915         default:
 916           gcc_unreachable ();
 917         }
 918
 919       emit_insn (gen (dst, src1, src2));
 920       return;
 921     }
 922 }
 923
 924 /* Split a complex SIMD move.  */
 925
 926 void
 927 aarch64_split_simd_move (rtx dst, rtx src)
 928 {
 929   enum machine_mode src_mode = GET_MODE (src);
 930   enum machine_mode dst_mode = GET_MODE (dst);
 931
 932   gcc_assert (VECTOR_MODE_P (dst_mode));
 933
 934   if (REG_P (dst) && REG_P (src))
 935     {
 936       rtx (*gen) (rtx, rtx);
 937
 938       gcc_assert (VECTOR_MODE_P (src_mode));
 939
 940       switch (src_mode)
 941         {
 942         case V16QImode:
 943           gen = gen_aarch64_split_simd_movv16qi;
 944           break;
 945         case V8HImode:
 946           gen = gen_aarch64_split_simd_movv8hi;
 947           break;
 948         case V4SImode:
 949           gen = gen_aarch64_split_simd_movv4si;
 950           break;
 951         case V2DImode:
 952           gen = gen_aarch64_split_simd_movv2di;
 953           break;
 954         case V4SFmode:
 955           gen = gen_aarch64_split_simd_movv4sf;
 956           break;
 957         case V2DFmode:
 958           gen = gen_aarch64_split_simd_movv2df;
 959           break;
 960         default:
 961           gcc_unreachable ();
 962         }
 963
 964       emit_insn (gen (dst, src));
 965       return;
 966     }
 967 }
 968
 969 static rtx
 970 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 971 {
 972   if (can_create_pseudo_p ())
 973     return force_reg (mode, value);
 974   else
 975     {
 976       x = aarch64_emit_move (x, value);
 977       return x;
 978     }
 979 }
 980
 981
 982 static rtx
 983 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 984 {
 985   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 986     {
 987       rtx high;
 988       /* Load the full offset into a register.  This
 989          might be improvable in the future.  */
 990       high = GEN_INT (offset);
 991       offset = 0;
 992       high = aarch64_force_temporary (mode, temp, high);
 993       reg = aarch64_force_temporary (mode, temp,
 994                                      gen_rtx_PLUS (mode, high, reg));
 995     }
 996   return plus_constant (mode, reg, offset);
 997 }
 998
 999 void
1000 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1001 {
1002   enum machine_mode mode = GET_MODE (dest);
1003   unsigned HOST_WIDE_INT mask;
1004   int i;
1005   bool first;
1006   unsigned HOST_WIDE_INT val;
1007   bool subtargets;
1008   rtx subtarget;
1009   int one_match, zero_match;
1010
1011   gcc_assert (mode == SImode || mode == DImode);
1012
1013   /* Check on what type of symbol it is.  */
1014   if (GET_CODE (imm) == SYMBOL_REF
1015       || GET_CODE (imm) == LABEL_REF
1016       || GET_CODE (imm) == CONST)
1017     {
1018       rtx mem, base, offset;
1019       enum aarch64_symbol_type sty;
1020
1021       /* If we have (const (plus symbol offset)), separate out the offset
1022          before we start classifying the symbol.  */
1023       split_const (imm, &base, &offset);
1024
1025       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1026       switch (sty)
1027         {
1028         case SYMBOL_FORCE_TO_MEM:
1029           if (offset != const0_rtx
1030               && targetm.cannot_force_const_mem (mode, imm))
1031             {
1032               gcc_assert (can_create_pseudo_p ());
1033               base = aarch64_force_temporary (mode, dest, base);
1034               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1035               aarch64_emit_move (dest, base);
1036               return;
1037             }
1038           mem = force_const_mem (ptr_mode, imm);
1039           gcc_assert (mem);
1040           if (mode != ptr_mode)
1041             mem = gen_rtx_ZERO_EXTEND (mode, mem);
1042           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1043           return;
1044
1045         case SYMBOL_SMALL_TLSGD:
1046         case SYMBOL_SMALL_TLSDESC:
1047         case SYMBOL_SMALL_GOTTPREL:
1048         case SYMBOL_SMALL_GOT:
1049         case SYMBOL_TINY_GOT:
1050           if (offset != const0_rtx)
1051             {
1052               gcc_assert(can_create_pseudo_p ());
1053               base = aarch64_force_temporary (mode, dest, base);
1054               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1055               aarch64_emit_move (dest, base);
1056               return;
1057             }
1058           /* FALLTHRU */
1059
1060         case SYMBOL_SMALL_TPREL:
1061         case SYMBOL_SMALL_ABSOLUTE:
1062         case SYMBOL_TINY_ABSOLUTE:
1063           aarch64_load_symref_appropriately (dest, imm, sty);
1064           return;
1065
1066         default:
1067           gcc_unreachable ();
1068         }
1069     }
1070
1071   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1072     {
1073       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1074       return;
1075     }
1076
1077   if (!CONST_INT_P (imm))
1078     {
1079       if (GET_CODE (imm) == HIGH)
1080         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1081       else
1082         {
1083           rtx mem = force_const_mem (mode, imm);
1084           gcc_assert (mem);
1085           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1086         }
1087
1088       return;
1089     }
1090
1091   if (mode == SImode)
1092     {
1093       /* We know we can't do this in 1 insn, and we must be able to do it
1094          in two; so don't mess around looking for sequences that don't buy
1095          us anything.  */
1096       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1097       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1098                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1099       return;
1100     }
1101
1102   /* Remaining cases are all for DImode.  */
1103
1104   val = INTVAL (imm);
1105   subtargets = optimize && can_create_pseudo_p ();
1106
1107   one_match = 0;
1108   zero_match = 0;
1109   mask = 0xffff;
1110
1111   for (i = 0; i < 64; i += 16, mask <<= 16)
1112     {
1113       if ((val & mask) == 0)
1114         zero_match++;
1115       else if ((val & mask) == mask)
1116         one_match++;
1117     }
1118
1119   if (one_match == 2)
1120     {
1121       mask = 0xffff;
1122       for (i = 0; i < 64; i += 16, mask <<= 16)
1123         {
1124           if ((val & mask) != mask)
1125             {
1126               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1127               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1128                                          GEN_INT ((val >> i) & 0xffff)));
1129               return;
1130             }
1131         }
1132       gcc_unreachable ();
1133     }
1134
1135   if (zero_match == 2)
1136     goto simple_sequence;
1137
1138   mask = 0x0ffff0000UL;
1139   for (i = 16; i < 64; i += 16, mask <<= 16)
1140     {
1141       HOST_WIDE_INT comp = mask & ~(mask - 1);
1142
1143       if (aarch64_uimm12_shift (val - (val & mask)))
1144         {
1145           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1146
1147           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1148           emit_insn (gen_adddi3 (dest, subtarget,
1149                                  GEN_INT (val - (val & mask))));
1150           return;
1151         }
1152       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1153         {
1154           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1155
1156           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1157                                   GEN_INT ((val + comp) & mask)));
1158           emit_insn (gen_adddi3 (dest, subtarget,
1159                                  GEN_INT (val - ((val + comp) & mask))));
1160           return;
1161         }
1162       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1163         {
1164           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1165
1166           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1167                                   GEN_INT ((val - comp) | ~mask)));
1168           emit_insn (gen_adddi3 (dest, subtarget,
1169                                  GEN_INT (val - ((val - comp) | ~mask))));
1170           return;
1171         }
1172       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1173         {
1174           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1175
1176           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1177                                   GEN_INT (val | ~mask)));
1178           emit_insn (gen_adddi3 (dest, subtarget,
1179                                  GEN_INT (val - (val | ~mask))));
1180           return;
1181         }
1182     }
1183
1184   /* See if we can do it by arithmetically combining two
1185      immediates.  */
1186   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1187     {
1188       int j;
1189       mask = 0xffff;
1190
1191       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1192           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1193         {
1194           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1195           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1196                                   GEN_INT (aarch64_bitmasks[i])));
1197           emit_insn (gen_adddi3 (dest, subtarget,
1198                                  GEN_INT (val - aarch64_bitmasks[i])));
1199           return;
1200         }
1201
1202       for (j = 0; j < 64; j += 16, mask <<= 16)
1203         {
1204           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1205             {
1206               emit_insn (gen_rtx_SET (VOIDmode, dest,
1207                                       GEN_INT (aarch64_bitmasks[i])));
1208               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1209                                          GEN_INT ((val >> j) & 0xffff)));
1210               return;
1211             }
1212         }
1213     }
1214
1215   /* See if we can do it by logically combining two immediates.  */
1216   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1217     {
1218       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1219         {
1220           int j;
1221
1222           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1223             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1224               {
1225                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1226                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1227                                         GEN_INT (aarch64_bitmasks[i])));
1228                 emit_insn (gen_iordi3 (dest, subtarget,
1229                                        GEN_INT (aarch64_bitmasks[j])));
1230                 return;
1231               }
1232         }
1233       else if ((val & aarch64_bitmasks[i]) == val)
1234         {
1235           int j;
1236
1237           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1238             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1239               {
1240
1241                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1242                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1243                                         GEN_INT (aarch64_bitmasks[j])));
1244                 emit_insn (gen_anddi3 (dest, subtarget,
1245                                        GEN_INT (aarch64_bitmasks[i])));
1246                 return;
1247               }
1248         }
1249     }
1250
1251  simple_sequence:
1252   first = true;
1253   mask = 0xffff;
1254   for (i = 0; i < 64; i += 16, mask <<= 16)
1255     {
1256       if ((val & mask) != 0)
1257         {
1258           if (first)
1259             {
1260               emit_insn (gen_rtx_SET (VOIDmode, dest,
1261                                       GEN_INT (val & mask)));
1262               first = false;
1263             }
1264           else
1265             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1266                                        GEN_INT ((val >> i) & 0xffff)));
1267         }
1268     }
1269 }
1270
1271 static bool
1272 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1273                                  tree exp ATTRIBUTE_UNUSED)
1274 {
1275   /* Currently, always true.  */
1276   return true;
1277 }
1278
1279 /* Implement TARGET_PASS_BY_REFERENCE.  */
1280
1281 static bool
1282 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1283                            enum machine_mode mode,
1284                            const_tree type,
1285                            bool named ATTRIBUTE_UNUSED)
1286 {
1287   HOST_WIDE_INT size;
1288   enum machine_mode dummymode;
1289   int nregs;
1290
1291   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1292   size = (mode == BLKmode && type)
1293     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1294
1295   /* Aggregates are passed by reference based on their size.  */
1296   if (type && AGGREGATE_TYPE_P (type))
1297     {
1298       size = int_size_in_bytes (type);
1299     }
1300
1301   /* Variable sized arguments are always returned by reference.  */
1302   if (size < 0)
1303     return true;
1304
1305   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1306   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1307                                                &dummymode, &nregs,
1308                                                NULL))
1309     return false;
1310
1311   /* Arguments which are variable sized or larger than 2 registers are
1312      passed by reference unless they are a homogenous floating point
1313      aggregate.  */
1314   return size > 2 * UNITS_PER_WORD;
1315 }
1316
1317 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1318 static bool
1319 aarch64_return_in_msb (const_tree valtype)
1320 {
1321   enum machine_mode dummy_mode;
1322   int dummy_int;
1323
1324   /* Never happens in little-endian mode.  */
1325   if (!BYTES_BIG_ENDIAN)
1326     return false;
1327
1328   /* Only composite types smaller than or equal to 16 bytes can
1329      be potentially returned in registers.  */
1330   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1331       || int_size_in_bytes (valtype) <= 0
1332       || int_size_in_bytes (valtype) > 16)
1333     return false;
1334
1335   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1336      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1337      is always passed/returned in the least significant bits of fp/simd
1338      register(s).  */
1339   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1340                                                &dummy_mode, &dummy_int, NULL))
1341     return false;
1342
1343   return true;
1344 }
1345
1346 /* Implement TARGET_FUNCTION_VALUE.
1347    Define how to find the value returned by a function.  */
1348
1349 static rtx
1350 aarch64_function_value (const_tree type, const_tree func,
1351                         bool outgoing ATTRIBUTE_UNUSED)
1352 {
1353   enum machine_mode mode;
1354   int unsignedp;
1355   int count;
1356   enum machine_mode ag_mode;
1357
1358   mode = TYPE_MODE (type);
1359   if (INTEGRAL_TYPE_P (type))
1360     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1361
1362   if (aarch64_return_in_msb (type))
1363     {
1364       HOST_WIDE_INT size = int_size_in_bytes (type);
1365
1366       if (size % UNITS_PER_WORD != 0)
1367         {
1368           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1369           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1370         }
1371     }
1372
1373   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1374                                                &ag_mode, &count, NULL))
1375     {
1376       if (!aarch64_composite_type_p (type, mode))
1377         {
1378           gcc_assert (count == 1 && mode == ag_mode);
1379           return gen_rtx_REG (mode, V0_REGNUM);
1380         }
1381       else
1382         {
1383           int i;
1384           rtx par;
1385
1386           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1387           for (i = 0; i < count; i++)
1388             {
1389               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1390               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1391                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1392               XVECEXP (par, 0, i) = tmp;
1393             }
1394           return par;
1395         }
1396     }
1397   else
1398     return gen_rtx_REG (mode, R0_REGNUM);
1399 }
1400
1401 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1402    Return true if REGNO is the number of a hard register in which the values
1403    of called function may come back.  */
1404
1405 static bool
1406 aarch64_function_value_regno_p (const unsigned int regno)
1407 {
1408   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1409      of 16-byte return values are: 128-bit integers and 16-byte small
1410      structures (excluding homogeneous floating-point aggregates).  */
1411   if (regno == R0_REGNUM || regno == R1_REGNUM)
1412     return true;
1413
1414   /* Up to four fp/simd registers can return a function value, e.g. a
1415      homogeneous floating-point aggregate having four members.  */
1416   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1417     return !TARGET_GENERAL_REGS_ONLY;
1418
1419   return false;
1420 }
1421
1422 /* Implement TARGET_RETURN_IN_MEMORY.
1423
1424    If the type T of the result of a function is such that
1425      void func (T arg)
1426    would require that arg be passed as a value in a register (or set of
1427    registers) according to the parameter passing rules, then the result
1428    is returned in the same registers as would be used for such an
1429    argument.  */
1430
1431 static bool
1432 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1433 {
1434   HOST_WIDE_INT size;
1435   enum machine_mode ag_mode;
1436   int count;
1437
1438   if (!AGGREGATE_TYPE_P (type)
1439       && TREE_CODE (type) != COMPLEX_TYPE
1440       && TREE_CODE (type) != VECTOR_TYPE)
1441     /* Simple scalar types always returned in registers.  */
1442     return false;
1443
1444   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1445                                                type,
1446                                                &ag_mode,
1447                                                &count,
1448                                                NULL))
1449     return false;
1450
1451   /* Types larger than 2 registers returned in memory.  */
1452   size = int_size_in_bytes (type);
1453   return (size < 0 || size > 2 * UNITS_PER_WORD);
1454 }
1455
1456 static bool
1457 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1458                                const_tree type, int *nregs)
1459 {
1460   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1461   return aarch64_vfp_is_call_or_return_candidate (mode,
1462                                                   type,
1463                                                   &pcum->aapcs_vfp_rmode,
1464                                                   nregs,
1465                                                   NULL);
1466 }
1467
1468 /* Given MODE and TYPE of a function argument, return the alignment in
1469    bits.  The idea is to suppress any stronger alignment requested by
1470    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1471    This is a helper function for local use only.  */
1472
1473 static unsigned int
1474 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1475 {
1476   unsigned int alignment;
1477
1478   if (type)
1479     {
1480       if (!integer_zerop (TYPE_SIZE (type)))
1481         {
1482           if (TYPE_MODE (type) == mode)
1483             alignment = TYPE_ALIGN (type);
1484           else
1485             alignment = GET_MODE_ALIGNMENT (mode);
1486         }
1487       else
1488         alignment = 0;
1489     }
1490   else
1491     alignment = GET_MODE_ALIGNMENT (mode);
1492
1493   return alignment;
1494 }
1495
1496 /* Layout a function argument according to the AAPCS64 rules.  The rule
1497    numbers refer to the rule numbers in the AAPCS64.  */
1498
1499 static void
1500 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1501                     const_tree type,
1502                     bool named ATTRIBUTE_UNUSED)
1503 {
1504   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1505   int ncrn, nvrn, nregs;
1506   bool allocate_ncrn, allocate_nvrn;
1507
1508   /* We need to do this once per argument.  */
1509   if (pcum->aapcs_arg_processed)
1510     return;
1511
1512   pcum->aapcs_arg_processed = true;
1513
1514   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1515   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1516                                                  mode,
1517                                                  type,
1518                                                  &nregs);
1519
1520   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1521      The following code thus handles passing by SIMD/FP registers first.  */
1522
1523   nvrn = pcum->aapcs_nvrn;
1524
1525   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1526      and homogenous short-vector aggregates (HVA).  */
1527   if (allocate_nvrn)
1528     {
1529       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1530         {
1531           pcum->aapcs_nextnvrn = nvrn + nregs;
1532           if (!aarch64_composite_type_p (type, mode))
1533             {
1534               gcc_assert (nregs == 1);
1535               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1536             }
1537           else
1538             {
1539               rtx par;
1540               int i;
1541               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1542               for (i = 0; i < nregs; i++)
1543                 {
1544                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1545                                          V0_REGNUM + nvrn + i);
1546                   tmp = gen_rtx_EXPR_LIST
1547                     (VOIDmode, tmp,
1548                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1549                   XVECEXP (par, 0, i) = tmp;
1550                 }
1551               pcum->aapcs_reg = par;
1552             }
1553           return;
1554         }
1555       else
1556         {
1557           /* C.3 NSRN is set to 8.  */
1558           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1559           goto on_stack;
1560         }
1561     }
1562
1563   ncrn = pcum->aapcs_ncrn;
1564   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1565            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1566
1567
1568   /* C6 - C9.  though the sign and zero extension semantics are
1569      handled elsewhere.  This is the case where the argument fits
1570      entirely general registers.  */
1571   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1572     {
1573       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1574
1575       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1576
1577       /* C.8 if the argument has an alignment of 16 then the NGRN is
1578          rounded up to the next even number.  */
1579       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1580         {
1581           ++ncrn;
1582           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1583         }
1584       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1585          A reg is still generated for it, but the caller should be smart
1586          enough not to use it.  */
1587       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1588         {
1589           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1590         }
1591       else
1592         {
1593           rtx par;
1594           int i;
1595
1596           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1597           for (i = 0; i < nregs; i++)
1598             {
1599               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1600               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1601                                        GEN_INT (i * UNITS_PER_WORD));
1602               XVECEXP (par, 0, i) = tmp;
1603             }
1604           pcum->aapcs_reg = par;
1605         }
1606
1607       pcum->aapcs_nextncrn = ncrn + nregs;
1608       return;
1609     }
1610
1611   /* C.11  */
1612   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1613
1614   /* The argument is passed on stack; record the needed number of words for
1615      this argument (we can re-use NREGS) and align the total size if
1616      necessary.  */
1617 on_stack:
1618   pcum->aapcs_stack_words = nregs;
1619   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1620     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1621                                                16 / UNITS_PER_WORD) + 1;
1622   return;
1623 }
1624
1625 /* Implement TARGET_FUNCTION_ARG.  */
1626
1627 static rtx
1628 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1629                       const_tree type, bool named)
1630 {
1631   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1632   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1633
1634   if (mode == VOIDmode)
1635     return NULL_RTX;
1636
1637   aarch64_layout_arg (pcum_v, mode, type, named);
1638   return pcum->aapcs_reg;
1639 }
1640
1641 void
1642 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1643                            const_tree fntype ATTRIBUTE_UNUSED,
1644                            rtx libname ATTRIBUTE_UNUSED,
1645                            const_tree fndecl ATTRIBUTE_UNUSED,
1646                            unsigned n_named ATTRIBUTE_UNUSED)
1647 {
1648   pcum->aapcs_ncrn = 0;
1649   pcum->aapcs_nvrn = 0;
1650   pcum->aapcs_nextncrn = 0;
1651   pcum->aapcs_nextnvrn = 0;
1652   pcum->pcs_variant = ARM_PCS_AAPCS64;
1653   pcum->aapcs_reg = NULL_RTX;
1654   pcum->aapcs_arg_processed = false;
1655   pcum->aapcs_stack_words = 0;
1656   pcum->aapcs_stack_size = 0;
1657
1658   return;
1659 }
1660
1661 static void
1662 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1663                               enum machine_mode mode,
1664                               const_tree type,
1665                               bool named)
1666 {
1667   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1668   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1669     {
1670       aarch64_layout_arg (pcum_v, mode, type, named);
1671       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1672                   != (pcum->aapcs_stack_words != 0));
1673       pcum->aapcs_arg_processed = false;
1674       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1675       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1676       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1677       pcum->aapcs_stack_words = 0;
1678       pcum->aapcs_reg = NULL_RTX;
1679     }
1680 }
1681
1682 bool
1683 aarch64_function_arg_regno_p (unsigned regno)
1684 {
1685   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1686           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1687 }
1688
1689 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1690    PARM_BOUNDARY bits of alignment, but will be given anything up
1691    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1692    that both before and after the layout of each argument, the Next
1693    Stacked Argument Address (NSAA) will have a minimum alignment of
1694    8 bytes.  */
1695
1696 static unsigned int
1697 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1698 {
1699   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1700
1701   if (alignment < PARM_BOUNDARY)
1702     alignment = PARM_BOUNDARY;
1703   if (alignment > STACK_BOUNDARY)
1704     alignment = STACK_BOUNDARY;
1705   return alignment;
1706 }
1707
1708 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1709
1710    Return true if an argument passed on the stack should be padded upwards,
1711    i.e. if the least-significant byte of the stack slot has useful data.
1712
1713    Small aggregate types are placed in the lowest memory address.
1714
1715    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1716
1717 bool
1718 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1719 {
1720   /* On little-endian targets, the least significant byte of every stack
1721      argument is passed at the lowest byte address of the stack slot.  */
1722   if (!BYTES_BIG_ENDIAN)
1723     return true;
1724
1725   /* Otherwise, integral, floating-point and pointer types are padded downward:
1726      the least significant byte of a stack argument is passed at the highest
1727      byte address of the stack slot.  */
1728   if (type
1729       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1730          || POINTER_TYPE_P (type))
1731       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1732     return false;
1733
1734   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1735   return true;
1736 }
1737
1738 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1739
1740    It specifies padding for the last (may also be the only)
1741    element of a block move between registers and memory.  If
1742    assuming the block is in the memory, padding upward means that
1743    the last element is padded after its highest significant byte,
1744    while in downward padding, the last element is padded at the
1745    its least significant byte side.
1746
1747    Small aggregates and small complex types are always padded
1748    upwards.
1749
1750    We don't need to worry about homogeneous floating-point or
1751    short-vector aggregates; their move is not affected by the
1752    padding direction determined here.  Regardless of endianness,
1753    each element of such an aggregate is put in the least
1754    significant bits of a fp/simd register.
1755
1756    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1757    register has useful data, and return the opposite if the most
1758    significant byte does.  */
1759
1760 bool
1761 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1762                      bool first ATTRIBUTE_UNUSED)
1763 {
1764
1765   /* Small composite types are always padded upward.  */
1766   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1767     {
1768       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1769                             : GET_MODE_SIZE (mode));
1770       if (size < 2 * UNITS_PER_WORD)
1771         return true;
1772     }
1773
1774   /* Otherwise, use the default padding.  */
1775   return !BYTES_BIG_ENDIAN;
1776 }
1777
1778 static enum machine_mode
1779 aarch64_libgcc_cmp_return_mode (void)
1780 {
1781   return SImode;
1782 }
1783
1784 static bool
1785 aarch64_frame_pointer_required (void)
1786 {
1787   /* If the function contains dynamic stack allocations, we need to
1788      use the frame pointer to access the static parts of the frame.  */
1789   if (cfun->calls_alloca)
1790     return true;
1791
1792   /* In aarch64_override_options_after_change
1793      flag_omit_leaf_frame_pointer turns off the frame pointer by
1794      default.  Turn it back on now if we've not got a leaf
1795      function.  */
1796   if (flag_omit_leaf_frame_pointer
1797       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1798     return true;
1799
1800   return false;
1801 }
1802
1803 /* Mark the registers that need to be saved by the callee and calculate
1804    the size of the callee-saved registers area and frame record (both FP
1805    and LR may be omitted).  */
1806 static void
1807 aarch64_layout_frame (void)
1808 {
1809   HOST_WIDE_INT offset = 0;
1810   int regno;
1811
1812   if (reload_completed && cfun->machine->frame.laid_out)
1813     return;
1814
1815   /* First mark all the registers that really need to be saved...  */
1816   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1817     cfun->machine->frame.reg_offset[regno] = -1;
1818
1819   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1820     cfun->machine->frame.reg_offset[regno] = -1;
1821
1822   /* ... that includes the eh data registers (if needed)...  */
1823   if (crtl->calls_eh_return)
1824     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1825       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1826
1827   /* ... and any callee saved register that dataflow says is live.  */
1828   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1829     if (df_regs_ever_live_p (regno)
1830         && !call_used_regs[regno])
1831       cfun->machine->frame.reg_offset[regno] = 0;
1832
1833   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1834     if (df_regs_ever_live_p (regno)
1835         && !call_used_regs[regno])
1836       cfun->machine->frame.reg_offset[regno] = 0;
1837
1838   if (frame_pointer_needed)
1839     {
1840       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1841       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1842       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1843     }
1844
1845   /* Now assign stack slots for them.  */
1846   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1847     if (cfun->machine->frame.reg_offset[regno] != -1)
1848       {
1849         cfun->machine->frame.reg_offset[regno] = offset;
1850         offset += UNITS_PER_WORD;
1851       }
1852
1853   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1854     if (cfun->machine->frame.reg_offset[regno] != -1)
1855       {
1856         cfun->machine->frame.reg_offset[regno] = offset;
1857         offset += UNITS_PER_WORD;
1858       }
1859
1860   if (frame_pointer_needed)
1861     {
1862       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1863       offset += UNITS_PER_WORD;
1864     }
1865
1866   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1867     {
1868       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1869       offset += UNITS_PER_WORD;
1870     }
1871
1872   cfun->machine->frame.padding0 =
1873     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1874   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1875
1876   cfun->machine->frame.saved_regs_size = offset;
1877   cfun->machine->frame.laid_out = true;
1878 }
1879
1880 /* Make the last instruction frame-related and note that it performs
1881    the operation described by FRAME_PATTERN.  */
1882
1883 static void
1884 aarch64_set_frame_expr (rtx frame_pattern)
1885 {
1886   rtx insn;
1887
1888   insn = get_last_insn ();
1889   RTX_FRAME_RELATED_P (insn) = 1;
1890   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1891   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1892                                       frame_pattern,
1893                                       REG_NOTES (insn));
1894 }
1895
1896 static bool
1897 aarch64_register_saved_on_entry (int regno)
1898 {
1899   return cfun->machine->frame.reg_offset[regno] != -1;
1900 }
1901
1902
1903 static void
1904 aarch64_save_or_restore_fprs (int start_offset, int increment,
1905                               bool restore, rtx base_rtx)
1906
1907 {
1908   unsigned regno;
1909   unsigned regno2;
1910   rtx insn;
1911   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1912     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1913
1914
1915   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1916     {
1917       if (aarch64_register_saved_on_entry (regno))
1918         {
1919           rtx mem;
1920           mem = gen_mem_ref (DFmode,
1921                              plus_constant (Pmode,
1922                                             base_rtx,
1923                                             start_offset));
1924
1925           for (regno2 = regno + 1;
1926                regno2 <= V31_REGNUM
1927                  && !aarch64_register_saved_on_entry (regno2);
1928                regno2++)
1929             {
1930               /* Empty loop.  */
1931             }
1932           if (regno2 <= V31_REGNUM &&
1933               aarch64_register_saved_on_entry (regno2))
1934             {
1935               rtx mem2;
1936               /* Next highest register to be saved.  */
1937               mem2 = gen_mem_ref (DFmode,
1938                                   plus_constant
1939                                   (Pmode,
1940                                    base_rtx,
1941                                    start_offset + increment));
1942               if (restore == false)
1943                 {
1944                   insn = emit_insn
1945                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1946                                         mem2, gen_rtx_REG (DFmode, regno2)));
1947
1948                 }
1949               else
1950                 {
1951                   insn = emit_insn
1952                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1953                                        gen_rtx_REG (DFmode, regno2), mem2));
1954
1955                   add_reg_note (insn, REG_CFA_RESTORE,
1956                                 gen_rtx_REG (DFmode, regno));
1957                   add_reg_note (insn, REG_CFA_RESTORE,
1958                                 gen_rtx_REG (DFmode, regno2));
1959                 }
1960
1961                   /* The first part of a frame-related parallel insn
1962                      is always assumed to be relevant to the frame
1963                      calculations; subsequent parts, are only
1964                      frame-related if explicitly marked.  */
1965               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1966               regno = regno2;
1967               start_offset += increment * 2;
1968             }
1969           else
1970             {
1971               if (restore == false)
1972                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1973               else
1974                 {
1975                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1976                   add_reg_note (insn, REG_CFA_RESTORE,
1977                                 gen_rtx_REG (DImode, regno));
1978                 }
1979               start_offset += increment;
1980             }
1981           RTX_FRAME_RELATED_P (insn) = 1;
1982         }
1983     }
1984
1985 }
1986
1987
1988 /* offset from the stack pointer of where the saves and
1989    restore's have to happen.  */
1990 static void
1991 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1992                                             bool restore)
1993 {
1994   rtx insn;
1995   rtx base_rtx = stack_pointer_rtx;
1996   HOST_WIDE_INT start_offset = offset;
1997   HOST_WIDE_INT increment = UNITS_PER_WORD;
1998   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1999   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
2000   unsigned regno;
2001   unsigned regno2;
2002
2003   for (regno = R0_REGNUM; regno <= limit; regno++)
2004     {
2005       if (aarch64_register_saved_on_entry (regno))
2006         {
2007           rtx mem;
2008           mem = gen_mem_ref (Pmode,
2009                              plus_constant (Pmode,
2010                                             base_rtx,
2011                                             start_offset));
2012
2013           for (regno2 = regno + 1;
2014                regno2 <= limit
2015                  && !aarch64_register_saved_on_entry (regno2);
2016                regno2++)
2017             {
2018               /* Empty loop.  */
2019             }
2020           if (regno2 <= limit &&
2021               aarch64_register_saved_on_entry (regno2))
2022             {
2023               rtx mem2;
2024               /* Next highest register to be saved.  */
2025               mem2 = gen_mem_ref (Pmode,
2026                                   plus_constant
2027                                   (Pmode,
2028                                    base_rtx,
2029                                    start_offset + increment));
2030               if (restore == false)
2031                 {
2032                   insn = emit_insn
2033                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2034                                         mem2, gen_rtx_REG (DImode, regno2)));
2035
2036                 }
2037               else
2038                 {
2039                   insn = emit_insn
2040                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2041                                      gen_rtx_REG (DImode, regno2), mem2));
2042
2043                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2044                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2045                 }
2046
2047                   /* The first part of a frame-related parallel insn
2048                      is always assumed to be relevant to the frame
2049                      calculations; subsequent parts, are only
2050                      frame-related if explicitly marked.  */
2051               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2052                                             1)) = 1;
2053               regno = regno2;
2054               start_offset += increment * 2;
2055             }
2056           else
2057             {
2058               if (restore == false)
2059                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2060               else
2061                 {
2062                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2063                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2064                 }
2065               start_offset += increment;
2066             }
2067           RTX_FRAME_RELATED_P (insn) = 1;
2068         }
2069     }
2070
2071   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2072
2073 }
2074
2075 /* AArch64 stack frames generated by this compiler look like:
2076
2077         +-------------------------------+
2078         |                               |
2079         |  incoming stack arguments     |
2080         |                               |
2081         +-------------------------------+ <-- arg_pointer_rtx
2082         |                               |
2083         |  callee-allocated save area   |
2084         |  for register varargs         |
2085         |                               |
2086         +-------------------------------+ <-- frame_pointer_rtx
2087         |                               |
2088         |  local variables              |
2089         |                               |
2090         +-------------------------------+
2091         |  padding0                     | \
2092         +-------------------------------+  |
2093         |                               |  |
2094         |                               |  |
2095         |  callee-saved registers       |  | frame.saved_regs_size
2096         |                               |  |
2097         +-------------------------------+  |
2098         |  LR'                          |  |
2099         +-------------------------------+  |
2100         |  FP'                          | /
2101       P +-------------------------------+ <-- hard_frame_pointer_rtx
2102         |  dynamic allocation           |
2103         +-------------------------------+
2104         |                               |
2105         |  outgoing stack arguments     |
2106         |                               |
2107         +-------------------------------+ <-- stack_pointer_rtx
2108
2109    Dynamic stack allocations such as alloca insert data at point P.
2110    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2111    hard_frame_pointer_rtx unchanged.  */
2112
2113 /* Generate the prologue instructions for entry into a function.
2114    Establish the stack frame by decreasing the stack pointer with a
2115    properly calculated size and, if necessary, create a frame record
2116    filled with the values of LR and previous frame pointer.  The
2117    current FP is also set up if it is in use.  */
2118
2119 void
2120 aarch64_expand_prologue (void)
2121 {
2122   /* sub sp, sp, #<frame_size>
2123      stp {fp, lr}, [sp, #<frame_size> - 16]
2124      add fp, sp, #<frame_size> - hardfp_offset
2125      stp {cs_reg}, [fp, #-16] etc.
2126
2127      sub sp, sp, <final_adjustment_if_any>
2128   */
2129   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2130   HOST_WIDE_INT frame_size, offset;
2131   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2132   rtx insn;
2133
2134   aarch64_layout_frame ();
2135   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2136   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2137               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2138   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2139                 + crtl->outgoing_args_size);
2140   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2141                                           STACK_BOUNDARY / BITS_PER_UNIT);
2142
2143   if (flag_stack_usage_info)
2144     current_function_static_stack_size = frame_size;
2145
2146   fp_offset = (offset
2147                - original_frame_size
2148                - cfun->machine->frame.saved_regs_size);
2149
2150   /* Store pairs and load pairs have a range only -512 to 504.  */
2151   if (offset >= 512)
2152     {
2153       /* When the frame has a large size, an initial decrease is done on
2154          the stack pointer to jump over the callee-allocated save area for
2155          register varargs, the local variable area and/or the callee-saved
2156          register area.  This will allow the pre-index write-back
2157          store pair instructions to be used for setting up the stack frame
2158          efficiently.  */
2159       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2160       if (offset >= 512)
2161         offset = cfun->machine->frame.saved_regs_size;
2162
2163       frame_size -= (offset + crtl->outgoing_args_size);
2164       fp_offset = 0;
2165
2166       if (frame_size >= 0x1000000)
2167         {
2168           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2169           emit_move_insn (op0, GEN_INT (-frame_size));
2170           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2171           aarch64_set_frame_expr (gen_rtx_SET
2172                                   (Pmode, stack_pointer_rtx,
2173                                    plus_constant (Pmode,
2174                                                   stack_pointer_rtx,
2175                                                   -frame_size)));
2176         }
2177       else if (frame_size > 0)
2178         {
2179           if ((frame_size & 0xfff) != frame_size)
2180             {
2181               insn = emit_insn (gen_add2_insn
2182                                 (stack_pointer_rtx,
2183                                  GEN_INT (-(frame_size
2184                                             & ~(HOST_WIDE_INT)0xfff))));
2185               RTX_FRAME_RELATED_P (insn) = 1;
2186             }
2187           if ((frame_size & 0xfff) != 0)
2188             {
2189               insn = emit_insn (gen_add2_insn
2190                                 (stack_pointer_rtx,
2191                                  GEN_INT (-(frame_size
2192                                             & (HOST_WIDE_INT)0xfff))));
2193               RTX_FRAME_RELATED_P (insn) = 1;
2194             }
2195         }
2196     }
2197   else
2198     frame_size = -1;
2199
2200   if (offset > 0)
2201     {
2202       /* Save the frame pointer and lr if the frame pointer is needed
2203          first.  Make the frame pointer point to the location of the
2204          old frame pointer on the stack.  */
2205       if (frame_pointer_needed)
2206         {
2207           rtx mem_fp, mem_lr;
2208
2209           if (fp_offset)
2210             {
2211               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2212                                                GEN_INT (-offset)));
2213               RTX_FRAME_RELATED_P (insn) = 1;
2214               aarch64_set_frame_expr (gen_rtx_SET
2215                                       (Pmode, stack_pointer_rtx,
2216                                        gen_rtx_MINUS (Pmode,
2217                                                       stack_pointer_rtx,
2218                                                       GEN_INT (offset))));
2219               mem_fp = gen_frame_mem (DImode,
2220                                       plus_constant (Pmode,
2221                                                      stack_pointer_rtx,
2222                                                      fp_offset));
2223               mem_lr = gen_frame_mem (DImode,
2224                                       plus_constant (Pmode,
2225                                                      stack_pointer_rtx,
2226                                                      fp_offset
2227                                                      + UNITS_PER_WORD));
2228               insn = emit_insn (gen_store_pairdi (mem_fp,
2229                                                   hard_frame_pointer_rtx,
2230                                                   mem_lr,
2231                                                   gen_rtx_REG (DImode,
2232                                                                LR_REGNUM)));
2233             }
2234           else
2235             {
2236               insn = emit_insn (gen_storewb_pairdi_di
2237                                 (stack_pointer_rtx, stack_pointer_rtx,
2238                                  hard_frame_pointer_rtx,
2239                                  gen_rtx_REG (DImode, LR_REGNUM),
2240                                  GEN_INT (-offset),
2241                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2242               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2243             }
2244
2245           /* The first part of a frame-related parallel insn is always
2246              assumed to be relevant to the frame calculations;
2247              subsequent parts, are only frame-related if explicitly
2248              marked.  */
2249           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2250           RTX_FRAME_RELATED_P (insn) = 1;
2251
2252           /* Set up frame pointer to point to the location of the
2253              previous frame pointer on the stack.  */
2254           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2255                                            stack_pointer_rtx,
2256                                            GEN_INT (fp_offset)));
2257           aarch64_set_frame_expr (gen_rtx_SET
2258                                   (Pmode, hard_frame_pointer_rtx,
2259                                    plus_constant (Pmode,
2260                                                   stack_pointer_rtx,
2261                                                   fp_offset)));
2262           RTX_FRAME_RELATED_P (insn) = 1;
2263           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2264                                            hard_frame_pointer_rtx));
2265         }
2266       else
2267         {
2268           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2269                                            GEN_INT (-offset)));
2270           RTX_FRAME_RELATED_P (insn) = 1;
2271         }
2272
2273       aarch64_save_or_restore_callee_save_registers
2274         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2275     }
2276
2277   /* when offset >= 512,
2278      sub sp, sp, #<outgoing_args_size> */
2279   if (frame_size > -1)
2280     {
2281       if (crtl->outgoing_args_size > 0)
2282         {
2283           insn = emit_insn (gen_add2_insn
2284                             (stack_pointer_rtx,
2285                              GEN_INT (- crtl->outgoing_args_size)));
2286           RTX_FRAME_RELATED_P (insn) = 1;
2287         }
2288     }
2289 }
2290
2291 /* Generate the epilogue instructions for returning from a function.  */
2292 void
2293 aarch64_expand_epilogue (bool for_sibcall)
2294 {
2295   HOST_WIDE_INT original_frame_size, frame_size, offset;
2296   HOST_WIDE_INT fp_offset;
2297   rtx insn;
2298   rtx cfa_reg;
2299
2300   aarch64_layout_frame ();
2301   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2302   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2303                 + crtl->outgoing_args_size);
2304   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2305                                           STACK_BOUNDARY / BITS_PER_UNIT);
2306
2307   fp_offset = (offset
2308                - original_frame_size
2309                - cfun->machine->frame.saved_regs_size);
2310
2311   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2312
2313   /* Store pairs and load pairs have a range only -512 to 504.  */
2314   if (offset >= 512)
2315     {
2316       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2317       if (offset >= 512)
2318         offset = cfun->machine->frame.saved_regs_size;
2319
2320       frame_size -= (offset + crtl->outgoing_args_size);
2321       fp_offset = 0;
2322       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2323         {
2324           insn = emit_insn (gen_add2_insn
2325                             (stack_pointer_rtx,
2326                              GEN_INT (crtl->outgoing_args_size)));
2327           RTX_FRAME_RELATED_P (insn) = 1;
2328         }
2329     }
2330   else
2331     frame_size = -1;
2332
2333   /* If there were outgoing arguments or we've done dynamic stack
2334      allocation, then restore the stack pointer from the frame
2335      pointer.  This is at most one insn and more efficient than using
2336      GCC's internal mechanism.  */
2337   if (frame_pointer_needed
2338       && (crtl->outgoing_args_size || cfun->calls_alloca))
2339     {
2340       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2341                                        hard_frame_pointer_rtx,
2342                                        GEN_INT (- fp_offset)));
2343       RTX_FRAME_RELATED_P (insn) = 1;
2344       /* As SP is set to (FP - fp_offset), according to the rules in
2345          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2346          from the value of SP from now on.  */
2347       cfa_reg = stack_pointer_rtx;
2348     }
2349
2350   aarch64_save_or_restore_callee_save_registers
2351     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2352
2353   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2354   if (offset > 0)
2355     {
2356       if (frame_pointer_needed)
2357         {
2358           rtx mem_fp, mem_lr;
2359
2360           if (fp_offset)
2361             {
2362               mem_fp = gen_frame_mem (DImode,
2363                                       plus_constant (Pmode,
2364                                                      stack_pointer_rtx,
2365                                                      fp_offset));
2366               mem_lr = gen_frame_mem (DImode,
2367                                       plus_constant (Pmode,
2368                                                      stack_pointer_rtx,
2369                                                      fp_offset
2370                                                      + UNITS_PER_WORD));
2371               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2372                                                  mem_fp,
2373                                                  gen_rtx_REG (DImode,
2374                                                               LR_REGNUM),
2375                                                  mem_lr));
2376             }
2377           else
2378             {
2379               insn = emit_insn (gen_loadwb_pairdi_di
2380                                 (stack_pointer_rtx,
2381                                  stack_pointer_rtx,
2382                                  hard_frame_pointer_rtx,
2383                                  gen_rtx_REG (DImode, LR_REGNUM),
2384                                  GEN_INT (offset),
2385                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2386               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2387               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2388                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2389                                           plus_constant (Pmode, cfa_reg,
2390                                                          offset))));
2391             }
2392
2393           /* The first part of a frame-related parallel insn
2394              is always assumed to be relevant to the frame
2395              calculations; subsequent parts, are only
2396              frame-related if explicitly marked.  */
2397           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2398           RTX_FRAME_RELATED_P (insn) = 1;
2399           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2400           add_reg_note (insn, REG_CFA_RESTORE,
2401                         gen_rtx_REG (DImode, LR_REGNUM));
2402
2403           if (fp_offset)
2404             {
2405               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2406                                                GEN_INT (offset)));
2407               RTX_FRAME_RELATED_P (insn) = 1;
2408             }
2409         }
2410       else
2411         {
2412           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2413                                            GEN_INT (offset)));
2414           RTX_FRAME_RELATED_P (insn) = 1;
2415         }
2416     }
2417
2418   /* Stack adjustment for exception handler.  */
2419   if (crtl->calls_eh_return)
2420     {
2421       /* We need to unwind the stack by the offset computed by
2422          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2423          based on SP.  Ideally we would update the SP and define the
2424          CFA along the lines of:
2425
2426          SP = SP + EH_RETURN_STACKADJ_RTX
2427          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2428
2429          However the dwarf emitter only understands a constant
2430          register offset.
2431
2432          The solution chosen here is to use the otherwise unused IP0
2433          as a temporary register to hold the current SP value.  The
2434          CFA is described using IP0 then SP is modified.  */
2435
2436       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2437
2438       insn = emit_move_insn (ip0, stack_pointer_rtx);
2439       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2440       RTX_FRAME_RELATED_P (insn) = 1;
2441
2442       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2443
2444       /* Ensure the assignment to IP0 does not get optimized away.  */
2445       emit_use (ip0);
2446     }
2447
2448   if (frame_size > -1)
2449     {
2450       if (frame_size >= 0x1000000)
2451         {
2452           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2453           emit_move_insn (op0, GEN_INT (frame_size));
2454           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2455           aarch64_set_frame_expr (gen_rtx_SET
2456                                   (Pmode, stack_pointer_rtx,
2457                                    plus_constant (Pmode,
2458                                                   stack_pointer_rtx,
2459                                                   frame_size)));
2460         }
2461       else if (frame_size > 0)
2462         {
2463           if ((frame_size & 0xfff) != 0)
2464             {
2465               insn = emit_insn (gen_add2_insn
2466                                 (stack_pointer_rtx,
2467                                  GEN_INT ((frame_size
2468                                            & (HOST_WIDE_INT) 0xfff))));
2469               RTX_FRAME_RELATED_P (insn) = 1;
2470             }
2471           if ((frame_size & 0xfff) != frame_size)
2472             {
2473               insn = emit_insn (gen_add2_insn
2474                                 (stack_pointer_rtx,
2475                                  GEN_INT ((frame_size
2476                                            & ~ (HOST_WIDE_INT) 0xfff))));
2477               RTX_FRAME_RELATED_P (insn) = 1;
2478             }
2479         }
2480
2481         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2482                                              plus_constant (Pmode,
2483                                                             stack_pointer_rtx,
2484                                                             offset)));
2485     }
2486
2487   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2488   if (!for_sibcall)
2489     emit_jump_insn (ret_rtx);
2490 }
2491
2492 /* Return the place to copy the exception unwinding return address to.
2493    This will probably be a stack slot, but could (in theory be the
2494    return register).  */
2495 rtx
2496 aarch64_final_eh_return_addr (void)
2497 {
2498   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2499   aarch64_layout_frame ();
2500   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2501   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2502                 + crtl->outgoing_args_size);
2503   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2504                                           STACK_BOUNDARY / BITS_PER_UNIT);
2505   fp_offset = offset
2506     - original_frame_size
2507     - cfun->machine->frame.saved_regs_size;
2508
2509   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2510     return gen_rtx_REG (DImode, LR_REGNUM);
2511
2512   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2513      result in a store to save LR introduced by builtin_eh_return () being
2514      incorrectly deleted because the alias is not detected.
2515      So in the calculation of the address to copy the exception unwinding
2516      return address to, we note 2 cases.
2517      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2518      we return a SP-relative location since all the addresses are SP-relative
2519      in this case.  This prevents the store from being optimized away.
2520      If the fp_offset is not 0, then the addresses will be FP-relative and
2521      therefore we return a FP-relative location.  */
2522
2523   if (frame_pointer_needed)
2524     {
2525       if (fp_offset)
2526         return gen_frame_mem (DImode,
2527                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2528       else
2529         return gen_frame_mem (DImode,
2530                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2531     }
2532
2533   /* If FP is not needed, we calculate the location of LR, which would be
2534      at the top of the saved registers block.  */
2535
2536   return gen_frame_mem (DImode,
2537                         plus_constant (Pmode,
2538                                        stack_pointer_rtx,
2539                                        fp_offset
2540                                        + cfun->machine->frame.saved_regs_size
2541                                        - 2 * UNITS_PER_WORD));
2542 }
2543
2544 /* Possibly output code to build up a constant in a register.  For
2545    the benefit of the costs infrastructure, returns the number of
2546    instructions which would be emitted.  GENERATE inhibits or
2547    enables code generation.  */
2548
2549 static int
2550 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2551 {
2552   int insns = 0;
2553
2554   if (aarch64_bitmask_imm (val, DImode))
2555     {
2556       if (generate)
2557         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2558       insns = 1;
2559     }
2560   else
2561     {
2562       int i;
2563       int ncount = 0;
2564       int zcount = 0;
2565       HOST_WIDE_INT valp = val >> 16;
2566       HOST_WIDE_INT valm;
2567       HOST_WIDE_INT tval;
2568
2569       for (i = 16; i < 64; i += 16)
2570         {
2571           valm = (valp & 0xffff);
2572
2573           if (valm != 0)
2574             ++ zcount;
2575
2576           if (valm != 0xffff)
2577             ++ ncount;
2578
2579           valp >>= 16;
2580         }
2581
2582       /* zcount contains the number of additional MOVK instructions
2583          required if the constant is built up with an initial MOVZ instruction,
2584          while ncount is the number of MOVK instructions required if starting
2585          with a MOVN instruction.  Choose the sequence that yields the fewest
2586          number of instructions, preferring MOVZ instructions when they are both
2587          the same.  */
2588       if (ncount < zcount)
2589         {
2590           if (generate)
2591             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2592                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2593           tval = 0xffff;
2594           insns++;
2595         }
2596       else
2597         {
2598           if (generate)
2599             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2600                             GEN_INT (val & 0xffff));
2601           tval = 0;
2602           insns++;
2603         }
2604
2605       val >>= 16;
2606
2607       for (i = 16; i < 64; i += 16)
2608         {
2609           if ((val & 0xffff) != tval)
2610             {
2611               if (generate)
2612                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2613                                            GEN_INT (i),
2614                                            GEN_INT (val & 0xffff)));
2615               insns++;
2616             }
2617           val >>= 16;
2618         }
2619     }
2620   return insns;
2621 }
2622
2623 static void
2624 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2625 {
2626   HOST_WIDE_INT mdelta = delta;
2627   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2628   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2629
2630   if (mdelta < 0)
2631     mdelta = -mdelta;
2632
2633   if (mdelta >= 4096 * 4096)
2634     {
2635       (void) aarch64_build_constant (scratchreg, delta, true);
2636       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2637     }
2638   else if (mdelta > 0)
2639     {
2640       if (mdelta >= 4096)
2641         {
2642           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2643           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2644           if (delta < 0)
2645             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2646                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2647           else
2648             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2649                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2650         }
2651       if (mdelta % 4096 != 0)
2652         {
2653           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2654           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2655                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2656         }
2657     }
2658 }
2659
2660 /* Output code to add DELTA to the first argument, and then jump
2661    to FUNCTION.  Used for C++ multiple inheritance.  */
2662 static void
2663 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2664                          HOST_WIDE_INT delta,
2665                          HOST_WIDE_INT vcall_offset,
2666                          tree function)
2667 {
2668   /* The this pointer is always in x0.  Note that this differs from
2669      Arm where the this pointer maybe bumped to r1 if r0 is required
2670      to return a pointer to an aggregate.  On AArch64 a result value
2671      pointer will be in x8.  */
2672   int this_regno = R0_REGNUM;
2673   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2674
2675   reload_completed = 1;
2676   emit_note (NOTE_INSN_PROLOGUE_END);
2677
2678   if (vcall_offset == 0)
2679     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2680   else
2681     {
2682       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2683
2684       this_rtx = gen_rtx_REG (Pmode, this_regno);
2685       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2686       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2687
2688       addr = this_rtx;
2689       if (delta != 0)
2690         {
2691           if (delta >= -256 && delta < 256)
2692             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2693                                        plus_constant (Pmode, this_rtx, delta));
2694           else
2695             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2696         }
2697
2698       if (Pmode == ptr_mode)
2699         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2700       else
2701         aarch64_emit_move (temp0,
2702                            gen_rtx_ZERO_EXTEND (Pmode,
2703                                                 gen_rtx_MEM (ptr_mode, addr)));
2704
2705       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2706           addr = plus_constant (Pmode, temp0, vcall_offset);
2707       else
2708         {
2709           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2710           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2711         }
2712
2713       if (Pmode == ptr_mode)
2714         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2715       else
2716         aarch64_emit_move (temp1,
2717                            gen_rtx_SIGN_EXTEND (Pmode,
2718                                                 gen_rtx_MEM (ptr_mode, addr)));
2719
2720       emit_insn (gen_add2_insn (this_rtx, temp1));
2721     }
2722
2723   /* Generate a tail call to the target function.  */
2724   if (!TREE_USED (function))
2725     {
2726       assemble_external (function);
2727       TREE_USED (function) = 1;
2728     }
2729   funexp = XEXP (DECL_RTL (function), 0);
2730   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2731   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2732   SIBLING_CALL_P (insn) = 1;
2733
2734   insn = get_insns ();
2735   shorten_branches (insn);
2736   final_start_function (insn, file, 1);
2737   final (insn, file, 1);
2738   final_end_function ();
2739
2740   /* Stop pretending to be a post-reload pass.  */
2741   reload_completed = 0;
2742 }
2743
2744 static int
2745 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2746 {
2747   if (GET_CODE (*x) == SYMBOL_REF)
2748     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2749
2750   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2751      TLS offsets, not real symbol references.  */
2752   if (GET_CODE (*x) == UNSPEC
2753       && XINT (*x, 1) == UNSPEC_TLS)
2754     return -1;
2755
2756   return 0;
2757 }
2758
2759 static bool
2760 aarch64_tls_referenced_p (rtx x)
2761 {
2762   if (!TARGET_HAVE_TLS)
2763     return false;
2764
2765   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2766 }
2767
2768
2769 static int
2770 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2771 {
2772   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2773   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2774
2775   if (*imm1 < *imm2)
2776     return -1;
2777   if (*imm1 > *imm2)
2778     return +1;
2779   return 0;
2780 }
2781
2782
2783 static void
2784 aarch64_build_bitmask_table (void)
2785 {
2786   unsigned HOST_WIDE_INT mask, imm;
2787   unsigned int log_e, e, s, r;
2788   unsigned int nimms = 0;
2789
2790   for (log_e = 1; log_e <= 6; log_e++)
2791     {
2792       e = 1 << log_e;
2793       if (e == 64)
2794         mask = ~(HOST_WIDE_INT) 0;
2795       else
2796         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2797       for (s = 1; s < e; s++)
2798         {
2799           for (r = 0; r < e; r++)
2800             {
2801               /* set s consecutive bits to 1 (s < 64) */
2802               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2803               /* rotate right by r */
2804               if (r != 0)
2805                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2806               /* replicate the constant depending on SIMD size */
2807               switch (log_e) {
2808               case 1: imm |= (imm <<  2);
2809               case 2: imm |= (imm <<  4);
2810               case 3: imm |= (imm <<  8);
2811               case 4: imm |= (imm << 16);
2812               case 5: imm |= (imm << 32);
2813               case 6:
2814                 break;
2815               default:
2816                 gcc_unreachable ();
2817               }
2818               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2819               aarch64_bitmasks[nimms++] = imm;
2820             }
2821         }
2822     }
2823
2824   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2825   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2826          aarch64_bitmasks_cmp);
2827 }
2828
2829
2830 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2831    a left shift of 0 or 12 bits.  */
2832 bool
2833 aarch64_uimm12_shift (HOST_WIDE_INT val)
2834 {
2835   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2836           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2837           );
2838 }
2839
2840
2841 /* Return true if val is an immediate that can be loaded into a
2842    register by a MOVZ instruction.  */
2843 static bool
2844 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2845 {
2846   if (GET_MODE_SIZE (mode) > 4)
2847     {
2848       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2849           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2850         return 1;
2851     }
2852   else
2853     {
2854       /* Ignore sign extension.  */
2855       val &= (HOST_WIDE_INT) 0xffffffff;
2856     }
2857   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2858           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2859 }
2860
2861
2862 /* Return true if val is a valid bitmask immediate.  */
2863 bool
2864 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2865 {
2866   if (GET_MODE_SIZE (mode) < 8)
2867     {
2868       /* Replicate bit pattern.  */
2869       val &= (HOST_WIDE_INT) 0xffffffff;
2870       val |= val << 32;
2871     }
2872   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2873                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2874 }
2875
2876
2877 /* Return true if val is an immediate that can be loaded into a
2878    register in a single instruction.  */
2879 bool
2880 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2881 {
2882   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2883     return 1;
2884   return aarch64_bitmask_imm (val, mode);
2885 }
2886
2887 static bool
2888 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2889 {
2890   rtx base, offset;
2891
2892   if (GET_CODE (x) == HIGH)
2893     return true;
2894
2895   split_const (x, &base, &offset);
2896   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2897     {
2898       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2899           != SYMBOL_FORCE_TO_MEM)
2900         return true;
2901       else
2902         /* Avoid generating a 64-bit relocation in ILP32; leave
2903            to aarch64_expand_mov_immediate to handle it properly.  */
2904         return mode != ptr_mode;
2905     }
2906
2907   return aarch64_tls_referenced_p (x);
2908 }
2909
2910 /* Return true if register REGNO is a valid index register.
2911    STRICT_P is true if REG_OK_STRICT is in effect.  */
2912
2913 bool
2914 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2915 {
2916   if (!HARD_REGISTER_NUM_P (regno))
2917     {
2918       if (!strict_p)
2919         return true;
2920
2921       if (!reg_renumber)
2922         return false;
2923
2924       regno = reg_renumber[regno];
2925     }
2926   return GP_REGNUM_P (regno);
2927 }
2928
2929 /* Return true if register REGNO is a valid base register for mode MODE.
2930    STRICT_P is true if REG_OK_STRICT is in effect.  */
2931
2932 bool
2933 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2934 {
2935   if (!HARD_REGISTER_NUM_P (regno))
2936     {
2937       if (!strict_p)
2938         return true;
2939
2940       if (!reg_renumber)
2941         return false;
2942
2943       regno = reg_renumber[regno];
2944     }
2945
2946   /* The fake registers will be eliminated to either the stack or
2947      hard frame pointer, both of which are usually valid base registers.
2948      Reload deals with the cases where the eliminated form isn't valid.  */
2949   return (GP_REGNUM_P (regno)
2950           || regno == SP_REGNUM
2951           || regno == FRAME_POINTER_REGNUM
2952           || regno == ARG_POINTER_REGNUM);
2953 }
2954
2955 /* Return true if X is a valid base register for mode MODE.
2956    STRICT_P is true if REG_OK_STRICT is in effect.  */
2957
2958 static bool
2959 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2960 {
2961   if (!strict_p && GET_CODE (x) == SUBREG)
2962     x = SUBREG_REG (x);
2963
2964   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2965 }
2966
2967 /* Return true if address offset is a valid index.  If it is, fill in INFO
2968    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2969
2970 static bool
2971 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2972                         enum machine_mode mode, bool strict_p)
2973 {
2974   enum aarch64_address_type type;
2975   rtx index;
2976   int shift;
2977
2978   /* (reg:P) */
2979   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2980       && GET_MODE (x) == Pmode)
2981     {
2982       type = ADDRESS_REG_REG;
2983       index = x;
2984       shift = 0;
2985     }
2986   /* (sign_extend:DI (reg:SI)) */
2987   else if ((GET_CODE (x) == SIGN_EXTEND
2988             || GET_CODE (x) == ZERO_EXTEND)
2989            && GET_MODE (x) == DImode
2990            && GET_MODE (XEXP (x, 0)) == SImode)
2991     {
2992       type = (GET_CODE (x) == SIGN_EXTEND)
2993         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2994       index = XEXP (x, 0);
2995       shift = 0;
2996     }
2997   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2998   else if (GET_CODE (x) == MULT
2999            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3000                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3001            && GET_MODE (XEXP (x, 0)) == DImode
3002            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3003            && CONST_INT_P (XEXP (x, 1)))
3004     {
3005       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3006         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3007       index = XEXP (XEXP (x, 0), 0);
3008       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3009     }
3010   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3011   else if (GET_CODE (x) == ASHIFT
3012            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3013                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3014            && GET_MODE (XEXP (x, 0)) == DImode
3015            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3016            && CONST_INT_P (XEXP (x, 1)))
3017     {
3018       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3019         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3020       index = XEXP (XEXP (x, 0), 0);
3021       shift = INTVAL (XEXP (x, 1));
3022     }
3023   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3024   else if ((GET_CODE (x) == SIGN_EXTRACT
3025             || GET_CODE (x) == ZERO_EXTRACT)
3026            && GET_MODE (x) == DImode
3027            && GET_CODE (XEXP (x, 0)) == MULT
3028            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3029            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3030     {
3031       type = (GET_CODE (x) == SIGN_EXTRACT)
3032         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3033       index = XEXP (XEXP (x, 0), 0);
3034       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3035       if (INTVAL (XEXP (x, 1)) != 32 + shift
3036           || INTVAL (XEXP (x, 2)) != 0)
3037         shift = -1;
3038     }
3039   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3040      (const_int 0xffffffff<<shift)) */
3041   else if (GET_CODE (x) == AND
3042            && GET_MODE (x) == DImode
3043            && GET_CODE (XEXP (x, 0)) == MULT
3044            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3045            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3046            && CONST_INT_P (XEXP (x, 1)))
3047     {
3048       type = ADDRESS_REG_UXTW;
3049       index = XEXP (XEXP (x, 0), 0);
3050       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3051       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3052         shift = -1;
3053     }
3054   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3055   else if ((GET_CODE (x) == SIGN_EXTRACT
3056             || GET_CODE (x) == ZERO_EXTRACT)
3057            && GET_MODE (x) == DImode
3058            && GET_CODE (XEXP (x, 0)) == ASHIFT
3059            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3060            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3061     {
3062       type = (GET_CODE (x) == SIGN_EXTRACT)
3063         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3064       index = XEXP (XEXP (x, 0), 0);
3065       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3066       if (INTVAL (XEXP (x, 1)) != 32 + shift
3067           || INTVAL (XEXP (x, 2)) != 0)
3068         shift = -1;
3069     }
3070   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3071      (const_int 0xffffffff<<shift)) */
3072   else if (GET_CODE (x) == AND
3073            && GET_MODE (x) == DImode
3074            && GET_CODE (XEXP (x, 0)) == ASHIFT
3075            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3076            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3077            && CONST_INT_P (XEXP (x, 1)))
3078     {
3079       type = ADDRESS_REG_UXTW;
3080       index = XEXP (XEXP (x, 0), 0);
3081       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3082       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3083         shift = -1;
3084     }
3085   /* (mult:P (reg:P) (const_int scale)) */
3086   else if (GET_CODE (x) == MULT
3087            && GET_MODE (x) == Pmode
3088            && GET_MODE (XEXP (x, 0)) == Pmode
3089            && CONST_INT_P (XEXP (x, 1)))
3090     {
3091       type = ADDRESS_REG_REG;
3092       index = XEXP (x, 0);
3093       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3094     }
3095   /* (ashift:P (reg:P) (const_int shift)) */
3096   else if (GET_CODE (x) == ASHIFT
3097            && GET_MODE (x) == Pmode
3098            && GET_MODE (XEXP (x, 0)) == Pmode
3099            && CONST_INT_P (XEXP (x, 1)))
3100     {
3101       type = ADDRESS_REG_REG;
3102       index = XEXP (x, 0);
3103       shift = INTVAL (XEXP (x, 1));
3104     }
3105   else
3106     return false;
3107
3108   if (GET_CODE (index) == SUBREG)
3109     index = SUBREG_REG (index);
3110
3111   if ((shift == 0 ||
3112        (shift > 0 && shift <= 3
3113         && (1 << shift) == GET_MODE_SIZE (mode)))
3114       && REG_P (index)
3115       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3116     {
3117       info->type = type;
3118       info->offset = index;
3119       info->shift = shift;
3120       return true;
3121     }
3122
3123   return false;
3124 }
3125
3126 static inline bool
3127 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3128 {
3129   return (offset >= -64 * GET_MODE_SIZE (mode)
3130           && offset < 64 * GET_MODE_SIZE (mode)
3131           && offset % GET_MODE_SIZE (mode) == 0);
3132 }
3133
3134 static inline bool
3135 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3136                                HOST_WIDE_INT offset)
3137 {
3138   return offset >= -256 && offset < 256;
3139 }
3140
3141 static inline bool
3142 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3143 {
3144   return (offset >= 0
3145           && offset < 4096 * GET_MODE_SIZE (mode)
3146           && offset % GET_MODE_SIZE (mode) == 0);
3147 }
3148
3149 /* Return true if X is a valid address for machine mode MODE.  If it is,
3150    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3151    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3152
3153 static bool
3154 aarch64_classify_address (struct aarch64_address_info *info,
3155                           rtx x, enum machine_mode mode,
3156                           RTX_CODE outer_code, bool strict_p)
3157 {
3158   enum rtx_code code = GET_CODE (x);
3159   rtx op0, op1;
3160   bool allow_reg_index_p =
3161     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3162
3163   /* Don't support anything other than POST_INC or REG addressing for
3164      AdvSIMD.  */
3165   if (aarch64_vector_mode_p (mode)
3166       && (code != POST_INC && code != REG))
3167     return false;
3168
3169   switch (code)
3170     {
3171     case REG:
3172     case SUBREG:
3173       info->type = ADDRESS_REG_IMM;
3174       info->base = x;
3175       info->offset = const0_rtx;
3176       return aarch64_base_register_rtx_p (x, strict_p);
3177
3178     case PLUS:
3179       op0 = XEXP (x, 0);
3180       op1 = XEXP (x, 1);
3181       if (GET_MODE_SIZE (mode) != 0
3182           && CONST_INT_P (op1)
3183           && aarch64_base_register_rtx_p (op0, strict_p))
3184         {
3185           HOST_WIDE_INT offset = INTVAL (op1);
3186
3187           info->type = ADDRESS_REG_IMM;
3188           info->base = op0;
3189           info->offset = op1;
3190
3191           /* TImode and TFmode values are allowed in both pairs of X
3192              registers and individual Q registers.  The available
3193              address modes are:
3194              X,X: 7-bit signed scaled offset
3195              Q:   9-bit signed offset
3196              We conservatively require an offset representable in either mode.
3197            */
3198           if (mode == TImode || mode == TFmode)
3199             return (offset_7bit_signed_scaled_p (mode, offset)
3200                     && offset_9bit_signed_unscaled_p (mode, offset));
3201
3202           if (outer_code == PARALLEL)
3203             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3204                     && offset_7bit_signed_scaled_p (mode, offset));
3205           else
3206             return (offset_9bit_signed_unscaled_p (mode, offset)
3207                     || offset_12bit_unsigned_scaled_p (mode, offset));
3208         }
3209
3210       if (allow_reg_index_p)
3211         {
3212           /* Look for base + (scaled/extended) index register.  */
3213           if (aarch64_base_register_rtx_p (op0, strict_p)
3214               && aarch64_classify_index (info, op1, mode, strict_p))
3215             {
3216               info->base = op0;
3217               return true;
3218             }
3219           if (aarch64_base_register_rtx_p (op1, strict_p)
3220               && aarch64_classify_index (info, op0, mode, strict_p))
3221             {
3222               info->base = op1;
3223               return true;
3224             }
3225         }
3226
3227       return false;
3228
3229     case POST_INC:
3230     case POST_DEC:
3231     case PRE_INC:
3232     case PRE_DEC:
3233       info->type = ADDRESS_REG_WB;
3234       info->base = XEXP (x, 0);
3235       info->offset = NULL_RTX;
3236       return aarch64_base_register_rtx_p (info->base, strict_p);
3237
3238     case POST_MODIFY:
3239     case PRE_MODIFY:
3240       info->type = ADDRESS_REG_WB;
3241       info->base = XEXP (x, 0);
3242       if (GET_CODE (XEXP (x, 1)) == PLUS
3243           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3244           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3245           && aarch64_base_register_rtx_p (info->base, strict_p))
3246         {
3247           HOST_WIDE_INT offset;
3248           info->offset = XEXP (XEXP (x, 1), 1);
3249           offset = INTVAL (info->offset);
3250
3251           /* TImode and TFmode values are allowed in both pairs of X
3252              registers and individual Q registers.  The available
3253              address modes are:
3254              X,X: 7-bit signed scaled offset
3255              Q:   9-bit signed offset
3256              We conservatively require an offset representable in either mode.
3257            */
3258           if (mode == TImode || mode == TFmode)
3259             return (offset_7bit_signed_scaled_p (mode, offset)
3260                     && offset_9bit_signed_unscaled_p (mode, offset));
3261
3262           if (outer_code == PARALLEL)
3263             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3264                     && offset_7bit_signed_scaled_p (mode, offset));
3265           else
3266             return offset_9bit_signed_unscaled_p (mode, offset);
3267         }
3268       return false;
3269
3270     case CONST:
3271     case SYMBOL_REF:
3272     case LABEL_REF:
3273       /* load literal: pc-relative constant pool entry.  Only supported
3274          for SI mode or larger.  */
3275       info->type = ADDRESS_SYMBOLIC;
3276       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3277         {
3278           rtx sym, addend;
3279
3280           split_const (x, &sym, &addend);
3281           return (GET_CODE (sym) == LABEL_REF
3282                   || (GET_CODE (sym) == SYMBOL_REF
3283                       && CONSTANT_POOL_ADDRESS_P (sym)));
3284         }
3285       return false;
3286
3287     case LO_SUM:
3288       info->type = ADDRESS_LO_SUM;
3289       info->base = XEXP (x, 0);
3290       info->offset = XEXP (x, 1);
3291       if (allow_reg_index_p
3292           && aarch64_base_register_rtx_p (info->base, strict_p))
3293         {
3294           rtx sym, offs;
3295           split_const (info->offset, &sym, &offs);
3296           if (GET_CODE (sym) == SYMBOL_REF
3297               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3298                   == SYMBOL_SMALL_ABSOLUTE))
3299             {
3300               /* The symbol and offset must be aligned to the access size.  */
3301               unsigned int align;
3302               unsigned int ref_size;
3303
3304               if (CONSTANT_POOL_ADDRESS_P (sym))
3305                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3306               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3307                 {
3308                   tree exp = SYMBOL_REF_DECL (sym);
3309                   align = TYPE_ALIGN (TREE_TYPE (exp));
3310                   align = CONSTANT_ALIGNMENT (exp, align);
3311                 }
3312               else if (SYMBOL_REF_DECL (sym))
3313                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3314               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3315                        && SYMBOL_REF_BLOCK (sym) != NULL)
3316                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3317               else
3318                 align = BITS_PER_UNIT;
3319
3320               ref_size = GET_MODE_SIZE (mode);
3321               if (ref_size == 0)
3322                 ref_size = GET_MODE_SIZE (DImode);
3323
3324               return ((INTVAL (offs) & (ref_size - 1)) == 0
3325                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3326             }
3327         }
3328       return false;
3329
3330     default:
3331       return false;
3332     }
3333 }
3334
3335 bool
3336 aarch64_symbolic_address_p (rtx x)
3337 {
3338   rtx offset;
3339
3340   split_const (x, &x, &offset);
3341   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3342 }
3343
3344 /* Classify the base of symbolic expression X, given that X appears in
3345    context CONTEXT.  */
3346
3347 enum aarch64_symbol_type
3348 aarch64_classify_symbolic_expression (rtx x,
3349                                       enum aarch64_symbol_context context)
3350 {
3351   rtx offset;
3352
3353   split_const (x, &x, &offset);
3354   return aarch64_classify_symbol (x, context);
3355 }
3356
3357
3358 /* Return TRUE if X is a legitimate address for accessing memory in
3359    mode MODE.  */
3360 static bool
3361 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3362 {
3363   struct aarch64_address_info addr;
3364
3365   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3366 }
3367
3368 /* Return TRUE if X is a legitimate address for accessing memory in
3369    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3370    pair operation.  */
3371 bool
3372 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3373                               RTX_CODE outer_code, bool strict_p)
3374 {
3375   struct aarch64_address_info addr;
3376
3377   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3378 }
3379
3380 /* Return TRUE if rtx X is immediate constant 0.0 */
3381 bool
3382 aarch64_float_const_zero_rtx_p (rtx x)
3383 {
3384   REAL_VALUE_TYPE r;
3385
3386   if (GET_MODE (x) == VOIDmode)
3387     return false;
3388
3389   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3390   if (REAL_VALUE_MINUS_ZERO (r))
3391     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3392   return REAL_VALUES_EQUAL (r, dconst0);
3393 }
3394
3395 /* Return the fixed registers used for condition codes.  */
3396
3397 static bool
3398 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3399 {
3400   *p1 = CC_REGNUM;
3401   *p2 = INVALID_REGNUM;
3402   return true;
3403 }
3404
3405 enum machine_mode
3406 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3407 {
3408   /* All floating point compares return CCFP if it is an equality
3409      comparison, and CCFPE otherwise.  */
3410   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3411     {
3412       switch (code)
3413         {
3414         case EQ:
3415         case NE:
3416         case UNORDERED:
3417         case ORDERED:
3418         case UNLT:
3419         case UNLE:
3420         case UNGT:
3421         case UNGE:
3422         case UNEQ:
3423         case LTGT:
3424           return CCFPmode;
3425
3426         case LT:
3427         case LE:
3428         case GT:
3429         case GE:
3430           return CCFPEmode;
3431
3432         default:
3433           gcc_unreachable ();
3434         }
3435     }
3436
3437   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3438       && y == const0_rtx
3439       && (code == EQ || code == NE || code == LT || code == GE)
3440       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3441           || GET_CODE (x) == NEG))
3442     return CC_NZmode;
3443
3444   /* A compare with a shifted operand.  Because of canonicalization,
3445      the comparison will have to be swapped when we emit the assembly
3446      code.  */
3447   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3448       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3449       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3450           || GET_CODE (x) == LSHIFTRT
3451           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3452     return CC_SWPmode;
3453
3454   /* Similarly for a negated operand, but we can only do this for
3455      equalities.  */
3456   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3457       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3458       && (code == EQ || code == NE)
3459       && GET_CODE (x) == NEG)
3460     return CC_Zmode;
3461
3462   /* A compare of a mode narrower than SI mode against zero can be done
3463      by extending the value in the comparison.  */
3464   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3465       && y == const0_rtx)
3466     /* Only use sign-extension if we really need it.  */
3467     return ((code == GT || code == GE || code == LE || code == LT)
3468             ? CC_SESWPmode : CC_ZESWPmode);
3469
3470   /* For everything else, return CCmode.  */
3471   return CCmode;
3472 }
3473
3474 static unsigned
3475 aarch64_get_condition_code (rtx x)
3476 {
3477   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3478   enum rtx_code comp_code = GET_CODE (x);
3479
3480   if (GET_MODE_CLASS (mode) != MODE_CC)
3481     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3482
3483   switch (mode)
3484     {
3485     case CCFPmode:
3486     case CCFPEmode:
3487       switch (comp_code)
3488         {
3489         case GE: return AARCH64_GE;
3490         case GT: return AARCH64_GT;
3491         case LE: return AARCH64_LS;
3492         case LT: return AARCH64_MI;
3493         case NE: return AARCH64_NE;
3494         case EQ: return AARCH64_EQ;
3495         case ORDERED: return AARCH64_VC;
3496         case UNORDERED: return AARCH64_VS;
3497         case UNLT: return AARCH64_LT;
3498         case UNLE: return AARCH64_LE;
3499         case UNGT: return AARCH64_HI;
3500         case UNGE: return AARCH64_PL;
3501         default: gcc_unreachable ();
3502         }
3503       break;
3504
3505     case CCmode:
3506       switch (comp_code)
3507         {
3508         case NE: return AARCH64_NE;
3509         case EQ: return AARCH64_EQ;
3510         case GE: return AARCH64_GE;
3511         case GT: return AARCH64_GT;
3512         case LE: return AARCH64_LE;
3513         case LT: return AARCH64_LT;
3514         case GEU: return AARCH64_CS;
3515         case GTU: return AARCH64_HI;
3516         case LEU: return AARCH64_LS;
3517         case LTU: return AARCH64_CC;
3518         default: gcc_unreachable ();
3519         }
3520       break;
3521
3522     case CC_SWPmode:
3523     case CC_ZESWPmode:
3524     case CC_SESWPmode:
3525       switch (comp_code)
3526         {
3527         case NE: return AARCH64_NE;
3528         case EQ: return AARCH64_EQ;
3529         case GE: return AARCH64_LE;
3530         case GT: return AARCH64_LT;
3531         case LE: return AARCH64_GE;
3532         case LT: return AARCH64_GT;
3533         case GEU: return AARCH64_LS;
3534         case GTU: return AARCH64_CC;
3535         case LEU: return AARCH64_CS;
3536         case LTU: return AARCH64_HI;
3537         default: gcc_unreachable ();
3538         }
3539       break;
3540
3541     case CC_NZmode:
3542       switch (comp_code)
3543         {
3544         case NE: return AARCH64_NE;
3545         case EQ: return AARCH64_EQ;
3546         case GE: return AARCH64_PL;
3547         case LT: return AARCH64_MI;
3548         default: gcc_unreachable ();
3549         }
3550       break;
3551
3552     case CC_Zmode:
3553       switch (comp_code)
3554         {
3555         case NE: return AARCH64_NE;
3556         case EQ: return AARCH64_EQ;
3557         default: gcc_unreachable ();
3558         }
3559       break;
3560
3561     default:
3562       gcc_unreachable ();
3563       break;
3564     }
3565 }
3566
3567 static unsigned
3568 bit_count (unsigned HOST_WIDE_INT value)
3569 {
3570   unsigned count = 0;
3571
3572   while (value)
3573     {
3574       count++;
3575       value &= value - 1;
3576     }
3577
3578   return count;
3579 }
3580
3581 void
3582 aarch64_print_operand (FILE *f, rtx x, char code)
3583 {
3584   switch (code)
3585     {
3586     /* An integer or symbol address without a preceding # sign.  */
3587     case 'c':
3588       switch (GET_CODE (x))
3589         {
3590         case CONST_INT:
3591           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3592           break;
3593
3594         case SYMBOL_REF:
3595           output_addr_const (f, x);
3596           break;
3597
3598         case CONST:
3599           if (GET_CODE (XEXP (x, 0)) == PLUS
3600               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3601             {
3602               output_addr_const (f, x);
3603               break;
3604             }
3605           /* Fall through.  */
3606
3607         default:
3608           output_operand_lossage ("Unsupported operand for code '%c'", code);
3609         }
3610       break;
3611
3612     case 'e':
3613       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3614       {
3615         int n;
3616
3617         if (GET_CODE (x) != CONST_INT
3618             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3619           {
3620             output_operand_lossage ("invalid operand for '%%%c'", code);
3621             return;
3622           }
3623
3624         switch (n)
3625           {
3626           case 3:
3627             fputc ('b', f);
3628             break;
3629           case 4:
3630             fputc ('h', f);
3631             break;
3632           case 5:
3633             fputc ('w', f);
3634             break;
3635           default:
3636             output_operand_lossage ("invalid operand for '%%%c'", code);
3637             return;
3638           }
3639       }
3640       break;
3641
3642     case 'p':
3643       {
3644         int n;
3645
3646         /* Print N such that 2^N == X.  */
3647         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3648           {
3649             output_operand_lossage ("invalid operand for '%%%c'", code);
3650             return;
3651           }
3652
3653         asm_fprintf (f, "%d", n);
3654       }
3655       break;
3656
3657     case 'P':
3658       /* Print the number of non-zero bits in X (a const_int).  */
3659       if (GET_CODE (x) != CONST_INT)
3660         {
3661           output_operand_lossage ("invalid operand for '%%%c'", code);
3662           return;
3663         }
3664
3665       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3666       break;
3667
3668     case 'H':
3669       /* Print the higher numbered register of a pair (TImode) of regs.  */
3670       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3671         {
3672           output_operand_lossage ("invalid operand for '%%%c'", code);
3673           return;
3674         }
3675
3676       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3677       break;
3678
3679     case 'm':
3680       /* Print a condition (eq, ne, etc).  */
3681
3682       /* CONST_TRUE_RTX means always -- that's the default.  */
3683       if (x == const_true_rtx)
3684         return;
3685
3686       if (!COMPARISON_P (x))
3687         {
3688           output_operand_lossage ("invalid operand for '%%%c'", code);
3689           return;
3690         }
3691
3692       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3693       break;
3694
3695     case 'M':
3696       /* Print the inverse of a condition (eq <-> ne, etc).  */
3697
3698       /* CONST_TRUE_RTX means never -- that's the default.  */
3699       if (x == const_true_rtx)
3700         {
3701           fputs ("nv", f);
3702           return;
3703         }
3704
3705       if (!COMPARISON_P (x))
3706         {
3707           output_operand_lossage ("invalid operand for '%%%c'", code);
3708           return;
3709         }
3710
3711       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3712                                   (aarch64_get_condition_code (x))], f);
3713       break;
3714
3715     case 'b':
3716     case 'h':
3717     case 's':
3718     case 'd':
3719     case 'q':
3720       /* Print a scalar FP/SIMD register name.  */
3721       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3722         {
3723           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3724           return;
3725         }
3726       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3727       break;
3728
3729     case 'S':
3730     case 'T':
3731     case 'U':
3732     case 'V':
3733       /* Print the first FP/SIMD register name in a list.  */
3734       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3735         {
3736           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3737           return;
3738         }
3739       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3740       break;
3741
3742     case 'X':
3743       /* Print bottom 16 bits of integer constant in hex.  */
3744       if (GET_CODE (x) != CONST_INT)
3745         {
3746           output_operand_lossage ("invalid operand for '%%%c'", code);
3747           return;
3748         }
3749       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3750       break;
3751
3752     case 'w':
3753     case 'x':
3754       /* Print a general register name or the zero register (32-bit or
3755          64-bit).  */
3756       if (x == const0_rtx
3757           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3758         {
3759           asm_fprintf (f, "%czr", code);
3760           break;
3761         }
3762
3763       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3764         {
3765           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3766           break;
3767         }
3768
3769       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3770         {
3771           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3772           break;
3773         }
3774
3775       /* Fall through */
3776
3777     case 0:
3778       /* Print a normal operand, if it's a general register, then we
3779          assume DImode.  */
3780       if (x == NULL)
3781         {
3782           output_operand_lossage ("missing operand");
3783           return;
3784         }
3785
3786       switch (GET_CODE (x))
3787         {
3788         case REG:
3789           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3790           break;
3791
3792         case MEM:
3793           aarch64_memory_reference_mode = GET_MODE (x);
3794           output_address (XEXP (x, 0));
3795           break;
3796
3797         case LABEL_REF:
3798         case SYMBOL_REF:
3799           output_addr_const (asm_out_file, x);
3800           break;
3801
3802         case CONST_INT:
3803           asm_fprintf (f, "%wd", INTVAL (x));
3804           break;
3805
3806         case CONST_VECTOR:
3807           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3808             {
3809               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3810                                                             HOST_WIDE_INT_MIN,
3811                                                             HOST_WIDE_INT_MAX));
3812               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3813             }
3814           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3815             {
3816               fputc ('0', f);
3817             }
3818           else
3819             gcc_unreachable ();
3820           break;
3821
3822         case CONST_DOUBLE:
3823           /* CONST_DOUBLE can represent a double-width integer.
3824              In this case, the mode of x is VOIDmode.  */
3825           if (GET_MODE (x) == VOIDmode)
3826             ; /* Do Nothing.  */
3827           else if (aarch64_float_const_zero_rtx_p (x))
3828             {
3829               fputc ('0', f);
3830               break;
3831             }
3832           else if (aarch64_float_const_representable_p (x))
3833             {
3834 #define buf_size 20
3835               char float_buf[buf_size] = {'\0'};
3836               REAL_VALUE_TYPE r;
3837               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3838               real_to_decimal_for_mode (float_buf, &r,
3839                                         buf_size, buf_size,
3840                                         1, GET_MODE (x));
3841               asm_fprintf (asm_out_file, "%s", float_buf);
3842               break;
3843 #undef buf_size
3844             }
3845           output_operand_lossage ("invalid constant");
3846           return;
3847         default:
3848           output_operand_lossage ("invalid operand");
3849           return;
3850         }
3851       break;
3852
3853     case 'A':
3854       if (GET_CODE (x) == HIGH)
3855         x = XEXP (x, 0);
3856
3857       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3858         {
3859         case SYMBOL_SMALL_GOT:
3860           asm_fprintf (asm_out_file, ":got:");
3861           break;
3862
3863         case SYMBOL_SMALL_TLSGD:
3864           asm_fprintf (asm_out_file, ":tlsgd:");
3865           break;
3866
3867         case SYMBOL_SMALL_TLSDESC:
3868           asm_fprintf (asm_out_file, ":tlsdesc:");
3869           break;
3870
3871         case SYMBOL_SMALL_GOTTPREL:
3872           asm_fprintf (asm_out_file, ":gottprel:");
3873           break;
3874
3875         case SYMBOL_SMALL_TPREL:
3876           asm_fprintf (asm_out_file, ":tprel:");
3877           break;
3878
3879         case SYMBOL_TINY_GOT:
3880           gcc_unreachable ();
3881           break;
3882
3883         default:
3884           break;
3885         }
3886       output_addr_const (asm_out_file, x);
3887       break;
3888
3889     case 'L':
3890       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3891         {
3892         case SYMBOL_SMALL_GOT:
3893           asm_fprintf (asm_out_file, ":lo12:");
3894           break;
3895
3896         case SYMBOL_SMALL_TLSGD:
3897           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3898           break;
3899
3900         case SYMBOL_SMALL_TLSDESC:
3901           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3902           break;
3903
3904         case SYMBOL_SMALL_GOTTPREL:
3905           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3906           break;
3907
3908         case SYMBOL_SMALL_TPREL:
3909           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3910           break;
3911
3912         case SYMBOL_TINY_GOT:
3913           asm_fprintf (asm_out_file, ":got:");
3914           break;
3915
3916         default:
3917           break;
3918         }
3919       output_addr_const (asm_out_file, x);
3920       break;
3921
3922     case 'G':
3923
3924       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3925         {
3926         case SYMBOL_SMALL_TPREL:
3927           asm_fprintf (asm_out_file, ":tprel_hi12:");
3928           break;
3929         default:
3930           break;
3931         }
3932       output_addr_const (asm_out_file, x);
3933       break;
3934
3935     default:
3936       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3937       return;
3938     }
3939 }
3940
3941 void
3942 aarch64_print_operand_address (FILE *f, rtx x)
3943 {
3944   struct aarch64_address_info addr;
3945
3946   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3947                              MEM, true))
3948     switch (addr.type)
3949       {
3950       case ADDRESS_REG_IMM:
3951         if (addr.offset == const0_rtx)
3952           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3953         else
3954           asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3955                        INTVAL (addr.offset));
3956         return;
3957
3958       case ADDRESS_REG_REG:
3959         if (addr.shift == 0)
3960           asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3961                        reg_names [REGNO (addr.offset)]);
3962         else
3963           asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3964                        reg_names [REGNO (addr.offset)], addr.shift);
3965         return;
3966
3967       case ADDRESS_REG_UXTW:
3968         if (addr.shift == 0)
3969           asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3970                        REGNO (addr.offset) - R0_REGNUM);
3971         else
3972           asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3973                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3974         return;
3975
3976       case ADDRESS_REG_SXTW:
3977         if (addr.shift == 0)
3978           asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3979                        REGNO (addr.offset) - R0_REGNUM);
3980         else
3981           asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3982                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3983         return;
3984
3985       case ADDRESS_REG_WB:
3986         switch (GET_CODE (x))
3987           {
3988           case PRE_INC:
3989             asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3990                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3991             return;
3992           case POST_INC:
3993             asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
3994                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3995             return;
3996           case PRE_DEC:
3997             asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
3998                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3999             return;
4000           case POST_DEC:
4001             asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4002                          GET_MODE_SIZE (aarch64_memory_reference_mode));
4003             return;
4004           case PRE_MODIFY:
4005             asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4006                          INTVAL (addr.offset));
4007             return;
4008           case POST_MODIFY:
4009             asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4010                          INTVAL (addr.offset));
4011             return;
4012           default:
4013             break;
4014           }
4015         break;
4016
4017       case ADDRESS_LO_SUM:
4018         asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4019         output_addr_const (f, addr.offset);
4020         asm_fprintf (f, "]");
4021         return;
4022
4023       case ADDRESS_SYMBOLIC:
4024         break;
4025       }
4026
4027   output_addr_const (f, x);
4028 }
4029
4030 bool
4031 aarch64_label_mentioned_p (rtx x)
4032 {
4033   const char *fmt;
4034   int i;
4035
4036   if (GET_CODE (x) == LABEL_REF)
4037     return true;
4038
4039   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4040      referencing instruction, but they are constant offsets, not
4041      symbols.  */
4042   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4043     return false;
4044
4045   fmt = GET_RTX_FORMAT (GET_CODE (x));
4046   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4047     {
4048       if (fmt[i] == 'E')
4049         {
4050           int j;
4051
4052           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4053             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4054               return 1;
4055         }
4056       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4057         return 1;
4058     }
4059
4060   return 0;
4061 }
4062
4063 /* Implement REGNO_REG_CLASS.  */
4064
4065 enum reg_class
4066 aarch64_regno_regclass (unsigned regno)
4067 {
4068   if (GP_REGNUM_P (regno))
4069     return GENERAL_REGS;
4070
4071   if (regno == SP_REGNUM)
4072     return STACK_REG;
4073
4074   if (regno == FRAME_POINTER_REGNUM
4075       || regno == ARG_POINTER_REGNUM)
4076     return POINTER_REGS;
4077
4078   if (FP_REGNUM_P (regno))
4079     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4080
4081   return NO_REGS;
4082 }
4083
4084 /* Try a machine-dependent way of reloading an illegitimate address
4085    operand.  If we find one, push the reload and return the new rtx.  */
4086
4087 rtx
4088 aarch64_legitimize_reload_address (rtx *x_p,
4089                                    enum machine_mode mode,
4090                                    int opnum, int type,
4091                                    int ind_levels ATTRIBUTE_UNUSED)
4092 {
4093   rtx x = *x_p;
4094
4095   /* Do not allow mem (plus (reg, const)) if vector mode.  */
4096   if (aarch64_vector_mode_p (mode)
4097       && GET_CODE (x) == PLUS
4098       && REG_P (XEXP (x, 0))
4099       && CONST_INT_P (XEXP (x, 1)))
4100     {
4101       rtx orig_rtx = x;
4102       x = copy_rtx (x);
4103       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4104                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4105                    opnum, (enum reload_type) type);
4106       return x;
4107     }
4108
4109   /* We must recognize output that we have already generated ourselves.  */
4110   if (GET_CODE (x) == PLUS
4111       && GET_CODE (XEXP (x, 0)) == PLUS
4112       && REG_P (XEXP (XEXP (x, 0), 0))
4113       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4114       && CONST_INT_P (XEXP (x, 1)))
4115     {
4116       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4117                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4118                    opnum, (enum reload_type) type);
4119       return x;
4120     }
4121
4122   /* We wish to handle large displacements off a base register by splitting
4123      the addend across an add and the mem insn.  This can cut the number of
4124      extra insns needed from 3 to 1.  It is only useful for load/store of a
4125      single register with 12 bit offset field.  */
4126   if (GET_CODE (x) == PLUS
4127       && REG_P (XEXP (x, 0))
4128       && CONST_INT_P (XEXP (x, 1))
4129       && HARD_REGISTER_P (XEXP (x, 0))
4130       && mode != TImode
4131       && mode != TFmode
4132       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4133     {
4134       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4135       HOST_WIDE_INT low = val & 0xfff;
4136       HOST_WIDE_INT high = val - low;
4137       HOST_WIDE_INT offs;
4138       rtx cst;
4139       enum machine_mode xmode = GET_MODE (x);
4140
4141       /* In ILP32, xmode can be either DImode or SImode.  */
4142       gcc_assert (xmode == DImode || xmode == SImode);
4143
4144       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4145          BLKmode alignment.  */
4146       if (GET_MODE_SIZE (mode) == 0)
4147         return NULL_RTX;
4148
4149       offs = low % GET_MODE_SIZE (mode);
4150
4151       /* Align misaligned offset by adjusting high part to compensate.  */
4152       if (offs != 0)
4153         {
4154           if (aarch64_uimm12_shift (high + offs))
4155             {
4156               /* Align down.  */
4157               low = low - offs;
4158               high = high + offs;
4159             }
4160           else
4161             {
4162               /* Align up.  */
4163               offs = GET_MODE_SIZE (mode) - offs;
4164               low = low + offs;
4165               high = high + (low & 0x1000) - offs;
4166               low &= 0xfff;
4167             }
4168         }
4169
4170       /* Check for overflow.  */
4171       if (high + low != val)
4172         return NULL_RTX;
4173
4174       cst = GEN_INT (high);
4175       if (!aarch64_uimm12_shift (high))
4176         cst = force_const_mem (xmode, cst);
4177
4178       /* Reload high part into base reg, leaving the low part
4179          in the mem instruction.
4180          Note that replacing this gen_rtx_PLUS with plus_constant is
4181          wrong in this case because we rely on the
4182          (plus (plus reg c1) c2) structure being preserved so that
4183          XEXP (*p, 0) in push_reload below uses the correct term.  */
4184       x = gen_rtx_PLUS (xmode,
4185                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4186                         GEN_INT (low));
4187
4188       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4189                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4190                    opnum, (enum reload_type) type);
4191       return x;
4192     }
4193
4194   return NULL_RTX;
4195 }
4196
4197
4198 static reg_class_t
4199 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4200                           reg_class_t rclass,
4201                           enum machine_mode mode,
4202                           secondary_reload_info *sri)
4203 {
4204   /* Without the TARGET_SIMD instructions we cannot move a Q register
4205      to a Q register directly.  We need a scratch.  */
4206   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4207       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4208       && reg_class_subset_p (rclass, FP_REGS))
4209     {
4210       if (mode == TFmode)
4211         sri->icode = CODE_FOR_aarch64_reload_movtf;
4212       else if (mode == TImode)
4213         sri->icode = CODE_FOR_aarch64_reload_movti;
4214       return NO_REGS;
4215     }
4216
4217   /* A TFmode or TImode memory access should be handled via an FP_REGS
4218      because AArch64 has richer addressing modes for LDR/STR instructions
4219      than LDP/STP instructions.  */
4220   if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4221       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4222     return FP_REGS;
4223
4224   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4225       return GENERAL_REGS;
4226
4227   return NO_REGS;
4228 }
4229
4230 static bool
4231 aarch64_can_eliminate (const int from, const int to)
4232 {
4233   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4234      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4235
4236   if (frame_pointer_needed)
4237     {
4238       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4239         return true;
4240       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4241         return false;
4242       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4243           && !cfun->calls_alloca)
4244         return true;
4245       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4246         return true;
4247
4248       return false;
4249     }
4250
4251   return true;
4252 }
4253
4254 HOST_WIDE_INT
4255 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4256 {
4257   HOST_WIDE_INT frame_size;
4258   HOST_WIDE_INT offset;
4259
4260   aarch64_layout_frame ();
4261   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4262                 + crtl->outgoing_args_size
4263                 + cfun->machine->saved_varargs_size);
4264
4265   frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4266   offset = frame_size;
4267
4268   if (to == HARD_FRAME_POINTER_REGNUM)
4269     {
4270       if (from == ARG_POINTER_REGNUM)
4271         return offset - crtl->outgoing_args_size;
4272
4273       if (from == FRAME_POINTER_REGNUM)
4274         return cfun->machine->frame.saved_regs_size + get_frame_size ();
4275     }
4276
4277   if (to == STACK_POINTER_REGNUM)
4278     {
4279       if (from == FRAME_POINTER_REGNUM)
4280         {
4281           HOST_WIDE_INT elim = crtl->outgoing_args_size
4282             + cfun->machine->frame.saved_regs_size
4283             + get_frame_size ();
4284           elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4285           return elim;
4286         }
4287     }
4288
4289   return offset;
4290 }
4291
4292
4293 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4294    previous frame.  */
4295
4296 rtx
4297 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4298 {
4299   if (count != 0)
4300     return const0_rtx;
4301   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4302 }
4303
4304
4305 static void
4306 aarch64_asm_trampoline_template (FILE *f)
4307 {
4308   if (TARGET_ILP32)
4309     {
4310       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4311       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4312     }
4313   else
4314     {
4315       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4316       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4317     }
4318   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4319   assemble_aligned_integer (4, const0_rtx);
4320   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4321   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4322 }
4323
4324 static void
4325 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4326 {
4327   rtx fnaddr, mem, a_tramp;
4328   const int tramp_code_sz = 16;
4329
4330   /* Don't need to copy the trailing D-words, we fill those in below.  */
4331   emit_block_move (m_tramp, assemble_trampoline_template (),
4332                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4333   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4334   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4335   if (GET_MODE (fnaddr) != ptr_mode)
4336     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4337   emit_move_insn (mem, fnaddr);
4338
4339   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4340   emit_move_insn (mem, chain_value);
4341
4342   /* XXX We should really define a "clear_cache" pattern and use
4343      gen_clear_cache().  */
4344   a_tramp = XEXP (m_tramp, 0);
4345   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4346                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4347                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4348                      ptr_mode);
4349 }
4350
4351 static unsigned char
4352 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4353 {
4354   switch (regclass)
4355     {
4356     case CALLER_SAVE_REGS:
4357     case POINTER_REGS:
4358     case GENERAL_REGS:
4359     case ALL_REGS:
4360     case FP_REGS:
4361     case FP_LO_REGS:
4362       return
4363         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4364                                        (GET_MODE_SIZE (mode) + 7) / 8;
4365     case STACK_REG:
4366       return 1;
4367
4368     case NO_REGS:
4369       return 0;
4370
4371     default:
4372       break;
4373     }
4374   gcc_unreachable ();
4375 }
4376
4377 static reg_class_t
4378 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4379 {
4380   if (regclass == POINTER_REGS)
4381     return GENERAL_REGS;
4382
4383   if (regclass == STACK_REG)
4384     {
4385       if (REG_P(x)
4386           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4387           return regclass;
4388
4389       return NO_REGS;
4390     }
4391
4392   /* If it's an integer immediate that MOVI can't handle, then
4393      FP_REGS is not an option, so we return NO_REGS instead.  */
4394   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4395       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4396     return NO_REGS;
4397
4398   /* Register eliminiation can result in a request for
4399      SP+constant->FP_REGS.  We cannot support such operations which
4400      use SP as source and an FP_REG as destination, so reject out
4401      right now.  */
4402   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4403     {
4404       rtx lhs = XEXP (x, 0);
4405
4406       /* Look through a possible SUBREG introduced by ILP32.  */
4407       if (GET_CODE (lhs) == SUBREG)
4408         lhs = SUBREG_REG (lhs);
4409
4410       gcc_assert (REG_P (lhs));
4411       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4412                                       POINTER_REGS));
4413       return NO_REGS;
4414     }
4415
4416   return regclass;
4417 }
4418
4419 void
4420 aarch64_asm_output_labelref (FILE* f, const char *name)
4421 {
4422   asm_fprintf (f, "%U%s", name);
4423 }
4424
4425 static void
4426 aarch64_elf_asm_constructor (rtx symbol, int priority)
4427 {
4428   if (priority == DEFAULT_INIT_PRIORITY)
4429     default_ctor_section_asm_out_constructor (symbol, priority);
4430   else
4431     {
4432       section *s;
4433       char buf[18];
4434       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4435       s = get_section (buf, SECTION_WRITE, NULL);
4436       switch_to_section (s);
4437       assemble_align (POINTER_SIZE);
4438       assemble_aligned_integer (POINTER_BYTES, symbol);
4439     }
4440 }
4441
4442 static void
4443 aarch64_elf_asm_destructor (rtx symbol, int priority)
4444 {
4445   if (priority == DEFAULT_INIT_PRIORITY)
4446     default_dtor_section_asm_out_destructor (symbol, priority);
4447   else
4448     {
4449       section *s;
4450       char buf[18];
4451       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4452       s = get_section (buf, SECTION_WRITE, NULL);
4453       switch_to_section (s);
4454       assemble_align (POINTER_SIZE);
4455       assemble_aligned_integer (POINTER_BYTES, symbol);
4456     }
4457 }
4458
4459 const char*
4460 aarch64_output_casesi (rtx *operands)
4461 {
4462   char buf[100];
4463   char label[100];
4464   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4465   int index;
4466   static const char *const patterns[4][2] =
4467   {
4468     {
4469       "ldrb\t%w3, [%0,%w1,uxtw]",
4470       "add\t%3, %4, %w3, sxtb #2"
4471     },
4472     {
4473       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4474       "add\t%3, %4, %w3, sxth #2"
4475     },
4476     {
4477       "ldr\t%w3, [%0,%w1,uxtw #2]",
4478       "add\t%3, %4, %w3, sxtw #2"
4479     },
4480     /* We assume that DImode is only generated when not optimizing and
4481        that we don't really need 64-bit address offsets.  That would
4482        imply an object file with 8GB of code in a single function!  */
4483     {
4484       "ldr\t%w3, [%0,%w1,uxtw #2]",
4485       "add\t%3, %4, %w3, sxtw #2"
4486     }
4487   };
4488
4489   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4490
4491   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4492
4493   gcc_assert (index >= 0 && index <= 3);
4494
4495   /* Need to implement table size reduction, by chaning the code below.  */
4496   output_asm_insn (patterns[index][0], operands);
4497   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4498   snprintf (buf, sizeof (buf),
4499             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4500   output_asm_insn (buf, operands);
4501   output_asm_insn (patterns[index][1], operands);
4502   output_asm_insn ("br\t%3", operands);
4503   assemble_label (asm_out_file, label);
4504   return "";
4505 }
4506
4507
4508 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4509    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4510    operator.  */
4511
4512 int
4513 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4514 {
4515   if (shift >= 0 && shift <= 3)
4516     {
4517       int size;
4518       for (size = 8; size <= 32; size *= 2)
4519         {
4520           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4521           if (mask == bits << shift)
4522             return size;
4523         }
4524     }
4525   return 0;
4526 }
4527
4528 static bool
4529 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4530                                    const_rtx x ATTRIBUTE_UNUSED)
4531 {
4532   /* We can't use blocks for constants when we're using a per-function
4533      constant pool.  */
4534   return false;
4535 }
4536
4537 static section *
4538 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4539                             rtx x ATTRIBUTE_UNUSED,
4540                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4541 {
4542   /* Force all constant pool entries into the current function section.  */
4543   return function_section (current_function_decl);
4544 }
4545
4546
4547 /* Costs.  */
4548
4549 /* Helper function for rtx cost calculation.  Strip a shift expression
4550    from X.  Returns the inner operand if successful, or the original
4551    expression on failure.  */
4552 static rtx
4553 aarch64_strip_shift (rtx x)
4554 {
4555   rtx op = x;
4556
4557   /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4558      we can convert both to ROR during final output.  */
4559   if ((GET_CODE (op) == ASHIFT
4560        || GET_CODE (op) == ASHIFTRT
4561        || GET_CODE (op) == LSHIFTRT
4562        || GET_CODE (op) == ROTATERT
4563        || GET_CODE (op) == ROTATE)
4564       && CONST_INT_P (XEXP (op, 1)))
4565     return XEXP (op, 0);
4566
4567   if (GET_CODE (op) == MULT
4568       && CONST_INT_P (XEXP (op, 1))
4569       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4570     return XEXP (op, 0);
4571
4572   return x;
4573 }
4574
4575 /* Helper function for rtx cost calculation.  Strip an extend
4576    expression from X.  Returns the inner operand if successful, or the
4577    original expression on failure.  We deal with a number of possible
4578    canonicalization variations here.  */
4579 static rtx
4580 aarch64_strip_extend (rtx x)
4581 {
4582   rtx op = x;
4583
4584   /* Zero and sign extraction of a widened value.  */
4585   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4586       && XEXP (op, 2) == const0_rtx
4587       && GET_CODE (XEXP (op, 0)) == MULT
4588       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4589                                          XEXP (op, 1)))
4590     return XEXP (XEXP (op, 0), 0);
4591
4592   /* It can also be represented (for zero-extend) as an AND with an
4593      immediate.  */
4594   if (GET_CODE (op) == AND
4595       && GET_CODE (XEXP (op, 0)) == MULT
4596       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4597       && CONST_INT_P (XEXP (op, 1))
4598       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4599                            INTVAL (XEXP (op, 1))) != 0)
4600     return XEXP (XEXP (op, 0), 0);
4601
4602   /* Now handle extended register, as this may also have an optional
4603      left shift by 1..4.  */
4604   if (GET_CODE (op) == ASHIFT
4605       && CONST_INT_P (XEXP (op, 1))
4606       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4607     op = XEXP (op, 0);
4608
4609   if (GET_CODE (op) == ZERO_EXTEND
4610       || GET_CODE (op) == SIGN_EXTEND)
4611     op = XEXP (op, 0);
4612
4613   if (op != x)
4614     return op;
4615
4616   return x;
4617 }
4618
4619 /* Helper function for rtx cost calculation.  Calculate the cost of
4620    a MULT, which may be part of a multiply-accumulate rtx.  Return
4621    the calculated cost of the expression, recursing manually in to
4622    operands where needed.  */
4623
4624 static int
4625 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4626 {
4627   rtx op0, op1;
4628   const struct cpu_cost_table *extra_cost
4629     = aarch64_tune_params->insn_extra_cost;
4630   int cost = 0;
4631   bool maybe_fma = (outer == PLUS || outer == MINUS);
4632   enum machine_mode mode = GET_MODE (x);
4633
4634   gcc_checking_assert (code == MULT);
4635
4636   op0 = XEXP (x, 0);
4637   op1 = XEXP (x, 1);
4638
4639   if (VECTOR_MODE_P (mode))
4640     mode = GET_MODE_INNER (mode);
4641
4642   /* Integer multiply/fma.  */
4643   if (GET_MODE_CLASS (mode) == MODE_INT)
4644     {
4645       /* The multiply will be canonicalized as a shift, cost it as such.  */
4646       if (CONST_INT_P (op1)
4647           && exact_log2 (INTVAL (op1)) > 0)
4648         {
4649           if (speed)
4650             {
4651               if (maybe_fma)
4652                 /* ADD (shifted register).  */
4653                 cost += extra_cost->alu.arith_shift;
4654               else
4655                 /* LSL (immediate).  */
4656                 cost += extra_cost->alu.shift;
4657             }
4658
4659           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4660
4661           return cost;
4662         }
4663
4664       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4665       if ((GET_CODE (op0) == ZERO_EXTEND
4666            && GET_CODE (op1) == ZERO_EXTEND)
4667           || (GET_CODE (op0) == SIGN_EXTEND
4668               && GET_CODE (op1) == SIGN_EXTEND))
4669         {
4670           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4671                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4672
4673           if (speed)
4674             {
4675               if (maybe_fma)
4676                 /* MADD/SMADDL/UMADDL.  */
4677                 cost += extra_cost->mult[0].extend_add;
4678               else
4679                 /* MUL/SMULL/UMULL.  */
4680                 cost += extra_cost->mult[0].extend;
4681             }
4682
4683           return cost;
4684         }
4685
4686       /* This is either an integer multiply or an FMA.  In both cases
4687          we want to recurse and cost the operands.  */
4688       cost += rtx_cost (op0, MULT, 0, speed)
4689               + rtx_cost (op1, MULT, 1, speed);
4690
4691       if (speed)
4692         {
4693           if (maybe_fma)
4694             /* MADD.  */
4695             cost += extra_cost->mult[mode == DImode].add;
4696           else
4697             /* MUL.  */
4698             cost += extra_cost->mult[mode == DImode].simple;
4699         }
4700
4701       return cost;
4702     }
4703   else
4704     {
4705       if (speed)
4706         {
4707           /* Floating-point FMA/FMUL can also support negations of the
4708              operands.  */
4709           if (GET_CODE (op0) == NEG)
4710             op0 = XEXP (op0, 0);
4711           if (GET_CODE (op1) == NEG)
4712             op1 = XEXP (op1, 0);
4713
4714           if (maybe_fma)
4715             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4716             cost += extra_cost->fp[mode == DFmode].fma;
4717           else
4718             /* FMUL/FNMUL.  */
4719             cost += extra_cost->fp[mode == DFmode].mult;
4720         }
4721
4722       cost += rtx_cost (op0, MULT, 0, speed)
4723               + rtx_cost (op1, MULT, 1, speed);
4724       return cost;
4725     }
4726 }
4727
4728 static int
4729 aarch64_address_cost (rtx x,
4730                       enum machine_mode mode,
4731                       addr_space_t as ATTRIBUTE_UNUSED,
4732                       bool speed)
4733 {
4734   enum rtx_code c = GET_CODE (x);
4735   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4736   struct aarch64_address_info info;
4737   int cost = 0;
4738   info.shift = 0;
4739
4740   if (!aarch64_classify_address (&info, x, mode, c, false))
4741     {
4742       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4743         {
4744           /* This is a CONST or SYMBOL ref which will be split
4745              in a different way depending on the code model in use.
4746              Cost it through the generic infrastructure.  */
4747           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4748           /* Divide through by the cost of one instruction to
4749              bring it to the same units as the address costs.  */
4750           cost_symbol_ref /= COSTS_N_INSNS (1);
4751           /* The cost is then the cost of preparing the address,
4752              followed by an immediate (possibly 0) offset.  */
4753           return cost_symbol_ref + addr_cost->imm_offset;
4754         }
4755       else
4756         {
4757           /* This is most likely a jump table from a case
4758              statement.  */
4759           return addr_cost->register_offset;
4760         }
4761     }
4762
4763   switch (info.type)
4764     {
4765       case ADDRESS_LO_SUM:
4766       case ADDRESS_SYMBOLIC:
4767       case ADDRESS_REG_IMM:
4768         cost += addr_cost->imm_offset;
4769         break;
4770
4771       case ADDRESS_REG_WB:
4772         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4773           cost += addr_cost->pre_modify;
4774         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4775           cost += addr_cost->post_modify;
4776         else
4777           gcc_unreachable ();
4778
4779         break;
4780
4781       case ADDRESS_REG_REG:
4782         cost += addr_cost->register_offset;
4783         break;
4784
4785       case ADDRESS_REG_UXTW:
4786       case ADDRESS_REG_SXTW:
4787         cost += addr_cost->register_extend;
4788         break;
4789
4790       default:
4791         gcc_unreachable ();
4792     }
4793
4794
4795   if (info.shift > 0)
4796     {
4797       /* For the sake of calculating the cost of the shifted register
4798          component, we can treat same sized modes in the same way.  */
4799       switch (GET_MODE_BITSIZE (mode))
4800         {
4801           case 16:
4802             cost += addr_cost->addr_scale_costs.hi;
4803             break;
4804
4805           case 32:
4806             cost += addr_cost->addr_scale_costs.si;
4807             break;
4808
4809           case 64:
4810             cost += addr_cost->addr_scale_costs.di;
4811             break;
4812
4813           /* We can't tell, or this is a 128-bit vector.  */
4814           default:
4815             cost += addr_cost->addr_scale_costs.ti;
4816             break;
4817         }
4818     }
4819
4820   return cost;
4821 }
4822
4823 /* Return true if the RTX X in mode MODE is a zero or sign extract
4824    usable in an ADD or SUB (extended register) instruction.  */
4825 static bool
4826 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4827 {
4828   /* Catch add with a sign extract.
4829      This is add_<optab><mode>_multp2.  */
4830   if (GET_CODE (x) == SIGN_EXTRACT
4831       || GET_CODE (x) == ZERO_EXTRACT)
4832     {
4833       rtx op0 = XEXP (x, 0);
4834       rtx op1 = XEXP (x, 1);
4835       rtx op2 = XEXP (x, 2);
4836
4837       if (GET_CODE (op0) == MULT
4838           && CONST_INT_P (op1)
4839           && op2 == const0_rtx
4840           && CONST_INT_P (XEXP (op0, 1))
4841           && aarch64_is_extend_from_extract (mode,
4842                                              XEXP (op0, 1),
4843                                              op1))
4844         {
4845           return true;
4846         }
4847     }
4848
4849   return false;
4850 }
4851
4852 /* Calculate the cost of calculating X, storing it in *COST.  Result
4853    is true if the total cost of the operation has now been calculated.  */
4854 static bool
4855 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4856                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4857 {
4858   rtx op0, op1, op2;
4859   const struct cpu_cost_table *extra_cost
4860     = aarch64_tune_params->insn_extra_cost;
4861   enum machine_mode mode = GET_MODE (x);
4862
4863   /* By default, assume that everything has equivalent cost to the
4864      cheapest instruction.  Any additional costs are applied as a delta
4865      above this default.  */
4866   *cost = COSTS_N_INSNS (1);
4867
4868   /* TODO: The cost infrastructure currently does not handle
4869      vector operations.  Assume that all vector operations
4870      are equally expensive.  */
4871   if (VECTOR_MODE_P (mode))
4872     {
4873       if (speed)
4874         *cost += extra_cost->vect.alu;
4875       return true;
4876     }
4877
4878   switch (code)
4879     {
4880     case SET:
4881       /* The cost depends entirely on the operands to SET.  */
4882       *cost = 0;
4883       op0 = SET_DEST (x);
4884       op1 = SET_SRC (x);
4885
4886       switch (GET_CODE (op0))
4887         {
4888         case MEM:
4889           if (speed)
4890             {
4891               rtx address = XEXP (op0, 0);
4892               if (GET_MODE_CLASS (mode) == MODE_INT)
4893                 *cost += extra_cost->ldst.store;
4894               else if (mode == SFmode)
4895                 *cost += extra_cost->ldst.storef;
4896               else if (mode == DFmode)
4897                 *cost += extra_cost->ldst.stored;
4898
4899               *cost +=
4900                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4901                                                      0, speed));
4902             }
4903
4904           *cost += rtx_cost (op1, SET, 1, speed);
4905           return true;
4906
4907         case SUBREG:
4908           if (! REG_P (SUBREG_REG (op0)))
4909             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4910
4911           /* Fall through.  */
4912         case REG:
4913           /* const0_rtx is in general free, but we will use an
4914              instruction to set a register to 0.  */
4915           if (REG_P (op1) || op1 == const0_rtx)
4916             {
4917               /* The cost is 1 per register copied.  */
4918               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4919                               / UNITS_PER_WORD;
4920               *cost = COSTS_N_INSNS (n_minus_1 + 1);
4921             }
4922           else
4923             /* Cost is just the cost of the RHS of the set.  */
4924             *cost += rtx_cost (op1, SET, 1, speed);
4925           return true;
4926
4927         case ZERO_EXTRACT:
4928         case SIGN_EXTRACT:
4929           /* Bit-field insertion.  Strip any redundant widening of
4930              the RHS to meet the width of the target.  */
4931           if (GET_CODE (op1) == SUBREG)
4932             op1 = SUBREG_REG (op1);
4933           if ((GET_CODE (op1) == ZERO_EXTEND
4934                || GET_CODE (op1) == SIGN_EXTEND)
4935               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4936               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4937                   >= INTVAL (XEXP (op0, 1))))
4938             op1 = XEXP (op1, 0);
4939
4940           if (CONST_INT_P (op1))
4941             {
4942               /* MOV immediate is assumed to always be cheap.  */
4943               *cost = COSTS_N_INSNS (1);
4944             }
4945           else
4946             {
4947               /* BFM.  */
4948               if (speed)
4949                 *cost += extra_cost->alu.bfi;
4950               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4951             }
4952
4953           return true;
4954
4955         default:
4956           /* We can't make sense of this, assume default cost.  */
4957           *cost = COSTS_N_INSNS (1);
4958           break;
4959         }
4960       return false;
4961
4962     case CONST_INT:
4963       /* If an instruction can incorporate a constant within the
4964          instruction, the instruction's expression avoids calling
4965          rtx_cost() on the constant.  If rtx_cost() is called on a
4966          constant, then it is usually because the constant must be
4967          moved into a register by one or more instructions.
4968
4969          The exception is constant 0, which can be expressed
4970          as XZR/WZR and is therefore free.  The exception to this is
4971          if we have (set (reg) (const0_rtx)) in which case we must cost
4972          the move.  However, we can catch that when we cost the SET, so
4973          we don't need to consider that here.  */
4974       if (x == const0_rtx)
4975         *cost = 0;
4976       else
4977         {
4978           /* To an approximation, building any other constant is
4979              proportionally expensive to the number of instructions
4980              required to build that constant.  This is true whether we
4981              are compiling for SPEED or otherwise.  */
4982           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4983                                                          INTVAL (x),
4984                                                          false));
4985         }
4986       return true;
4987
4988     case CONST_DOUBLE:
4989       if (speed)
4990         {
4991           /* mov[df,sf]_aarch64.  */
4992           if (aarch64_float_const_representable_p (x))
4993             /* FMOV (scalar immediate).  */
4994             *cost += extra_cost->fp[mode == DFmode].fpconst;
4995           else if (!aarch64_float_const_zero_rtx_p (x))
4996             {
4997               /* This will be a load from memory.  */
4998               if (mode == DFmode)
4999                 *cost += extra_cost->ldst.loadd;
5000               else
5001                 *cost += extra_cost->ldst.loadf;
5002             }
5003           else
5004             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
5005                or MOV v0.s[0], wzr - neither of which are modeled by the
5006                cost tables.  Just use the default cost.  */
5007             {
5008             }
5009         }
5010
5011       return true;
5012
5013     case MEM:
5014       if (speed)
5015         {
5016           /* For loads we want the base cost of a load, plus an
5017              approximation for the additional cost of the addressing
5018              mode.  */
5019           rtx address = XEXP (x, 0);
5020           if (GET_MODE_CLASS (mode) == MODE_INT)
5021             *cost += extra_cost->ldst.load;
5022           else if (mode == SFmode)
5023             *cost += extra_cost->ldst.loadf;
5024           else if (mode == DFmode)
5025             *cost += extra_cost->ldst.loadd;
5026
5027           *cost +=
5028                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5029                                                      0, speed));
5030         }
5031
5032       return true;
5033
5034     case NEG:
5035       op0 = XEXP (x, 0);
5036
5037       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5038        {
5039           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5040               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5041             {
5042               /* CSETM.  */
5043               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5044               return true;
5045             }
5046
5047           /* Cost this as SUB wzr, X.  */
5048           op0 = CONST0_RTX (GET_MODE (x));
5049           op1 = XEXP (x, 0);
5050           goto cost_minus;
5051         }
5052
5053       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5054         {
5055           /* Support (neg(fma...)) as a single instruction only if
5056              sign of zeros is unimportant.  This matches the decision
5057              making in aarch64.md.  */
5058           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5059             {
5060               /* FNMADD.  */
5061               *cost = rtx_cost (op0, NEG, 0, speed);
5062               return true;
5063             }
5064           if (speed)
5065             /* FNEG.  */
5066             *cost += extra_cost->fp[mode == DFmode].neg;
5067           return false;
5068         }
5069
5070       return false;
5071
5072     case COMPARE:
5073       op0 = XEXP (x, 0);
5074       op1 = XEXP (x, 1);
5075
5076       if (op1 == const0_rtx
5077           && GET_CODE (op0) == AND)
5078         {
5079           x = op0;
5080           goto cost_logic;
5081         }
5082
5083       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5084         {
5085           /* TODO: A write to the CC flags possibly costs extra, this
5086              needs encoding in the cost tables.  */
5087
5088           /* CC_ZESWPmode supports zero extend for free.  */
5089           if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5090             op0 = XEXP (op0, 0);
5091
5092           /* ANDS.  */
5093           if (GET_CODE (op0) == AND)
5094             {
5095               x = op0;
5096               goto cost_logic;
5097             }
5098
5099           if (GET_CODE (op0) == PLUS)
5100             {
5101               /* ADDS (and CMN alias).  */
5102               x = op0;
5103               goto cost_plus;
5104             }
5105
5106           if (GET_CODE (op0) == MINUS)
5107             {
5108               /* SUBS.  */
5109               x = op0;
5110               goto cost_minus;
5111             }
5112
5113           if (GET_CODE (op1) == NEG)
5114             {
5115               /* CMN.  */
5116               if (speed)
5117                 *cost += extra_cost->alu.arith;
5118
5119               *cost += rtx_cost (op0, COMPARE, 0, speed);
5120               *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5121               return true;
5122             }
5123
5124           /* CMP.
5125
5126              Compare can freely swap the order of operands, and
5127              canonicalization puts the more complex operation first.
5128              But the integer MINUS logic expects the shift/extend
5129              operation in op1.  */
5130           if (! (REG_P (op0)
5131                  || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5132           {
5133             op0 = XEXP (x, 1);
5134             op1 = XEXP (x, 0);
5135           }
5136           goto cost_minus;
5137         }
5138
5139       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5140         {
5141           /* FCMP.  */
5142           if (speed)
5143             *cost += extra_cost->fp[mode == DFmode].compare;
5144
5145           if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5146             {
5147               /* FCMP supports constant 0.0 for no extra cost. */
5148               return true;
5149             }
5150           return false;
5151         }
5152
5153       return false;
5154
5155     case MINUS:
5156       {
5157         op0 = XEXP (x, 0);
5158         op1 = XEXP (x, 1);
5159
5160 cost_minus:
5161         /* Detect valid immediates.  */
5162         if ((GET_MODE_CLASS (mode) == MODE_INT
5163              || (GET_MODE_CLASS (mode) == MODE_CC
5164                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5165             && CONST_INT_P (op1)
5166             && aarch64_uimm12_shift (INTVAL (op1)))
5167           {
5168             *cost += rtx_cost (op0, MINUS, 0, speed);
5169
5170             if (speed)
5171               /* SUB(S) (immediate).  */
5172               *cost += extra_cost->alu.arith;
5173             return true;
5174
5175           }
5176
5177         /* Look for SUB (extended register).  */
5178         if (aarch64_rtx_arith_op_extract_p (op1, mode))
5179           {
5180             if (speed)
5181               *cost += extra_cost->alu.arith_shift;
5182
5183             *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5184                                (enum rtx_code) GET_CODE (op1),
5185                                0, speed);
5186             return true;
5187           }
5188
5189         rtx new_op1 = aarch64_strip_extend (op1);
5190
5191         /* Cost this as an FMA-alike operation.  */
5192         if ((GET_CODE (new_op1) == MULT
5193              || GET_CODE (new_op1) == ASHIFT)
5194             && code != COMPARE)
5195           {
5196             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5197                                             (enum rtx_code) code,
5198                                             speed);
5199             *cost += rtx_cost (op0, MINUS, 0, speed);
5200             return true;
5201           }
5202
5203         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5204
5205         if (speed)
5206           {
5207             if (GET_MODE_CLASS (mode) == MODE_INT)
5208               /* SUB(S).  */
5209               *cost += extra_cost->alu.arith;
5210             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5211               /* FSUB.  */
5212               *cost += extra_cost->fp[mode == DFmode].addsub;
5213           }
5214         return true;
5215       }
5216
5217     case PLUS:
5218       {
5219         rtx new_op0;
5220
5221         op0 = XEXP (x, 0);
5222         op1 = XEXP (x, 1);
5223
5224 cost_plus:
5225         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5226             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5227           {
5228             /* CSINC.  */
5229             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5230             *cost += rtx_cost (op1, PLUS, 1, speed);
5231             return true;
5232           }
5233
5234         if (GET_MODE_CLASS (mode) == MODE_INT
5235             && CONST_INT_P (op1)
5236             && aarch64_uimm12_shift (INTVAL (op1)))
5237           {
5238             *cost += rtx_cost (op0, PLUS, 0, speed);
5239
5240             if (speed)
5241               /* ADD (immediate).  */
5242               *cost += extra_cost->alu.arith;
5243             return true;
5244           }
5245
5246         /* Look for ADD (extended register).  */
5247         if (aarch64_rtx_arith_op_extract_p (op0, mode))
5248           {
5249             if (speed)
5250               *cost += extra_cost->alu.arith_shift;
5251
5252             *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5253                                (enum rtx_code) GET_CODE (op0),
5254                                0, speed);
5255             return true;
5256           }
5257
5258         /* Strip any extend, leave shifts behind as we will
5259            cost them through mult_cost.  */
5260         new_op0 = aarch64_strip_extend (op0);
5261
5262         if (GET_CODE (new_op0) == MULT
5263             || GET_CODE (new_op0) == ASHIFT)
5264           {
5265             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5266                                             speed);
5267             *cost += rtx_cost (op1, PLUS, 1, speed);
5268             return true;
5269           }
5270
5271         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5272                   + rtx_cost (op1, PLUS, 1, speed));
5273
5274         if (speed)
5275           {
5276             if (GET_MODE_CLASS (mode) == MODE_INT)
5277               /* ADD.  */
5278               *cost += extra_cost->alu.arith;
5279             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5280               /* FADD.  */
5281               *cost += extra_cost->fp[mode == DFmode].addsub;
5282           }
5283         return true;
5284       }
5285
5286     case BSWAP:
5287       *cost = COSTS_N_INSNS (1);
5288
5289       if (speed)
5290         *cost += extra_cost->alu.rev;
5291
5292       return false;
5293
5294     case IOR:
5295       if (aarch_rev16_p (x))
5296         {
5297           *cost = COSTS_N_INSNS (1);
5298
5299           if (speed)
5300             *cost += extra_cost->alu.rev;
5301
5302           return true;
5303         }
5304     /* Fall through.  */
5305     case XOR:
5306     case AND:
5307     cost_logic:
5308       op0 = XEXP (x, 0);
5309       op1 = XEXP (x, 1);
5310
5311       if (code == AND
5312           && GET_CODE (op0) == MULT
5313           && CONST_INT_P (XEXP (op0, 1))
5314           && CONST_INT_P (op1)
5315           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5316                                INTVAL (op1)) != 0)
5317         {
5318           /* This is a UBFM/SBFM.  */
5319           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5320           if (speed)
5321             *cost += extra_cost->alu.bfx;
5322           return true;
5323         }
5324
5325       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5326         {
5327           /* We possibly get the immediate for free, this is not
5328              modelled.  */
5329           if (CONST_INT_P (op1)
5330               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5331             {
5332               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5333
5334               if (speed)
5335                 *cost += extra_cost->alu.logical;
5336
5337               return true;
5338             }
5339           else
5340             {
5341               rtx new_op0 = op0;
5342
5343               /* Handle ORN, EON, or BIC.  */
5344               if (GET_CODE (op0) == NOT)
5345                 op0 = XEXP (op0, 0);
5346
5347               new_op0 = aarch64_strip_shift (op0);
5348
5349               /* If we had a shift on op0 then this is a logical-shift-
5350                  by-register/immediate operation.  Otherwise, this is just
5351                  a logical operation.  */
5352               if (speed)
5353                 {
5354                   if (new_op0 != op0)
5355                     {
5356                       /* Shift by immediate.  */
5357                       if (CONST_INT_P (XEXP (op0, 1)))
5358                         *cost += extra_cost->alu.log_shift;
5359                       else
5360                         *cost += extra_cost->alu.log_shift_reg;
5361                     }
5362                   else
5363                     *cost += extra_cost->alu.logical;
5364                 }
5365
5366               /* In both cases we want to cost both operands.  */
5367               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5368                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5369
5370               return true;
5371             }
5372         }
5373       return false;
5374
5375     case NOT:
5376       /* MVN.  */
5377       if (speed)
5378         *cost += extra_cost->alu.logical;
5379
5380       /* The logical instruction could have the shifted register form,
5381          but the cost is the same if the shift is processed as a separate
5382          instruction, so we don't bother with it here.  */
5383       return false;
5384
5385     case ZERO_EXTEND:
5386
5387       op0 = XEXP (x, 0);
5388       /* If a value is written in SI mode, then zero extended to DI
5389          mode, the operation will in general be free as a write to
5390          a 'w' register implicitly zeroes the upper bits of an 'x'
5391          register.  However, if this is
5392
5393            (set (reg) (zero_extend (reg)))
5394
5395          we must cost the explicit register move.  */
5396       if (mode == DImode
5397           && GET_MODE (op0) == SImode
5398           && outer == SET)
5399         {
5400           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5401
5402           if (!op_cost && speed)
5403             /* MOV.  */
5404             *cost += extra_cost->alu.extend;
5405           else
5406             /* Free, the cost is that of the SI mode operation.  */
5407             *cost = op_cost;
5408
5409           return true;
5410         }
5411       else if (MEM_P (XEXP (x, 0)))
5412         {
5413           /* All loads can zero extend to any size for free.  */
5414           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5415           return true;
5416         }
5417
5418       /* UXTB/UXTH.  */
5419       if (speed)
5420         *cost += extra_cost->alu.extend;
5421
5422       return false;
5423
5424     case SIGN_EXTEND:
5425       if (MEM_P (XEXP (x, 0)))
5426         {
5427           /* LDRSH.  */
5428           if (speed)
5429             {
5430               rtx address = XEXP (XEXP (x, 0), 0);
5431               *cost += extra_cost->ldst.load_sign_extend;
5432
5433               *cost +=
5434                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5435                                                      0, speed));
5436             }
5437           return true;
5438         }
5439
5440       if (speed)
5441         *cost += extra_cost->alu.extend;
5442       return false;
5443
5444     case ASHIFT:
5445       op0 = XEXP (x, 0);
5446       op1 = XEXP (x, 1);
5447
5448       if (CONST_INT_P (op1))
5449         {
5450           /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
5451              aliases.  */
5452           if (speed)
5453             *cost += extra_cost->alu.shift;
5454
5455           /* We can incorporate zero/sign extend for free.  */
5456           if (GET_CODE (op0) == ZERO_EXTEND
5457               || GET_CODE (op0) == SIGN_EXTEND)
5458             op0 = XEXP (op0, 0);
5459
5460           *cost += rtx_cost (op0, ASHIFT, 0, speed);
5461           return true;
5462         }
5463       else
5464         {
5465           /* LSLV.  */
5466           if (speed)
5467             *cost += extra_cost->alu.shift_reg;
5468
5469           return false;  /* All arguments need to be in registers.  */
5470         }
5471
5472     case ROTATE:
5473     case ROTATERT:
5474     case LSHIFTRT:
5475     case ASHIFTRT:
5476       op0 = XEXP (x, 0);
5477       op1 = XEXP (x, 1);
5478
5479       if (CONST_INT_P (op1))
5480         {
5481           /* ASR (immediate) and friends.  */
5482           if (speed)
5483             *cost += extra_cost->alu.shift;
5484
5485           *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5486           return true;
5487         }
5488       else
5489         {
5490
5491           /* ASR (register) and friends.  */
5492           if (speed)
5493             *cost += extra_cost->alu.shift_reg;
5494
5495           return false;  /* All arguments need to be in registers.  */
5496         }
5497
5498     case SYMBOL_REF:
5499
5500       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5501         {
5502           /* LDR.  */
5503           if (speed)
5504             *cost += extra_cost->ldst.load;
5505         }
5506       else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5507                || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5508         {
5509           /* ADRP, followed by ADD.  */
5510           *cost += COSTS_N_INSNS (1);
5511           if (speed)
5512             *cost += 2 * extra_cost->alu.arith;
5513         }
5514       else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5515                || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5516         {
5517           /* ADR.  */
5518           if (speed)
5519             *cost += extra_cost->alu.arith;
5520         }
5521
5522       if (flag_pic)
5523         {
5524           /* One extra load instruction, after accessing the GOT.  */
5525           *cost += COSTS_N_INSNS (1);
5526           if (speed)
5527             *cost += extra_cost->ldst.load;
5528         }
5529       return true;
5530
5531     case HIGH:
5532     case LO_SUM:
5533       /* ADRP/ADD (immediate).  */
5534       if (speed)
5535         *cost += extra_cost->alu.arith;
5536       return true;
5537
5538     case ZERO_EXTRACT:
5539     case SIGN_EXTRACT:
5540       /* UBFX/SBFX.  */
5541       if (speed)
5542         *cost += extra_cost->alu.bfx;
5543
5544       /* We can trust that the immediates used will be correct (there
5545          are no by-register forms), so we need only cost op0.  */
5546       *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5547       return true;
5548
5549     case MULT:
5550       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5551       /* aarch64_rtx_mult_cost always handles recursion to its
5552          operands.  */
5553       return true;
5554
5555     case MOD:
5556     case UMOD:
5557       if (speed)
5558         {
5559           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5560             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5561                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5562           else if (GET_MODE (x) == DFmode)
5563             *cost += (extra_cost->fp[1].mult
5564                       + extra_cost->fp[1].div);
5565           else if (GET_MODE (x) == SFmode)
5566             *cost += (extra_cost->fp[0].mult
5567                       + extra_cost->fp[0].div);
5568         }
5569       return false;  /* All arguments need to be in registers.  */
5570
5571     case DIV:
5572     case UDIV:
5573     case SQRT:
5574       if (speed)
5575         {
5576           if (GET_MODE_CLASS (mode) == MODE_INT)
5577             /* There is no integer SQRT, so only DIV and UDIV can get
5578                here.  */
5579             *cost += extra_cost->mult[mode == DImode].idiv;
5580           else
5581             *cost += extra_cost->fp[mode == DFmode].div;
5582         }
5583       return false;  /* All arguments need to be in registers.  */
5584
5585     case IF_THEN_ELSE:
5586       op2 = XEXP (x, 2);
5587       op0 = XEXP (x, 0);
5588       op1 = XEXP (x, 1);
5589
5590       if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5591         {
5592           /* Conditional branch.  */
5593           if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
5594             return true;
5595           else
5596             {
5597               if (GET_CODE (op0) == NE
5598                   || GET_CODE (op0) == EQ)
5599                 {
5600                   rtx inner = XEXP (op0, 0);
5601                   rtx comparator = XEXP (op0, 1);
5602
5603                   if (comparator == const0_rtx)
5604                     {
5605                       /* TBZ/TBNZ/CBZ/CBNZ.  */
5606                       if (GET_CODE (inner) == ZERO_EXTRACT)
5607                         /* TBZ/TBNZ.  */
5608                         *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5609                                            0, speed);
5610                       else
5611                         /* CBZ/CBNZ.  */
5612                         *cost += rtx_cost (inner, GET_CODE (op0), 0, speed);
5613
5614                       return true;
5615                     }
5616                 }
5617               else if (GET_CODE (op0) == LT
5618                        || GET_CODE (op0) == GE)
5619                 {
5620                   rtx comparator = XEXP (op0, 1);
5621
5622                   /* TBZ/TBNZ.  */
5623                   if (comparator == const0_rtx)
5624                     return true;
5625                 }
5626             }
5627         }
5628       else if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
5629         {
5630           /* It's a conditional operation based on the status flags,
5631              so it must be some flavor of CSEL.  */
5632
5633           /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
5634           if (GET_CODE (op1) == NEG
5635               || GET_CODE (op1) == NOT
5636               || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5637             op1 = XEXP (op1, 0);
5638
5639           *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5640           *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5641           return true;
5642         }
5643
5644       /* We don't know what this is, cost all operands.  */
5645       return false;
5646
5647     case EQ:
5648     case NE:
5649     case GT:
5650     case GTU:
5651     case LT:
5652     case LTU:
5653     case GE:
5654     case GEU:
5655     case LE:
5656     case LEU:
5657
5658       return false; /* All arguments must be in registers.  */
5659
5660     case FMA:
5661       op0 = XEXP (x, 0);
5662       op1 = XEXP (x, 1);
5663       op2 = XEXP (x, 2);
5664
5665       if (speed)
5666         *cost += extra_cost->fp[mode == DFmode].fma;
5667
5668       /* FMSUB, FNMADD, and FNMSUB are free.  */
5669       if (GET_CODE (op0) == NEG)
5670         op0 = XEXP (op0, 0);
5671
5672       if (GET_CODE (op2) == NEG)
5673         op2 = XEXP (op2, 0);
5674
5675       /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5676          and the by-element operand as operand 0.  */
5677       if (GET_CODE (op1) == NEG)
5678         op1 = XEXP (op1, 0);
5679
5680       /* Catch vector-by-element operations.  The by-element operand can
5681          either be (vec_duplicate (vec_select (x))) or just
5682          (vec_select (x)), depending on whether we are multiplying by
5683          a vector or a scalar.
5684
5685          Canonicalization is not very good in these cases, FMA4 will put the
5686          by-element operand as operand 0, FNMA4 will have it as operand 1.  */
5687       if (GET_CODE (op0) == VEC_DUPLICATE)
5688         op0 = XEXP (op0, 0);
5689       else if (GET_CODE (op1) == VEC_DUPLICATE)
5690         op1 = XEXP (op1, 0);
5691
5692       if (GET_CODE (op0) == VEC_SELECT)
5693         op0 = XEXP (op0, 0);
5694       else if (GET_CODE (op1) == VEC_SELECT)
5695         op1 = XEXP (op1, 0);
5696
5697       /* If the remaining parameters are not registers,
5698          get the cost to put them into registers.  */
5699       *cost += rtx_cost (op0, FMA, 0, speed);
5700       *cost += rtx_cost (op1, FMA, 1, speed);
5701       *cost += rtx_cost (op2, FMA, 2, speed);
5702       return true;
5703
5704     case FLOAT_EXTEND:
5705       if (speed)
5706         *cost += extra_cost->fp[mode == DFmode].widen;
5707       return false;
5708
5709     case FLOAT_TRUNCATE:
5710       if (speed)
5711         *cost += extra_cost->fp[mode == DFmode].narrow;
5712       return false;
5713
5714     case ABS:
5715       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5716         {
5717           /* FABS and FNEG are analogous.  */
5718           if (speed)
5719             *cost += extra_cost->fp[mode == DFmode].neg;
5720         }
5721       else
5722         {
5723           /* Integer ABS will either be split to
5724              two arithmetic instructions, or will be an ABS
5725              (scalar), which we don't model.  */
5726           *cost = COSTS_N_INSNS (2);
5727           if (speed)
5728             *cost += 2 * extra_cost->alu.arith;
5729         }
5730       return false;
5731
5732     case SMAX:
5733     case SMIN:
5734       if (speed)
5735         {
5736           /* FMAXNM/FMINNM/FMAX/FMIN.
5737              TODO: This may not be accurate for all implementations, but
5738              we do not model this in the cost tables.  */
5739           *cost += extra_cost->fp[mode == DFmode].addsub;
5740         }
5741       return false;
5742
5743     case TRUNCATE:
5744
5745       /* Decompose <su>muldi3_highpart.  */
5746       if (/* (truncate:DI  */
5747           mode == DImode
5748           /*   (lshiftrt:TI  */
5749           && GET_MODE (XEXP (x, 0)) == TImode
5750           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5751           /*      (mult:TI  */
5752           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5753           /*        (ANY_EXTEND:TI (reg:DI))
5754                     (ANY_EXTEND:TI (reg:DI)))  */
5755           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5756                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5757               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5758                   && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5759           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5760           && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5761           /*     (const_int 64)  */
5762           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5763           && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5764         {
5765           /* UMULH/SMULH.  */
5766           if (speed)
5767             *cost += extra_cost->mult[mode == DImode].extend;
5768           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5769                              MULT, 0, speed);
5770           *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5771                              MULT, 1, speed);
5772           return true;
5773         }
5774
5775       /* Fall through.  */
5776     default:
5777       if (dump_file && (dump_flags & TDF_DETAILS))
5778         fprintf (dump_file,
5779                  "\nFailed to cost RTX.  Assuming default cost.\n");
5780
5781       return true;
5782     }
5783   return false;
5784 }
5785
5786 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5787    calculated for X.  This cost is stored in *COST.  Returns true
5788    if the total cost of X was calculated.  */
5789 static bool
5790 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5791                    int param, int *cost, bool speed)
5792 {
5793   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5794
5795   if (dump_file && (dump_flags & TDF_DETAILS))
5796     {
5797       print_rtl_single (dump_file, x);
5798       fprintf (dump_file, "\n%s cost: %d (%s)\n",
5799                speed ? "Hot" : "Cold",
5800                *cost, result ? "final" : "partial");
5801     }
5802
5803   return result;
5804 }
5805
5806 static int
5807 aarch64_register_move_cost (enum machine_mode mode,
5808                             reg_class_t from_i, reg_class_t to_i)
5809 {
5810   enum reg_class from = (enum reg_class) from_i;
5811   enum reg_class to = (enum reg_class) to_i;
5812   const struct cpu_regmove_cost *regmove_cost
5813     = aarch64_tune_params->regmove_cost;
5814
5815   /* Moving between GPR and stack cost is the same as GP2GP.  */
5816   if ((from == GENERAL_REGS && to == STACK_REG)
5817       || (to == GENERAL_REGS && from == STACK_REG))
5818     return regmove_cost->GP2GP;
5819
5820   /* To/From the stack register, we move via the gprs.  */
5821   if (to == STACK_REG || from == STACK_REG)
5822     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5823             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5824
5825   if (from == GENERAL_REGS && to == GENERAL_REGS)
5826     return regmove_cost->GP2GP;
5827   else if (from == GENERAL_REGS)
5828     return regmove_cost->GP2FP;
5829   else if (to == GENERAL_REGS)
5830     return regmove_cost->FP2GP;
5831
5832   /* When AdvSIMD instructions are disabled it is not possible to move
5833      a 128-bit value directly between Q registers.  This is handled in
5834      secondary reload.  A general register is used as a scratch to move
5835      the upper DI value and the lower DI value is moved directly,
5836      hence the cost is the sum of three moves. */
5837   if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5838     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5839
5840   return regmove_cost->FP2FP;
5841 }
5842
5843 static int
5844 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5845                           reg_class_t rclass ATTRIBUTE_UNUSED,
5846                           bool in ATTRIBUTE_UNUSED)
5847 {
5848   return aarch64_tune_params->memmov_cost;
5849 }
5850
5851 /* Return the number of instructions that can be issued per cycle.  */
5852 static int
5853 aarch64_sched_issue_rate (void)
5854 {
5855   return aarch64_tune_params->issue_rate;
5856 }
5857
5858 /* Vectorizer cost model target hooks.  */
5859
5860 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5861 static int
5862 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5863                                     tree vectype,
5864                                     int misalign ATTRIBUTE_UNUSED)
5865 {
5866   unsigned elements;
5867
5868   switch (type_of_cost)
5869     {
5870       case scalar_stmt:
5871         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5872
5873       case scalar_load:
5874         return aarch64_tune_params->vec_costs->scalar_load_cost;
5875
5876       case scalar_store:
5877         return aarch64_tune_params->vec_costs->scalar_store_cost;
5878
5879       case vector_stmt:
5880         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5881
5882       case vector_load:
5883         return aarch64_tune_params->vec_costs->vec_align_load_cost;
5884
5885       case vector_store:
5886         return aarch64_tune_params->vec_costs->vec_store_cost;
5887
5888       case vec_to_scalar:
5889         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5890
5891       case scalar_to_vec:
5892         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5893
5894       case unaligned_load:
5895         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5896
5897       case unaligned_store:
5898         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5899
5900       case cond_branch_taken:
5901         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5902
5903       case cond_branch_not_taken:
5904         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5905
5906       case vec_perm:
5907       case vec_promote_demote:
5908         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5909
5910       case vec_construct:
5911         elements = TYPE_VECTOR_SUBPARTS (vectype);
5912         return elements / 2 + 1;
5913
5914       default:
5915         gcc_unreachable ();
5916     }
5917 }
5918
5919 /* Implement targetm.vectorize.add_stmt_cost.  */
5920 static unsigned
5921 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5922                        struct _stmt_vec_info *stmt_info, int misalign,
5923                        enum vect_cost_model_location where)
5924 {
5925   unsigned *cost = (unsigned *) data;
5926   unsigned retval = 0;
5927
5928   if (flag_vect_cost_model)
5929     {
5930       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5931       int stmt_cost =
5932             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5933
5934       /* Statements in an inner loop relative to the loop being
5935          vectorized are weighted more heavily.  The value here is
5936          a function (linear for now) of the loop nest level.  */
5937       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5938         {
5939           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5940           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
5941           unsigned nest_level = loop_depth (loop);
5942
5943           count *= nest_level;
5944         }
5945
5946       retval = (unsigned) (count * stmt_cost);
5947       cost[where] += retval;
5948     }
5949
5950   return retval;
5951 }
5952
5953 static void initialize_aarch64_code_model (void);
5954
5955 /* Parse the architecture extension string.  */
5956
5957 static void
5958 aarch64_parse_extension (char *str)
5959 {
5960   /* The extension string is parsed left to right.  */
5961   const struct aarch64_option_extension *opt = NULL;
5962
5963   /* Flag to say whether we are adding or removing an extension.  */
5964   int adding_ext = -1;
5965
5966   while (str != NULL && *str != 0)
5967     {
5968       char *ext;
5969       size_t len;
5970
5971       str++;
5972       ext = strchr (str, '+');
5973
5974       if (ext != NULL)
5975         len = ext - str;
5976       else
5977         len = strlen (str);
5978
5979       if (len >= 2 && strncmp (str, "no", 2) == 0)
5980         {
5981           adding_ext = 0;
5982           len -= 2;
5983           str += 2;
5984         }
5985       else if (len > 0)
5986         adding_ext = 1;
5987
5988       if (len == 0)
5989         {
5990           error ("missing feature modifier after %qs", "+no");
5991           return;
5992         }
5993
5994       /* Scan over the extensions table trying to find an exact match.  */
5995       for (opt = all_extensions; opt->name != NULL; opt++)
5996         {
5997           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5998             {
5999               /* Add or remove the extension.  */
6000               if (adding_ext)
6001                 aarch64_isa_flags |= opt->flags_on;
6002               else
6003                 aarch64_isa_flags &= ~(opt->flags_off);
6004               break;
6005             }
6006         }
6007
6008       if (opt->name == NULL)
6009         {
6010           /* Extension not found in list.  */
6011           error ("unknown feature modifier %qs", str);
6012           return;
6013         }
6014
6015       str = ext;
6016     };
6017
6018   return;
6019 }
6020
6021 /* Parse the ARCH string.  */
6022
6023 static void
6024 aarch64_parse_arch (void)
6025 {
6026   char *ext;
6027   const struct processor *arch;
6028   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6029   size_t len;
6030
6031   strcpy (str, aarch64_arch_string);
6032
6033   ext = strchr (str, '+');
6034
6035   if (ext != NULL)
6036     len = ext - str;
6037   else
6038     len = strlen (str);
6039
6040   if (len == 0)
6041     {
6042       error ("missing arch name in -march=%qs", str);
6043       return;
6044     }
6045
6046   /* Loop through the list of supported ARCHs to find a match.  */
6047   for (arch = all_architectures; arch->name != NULL; arch++)
6048     {
6049       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6050         {
6051           selected_arch = arch;
6052           aarch64_isa_flags = selected_arch->flags;
6053
6054           if (!selected_cpu)
6055             selected_cpu = &all_cores[selected_arch->core];
6056
6057           if (ext != NULL)
6058             {
6059               /* ARCH string contains at least one extension.  */
6060               aarch64_parse_extension (ext);
6061             }
6062
6063           if (strcmp (selected_arch->arch, selected_cpu->arch))
6064             {
6065               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6066                        selected_cpu->name, selected_arch->name);
6067             }
6068
6069           return;
6070         }
6071     }
6072
6073   /* ARCH name not found in list.  */
6074   error ("unknown value %qs for -march", str);
6075   return;
6076 }
6077
6078 /* Parse the CPU string.  */
6079
6080 static void
6081 aarch64_parse_cpu (void)
6082 {
6083   char *ext;
6084   const struct processor *cpu;
6085   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6086   size_t len;
6087
6088   strcpy (str, aarch64_cpu_string);
6089
6090   ext = strchr (str, '+');
6091
6092   if (ext != NULL)
6093     len = ext - str;
6094   else
6095     len = strlen (str);
6096
6097   if (len == 0)
6098     {
6099       error ("missing cpu name in -mcpu=%qs", str);
6100       return;
6101     }
6102
6103   /* Loop through the list of supported CPUs to find a match.  */
6104   for (cpu = all_cores; cpu->name != NULL; cpu++)
6105     {
6106       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6107         {
6108           selected_cpu = cpu;
6109           selected_tune = cpu;
6110           aarch64_isa_flags = selected_cpu->flags;
6111
6112           if (ext != NULL)
6113             {
6114               /* CPU string contains at least one extension.  */
6115               aarch64_parse_extension (ext);
6116             }
6117
6118           return;
6119         }
6120     }
6121
6122   /* CPU name not found in list.  */
6123   error ("unknown value %qs for -mcpu", str);
6124   return;
6125 }
6126
6127 /* Parse the TUNE string.  */
6128
6129 static void
6130 aarch64_parse_tune (void)
6131 {
6132   const struct processor *cpu;
6133   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6134   strcpy (str, aarch64_tune_string);
6135
6136   /* Loop through the list of supported CPUs to find a match.  */
6137   for (cpu = all_cores; cpu->name != NULL; cpu++)
6138     {
6139       if (strcmp (cpu->name, str) == 0)
6140         {
6141           selected_tune = cpu;
6142           return;
6143         }
6144     }
6145
6146   /* CPU name not found in list.  */
6147   error ("unknown value %qs for -mtune", str);
6148   return;
6149 }
6150
6151
6152 /* Implement TARGET_OPTION_OVERRIDE.  */
6153
6154 static void
6155 aarch64_override_options (void)
6156 {
6157   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6158      If either of -march or -mtune is given, they override their
6159      respective component of -mcpu.
6160
6161      So, first parse AARCH64_CPU_STRING, then the others, be careful
6162      with -march as, if -mcpu is not present on the command line, march
6163      must set a sensible default CPU.  */
6164   if (aarch64_cpu_string)
6165     {
6166       aarch64_parse_cpu ();
6167     }
6168
6169   if (aarch64_arch_string)
6170     {
6171       aarch64_parse_arch ();
6172     }
6173
6174   if (aarch64_tune_string)
6175     {
6176       aarch64_parse_tune ();
6177     }
6178
6179 #ifndef HAVE_AS_MABI_OPTION
6180   /* The compiler may have been configured with 2.23.* binutils, which does
6181      not have support for ILP32.  */
6182   if (TARGET_ILP32)
6183     error ("Assembler does not support -mabi=ilp32");
6184 #endif
6185
6186   initialize_aarch64_code_model ();
6187
6188   aarch64_build_bitmask_table ();
6189
6190   /* This target defaults to strict volatile bitfields.  */
6191   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6192     flag_strict_volatile_bitfields = 1;
6193
6194   /* If the user did not specify a processor, choose the default
6195      one for them.  This will be the CPU set during configuration using
6196      --with-cpu, otherwise it is "generic".  */
6197   if (!selected_cpu)
6198     {
6199       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6200       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6201     }
6202
6203   gcc_assert (selected_cpu);
6204
6205   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
6206   if (!selected_tune)
6207     selected_tune = &all_cores[selected_cpu->core];
6208
6209   aarch64_tune_flags = selected_tune->flags;
6210   aarch64_tune = selected_tune->core;
6211   aarch64_tune_params = selected_tune->tune;
6212
6213   aarch64_override_options_after_change ();
6214 }
6215
6216 /* Implement targetm.override_options_after_change.  */
6217
6218 static void
6219 aarch64_override_options_after_change (void)
6220 {
6221   if (flag_omit_frame_pointer)
6222     flag_omit_leaf_frame_pointer = false;
6223   else if (flag_omit_leaf_frame_pointer)
6224     flag_omit_frame_pointer = true;
6225 }
6226
6227 static struct machine_function *
6228 aarch64_init_machine_status (void)
6229 {
6230   struct machine_function *machine;
6231   machine = ggc_cleared_alloc<machine_function> ();
6232   return machine;
6233 }
6234
6235 void
6236 aarch64_init_expanders (void)
6237 {
6238   init_machine_status = aarch64_init_machine_status;
6239 }
6240
6241 /* A checking mechanism for the implementation of the various code models.  */
6242 static void
6243 initialize_aarch64_code_model (void)
6244 {
6245    if (flag_pic)
6246      {
6247        switch (aarch64_cmodel_var)
6248          {
6249          case AARCH64_CMODEL_TINY:
6250            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6251            break;
6252          case AARCH64_CMODEL_SMALL:
6253            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6254            break;
6255          case AARCH64_CMODEL_LARGE:
6256            sorry ("code model %qs with -f%s", "large",
6257                   flag_pic > 1 ? "PIC" : "pic");
6258          default:
6259            gcc_unreachable ();
6260          }
6261      }
6262    else
6263      aarch64_cmodel = aarch64_cmodel_var;
6264 }
6265
6266 /* Return true if SYMBOL_REF X binds locally.  */
6267
6268 static bool
6269 aarch64_symbol_binds_local_p (const_rtx x)
6270 {
6271   return (SYMBOL_REF_DECL (x)
6272           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6273           : SYMBOL_REF_LOCAL_P (x));
6274 }
6275
6276 /* Return true if SYMBOL_REF X is thread local */
6277 static bool
6278 aarch64_tls_symbol_p (rtx x)
6279 {
6280   if (! TARGET_HAVE_TLS)
6281     return false;
6282
6283   if (GET_CODE (x) != SYMBOL_REF)
6284     return false;
6285
6286   return SYMBOL_REF_TLS_MODEL (x) != 0;
6287 }
6288
6289 /* Classify a TLS symbol into one of the TLS kinds.  */
6290 enum aarch64_symbol_type
6291 aarch64_classify_tls_symbol (rtx x)
6292 {
6293   enum tls_model tls_kind = tls_symbolic_operand_type (x);
6294
6295   switch (tls_kind)
6296     {
6297     case TLS_MODEL_GLOBAL_DYNAMIC:
6298     case TLS_MODEL_LOCAL_DYNAMIC:
6299       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6300
6301     case TLS_MODEL_INITIAL_EXEC:
6302       return SYMBOL_SMALL_GOTTPREL;
6303
6304     case TLS_MODEL_LOCAL_EXEC:
6305       return SYMBOL_SMALL_TPREL;
6306
6307     case TLS_MODEL_EMULATED:
6308     case TLS_MODEL_NONE:
6309       return SYMBOL_FORCE_TO_MEM;
6310
6311     default:
6312       gcc_unreachable ();
6313     }
6314 }
6315
6316 /* Return the method that should be used to access SYMBOL_REF or
6317    LABEL_REF X in context CONTEXT.  */
6318
6319 enum aarch64_symbol_type
6320 aarch64_classify_symbol (rtx x,
6321                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6322 {
6323   if (GET_CODE (x) == LABEL_REF)
6324     {
6325       switch (aarch64_cmodel)
6326         {
6327         case AARCH64_CMODEL_LARGE:
6328           return SYMBOL_FORCE_TO_MEM;
6329
6330         case AARCH64_CMODEL_TINY_PIC:
6331         case AARCH64_CMODEL_TINY:
6332           return SYMBOL_TINY_ABSOLUTE;
6333
6334         case AARCH64_CMODEL_SMALL_PIC:
6335         case AARCH64_CMODEL_SMALL:
6336           return SYMBOL_SMALL_ABSOLUTE;
6337
6338         default:
6339           gcc_unreachable ();
6340         }
6341     }
6342
6343   if (GET_CODE (x) == SYMBOL_REF)
6344     {
6345       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6346           return SYMBOL_FORCE_TO_MEM;
6347
6348       if (aarch64_tls_symbol_p (x))
6349         return aarch64_classify_tls_symbol (x);
6350
6351       switch (aarch64_cmodel)
6352         {
6353         case AARCH64_CMODEL_TINY:
6354           if (SYMBOL_REF_WEAK (x))
6355             return SYMBOL_FORCE_TO_MEM;
6356           return SYMBOL_TINY_ABSOLUTE;
6357
6358         case AARCH64_CMODEL_SMALL:
6359           if (SYMBOL_REF_WEAK (x))
6360             return SYMBOL_FORCE_TO_MEM;
6361           return SYMBOL_SMALL_ABSOLUTE;
6362
6363         case AARCH64_CMODEL_TINY_PIC:
6364           if (!aarch64_symbol_binds_local_p (x))
6365             return SYMBOL_TINY_GOT;
6366           return SYMBOL_TINY_ABSOLUTE;
6367
6368         case AARCH64_CMODEL_SMALL_PIC:
6369           if (!aarch64_symbol_binds_local_p (x))
6370             return SYMBOL_SMALL_GOT;
6371           return SYMBOL_SMALL_ABSOLUTE;
6372
6373         default:
6374           gcc_unreachable ();
6375         }
6376     }
6377
6378   /* By default push everything into the constant pool.  */
6379   return SYMBOL_FORCE_TO_MEM;
6380 }
6381
6382 bool
6383 aarch64_constant_address_p (rtx x)
6384 {
6385   return (CONSTANT_P (x) && memory_address_p (DImode, x));
6386 }
6387
6388 bool
6389 aarch64_legitimate_pic_operand_p (rtx x)
6390 {
6391   if (GET_CODE (x) == SYMBOL_REF
6392       || (GET_CODE (x) == CONST
6393           && GET_CODE (XEXP (x, 0)) == PLUS
6394           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6395      return false;
6396
6397   return true;
6398 }
6399
6400 /* Return true if X holds either a quarter-precision or
6401      floating-point +0.0 constant.  */
6402 static bool
6403 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6404 {
6405   if (!CONST_DOUBLE_P (x))
6406     return false;
6407
6408   /* TODO: We could handle moving 0.0 to a TFmode register,
6409      but first we would like to refactor the movtf_aarch64
6410      to be more amicable to split moves properly and
6411      correctly gate on TARGET_SIMD.  For now - reject all
6412      constants which are not to SFmode or DFmode registers.  */
6413   if (!(mode == SFmode || mode == DFmode))
6414     return false;
6415
6416   if (aarch64_float_const_zero_rtx_p (x))
6417     return true;
6418   return aarch64_float_const_representable_p (x);
6419 }
6420
6421 static bool
6422 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6423 {
6424   /* Do not allow vector struct mode constants.  We could support
6425      0 and -1 easily, but they need support in aarch64-simd.md.  */
6426   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6427     return false;
6428
6429   /* This could probably go away because
6430      we now decompose CONST_INTs according to expand_mov_immediate.  */
6431   if ((GET_CODE (x) == CONST_VECTOR
6432        && aarch64_simd_valid_immediate (x, mode, false, NULL))
6433       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6434         return !targetm.cannot_force_const_mem (mode, x);
6435
6436   if (GET_CODE (x) == HIGH
6437       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6438     return true;
6439
6440   return aarch64_constant_address_p (x);
6441 }
6442
6443 rtx
6444 aarch64_load_tp (rtx target)
6445 {
6446   if (!target
6447       || GET_MODE (target) != Pmode
6448       || !register_operand (target, Pmode))
6449     target = gen_reg_rtx (Pmode);
6450
6451   /* Can return in any reg.  */
6452   emit_insn (gen_aarch64_load_tp_hard (target));
6453   return target;
6454 }
6455
6456 /* On AAPCS systems, this is the "struct __va_list".  */
6457 static GTY(()) tree va_list_type;
6458
6459 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6460    Return the type to use as __builtin_va_list.
6461
6462    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6463
6464    struct __va_list
6465    {
6466      void *__stack;
6467      void *__gr_top;
6468      void *__vr_top;
6469      int   __gr_offs;
6470      int   __vr_offs;
6471    };  */
6472
6473 static tree
6474 aarch64_build_builtin_va_list (void)
6475 {
6476   tree va_list_name;
6477   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6478
6479   /* Create the type.  */
6480   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6481   /* Give it the required name.  */
6482   va_list_name = build_decl (BUILTINS_LOCATION,
6483                              TYPE_DECL,
6484                              get_identifier ("__va_list"),
6485                              va_list_type);
6486   DECL_ARTIFICIAL (va_list_name) = 1;
6487   TYPE_NAME (va_list_type) = va_list_name;
6488   TYPE_STUB_DECL (va_list_type) = va_list_name;
6489
6490   /* Create the fields.  */
6491   f_stack = build_decl (BUILTINS_LOCATION,
6492                         FIELD_DECL, get_identifier ("__stack"),
6493                         ptr_type_node);
6494   f_grtop = build_decl (BUILTINS_LOCATION,
6495                         FIELD_DECL, get_identifier ("__gr_top"),
6496                         ptr_type_node);
6497   f_vrtop = build_decl (BUILTINS_LOCATION,
6498                         FIELD_DECL, get_identifier ("__vr_top"),
6499                         ptr_type_node);
6500   f_groff = build_decl (BUILTINS_LOCATION,
6501                         FIELD_DECL, get_identifier ("__gr_offs"),
6502                         integer_type_node);
6503   f_vroff = build_decl (BUILTINS_LOCATION,
6504                         FIELD_DECL, get_identifier ("__vr_offs"),
6505                         integer_type_node);
6506
6507   DECL_ARTIFICIAL (f_stack) = 1;
6508   DECL_ARTIFICIAL (f_grtop) = 1;
6509   DECL_ARTIFICIAL (f_vrtop) = 1;
6510   DECL_ARTIFICIAL (f_groff) = 1;
6511   DECL_ARTIFICIAL (f_vroff) = 1;
6512
6513   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6514   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6515   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6516   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6517   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6518
6519   TYPE_FIELDS (va_list_type) = f_stack;
6520   DECL_CHAIN (f_stack) = f_grtop;
6521   DECL_CHAIN (f_grtop) = f_vrtop;
6522   DECL_CHAIN (f_vrtop) = f_groff;
6523   DECL_CHAIN (f_groff) = f_vroff;
6524
6525   /* Compute its layout.  */
6526   layout_type (va_list_type);
6527
6528   return va_list_type;
6529 }
6530
6531 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6532 static void
6533 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6534 {
6535   const CUMULATIVE_ARGS *cum;
6536   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6537   tree stack, grtop, vrtop, groff, vroff;
6538   tree t;
6539   int gr_save_area_size;
6540   int vr_save_area_size;
6541   int vr_offset;
6542
6543   cum = &crtl->args.info;
6544   gr_save_area_size
6545     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6546   vr_save_area_size
6547     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6548
6549   if (TARGET_GENERAL_REGS_ONLY)
6550     {
6551       if (cum->aapcs_nvrn > 0)
6552         sorry ("%qs and floating point or vector arguments",
6553                "-mgeneral-regs-only");
6554       vr_save_area_size = 0;
6555     }
6556
6557   f_stack = TYPE_FIELDS (va_list_type_node);
6558   f_grtop = DECL_CHAIN (f_stack);
6559   f_vrtop = DECL_CHAIN (f_grtop);
6560   f_groff = DECL_CHAIN (f_vrtop);
6561   f_vroff = DECL_CHAIN (f_groff);
6562
6563   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6564                   NULL_TREE);
6565   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6566                   NULL_TREE);
6567   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6568                   NULL_TREE);
6569   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6570                   NULL_TREE);
6571   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6572                   NULL_TREE);
6573
6574   /* Emit code to initialize STACK, which points to the next varargs stack
6575      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6576      by named arguments.  STACK is 8-byte aligned.  */
6577   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6578   if (cum->aapcs_stack_size > 0)
6579     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6580   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6581   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6582
6583   /* Emit code to initialize GRTOP, the top of the GR save area.
6584      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6585   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6586   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6587   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6588
6589   /* Emit code to initialize VRTOP, the top of the VR save area.
6590      This address is gr_save_area_bytes below GRTOP, rounded
6591      down to the next 16-byte boundary.  */
6592   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6593   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6594                              STACK_BOUNDARY / BITS_PER_UNIT);
6595
6596   if (vr_offset)
6597     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6598   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6599   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6600
6601   /* Emit code to initialize GROFF, the offset from GRTOP of the
6602      next GPR argument.  */
6603   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6604               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6605   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6606
6607   /* Likewise emit code to initialize VROFF, the offset from FTOP
6608      of the next VR argument.  */
6609   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6610               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6611   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6612 }
6613
6614 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6615
6616 static tree
6617 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6618                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6619 {
6620   tree addr;
6621   bool indirect_p;
6622   bool is_ha;           /* is HFA or HVA.  */
6623   bool dw_align;        /* double-word align.  */
6624   enum machine_mode ag_mode = VOIDmode;
6625   int nregs;
6626   enum machine_mode mode;
6627
6628   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6629   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6630   HOST_WIDE_INT size, rsize, adjust, align;
6631   tree t, u, cond1, cond2;
6632
6633   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6634   if (indirect_p)
6635     type = build_pointer_type (type);
6636
6637   mode = TYPE_MODE (type);
6638
6639   f_stack = TYPE_FIELDS (va_list_type_node);
6640   f_grtop = DECL_CHAIN (f_stack);
6641   f_vrtop = DECL_CHAIN (f_grtop);
6642   f_groff = DECL_CHAIN (f_vrtop);
6643   f_vroff = DECL_CHAIN (f_groff);
6644
6645   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6646                   f_stack, NULL_TREE);
6647   size = int_size_in_bytes (type);
6648   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6649
6650   dw_align = false;
6651   adjust = 0;
6652   if (aarch64_vfp_is_call_or_return_candidate (mode,
6653                                                type,
6654                                                &ag_mode,
6655                                                &nregs,
6656                                                &is_ha))
6657     {
6658       /* TYPE passed in fp/simd registers.  */
6659       if (TARGET_GENERAL_REGS_ONLY)
6660         sorry ("%qs and floating point or vector arguments",
6661                "-mgeneral-regs-only");
6662
6663       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6664                       unshare_expr (valist), f_vrtop, NULL_TREE);
6665       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6666                       unshare_expr (valist), f_vroff, NULL_TREE);
6667
6668       rsize = nregs * UNITS_PER_VREG;
6669
6670       if (is_ha)
6671         {
6672           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6673             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6674         }
6675       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6676                && size < UNITS_PER_VREG)
6677         {
6678           adjust = UNITS_PER_VREG - size;
6679         }
6680     }
6681   else
6682     {
6683       /* TYPE passed in general registers.  */
6684       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6685                       unshare_expr (valist), f_grtop, NULL_TREE);
6686       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6687                       unshare_expr (valist), f_groff, NULL_TREE);
6688       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6689       nregs = rsize / UNITS_PER_WORD;
6690
6691       if (align > 8)
6692         dw_align = true;
6693
6694       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6695           && size < UNITS_PER_WORD)
6696         {
6697           adjust = UNITS_PER_WORD  - size;
6698         }
6699     }
6700
6701   /* Get a local temporary for the field value.  */
6702   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6703
6704   /* Emit code to branch if off >= 0.  */
6705   t = build2 (GE_EXPR, boolean_type_node, off,
6706               build_int_cst (TREE_TYPE (off), 0));
6707   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6708
6709   if (dw_align)
6710     {
6711       /* Emit: offs = (offs + 15) & -16.  */
6712       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6713                   build_int_cst (TREE_TYPE (off), 15));
6714       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6715                   build_int_cst (TREE_TYPE (off), -16));
6716       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6717     }
6718   else
6719     roundup = NULL;
6720
6721   /* Update ap.__[g|v]r_offs  */
6722   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6723               build_int_cst (TREE_TYPE (off), rsize));
6724   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6725
6726   /* String up.  */
6727   if (roundup)
6728     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6729
6730   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6731   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6732               build_int_cst (TREE_TYPE (f_off), 0));
6733   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6734
6735   /* String up: make sure the assignment happens before the use.  */
6736   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6737   COND_EXPR_ELSE (cond1) = t;
6738
6739   /* Prepare the trees handling the argument that is passed on the stack;
6740      the top level node will store in ON_STACK.  */
6741   arg = get_initialized_tmp_var (stack, pre_p, NULL);
6742   if (align > 8)
6743     {
6744       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
6745       t = fold_convert (intDI_type_node, arg);
6746       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6747                   build_int_cst (TREE_TYPE (t), 15));
6748       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6749                   build_int_cst (TREE_TYPE (t), -16));
6750       t = fold_convert (TREE_TYPE (arg), t);
6751       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6752     }
6753   else
6754     roundup = NULL;
6755   /* Advance ap.__stack  */
6756   t = fold_convert (intDI_type_node, arg);
6757   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6758               build_int_cst (TREE_TYPE (t), size + 7));
6759   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6760               build_int_cst (TREE_TYPE (t), -8));
6761   t = fold_convert (TREE_TYPE (arg), t);
6762   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6763   /* String up roundup and advance.  */
6764   if (roundup)
6765     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6766   /* String up with arg */
6767   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6768   /* Big-endianness related address adjustment.  */
6769   if (BLOCK_REG_PADDING (mode, type, 1) == downward
6770       && size < UNITS_PER_WORD)
6771   {
6772     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6773                 size_int (UNITS_PER_WORD - size));
6774     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6775   }
6776
6777   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6778   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6779
6780   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
6781   t = off;
6782   if (adjust)
6783     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6784                 build_int_cst (TREE_TYPE (off), adjust));
6785
6786   t = fold_convert (sizetype, t);
6787   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6788
6789   if (is_ha)
6790     {
6791       /* type ha; // treat as "struct {ftype field[n];}"
6792          ... [computing offs]
6793          for (i = 0; i <nregs; ++i, offs += 16)
6794            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6795          return ha;  */
6796       int i;
6797       tree tmp_ha, field_t, field_ptr_t;
6798
6799       /* Declare a local variable.  */
6800       tmp_ha = create_tmp_var_raw (type, "ha");
6801       gimple_add_tmp_var (tmp_ha);
6802
6803       /* Establish the base type.  */
6804       switch (ag_mode)
6805         {
6806         case SFmode:
6807           field_t = float_type_node;
6808           field_ptr_t = float_ptr_type_node;
6809           break;
6810         case DFmode:
6811           field_t = double_type_node;
6812           field_ptr_t = double_ptr_type_node;
6813           break;
6814         case TFmode:
6815           field_t = long_double_type_node;
6816           field_ptr_t = long_double_ptr_type_node;
6817           break;
6818 /* The half precision and quad precision are not fully supported yet.  Enable
6819    the following code after the support is complete.  Need to find the correct
6820    type node for __fp16 *.  */
6821 #if 0
6822         case HFmode:
6823           field_t = float_type_node;
6824           field_ptr_t = float_ptr_type_node;
6825           break;
6826 #endif
6827         case V2SImode:
6828         case V4SImode:
6829             {
6830               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6831               field_t = build_vector_type_for_mode (innertype, ag_mode);
6832               field_ptr_t = build_pointer_type (field_t);
6833             }
6834           break;
6835         default:
6836           gcc_assert (0);
6837         }
6838
6839       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
6840       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6841       addr = t;
6842       t = fold_convert (field_ptr_t, addr);
6843       t = build2 (MODIFY_EXPR, field_t,
6844                   build1 (INDIRECT_REF, field_t, tmp_ha),
6845                   build1 (INDIRECT_REF, field_t, t));
6846
6847       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
6848       for (i = 1; i < nregs; ++i)
6849         {
6850           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6851           u = fold_convert (field_ptr_t, addr);
6852           u = build2 (MODIFY_EXPR, field_t,
6853                       build2 (MEM_REF, field_t, tmp_ha,
6854                               build_int_cst (field_ptr_t,
6855                                              (i *
6856                                               int_size_in_bytes (field_t)))),
6857                       build1 (INDIRECT_REF, field_t, u));
6858           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6859         }
6860
6861       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6862       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6863     }
6864
6865   COND_EXPR_ELSE (cond2) = t;
6866   addr = fold_convert (build_pointer_type (type), cond1);
6867   addr = build_va_arg_indirect_ref (addr);
6868
6869   if (indirect_p)
6870     addr = build_va_arg_indirect_ref (addr);
6871
6872   return addr;
6873 }
6874
6875 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
6876
6877 static void
6878 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6879                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6880                                 int no_rtl)
6881 {
6882   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6883   CUMULATIVE_ARGS local_cum;
6884   int gr_saved, vr_saved;
6885
6886   /* The caller has advanced CUM up to, but not beyond, the last named
6887      argument.  Advance a local copy of CUM past the last "real" named
6888      argument, to find out how many registers are left over.  */
6889   local_cum = *cum;
6890   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6891
6892   /* Found out how many registers we need to save.  */
6893   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6894   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6895
6896   if (TARGET_GENERAL_REGS_ONLY)
6897     {
6898       if (local_cum.aapcs_nvrn > 0)
6899         sorry ("%qs and floating point or vector arguments",
6900                "-mgeneral-regs-only");
6901       vr_saved = 0;
6902     }
6903
6904   if (!no_rtl)
6905     {
6906       if (gr_saved > 0)
6907         {
6908           rtx ptr, mem;
6909
6910           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
6911           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6912                                - gr_saved * UNITS_PER_WORD);
6913           mem = gen_frame_mem (BLKmode, ptr);
6914           set_mem_alias_set (mem, get_varargs_alias_set ());
6915
6916           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6917                                mem, gr_saved);
6918         }
6919       if (vr_saved > 0)
6920         {
6921           /* We can't use move_block_from_reg, because it will use
6922              the wrong mode, storing D regs only.  */
6923           enum machine_mode mode = TImode;
6924           int off, i;
6925
6926           /* Set OFF to the offset from virtual_incoming_args_rtx of
6927              the first vector register.  The VR save area lies below
6928              the GR one, and is aligned to 16 bytes.  */
6929           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6930                                    STACK_BOUNDARY / BITS_PER_UNIT);
6931           off -= vr_saved * UNITS_PER_VREG;
6932
6933           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6934             {
6935               rtx ptr, mem;
6936
6937               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6938               mem = gen_frame_mem (mode, ptr);
6939               set_mem_alias_set (mem, get_varargs_alias_set ());
6940               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6941               off += UNITS_PER_VREG;
6942             }
6943         }
6944     }
6945
6946   /* We don't save the size into *PRETEND_SIZE because we want to avoid
6947      any complication of having crtl->args.pretend_args_size changed.  */
6948   cfun->machine->saved_varargs_size
6949     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6950                       STACK_BOUNDARY / BITS_PER_UNIT)
6951        + vr_saved * UNITS_PER_VREG);
6952 }
6953
6954 static void
6955 aarch64_conditional_register_usage (void)
6956 {
6957   int i;
6958   if (!TARGET_FLOAT)
6959     {
6960       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6961         {
6962           fixed_regs[i] = 1;
6963           call_used_regs[i] = 1;
6964         }
6965     }
6966 }
6967
6968 /* Walk down the type tree of TYPE counting consecutive base elements.
6969    If *MODEP is VOIDmode, then set it to the first valid floating point
6970    type.  If a non-floating point type is found, or if a floating point
6971    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6972    otherwise return the count in the sub-tree.  */
6973 static int
6974 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6975 {
6976   enum machine_mode mode;
6977   HOST_WIDE_INT size;
6978
6979   switch (TREE_CODE (type))
6980     {
6981     case REAL_TYPE:
6982       mode = TYPE_MODE (type);
6983       if (mode != DFmode && mode != SFmode && mode != TFmode)
6984         return -1;
6985
6986       if (*modep == VOIDmode)
6987         *modep = mode;
6988
6989       if (*modep == mode)
6990         return 1;
6991
6992       break;
6993
6994     case COMPLEX_TYPE:
6995       mode = TYPE_MODE (TREE_TYPE (type));
6996       if (mode != DFmode && mode != SFmode && mode != TFmode)
6997         return -1;
6998
6999       if (*modep == VOIDmode)
7000         *modep = mode;
7001
7002       if (*modep == mode)
7003         return 2;
7004
7005       break;
7006
7007     case VECTOR_TYPE:
7008       /* Use V2SImode and V4SImode as representatives of all 64-bit
7009          and 128-bit vector types.  */
7010       size = int_size_in_bytes (type);
7011       switch (size)
7012         {
7013         case 8:
7014           mode = V2SImode;
7015           break;
7016         case 16:
7017           mode = V4SImode;
7018           break;
7019         default:
7020           return -1;
7021         }
7022
7023       if (*modep == VOIDmode)
7024         *modep = mode;
7025
7026       /* Vector modes are considered to be opaque: two vectors are
7027          equivalent for the purposes of being homogeneous aggregates
7028          if they are the same size.  */
7029       if (*modep == mode)
7030         return 1;
7031
7032       break;
7033
7034     case ARRAY_TYPE:
7035       {
7036         int count;
7037         tree index = TYPE_DOMAIN (type);
7038
7039         /* Can't handle incomplete types nor sizes that are not
7040            fixed.  */
7041         if (!COMPLETE_TYPE_P (type)
7042             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7043           return -1;
7044
7045         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7046         if (count == -1
7047             || !index
7048             || !TYPE_MAX_VALUE (index)
7049             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7050             || !TYPE_MIN_VALUE (index)
7051             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7052             || count < 0)
7053           return -1;
7054
7055         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7056                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7057
7058         /* There must be no padding.  */
7059         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7060           return -1;
7061
7062         return count;
7063       }
7064
7065     case RECORD_TYPE:
7066       {
7067         int count = 0;
7068         int sub_count;
7069         tree field;
7070
7071         /* Can't handle incomplete types nor sizes that are not
7072            fixed.  */
7073         if (!COMPLETE_TYPE_P (type)
7074             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7075           return -1;
7076
7077         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7078           {
7079             if (TREE_CODE (field) != FIELD_DECL)
7080               continue;
7081
7082             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7083             if (sub_count < 0)
7084               return -1;
7085             count += sub_count;
7086           }
7087
7088         /* There must be no padding.  */
7089         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7090           return -1;
7091
7092         return count;
7093       }
7094
7095     case UNION_TYPE:
7096     case QUAL_UNION_TYPE:
7097       {
7098         /* These aren't very interesting except in a degenerate case.  */
7099         int count = 0;
7100         int sub_count;
7101         tree field;
7102
7103         /* Can't handle incomplete types nor sizes that are not
7104            fixed.  */
7105         if (!COMPLETE_TYPE_P (type)
7106             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7107           return -1;
7108
7109         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7110           {
7111             if (TREE_CODE (field) != FIELD_DECL)
7112               continue;
7113
7114             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7115             if (sub_count < 0)
7116               return -1;
7117             count = count > sub_count ? count : sub_count;
7118           }
7119
7120         /* There must be no padding.  */
7121         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7122           return -1;
7123
7124         return count;
7125       }
7126
7127     default:
7128       break;
7129     }
7130
7131   return -1;
7132 }
7133
7134 /* Return true if we use LRA instead of reload pass.  */
7135 static bool
7136 aarch64_lra_p (void)
7137 {
7138   return aarch64_lra_flag;
7139 }
7140
7141 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7142    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
7143    array types.  The C99 floating-point complex types are also considered
7144    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
7145    types, which are GCC extensions and out of the scope of AAPCS64, are
7146    treated as composite types here as well.
7147
7148    Note that MODE itself is not sufficient in determining whether a type
7149    is such a composite type or not.  This is because
7150    stor-layout.c:compute_record_mode may have already changed the MODE
7151    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
7152    structure with only one field may have its MODE set to the mode of the
7153    field.  Also an integer mode whose size matches the size of the
7154    RECORD_TYPE type may be used to substitute the original mode
7155    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
7156    solely relied on.  */
7157
7158 static bool
7159 aarch64_composite_type_p (const_tree type,
7160                           enum machine_mode mode)
7161 {
7162   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7163     return true;
7164
7165   if (mode == BLKmode
7166       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7167       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7168     return true;
7169
7170   return false;
7171 }
7172
7173 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7174    type as described in AAPCS64 \S 4.1.2.
7175
7176    See the comment above aarch64_composite_type_p for the notes on MODE.  */
7177
7178 static bool
7179 aarch64_short_vector_p (const_tree type,
7180                         enum machine_mode mode)
7181 {
7182   HOST_WIDE_INT size = -1;
7183
7184   if (type && TREE_CODE (type) == VECTOR_TYPE)
7185     size = int_size_in_bytes (type);
7186   else if (!aarch64_composite_type_p (type, mode)
7187            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7188                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7189     size = GET_MODE_SIZE (mode);
7190
7191   return (size == 8 || size == 16) ? true : false;
7192 }
7193
7194 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7195    shall be passed or returned in simd/fp register(s) (providing these
7196    parameter passing registers are available).
7197
7198    Upon successful return, *COUNT returns the number of needed registers,
7199    *BASE_MODE returns the mode of the individual register and when IS_HAF
7200    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7201    floating-point aggregate or a homogeneous short-vector aggregate.  */
7202
7203 static bool
7204 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7205                                          const_tree type,
7206                                          enum machine_mode *base_mode,
7207                                          int *count,
7208                                          bool *is_ha)
7209 {
7210   enum machine_mode new_mode = VOIDmode;
7211   bool composite_p = aarch64_composite_type_p (type, mode);
7212
7213   if (is_ha != NULL) *is_ha = false;
7214
7215   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7216       || aarch64_short_vector_p (type, mode))
7217     {
7218       *count = 1;
7219       new_mode = mode;
7220     }
7221   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7222     {
7223       if (is_ha != NULL) *is_ha = true;
7224       *count = 2;
7225       new_mode = GET_MODE_INNER (mode);
7226     }
7227   else if (type && composite_p)
7228     {
7229       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7230
7231       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7232         {
7233           if (is_ha != NULL) *is_ha = true;
7234           *count = ag_count;
7235         }
7236       else
7237         return false;
7238     }
7239   else
7240     return false;
7241
7242   *base_mode = new_mode;
7243   return true;
7244 }
7245
7246 /* Implement TARGET_STRUCT_VALUE_RTX.  */
7247
7248 static rtx
7249 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7250                           int incoming ATTRIBUTE_UNUSED)
7251 {
7252   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7253 }
7254
7255 /* Implements target hook vector_mode_supported_p.  */
7256 static bool
7257 aarch64_vector_mode_supported_p (enum machine_mode mode)
7258 {
7259   if (TARGET_SIMD
7260       && (mode == V4SImode  || mode == V8HImode
7261           || mode == V16QImode || mode == V2DImode
7262           || mode == V2SImode  || mode == V4HImode
7263           || mode == V8QImode || mode == V2SFmode
7264           || mode == V4SFmode || mode == V2DFmode))
7265     return true;
7266
7267   return false;
7268 }
7269
7270 /* Return appropriate SIMD container
7271    for MODE within a vector of WIDTH bits.  */
7272 static enum machine_mode
7273 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7274 {
7275   gcc_assert (width == 64 || width == 128);
7276   if (TARGET_SIMD)
7277     {
7278       if (width == 128)
7279         switch (mode)
7280           {
7281           case DFmode:
7282             return V2DFmode;
7283           case SFmode:
7284             return V4SFmode;
7285           case SImode:
7286             return V4SImode;
7287           case HImode:
7288             return V8HImode;
7289           case QImode:
7290             return V16QImode;
7291           case DImode:
7292             return V2DImode;
7293           default:
7294             break;
7295           }
7296       else
7297         switch (mode)
7298           {
7299           case SFmode:
7300             return V2SFmode;
7301           case SImode:
7302             return V2SImode;
7303           case HImode:
7304             return V4HImode;
7305           case QImode:
7306             return V8QImode;
7307           default:
7308             break;
7309           }
7310     }
7311   return word_mode;
7312 }
7313
7314 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
7315 static enum machine_mode
7316 aarch64_preferred_simd_mode (enum machine_mode mode)
7317 {
7318   return aarch64_simd_container_mode (mode, 128);
7319 }
7320
7321 /* Return the bitmask of possible vector sizes for the vectorizer
7322    to iterate over.  */
7323 static unsigned int
7324 aarch64_autovectorize_vector_sizes (void)
7325 {
7326   return (16 | 8);
7327 }
7328
7329 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7330    vector types in order to conform to the AAPCS64 (see "Procedure
7331    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
7332    qualify for emission with the mangled names defined in that document,
7333    a vector type must not only be of the correct mode but also be
7334    composed of AdvSIMD vector element types (e.g.
7335    _builtin_aarch64_simd_qi); these types are registered by
7336    aarch64_init_simd_builtins ().  In other words, vector types defined
7337    in other ways e.g. via vector_size attribute will get default
7338    mangled names.  */
7339 typedef struct
7340 {
7341   enum machine_mode mode;
7342   const char *element_type_name;
7343   const char *mangled_name;
7344 } aarch64_simd_mangle_map_entry;
7345
7346 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7347   /* 64-bit containerized types.  */
7348   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
7349   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
7350   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
7351   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
7352   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
7353   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
7354   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
7355   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
7356   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7357   /* 128-bit containerized types.  */
7358   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
7359   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
7360   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
7361   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
7362   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
7363   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
7364   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
7365   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
7366   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
7367   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
7368   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
7369   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7370   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7371   { VOIDmode, NULL, NULL }
7372 };
7373
7374 /* Implement TARGET_MANGLE_TYPE.  */
7375
7376 static const char *
7377 aarch64_mangle_type (const_tree type)
7378 {
7379   /* The AArch64 ABI documents say that "__va_list" has to be
7380      managled as if it is in the "std" namespace.  */
7381   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7382     return "St9__va_list";
7383
7384   /* Check the mode of the vector type, and the name of the vector
7385      element type, against the table.  */
7386   if (TREE_CODE (type) == VECTOR_TYPE)
7387     {
7388       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7389
7390       while (pos->mode != VOIDmode)
7391         {
7392           tree elt_type = TREE_TYPE (type);
7393
7394           if (pos->mode == TYPE_MODE (type)
7395               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7396               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7397                           pos->element_type_name))
7398             return pos->mangled_name;
7399
7400           pos++;
7401         }
7402     }
7403
7404   /* Use the default mangling.  */
7405   return NULL;
7406 }
7407
7408 /* Return the equivalent letter for size.  */
7409 static char
7410 sizetochar (int size)
7411 {
7412   switch (size)
7413     {
7414     case 64: return 'd';
7415     case 32: return 's';
7416     case 16: return 'h';
7417     case 8 : return 'b';
7418     default: gcc_unreachable ();
7419     }
7420 }
7421
7422 /* Return true iff x is a uniform vector of floating-point
7423    constants, and the constant can be represented in
7424    quarter-precision form.  Note, as aarch64_float_const_representable
7425    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
7426 static bool
7427 aarch64_vect_float_const_representable_p (rtx x)
7428 {
7429   int i = 0;
7430   REAL_VALUE_TYPE r0, ri;
7431   rtx x0, xi;
7432
7433   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7434     return false;
7435
7436   x0 = CONST_VECTOR_ELT (x, 0);
7437   if (!CONST_DOUBLE_P (x0))
7438     return false;
7439
7440   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7441
7442   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7443     {
7444       xi = CONST_VECTOR_ELT (x, i);
7445       if (!CONST_DOUBLE_P (xi))
7446         return false;
7447
7448       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7449       if (!REAL_VALUES_EQUAL (r0, ri))
7450         return false;
7451     }
7452
7453   return aarch64_float_const_representable_p (x0);
7454 }
7455
7456 /* Return true for valid and false for invalid.  */
7457 bool
7458 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7459                               struct simd_immediate_info *info)
7460 {
7461 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
7462   matches = 1;                                          \
7463   for (i = 0; i < idx; i += (STRIDE))                   \
7464     if (!(TEST))                                        \
7465       matches = 0;                                      \
7466   if (matches)                                          \
7467     {                                                   \
7468       immtype = (CLASS);                                \
7469       elsize = (ELSIZE);                                \
7470       eshift = (SHIFT);                                 \
7471       emvn = (NEG);                                     \
7472       break;                                            \
7473     }
7474
7475   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7476   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7477   unsigned char bytes[16];
7478   int immtype = -1, matches;
7479   unsigned int invmask = inverse ? 0xff : 0;
7480   int eshift, emvn;
7481
7482   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7483     {
7484       if (! (aarch64_simd_imm_zero_p (op, mode)
7485              || aarch64_vect_float_const_representable_p (op)))
7486         return false;
7487
7488       if (info)
7489         {
7490           info->value = CONST_VECTOR_ELT (op, 0);
7491           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7492           info->mvn = false;
7493           info->shift = 0;
7494         }
7495
7496       return true;
7497     }
7498
7499   /* Splat vector constant out into a byte vector.  */
7500   for (i = 0; i < n_elts; i++)
7501     {
7502       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7503          it must be laid out in the vector register in reverse order.  */
7504       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7505       unsigned HOST_WIDE_INT elpart;
7506       unsigned int part, parts;
7507
7508       if (GET_CODE (el) == CONST_INT)
7509         {
7510           elpart = INTVAL (el);
7511           parts = 1;
7512         }
7513       else if (GET_CODE (el) == CONST_DOUBLE)
7514         {
7515           elpart = CONST_DOUBLE_LOW (el);
7516           parts = 2;
7517         }
7518       else
7519         gcc_unreachable ();
7520
7521       for (part = 0; part < parts; part++)
7522         {
7523           unsigned int byte;
7524           for (byte = 0; byte < innersize; byte++)
7525             {
7526               bytes[idx++] = (elpart & 0xff) ^ invmask;
7527               elpart >>= BITS_PER_UNIT;
7528             }
7529           if (GET_CODE (el) == CONST_DOUBLE)
7530             elpart = CONST_DOUBLE_HIGH (el);
7531         }
7532     }
7533
7534   /* Sanity check.  */
7535   gcc_assert (idx == GET_MODE_SIZE (mode));
7536
7537   do
7538     {
7539       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7540              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7541
7542       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7543              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7544
7545       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7546              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7547
7548       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7549              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7550
7551       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7552
7553       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7554
7555       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7556              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7557
7558       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7559              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7560
7561       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7562              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7563
7564       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7565              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7566
7567       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7568
7569       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7570
7571       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7572              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7573
7574       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7575              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7576
7577       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7578              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7579
7580       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7581              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7582
7583       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7584
7585       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7586              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7587     }
7588   while (0);
7589
7590   if (immtype == -1)
7591     return false;
7592
7593   if (info)
7594     {
7595       info->element_width = elsize;
7596       info->mvn = emvn != 0;
7597       info->shift = eshift;
7598
7599       unsigned HOST_WIDE_INT imm = 0;
7600
7601       if (immtype >= 12 && immtype <= 15)
7602         info->msl = true;
7603
7604       /* Un-invert bytes of recognized vector, if necessary.  */
7605       if (invmask != 0)
7606         for (i = 0; i < idx; i++)
7607           bytes[i] ^= invmask;
7608
7609       if (immtype == 17)
7610         {
7611           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7612           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7613
7614           for (i = 0; i < 8; i++)
7615             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7616               << (i * BITS_PER_UNIT);
7617
7618
7619           info->value = GEN_INT (imm);
7620         }
7621       else
7622         {
7623           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7624             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7625
7626           /* Construct 'abcdefgh' because the assembler cannot handle
7627              generic constants.  */
7628           if (info->mvn)
7629             imm = ~imm;
7630           imm = (imm >> info->shift) & 0xff;
7631           info->value = GEN_INT (imm);
7632         }
7633     }
7634
7635   return true;
7636 #undef CHECK
7637 }
7638
7639 static bool
7640 aarch64_const_vec_all_same_int_p (rtx x,
7641                                   HOST_WIDE_INT minval,
7642                                   HOST_WIDE_INT maxval)
7643 {
7644   HOST_WIDE_INT firstval;
7645   int count, i;
7646
7647   if (GET_CODE (x) != CONST_VECTOR
7648       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7649     return false;
7650
7651   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7652   if (firstval < minval || firstval > maxval)
7653     return false;
7654
7655   count = CONST_VECTOR_NUNITS (x);
7656   for (i = 1; i < count; i++)
7657     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7658       return false;
7659
7660   return true;
7661 }
7662
7663 /* Check of immediate shift constants are within range.  */
7664 bool
7665 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7666 {
7667   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7668   if (left)
7669     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7670   else
7671     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7672 }
7673
7674 /* Return true if X is a uniform vector where all elements
7675    are either the floating-point constant 0.0 or the
7676    integer constant 0.  */
7677 bool
7678 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7679 {
7680   return x == CONST0_RTX (mode);
7681 }
7682
7683 bool
7684 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7685 {
7686   HOST_WIDE_INT imm = INTVAL (x);
7687   int i;
7688
7689   for (i = 0; i < 8; i++)
7690     {
7691       unsigned int byte = imm & 0xff;
7692       if (byte != 0xff && byte != 0)
7693        return false;
7694       imm >>= 8;
7695     }
7696
7697   return true;
7698 }
7699
7700 bool
7701 aarch64_mov_operand_p (rtx x,
7702                        enum aarch64_symbol_context context,
7703                        enum machine_mode mode)
7704 {
7705   if (GET_CODE (x) == HIGH
7706       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7707     return true;
7708
7709   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7710     return true;
7711
7712   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7713     return true;
7714
7715   return aarch64_classify_symbolic_expression (x, context)
7716     == SYMBOL_TINY_ABSOLUTE;
7717 }
7718
7719 /* Return a const_int vector of VAL.  */
7720 rtx
7721 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7722 {
7723   int nunits = GET_MODE_NUNITS (mode);
7724   rtvec v = rtvec_alloc (nunits);
7725   int i;
7726
7727   for (i=0; i < nunits; i++)
7728     RTVEC_ELT (v, i) = GEN_INT (val);
7729
7730   return gen_rtx_CONST_VECTOR (mode, v);
7731 }
7732
7733 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
7734
7735 bool
7736 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7737 {
7738   enum machine_mode vmode;
7739
7740   gcc_assert (!VECTOR_MODE_P (mode));
7741   vmode = aarch64_preferred_simd_mode (mode);
7742   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7743   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7744 }
7745
7746 /* Construct and return a PARALLEL RTX vector.  */
7747 rtx
7748 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7749 {
7750   int nunits = GET_MODE_NUNITS (mode);
7751   rtvec v = rtvec_alloc (nunits / 2);
7752   int base = high ? nunits / 2 : 0;
7753   rtx t1;
7754   int i;
7755
7756   for (i=0; i < nunits / 2; i++)
7757     RTVEC_ELT (v, i) = GEN_INT (base + i);
7758
7759   t1 = gen_rtx_PARALLEL (mode, v);
7760   return t1;
7761 }
7762
7763 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
7764    HIGH (exclusive).  */
7765 void
7766 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7767 {
7768   HOST_WIDE_INT lane;
7769   gcc_assert (GET_CODE (operand) == CONST_INT);
7770   lane = INTVAL (operand);
7771
7772   if (lane < low || lane >= high)
7773     error ("lane out of range");
7774 }
7775
7776 void
7777 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7778 {
7779   gcc_assert (GET_CODE (operand) == CONST_INT);
7780   HOST_WIDE_INT lane = INTVAL (operand);
7781
7782   if (lane < low || lane >= high)
7783     error ("constant out of range");
7784 }
7785
7786 /* Emit code to reinterpret one AdvSIMD type as another,
7787    without altering bits.  */
7788 void
7789 aarch64_simd_reinterpret (rtx dest, rtx src)
7790 {
7791   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7792 }
7793
7794 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7795    registers).  */
7796 void
7797 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7798                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7799                             rtx op1)
7800 {
7801   rtx mem = gen_rtx_MEM (mode, destaddr);
7802   rtx tmp1 = gen_reg_rtx (mode);
7803   rtx tmp2 = gen_reg_rtx (mode);
7804
7805   emit_insn (intfn (tmp1, op1, tmp2));
7806
7807   emit_move_insn (mem, tmp1);
7808   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7809   emit_move_insn (mem, tmp2);
7810 }
7811
7812 /* Return TRUE if OP is a valid vector addressing mode.  */
7813 bool
7814 aarch64_simd_mem_operand_p (rtx op)
7815 {
7816   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7817                         || GET_CODE (XEXP (op, 0)) == REG);
7818 }
7819
7820 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7821    not to early-clobber SRC registers in the process.
7822
7823    We assume that the operands described by SRC and DEST represent a
7824    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
7825    number of components into which the copy has been decomposed.  */
7826 void
7827 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7828                                 rtx *src, unsigned int count)
7829 {
7830   unsigned int i;
7831
7832   if (!reg_overlap_mentioned_p (operands[0], operands[1])
7833       || REGNO (operands[0]) < REGNO (operands[1]))
7834     {
7835       for (i = 0; i < count; i++)
7836         {
7837           operands[2 * i] = dest[i];
7838           operands[2 * i + 1] = src[i];
7839         }
7840     }
7841   else
7842     {
7843       for (i = 0; i < count; i++)
7844         {
7845           operands[2 * i] = dest[count - i - 1];
7846           operands[2 * i + 1] = src[count - i - 1];
7847         }
7848     }
7849 }
7850
7851 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7852    one of VSTRUCT modes: OI, CI or XI.  */
7853 int
7854 aarch64_simd_attr_length_move (rtx insn)
7855 {
7856   enum machine_mode mode;
7857
7858   extract_insn_cached (insn);
7859
7860   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7861     {
7862       mode = GET_MODE (recog_data.operand[0]);
7863       switch (mode)
7864         {
7865         case OImode:
7866           return 8;
7867         case CImode:
7868           return 12;
7869         case XImode:
7870           return 16;
7871         default:
7872           gcc_unreachable ();
7873         }
7874     }
7875   return 4;
7876 }
7877
7878 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
7879    alignment of a vector to 128 bits.  */
7880 static HOST_WIDE_INT
7881 aarch64_simd_vector_alignment (const_tree type)
7882 {
7883   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7884   return MIN (align, 128);
7885 }
7886
7887 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
7888 static bool
7889 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7890 {
7891   if (is_packed)
7892     return false;
7893
7894   /* We guarantee alignment for vectors up to 128-bits.  */
7895   if (tree_int_cst_compare (TYPE_SIZE (type),
7896                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7897     return false;
7898
7899   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
7900   return true;
7901 }
7902
7903 /* If VALS is a vector constant that can be loaded into a register
7904    using DUP, generate instructions to do so and return an RTX to
7905    assign to the register.  Otherwise return NULL_RTX.  */
7906 static rtx
7907 aarch64_simd_dup_constant (rtx vals)
7908 {
7909   enum machine_mode mode = GET_MODE (vals);
7910   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7911   int n_elts = GET_MODE_NUNITS (mode);
7912   bool all_same = true;
7913   rtx x;
7914   int i;
7915
7916   if (GET_CODE (vals) != CONST_VECTOR)
7917     return NULL_RTX;
7918
7919   for (i = 1; i < n_elts; ++i)
7920     {
7921       x = CONST_VECTOR_ELT (vals, i);
7922       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7923         all_same = false;
7924     }
7925
7926   if (!all_same)
7927     return NULL_RTX;
7928
7929   /* We can load this constant by using DUP and a constant in a
7930      single ARM register.  This will be cheaper than a vector
7931      load.  */
7932   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7933   return gen_rtx_VEC_DUPLICATE (mode, x);
7934 }
7935
7936
7937 /* Generate code to load VALS, which is a PARALLEL containing only
7938    constants (for vec_init) or CONST_VECTOR, efficiently into a
7939    register.  Returns an RTX to copy into the register, or NULL_RTX
7940    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
7941 static rtx
7942 aarch64_simd_make_constant (rtx vals)
7943 {
7944   enum machine_mode mode = GET_MODE (vals);
7945   rtx const_dup;
7946   rtx const_vec = NULL_RTX;
7947   int n_elts = GET_MODE_NUNITS (mode);
7948   int n_const = 0;
7949   int i;
7950
7951   if (GET_CODE (vals) == CONST_VECTOR)
7952     const_vec = vals;
7953   else if (GET_CODE (vals) == PARALLEL)
7954     {
7955       /* A CONST_VECTOR must contain only CONST_INTs and
7956          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7957          Only store valid constants in a CONST_VECTOR.  */
7958       for (i = 0; i < n_elts; ++i)
7959         {
7960           rtx x = XVECEXP (vals, 0, i);
7961           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7962             n_const++;
7963         }
7964       if (n_const == n_elts)
7965         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7966     }
7967   else
7968     gcc_unreachable ();
7969
7970   if (const_vec != NULL_RTX
7971       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7972     /* Load using MOVI/MVNI.  */
7973     return const_vec;
7974   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7975     /* Loaded using DUP.  */
7976     return const_dup;
7977   else if (const_vec != NULL_RTX)
7978     /* Load from constant pool. We can not take advantage of single-cycle
7979        LD1 because we need a PC-relative addressing mode.  */
7980     return const_vec;
7981   else
7982     /* A PARALLEL containing something not valid inside CONST_VECTOR.
7983        We can not construct an initializer.  */
7984     return NULL_RTX;
7985 }
7986
7987 void
7988 aarch64_expand_vector_init (rtx target, rtx vals)
7989 {
7990   enum machine_mode mode = GET_MODE (target);
7991   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7992   int n_elts = GET_MODE_NUNITS (mode);
7993   int n_var = 0, one_var = -1;
7994   bool all_same = true;
7995   rtx x, mem;
7996   int i;
7997
7998   x = XVECEXP (vals, 0, 0);
7999   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8000     n_var = 1, one_var = 0;
8001
8002   for (i = 1; i < n_elts; ++i)
8003     {
8004       x = XVECEXP (vals, 0, i);
8005       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8006         ++n_var, one_var = i;
8007
8008       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8009         all_same = false;
8010     }
8011
8012   if (n_var == 0)
8013     {
8014       rtx constant = aarch64_simd_make_constant (vals);
8015       if (constant != NULL_RTX)
8016         {
8017           emit_move_insn (target, constant);
8018           return;
8019         }
8020     }
8021
8022   /* Splat a single non-constant element if we can.  */
8023   if (all_same)
8024     {
8025       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8026       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8027       return;
8028     }
8029
8030   /* One field is non-constant.  Load constant then overwrite varying
8031      field.  This is more efficient than using the stack.  */
8032   if (n_var == 1)
8033     {
8034       rtx copy = copy_rtx (vals);
8035       rtx index = GEN_INT (one_var);
8036       enum insn_code icode;
8037
8038       /* Load constant part of vector, substitute neighboring value for
8039          varying element.  */
8040       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8041       aarch64_expand_vector_init (target, copy);
8042
8043       /* Insert variable.  */
8044       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8045       icode = optab_handler (vec_set_optab, mode);
8046       gcc_assert (icode != CODE_FOR_nothing);
8047       emit_insn (GEN_FCN (icode) (target, x, index));
8048       return;
8049     }
8050
8051   /* Construct the vector in memory one field at a time
8052      and load the whole vector.  */
8053   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8054   for (i = 0; i < n_elts; i++)
8055     emit_move_insn (adjust_address_nv (mem, inner_mode,
8056                                     i * GET_MODE_SIZE (inner_mode)),
8057                     XVECEXP (vals, 0, i));
8058   emit_move_insn (target, mem);
8059
8060 }
8061
8062 static unsigned HOST_WIDE_INT
8063 aarch64_shift_truncation_mask (enum machine_mode mode)
8064 {
8065   return
8066     (aarch64_vector_mode_supported_p (mode)
8067      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8068 }
8069
8070 #ifndef TLS_SECTION_ASM_FLAG
8071 #define TLS_SECTION_ASM_FLAG 'T'
8072 #endif
8073
8074 void
8075 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8076                                tree decl ATTRIBUTE_UNUSED)
8077 {
8078   char flagchars[10], *f = flagchars;
8079
8080   /* If we have already declared this section, we can use an
8081      abbreviated form to switch back to it -- unless this section is
8082      part of a COMDAT groups, in which case GAS requires the full
8083      declaration every time.  */
8084   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8085       && (flags & SECTION_DECLARED))
8086     {
8087       fprintf (asm_out_file, "\t.section\t%s\n", name);
8088       return;
8089     }
8090
8091   if (!(flags & SECTION_DEBUG))
8092     *f++ = 'a';
8093   if (flags & SECTION_WRITE)
8094     *f++ = 'w';
8095   if (flags & SECTION_CODE)
8096     *f++ = 'x';
8097   if (flags & SECTION_SMALL)
8098     *f++ = 's';
8099   if (flags & SECTION_MERGE)
8100     *f++ = 'M';
8101   if (flags & SECTION_STRINGS)
8102     *f++ = 'S';
8103   if (flags & SECTION_TLS)
8104     *f++ = TLS_SECTION_ASM_FLAG;
8105   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8106     *f++ = 'G';
8107   *f = '\0';
8108
8109   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8110
8111   if (!(flags & SECTION_NOTYPE))
8112     {
8113       const char *type;
8114       const char *format;
8115
8116       if (flags & SECTION_BSS)
8117         type = "nobits";
8118       else
8119         type = "progbits";
8120
8121 #ifdef TYPE_OPERAND_FMT
8122       format = "," TYPE_OPERAND_FMT;
8123 #else
8124       format = ",@%s";
8125 #endif
8126
8127       fprintf (asm_out_file, format, type);
8128
8129       if (flags & SECTION_ENTSIZE)
8130         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8131       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8132         {
8133           if (TREE_CODE (decl) == IDENTIFIER_NODE)
8134             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8135           else
8136             fprintf (asm_out_file, ",%s,comdat",
8137                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8138         }
8139     }
8140
8141   putc ('\n', asm_out_file);
8142 }
8143
8144 /* Select a format to encode pointers in exception handling data.  */
8145 int
8146 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8147 {
8148    int type;
8149    switch (aarch64_cmodel)
8150      {
8151      case AARCH64_CMODEL_TINY:
8152      case AARCH64_CMODEL_TINY_PIC:
8153      case AARCH64_CMODEL_SMALL:
8154      case AARCH64_CMODEL_SMALL_PIC:
8155        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
8156           for everything.  */
8157        type = DW_EH_PE_sdata4;
8158        break;
8159      default:
8160        /* No assumptions here.  8-byte relocs required.  */
8161        type = DW_EH_PE_sdata8;
8162        break;
8163      }
8164    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8165 }
8166
8167 /* Emit load exclusive.  */
8168
8169 static void
8170 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8171                              rtx mem, rtx model_rtx)
8172 {
8173   rtx (*gen) (rtx, rtx, rtx);
8174
8175   switch (mode)
8176     {
8177     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8178     case HImode: gen = gen_aarch64_load_exclusivehi; break;
8179     case SImode: gen = gen_aarch64_load_exclusivesi; break;
8180     case DImode: gen = gen_aarch64_load_exclusivedi; break;
8181     default:
8182       gcc_unreachable ();
8183     }
8184
8185   emit_insn (gen (rval, mem, model_rtx));
8186 }
8187
8188 /* Emit store exclusive.  */
8189
8190 static void
8191 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8192                               rtx rval, rtx mem, rtx model_rtx)
8193 {
8194   rtx (*gen) (rtx, rtx, rtx, rtx);
8195
8196   switch (mode)
8197     {
8198     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8199     case HImode: gen = gen_aarch64_store_exclusivehi; break;
8200     case SImode: gen = gen_aarch64_store_exclusivesi; break;
8201     case DImode: gen = gen_aarch64_store_exclusivedi; break;
8202     default:
8203       gcc_unreachable ();
8204     }
8205
8206   emit_insn (gen (bval, rval, mem, model_rtx));
8207 }
8208
8209 /* Mark the previous jump instruction as unlikely.  */
8210
8211 static void
8212 aarch64_emit_unlikely_jump (rtx insn)
8213 {
8214   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8215
8216   insn = emit_jump_insn (insn);
8217   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8218 }
8219
8220 /* Expand a compare and swap pattern.  */
8221
8222 void
8223 aarch64_expand_compare_and_swap (rtx operands[])
8224 {
8225   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8226   enum machine_mode mode, cmp_mode;
8227   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8228
8229   bval = operands[0];
8230   rval = operands[1];
8231   mem = operands[2];
8232   oldval = operands[3];
8233   newval = operands[4];
8234   is_weak = operands[5];
8235   mod_s = operands[6];
8236   mod_f = operands[7];
8237   mode = GET_MODE (mem);
8238   cmp_mode = mode;
8239
8240   /* Normally the succ memory model must be stronger than fail, but in the
8241      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8242      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
8243
8244   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8245       && INTVAL (mod_s) == MEMMODEL_RELEASE)
8246     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8247
8248   switch (mode)
8249     {
8250     case QImode:
8251     case HImode:
8252       /* For short modes, we're going to perform the comparison in SImode,
8253          so do the zero-extension now.  */
8254       cmp_mode = SImode;
8255       rval = gen_reg_rtx (SImode);
8256       oldval = convert_modes (SImode, mode, oldval, true);
8257       /* Fall through.  */
8258
8259     case SImode:
8260     case DImode:
8261       /* Force the value into a register if needed.  */
8262       if (!aarch64_plus_operand (oldval, mode))
8263         oldval = force_reg (cmp_mode, oldval);
8264       break;
8265
8266     default:
8267       gcc_unreachable ();
8268     }
8269
8270   switch (mode)
8271     {
8272     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8273     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8274     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8275     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8276     default:
8277       gcc_unreachable ();
8278     }
8279
8280   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8281
8282   if (mode == QImode || mode == HImode)
8283     emit_move_insn (operands[1], gen_lowpart (mode, rval));
8284
8285   x = gen_rtx_REG (CCmode, CC_REGNUM);
8286   x = gen_rtx_EQ (SImode, x, const0_rtx);
8287   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8288 }
8289
8290 /* Split a compare and swap pattern.  */
8291
8292 void
8293 aarch64_split_compare_and_swap (rtx operands[])
8294 {
8295   rtx rval, mem, oldval, newval, scratch;
8296   enum machine_mode mode;
8297   bool is_weak;
8298   rtx label1, label2, x, cond;
8299
8300   rval = operands[0];
8301   mem = operands[1];
8302   oldval = operands[2];
8303   newval = operands[3];
8304   is_weak = (operands[4] != const0_rtx);
8305   scratch = operands[7];
8306   mode = GET_MODE (mem);
8307
8308   label1 = NULL_RTX;
8309   if (!is_weak)
8310     {
8311       label1 = gen_label_rtx ();
8312       emit_label (label1);
8313     }
8314   label2 = gen_label_rtx ();
8315
8316   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8317
8318   cond = aarch64_gen_compare_reg (NE, rval, oldval);
8319   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8320   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8321                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8322   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8323
8324   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8325
8326   if (!is_weak)
8327     {
8328       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8329       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8330                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8331       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8332     }
8333   else
8334     {
8335       cond = gen_rtx_REG (CCmode, CC_REGNUM);
8336       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8337       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8338     }
8339
8340   emit_label (label2);
8341 }
8342
8343 /* Split an atomic operation.  */
8344
8345 void
8346 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8347                      rtx value, rtx model_rtx, rtx cond)
8348 {
8349   enum machine_mode mode = GET_MODE (mem);
8350   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8351   rtx label, x;
8352
8353   label = gen_label_rtx ();
8354   emit_label (label);
8355
8356   if (new_out)
8357     new_out = gen_lowpart (wmode, new_out);
8358   if (old_out)
8359     old_out = gen_lowpart (wmode, old_out);
8360   else
8361     old_out = new_out;
8362   value = simplify_gen_subreg (wmode, value, mode, 0);
8363
8364   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8365
8366   switch (code)
8367     {
8368     case SET:
8369       new_out = value;
8370       break;
8371
8372     case NOT:
8373       x = gen_rtx_AND (wmode, old_out, value);
8374       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8375       x = gen_rtx_NOT (wmode, new_out);
8376       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8377       break;
8378
8379     case MINUS:
8380       if (CONST_INT_P (value))
8381         {
8382           value = GEN_INT (-INTVAL (value));
8383           code = PLUS;
8384         }
8385       /* Fall through.  */
8386
8387     default:
8388       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8389       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8390       break;
8391     }
8392
8393   aarch64_emit_store_exclusive (mode, cond, mem,
8394                                 gen_lowpart (mode, new_out), model_rtx);
8395
8396   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8397   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8398                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8399   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8400 }
8401
8402 static void
8403 aarch64_print_extension (void)
8404 {
8405   const struct aarch64_option_extension *opt = NULL;
8406
8407   for (opt = all_extensions; opt->name != NULL; opt++)
8408     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8409       asm_fprintf (asm_out_file, "+%s", opt->name);
8410
8411   asm_fprintf (asm_out_file, "\n");
8412 }
8413
8414 static void
8415 aarch64_start_file (void)
8416 {
8417   if (selected_arch)
8418     {
8419       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8420       aarch64_print_extension ();
8421     }
8422   else if (selected_cpu)
8423     {
8424       const char *truncated_name
8425             = aarch64_rewrite_selected_cpu (selected_cpu->name);
8426       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8427       aarch64_print_extension ();
8428     }
8429   default_file_start();
8430 }
8431
8432 /* Target hook for c_mode_for_suffix.  */
8433 static enum machine_mode
8434 aarch64_c_mode_for_suffix (char suffix)
8435 {
8436   if (suffix == 'q')
8437     return TFmode;
8438
8439   return VOIDmode;
8440 }
8441
8442 /* We can only represent floating point constants which will fit in
8443    "quarter-precision" values.  These values are characterised by
8444    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
8445    by:
8446
8447    (-1)^s * (n/16) * 2^r
8448
8449    Where:
8450      's' is the sign bit.
8451      'n' is an integer in the range 16 <= n <= 31.
8452      'r' is an integer in the range -3 <= r <= 4.  */
8453
8454 /* Return true iff X can be represented by a quarter-precision
8455    floating point immediate operand X.  Note, we cannot represent 0.0.  */
8456 bool
8457 aarch64_float_const_representable_p (rtx x)
8458 {
8459   /* This represents our current view of how many bits
8460      make up the mantissa.  */
8461   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8462   int exponent;
8463   unsigned HOST_WIDE_INT mantissa, mask;
8464   REAL_VALUE_TYPE r, m;
8465   bool fail;
8466
8467   if (!CONST_DOUBLE_P (x))
8468     return false;
8469
8470   if (GET_MODE (x) == VOIDmode)
8471     return false;
8472
8473   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8474
8475   /* We cannot represent infinities, NaNs or +/-zero.  We won't
8476      know if we have +zero until we analyse the mantissa, but we
8477      can reject the other invalid values.  */
8478   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8479       || REAL_VALUE_MINUS_ZERO (r))
8480     return false;
8481
8482   /* Extract exponent.  */
8483   r = real_value_abs (&r);
8484   exponent = REAL_EXP (&r);
8485
8486   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8487      highest (sign) bit, with a fixed binary point at bit point_pos.
8488      m1 holds the low part of the mantissa, m2 the high part.
8489      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8490      bits for the mantissa, this can fail (low bits will be lost).  */
8491   real_ldexp (&m, &r, point_pos - exponent);
8492   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8493
8494   /* If the low part of the mantissa has bits set we cannot represent
8495      the value.  */
8496   if (w.elt (0) != 0)
8497     return false;
8498   /* We have rejected the lower HOST_WIDE_INT, so update our
8499      understanding of how many bits lie in the mantissa and
8500      look only at the high HOST_WIDE_INT.  */
8501   mantissa = w.elt (1);
8502   point_pos -= HOST_BITS_PER_WIDE_INT;
8503
8504   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8505   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8506   if ((mantissa & mask) != 0)
8507     return false;
8508
8509   /* Having filtered unrepresentable values, we may now remove all
8510      but the highest 5 bits.  */
8511   mantissa >>= point_pos - 5;
8512
8513   /* We cannot represent the value 0.0, so reject it.  This is handled
8514      elsewhere.  */
8515   if (mantissa == 0)
8516     return false;
8517
8518   /* Then, as bit 4 is always set, we can mask it off, leaving
8519      the mantissa in the range [0, 15].  */
8520   mantissa &= ~(1 << 4);
8521   gcc_assert (mantissa <= 15);
8522
8523   /* GCC internally does not use IEEE754-like encoding (where normalized
8524      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8525      Our mantissa values are shifted 4 places to the left relative to
8526      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8527      by 5 places to correct for GCC's representation.  */
8528   exponent = 5 - exponent;
8529
8530   return (exponent >= 0 && exponent <= 7);
8531 }
8532
8533 char*
8534 aarch64_output_simd_mov_immediate (rtx const_vector,
8535                                    enum machine_mode mode,
8536                                    unsigned width)
8537 {
8538   bool is_valid;
8539   static char templ[40];
8540   const char *mnemonic;
8541   const char *shift_op;
8542   unsigned int lane_count = 0;
8543   char element_char;
8544
8545   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8546
8547   /* This will return true to show const_vector is legal for use as either
8548      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8549      also update INFO to show how the immediate should be generated.  */
8550   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8551   gcc_assert (is_valid);
8552
8553   element_char = sizetochar (info.element_width);
8554   lane_count = width / info.element_width;
8555
8556   mode = GET_MODE_INNER (mode);
8557   if (mode == SFmode || mode == DFmode)
8558     {
8559       gcc_assert (info.shift == 0 && ! info.mvn);
8560       if (aarch64_float_const_zero_rtx_p (info.value))
8561         info.value = GEN_INT (0);
8562       else
8563         {
8564 #define buf_size 20
8565           REAL_VALUE_TYPE r;
8566           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8567           char float_buf[buf_size] = {'\0'};
8568           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8569 #undef buf_size
8570
8571           if (lane_count == 1)
8572             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8573           else
8574             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8575                       lane_count, element_char, float_buf);
8576           return templ;
8577         }
8578     }
8579
8580   mnemonic = info.mvn ? "mvni" : "movi";
8581   shift_op = info.msl ? "msl" : "lsl";
8582
8583   if (lane_count == 1)
8584     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8585               mnemonic, UINTVAL (info.value));
8586   else if (info.shift)
8587     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8588               ", %s %d", mnemonic, lane_count, element_char,
8589               UINTVAL (info.value), shift_op, info.shift);
8590   else
8591     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8592               mnemonic, lane_count, element_char, UINTVAL (info.value));
8593   return templ;
8594 }
8595
8596 char*
8597 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8598                                           enum machine_mode mode)
8599 {
8600   enum machine_mode vmode;
8601
8602   gcc_assert (!VECTOR_MODE_P (mode));
8603   vmode = aarch64_simd_container_mode (mode, 64);
8604   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8605   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8606 }
8607
8608 /* Split operands into moves from op[1] + op[2] into op[0].  */
8609
8610 void
8611 aarch64_split_combinev16qi (rtx operands[3])
8612 {
8613   unsigned int dest = REGNO (operands[0]);
8614   unsigned int src1 = REGNO (operands[1]);
8615   unsigned int src2 = REGNO (operands[2]);
8616   enum machine_mode halfmode = GET_MODE (operands[1]);
8617   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8618   rtx destlo, desthi;
8619
8620   gcc_assert (halfmode == V16QImode);
8621
8622   if (src1 == dest && src2 == dest + halfregs)
8623     {
8624       /* No-op move.  Can't split to nothing; emit something.  */
8625       emit_note (NOTE_INSN_DELETED);
8626       return;
8627     }
8628
8629   /* Preserve register attributes for variable tracking.  */
8630   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8631   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8632                                GET_MODE_SIZE (halfmode));
8633
8634   /* Special case of reversed high/low parts.  */
8635   if (reg_overlap_mentioned_p (operands[2], destlo)
8636       && reg_overlap_mentioned_p (operands[1], desthi))
8637     {
8638       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8639       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8640       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8641     }
8642   else if (!reg_overlap_mentioned_p (operands[2], destlo))
8643     {
8644       /* Try to avoid unnecessary moves if part of the result
8645          is in the right place already.  */
8646       if (src1 != dest)
8647         emit_move_insn (destlo, operands[1]);
8648       if (src2 != dest + halfregs)
8649         emit_move_insn (desthi, operands[2]);
8650     }
8651   else
8652     {
8653       if (src2 != dest + halfregs)
8654         emit_move_insn (desthi, operands[2]);
8655       if (src1 != dest)
8656         emit_move_insn (destlo, operands[1]);
8657     }
8658 }
8659
8660 /* vec_perm support.  */
8661
8662 #define MAX_VECT_LEN 16
8663
8664 struct expand_vec_perm_d
8665 {
8666   rtx target, op0, op1;
8667   unsigned char perm[MAX_VECT_LEN];
8668   enum machine_mode vmode;
8669   unsigned char nelt;
8670   bool one_vector_p;
8671   bool testing_p;
8672 };
8673
8674 /* Generate a variable permutation.  */
8675
8676 static void
8677 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8678 {
8679   enum machine_mode vmode = GET_MODE (target);
8680   bool one_vector_p = rtx_equal_p (op0, op1);
8681
8682   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8683   gcc_checking_assert (GET_MODE (op0) == vmode);
8684   gcc_checking_assert (GET_MODE (op1) == vmode);
8685   gcc_checking_assert (GET_MODE (sel) == vmode);
8686   gcc_checking_assert (TARGET_SIMD);
8687
8688   if (one_vector_p)
8689     {
8690       if (vmode == V8QImode)
8691         {
8692           /* Expand the argument to a V16QI mode by duplicating it.  */
8693           rtx pair = gen_reg_rtx (V16QImode);
8694           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8695           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8696         }
8697       else
8698         {
8699           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8700         }
8701     }
8702   else
8703     {
8704       rtx pair;
8705
8706       if (vmode == V8QImode)
8707         {
8708           pair = gen_reg_rtx (V16QImode);
8709           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8710           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8711         }
8712       else
8713         {
8714           pair = gen_reg_rtx (OImode);
8715           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8716           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8717         }
8718     }
8719 }
8720
8721 void
8722 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8723 {
8724   enum machine_mode vmode = GET_MODE (target);
8725   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8726   bool one_vector_p = rtx_equal_p (op0, op1);
8727   rtx rmask[MAX_VECT_LEN], mask;
8728
8729   gcc_checking_assert (!BYTES_BIG_ENDIAN);
8730
8731   /* The TBL instruction does not use a modulo index, so we must take care
8732      of that ourselves.  */
8733   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8734   for (i = 0; i < nelt; ++i)
8735     rmask[i] = mask;
8736   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8737   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8738
8739   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8740 }
8741
8742 /* Recognize patterns suitable for the TRN instructions.  */
8743 static bool
8744 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8745 {
8746   unsigned int i, odd, mask, nelt = d->nelt;
8747   rtx out, in0, in1, x;
8748   rtx (*gen) (rtx, rtx, rtx);
8749   enum machine_mode vmode = d->vmode;
8750
8751   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8752     return false;
8753
8754   /* Note that these are little-endian tests.
8755      We correct for big-endian later.  */
8756   if (d->perm[0] == 0)
8757     odd = 0;
8758   else if (d->perm[0] == 1)
8759     odd = 1;
8760   else
8761     return false;
8762   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8763
8764   for (i = 0; i < nelt; i += 2)
8765     {
8766       if (d->perm[i] != i + odd)
8767         return false;
8768       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8769         return false;
8770     }
8771
8772   /* Success!  */
8773   if (d->testing_p)
8774     return true;
8775
8776   in0 = d->op0;
8777   in1 = d->op1;
8778   if (BYTES_BIG_ENDIAN)
8779     {
8780       x = in0, in0 = in1, in1 = x;
8781       odd = !odd;
8782     }
8783   out = d->target;
8784
8785   if (odd)
8786     {
8787       switch (vmode)
8788         {
8789         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8790         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8791         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8792         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8793         case V4SImode: gen = gen_aarch64_trn2v4si; break;
8794         case V2SImode: gen = gen_aarch64_trn2v2si; break;
8795         case V2DImode: gen = gen_aarch64_trn2v2di; break;
8796         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8797         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8798         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8799         default:
8800           return false;
8801         }
8802     }
8803   else
8804     {
8805       switch (vmode)
8806         {
8807         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8808         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8809         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8810         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8811         case V4SImode: gen = gen_aarch64_trn1v4si; break;
8812         case V2SImode: gen = gen_aarch64_trn1v2si; break;
8813         case V2DImode: gen = gen_aarch64_trn1v2di; break;
8814         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8815         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8816         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8817         default:
8818           return false;
8819         }
8820     }
8821
8822   emit_insn (gen (out, in0, in1));
8823   return true;
8824 }
8825
8826 /* Recognize patterns suitable for the UZP instructions.  */
8827 static bool
8828 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8829 {
8830   unsigned int i, odd, mask, nelt = d->nelt;
8831   rtx out, in0, in1, x;
8832   rtx (*gen) (rtx, rtx, rtx);
8833   enum machine_mode vmode = d->vmode;
8834
8835   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8836     return false;
8837
8838   /* Note that these are little-endian tests.
8839      We correct for big-endian later.  */
8840   if (d->perm[0] == 0)
8841     odd = 0;
8842   else if (d->perm[0] == 1)
8843     odd = 1;
8844   else
8845     return false;
8846   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8847
8848   for (i = 0; i < nelt; i++)
8849     {
8850       unsigned elt = (i * 2 + odd) & mask;
8851       if (d->perm[i] != elt)
8852         return false;
8853     }
8854
8855   /* Success!  */
8856   if (d->testing_p)
8857     return true;
8858
8859   in0 = d->op0;
8860   in1 = d->op1;
8861   if (BYTES_BIG_ENDIAN)
8862     {
8863       x = in0, in0 = in1, in1 = x;
8864       odd = !odd;
8865     }
8866   out = d->target;
8867
8868   if (odd)
8869     {
8870       switch (vmode)
8871         {
8872         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8873         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8874         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8875         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8876         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8877         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8878         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8879         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8880         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8881         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8882         default:
8883           return false;
8884         }
8885     }
8886   else
8887     {
8888       switch (vmode)
8889         {
8890         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8891         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8892         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8893         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8894         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8895         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8896         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8897         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8898         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8899         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8900         default:
8901           return false;
8902         }
8903     }
8904
8905   emit_insn (gen (out, in0, in1));
8906   return true;
8907 }
8908
8909 /* Recognize patterns suitable for the ZIP instructions.  */
8910 static bool
8911 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8912 {
8913   unsigned int i, high, mask, nelt = d->nelt;
8914   rtx out, in0, in1, x;
8915   rtx (*gen) (rtx, rtx, rtx);
8916   enum machine_mode vmode = d->vmode;
8917
8918   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8919     return false;
8920
8921   /* Note that these are little-endian tests.
8922      We correct for big-endian later.  */
8923   high = nelt / 2;
8924   if (d->perm[0] == high)
8925     /* Do Nothing.  */
8926     ;
8927   else if (d->perm[0] == 0)
8928     high = 0;
8929   else
8930     return false;
8931   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8932
8933   for (i = 0; i < nelt / 2; i++)
8934     {
8935       unsigned elt = (i + high) & mask;
8936       if (d->perm[i * 2] != elt)
8937         return false;
8938       elt = (elt + nelt) & mask;
8939       if (d->perm[i * 2 + 1] != elt)
8940         return false;
8941     }
8942
8943   /* Success!  */
8944   if (d->testing_p)
8945     return true;
8946
8947   in0 = d->op0;
8948   in1 = d->op1;
8949   if (BYTES_BIG_ENDIAN)
8950     {
8951       x = in0, in0 = in1, in1 = x;
8952       high = !high;
8953     }
8954   out = d->target;
8955
8956   if (high)
8957     {
8958       switch (vmode)
8959         {
8960         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8961         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8962         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8963         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8964         case V4SImode: gen = gen_aarch64_zip2v4si; break;
8965         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8966         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8967         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8968         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8969         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8970         default:
8971           return false;
8972         }
8973     }
8974   else
8975     {
8976       switch (vmode)
8977         {
8978         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8979         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8980         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8981         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8982         case V4SImode: gen = gen_aarch64_zip1v4si; break;
8983         case V2SImode: gen = gen_aarch64_zip1v2si; break;
8984         case V2DImode: gen = gen_aarch64_zip1v2di; break;
8985         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8986         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8987         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8988         default:
8989           return false;
8990         }
8991     }
8992
8993   emit_insn (gen (out, in0, in1));
8994   return true;
8995 }
8996
8997 /* Recognize patterns for the EXT insn.  */
8998
8999 static bool
9000 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9001 {
9002   unsigned int i, nelt = d->nelt;
9003   rtx (*gen) (rtx, rtx, rtx, rtx);
9004   rtx offset;
9005
9006   unsigned int location = d->perm[0]; /* Always < nelt.  */
9007
9008   /* Check if the extracted indices are increasing by one.  */
9009   for (i = 1; i < nelt; i++)
9010     {
9011       unsigned int required = location + i;
9012       if (d->one_vector_p)
9013         {
9014           /* We'll pass the same vector in twice, so allow indices to wrap.  */
9015           required &= (nelt - 1);
9016         }
9017       if (d->perm[i] != required)
9018         return false;
9019     }
9020
9021   /* The mid-end handles masks that just return one of the input vectors.  */
9022   gcc_assert (location != 0);
9023
9024   switch (d->vmode)
9025     {
9026     case V16QImode: gen = gen_aarch64_extv16qi; break;
9027     case V8QImode: gen = gen_aarch64_extv8qi; break;
9028     case V4HImode: gen = gen_aarch64_extv4hi; break;
9029     case V8HImode: gen = gen_aarch64_extv8hi; break;
9030     case V2SImode: gen = gen_aarch64_extv2si; break;
9031     case V4SImode: gen = gen_aarch64_extv4si; break;
9032     case V2SFmode: gen = gen_aarch64_extv2sf; break;
9033     case V4SFmode: gen = gen_aarch64_extv4sf; break;
9034     case V2DImode: gen = gen_aarch64_extv2di; break;
9035     case V2DFmode: gen = gen_aarch64_extv2df; break;
9036     default:
9037       return false;
9038     }
9039
9040   /* Success! */
9041   if (d->testing_p)
9042     return true;
9043
9044   if (BYTES_BIG_ENDIAN)
9045     {
9046       /* After setup, we want the high elements of the first vector (stored
9047          at the LSB end of the register), and the low elements of the second
9048          vector (stored at the MSB end of the register). So swap.  */
9049       rtx temp = d->op0;
9050       d->op0 = d->op1;
9051       d->op1 = temp;
9052       /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
9053       location = nelt - location;
9054     }
9055
9056   offset = GEN_INT (location);
9057   emit_insn (gen (d->target, d->op0, d->op1, offset));
9058   return true;
9059 }
9060
9061 static bool
9062 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9063 {
9064   rtx (*gen) (rtx, rtx, rtx);
9065   rtx out = d->target;
9066   rtx in0;
9067   enum machine_mode vmode = d->vmode;
9068   unsigned int i, elt, nelt = d->nelt;
9069   rtx lane;
9070
9071   /* TODO: This may not be big-endian safe.  */
9072   if (BYTES_BIG_ENDIAN)
9073     return false;
9074
9075   elt = d->perm[0];
9076   for (i = 1; i < nelt; i++)
9077     {
9078       if (elt != d->perm[i])
9079         return false;
9080     }
9081
9082   /* The generic preparation in aarch64_expand_vec_perm_const_1
9083      swaps the operand order and the permute indices if it finds
9084      d->perm[0] to be in the second operand.  Thus, we can always
9085      use d->op0 and need not do any extra arithmetic to get the
9086      correct lane number.  */
9087   in0 = d->op0;
9088   lane = GEN_INT (elt);
9089
9090   switch (vmode)
9091     {
9092     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9093     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9094     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9095     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9096     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9097     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9098     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9099     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9100     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9101     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9102     default:
9103       return false;
9104     }
9105
9106   emit_insn (gen (out, in0, lane));
9107   return true;
9108 }
9109
9110 static bool
9111 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9112 {
9113   rtx rperm[MAX_VECT_LEN], sel;
9114   enum machine_mode vmode = d->vmode;
9115   unsigned int i, nelt = d->nelt;
9116
9117   if (d->testing_p)
9118     return true;
9119
9120   /* Generic code will try constant permutation twice.  Once with the
9121      original mode and again with the elements lowered to QImode.
9122      So wait and don't do the selector expansion ourselves.  */
9123   if (vmode != V8QImode && vmode != V16QImode)
9124     return false;
9125
9126   for (i = 0; i < nelt; ++i)
9127     {
9128       int nunits = GET_MODE_NUNITS (vmode);
9129
9130       /* If big-endian and two vectors we end up with a weird mixed-endian
9131          mode on NEON.  Reverse the index within each word but not the word
9132          itself.  */
9133       rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9134                                            : d->perm[i]);
9135     }
9136   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9137   sel = force_reg (vmode, sel);
9138
9139   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9140   return true;
9141 }
9142
9143 static bool
9144 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9145 {
9146   /* The pattern matching functions above are written to look for a small
9147      number to begin the sequence (0, 1, N/2).  If we begin with an index
9148      from the second operand, we can swap the operands.  */
9149   if (d->perm[0] >= d->nelt)
9150     {
9151       unsigned i, nelt = d->nelt;
9152       rtx x;
9153
9154       gcc_assert (nelt == (nelt & -nelt));
9155       for (i = 0; i < nelt; ++i)
9156         d->perm[i] ^= nelt; /* Keep the same index, but in the other vector.  */
9157
9158       x = d->op0;
9159       d->op0 = d->op1;
9160       d->op1 = x;
9161     }
9162
9163   if (TARGET_SIMD)
9164     {
9165       if (aarch64_evpc_ext (d))
9166         return true;
9167       else if (aarch64_evpc_zip (d))
9168         return true;
9169       else if (aarch64_evpc_uzp (d))
9170         return true;
9171       else if (aarch64_evpc_trn (d))
9172         return true;
9173       else if (aarch64_evpc_dup (d))
9174         return true;
9175       return aarch64_evpc_tbl (d);
9176     }
9177   return false;
9178 }
9179
9180 /* Expand a vec_perm_const pattern.  */
9181
9182 bool
9183 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9184 {
9185   struct expand_vec_perm_d d;
9186   int i, nelt, which;
9187
9188   d.target = target;
9189   d.op0 = op0;
9190   d.op1 = op1;
9191
9192   d.vmode = GET_MODE (target);
9193   gcc_assert (VECTOR_MODE_P (d.vmode));
9194   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9195   d.testing_p = false;
9196
9197   for (i = which = 0; i < nelt; ++i)
9198     {
9199       rtx e = XVECEXP (sel, 0, i);
9200       int ei = INTVAL (e) & (2 * nelt - 1);
9201       which |= (ei < nelt ? 1 : 2);
9202       d.perm[i] = ei;
9203     }
9204
9205   switch (which)
9206     {
9207     default:
9208       gcc_unreachable ();
9209
9210     case 3:
9211       d.one_vector_p = false;
9212       if (!rtx_equal_p (op0, op1))
9213         break;
9214
9215       /* The elements of PERM do not suggest that only the first operand
9216          is used, but both operands are identical.  Allow easier matching
9217          of the permutation by folding the permutation into the single
9218          input vector.  */
9219       /* Fall Through.  */
9220     case 2:
9221       for (i = 0; i < nelt; ++i)
9222         d.perm[i] &= nelt - 1;
9223       d.op0 = op1;
9224       d.one_vector_p = true;
9225       break;
9226
9227     case 1:
9228       d.op1 = op0;
9229       d.one_vector_p = true;
9230       break;
9231     }
9232
9233   return aarch64_expand_vec_perm_const_1 (&d);
9234 }
9235
9236 static bool
9237 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9238                                      const unsigned char *sel)
9239 {
9240   struct expand_vec_perm_d d;
9241   unsigned int i, nelt, which;
9242   bool ret;
9243
9244   d.vmode = vmode;
9245   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9246   d.testing_p = true;
9247   memcpy (d.perm, sel, nelt);
9248
9249   /* Calculate whether all elements are in one vector.  */
9250   for (i = which = 0; i < nelt; ++i)
9251     {
9252       unsigned char e = d.perm[i];
9253       gcc_assert (e < 2 * nelt);
9254       which |= (e < nelt ? 1 : 2);
9255     }
9256
9257   /* If all elements are from the second vector, reindex as if from the
9258      first vector.  */
9259   if (which == 2)
9260     for (i = 0; i < nelt; ++i)
9261       d.perm[i] -= nelt;
9262
9263   /* Check whether the mask can be applied to a single vector.  */
9264   d.one_vector_p = (which != 3);
9265
9266   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9267   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9268   if (!d.one_vector_p)
9269     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9270
9271   start_sequence ();
9272   ret = aarch64_expand_vec_perm_const_1 (&d);
9273   end_sequence ();
9274
9275   return ret;
9276 }
9277
9278 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
9279 bool
9280 aarch64_cannot_change_mode_class (enum machine_mode from,
9281                                   enum machine_mode to,
9282                                   enum reg_class rclass)
9283 {
9284   /* Full-reg subregs are allowed on general regs or any class if they are
9285      the same size.  */
9286   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9287       || !reg_classes_intersect_p (FP_REGS, rclass))
9288     return false;
9289
9290   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
9291      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9292      2. Scalar to Scalar for integer modes or same size float modes.
9293      3. Vector to Vector modes.
9294      4. On little-endian only, Vector-Structure to Vector modes.  */
9295   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9296     {
9297       if (aarch64_vector_mode_supported_p (from)
9298           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9299         return false;
9300
9301       if (GET_MODE_NUNITS (from) == 1
9302           && GET_MODE_NUNITS (to) == 1
9303           && (GET_MODE_CLASS (from) == MODE_INT
9304               || from == to))
9305         return false;
9306
9307       if (aarch64_vector_mode_supported_p (from)
9308           && aarch64_vector_mode_supported_p (to))
9309         return false;
9310
9311       /* Within an vector structure straddling multiple vector registers
9312          we are in a mixed-endian representation.  As such, we can't
9313          easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
9314          switch between vectors and vector structures cheaply.  */
9315       if (!BYTES_BIG_ENDIAN)
9316         if ((aarch64_vector_mode_supported_p (from)
9317               && aarch64_vect_struct_mode_p (to))
9318             || (aarch64_vector_mode_supported_p (to)
9319               && aarch64_vect_struct_mode_p (from)))
9320           return false;
9321     }
9322
9323   return true;
9324 }
9325
9326 /* Implement MODES_TIEABLE_P.  */
9327
9328 bool
9329 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9330 {
9331   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9332     return true;
9333
9334   /* We specifically want to allow elements of "structure" modes to
9335      be tieable to the structure.  This more general condition allows
9336      other rarer situations too.  */
9337   if (TARGET_SIMD
9338       && aarch64_vector_mode_p (mode1)
9339       && aarch64_vector_mode_p (mode2))
9340     return true;
9341
9342   return false;
9343 }
9344
9345 #undef TARGET_ADDRESS_COST
9346 #define TARGET_ADDRESS_COST aarch64_address_cost
9347
9348 /* This hook will determines whether unnamed bitfields affect the alignment
9349    of the containing structure.  The hook returns true if the structure
9350    should inherit the alignment requirements of an unnamed bitfield's
9351    type.  */
9352 #undef TARGET_ALIGN_ANON_BITFIELD
9353 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9354
9355 #undef TARGET_ASM_ALIGNED_DI_OP
9356 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9357
9358 #undef TARGET_ASM_ALIGNED_HI_OP
9359 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9360
9361 #undef TARGET_ASM_ALIGNED_SI_OP
9362 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9363
9364 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9365 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9366   hook_bool_const_tree_hwi_hwi_const_tree_true
9367
9368 #undef TARGET_ASM_FILE_START
9369 #define TARGET_ASM_FILE_START aarch64_start_file
9370
9371 #undef TARGET_ASM_OUTPUT_MI_THUNK
9372 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9373
9374 #undef TARGET_ASM_SELECT_RTX_SECTION
9375 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9376
9377 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9378 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9379
9380 #undef TARGET_BUILD_BUILTIN_VA_LIST
9381 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9382
9383 #undef TARGET_CALLEE_COPIES
9384 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9385
9386 #undef TARGET_CAN_ELIMINATE
9387 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9388
9389 #undef TARGET_CANNOT_FORCE_CONST_MEM
9390 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9391
9392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9393 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9394
9395 /* Only the least significant bit is used for initialization guard
9396    variables.  */
9397 #undef TARGET_CXX_GUARD_MASK_BIT
9398 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9399
9400 #undef TARGET_C_MODE_FOR_SUFFIX
9401 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9402
9403 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9404 #undef  TARGET_DEFAULT_TARGET_FLAGS
9405 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9406 #endif
9407
9408 #undef TARGET_CLASS_MAX_NREGS
9409 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9410
9411 #undef TARGET_BUILTIN_DECL
9412 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9413
9414 #undef  TARGET_EXPAND_BUILTIN
9415 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9416
9417 #undef TARGET_EXPAND_BUILTIN_VA_START
9418 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9419
9420 #undef TARGET_FOLD_BUILTIN
9421 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9422
9423 #undef TARGET_FUNCTION_ARG
9424 #define TARGET_FUNCTION_ARG aarch64_function_arg
9425
9426 #undef TARGET_FUNCTION_ARG_ADVANCE
9427 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9428
9429 #undef TARGET_FUNCTION_ARG_BOUNDARY
9430 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9431
9432 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9433 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9434
9435 #undef TARGET_FUNCTION_VALUE
9436 #define TARGET_FUNCTION_VALUE aarch64_function_value
9437
9438 #undef TARGET_FUNCTION_VALUE_REGNO_P
9439 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9440
9441 #undef TARGET_FRAME_POINTER_REQUIRED
9442 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9443
9444 #undef TARGET_GIMPLE_FOLD_BUILTIN
9445 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9446
9447 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9448 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9449
9450 #undef  TARGET_INIT_BUILTINS
9451 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
9452
9453 #undef TARGET_LEGITIMATE_ADDRESS_P
9454 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9455
9456 #undef TARGET_LEGITIMATE_CONSTANT_P
9457 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9458
9459 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9460 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9461
9462 #undef TARGET_LRA_P
9463 #define TARGET_LRA_P aarch64_lra_p
9464
9465 #undef TARGET_MANGLE_TYPE
9466 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9467
9468 #undef TARGET_MEMORY_MOVE_COST
9469 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9470
9471 #undef TARGET_MUST_PASS_IN_STACK
9472 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9473
9474 /* This target hook should return true if accesses to volatile bitfields
9475    should use the narrowest mode possible.  It should return false if these
9476    accesses should use the bitfield container type.  */
9477 #undef TARGET_NARROW_VOLATILE_BITFIELD
9478 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9479
9480 #undef  TARGET_OPTION_OVERRIDE
9481 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9482
9483 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9484 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9485   aarch64_override_options_after_change
9486
9487 #undef TARGET_PASS_BY_REFERENCE
9488 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9489
9490 #undef TARGET_PREFERRED_RELOAD_CLASS
9491 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9492
9493 #undef TARGET_SECONDARY_RELOAD
9494 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9495
9496 #undef TARGET_SHIFT_TRUNCATION_MASK
9497 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9498
9499 #undef TARGET_SETUP_INCOMING_VARARGS
9500 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9501
9502 #undef TARGET_STRUCT_VALUE_RTX
9503 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
9504
9505 #undef TARGET_REGISTER_MOVE_COST
9506 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9507
9508 #undef TARGET_RETURN_IN_MEMORY
9509 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9510
9511 #undef TARGET_RETURN_IN_MSB
9512 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9513
9514 #undef TARGET_RTX_COSTS
9515 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9516
9517 #undef TARGET_SCHED_ISSUE_RATE
9518 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9519
9520 #undef TARGET_TRAMPOLINE_INIT
9521 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9522
9523 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9524 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9525
9526 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9527 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9528
9529 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9530 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9531
9532 #undef TARGET_VECTORIZE_ADD_STMT_COST
9533 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9534
9535 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9536 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9537   aarch64_builtin_vectorization_cost
9538
9539 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9540 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9541
9542 #undef TARGET_VECTORIZE_BUILTINS
9543 #define TARGET_VECTORIZE_BUILTINS
9544
9545 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9546 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9547   aarch64_builtin_vectorized_function
9548
9549 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9550 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9551   aarch64_autovectorize_vector_sizes
9552
9553 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9554 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9555   aarch64_atomic_assign_expand_fenv
9556
9557 /* Section anchor support.  */
9558
9559 #undef TARGET_MIN_ANCHOR_OFFSET
9560 #define TARGET_MIN_ANCHOR_OFFSET -256
9561
9562 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9563    byte offset; we can do much more for larger data types, but have no way
9564    to determine the size of the access.  We assume accesses are aligned.  */
9565 #undef TARGET_MAX_ANCHOR_OFFSET
9566 #define TARGET_MAX_ANCHOR_OFFSET 4095
9567
9568 #undef TARGET_VECTOR_ALIGNMENT
9569 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9570
9571 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9572 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9573   aarch64_simd_vector_alignment_reachable
9574
9575 /* vec_perm support.  */
9576
9577 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9578 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9579   aarch64_vectorize_vec_perm_const_ok
9580
9581
9582 #undef TARGET_FIXED_CONDITION_CODE_REGS
9583 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9584
9585 #undef TARGET_FLAGS_REGNUM
9586 #define TARGET_FLAGS_REGNUM CC_REGNUM
9587
9588 struct gcc_target targetm = TARGET_INITIALIZER;
9589
9590 #include "gt-aarch64.h"