]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] Optimize prologue when there is no frame pointer.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
2fb9a547
AM
49#include "pointer-set.h"
50#include "hash-table.h"
51#include "vec.h"
52#include "basic-block.h"
53#include "tree-ssa-alias.h"
54#include "internal-fn.h"
55#include "gimple-fold.h"
56#include "tree-eh.h"
57#include "gimple-expr.h"
58#include "is-a.h"
18f429e2 59#include "gimple.h"
45b0be94 60#include "gimplify.h"
43e9d192
IB
61#include "optabs.h"
62#include "dwarf2.h"
8990e73a
TB
63#include "cfgloop.h"
64#include "tree-vectorizer.h"
73250c4c 65#include "config/arm/aarch-cost-tables.h"
0ee859b5 66#include "dumpfile.h"
9b2b7279 67#include "builtins.h"
43e9d192 68
28514dda
YZ
69/* Defined for convenience. */
70#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71
43e9d192
IB
72/* Classifies an address.
73
74 ADDRESS_REG_IMM
75 A simple base register plus immediate offset.
76
77 ADDRESS_REG_WB
78 A base register indexed by immediate offset with writeback.
79
80 ADDRESS_REG_REG
81 A base register indexed by (optionally scaled) register.
82
83 ADDRESS_REG_UXTW
84 A base register indexed by (optionally scaled) zero-extended register.
85
86 ADDRESS_REG_SXTW
87 A base register indexed by (optionally scaled) sign-extended register.
88
89 ADDRESS_LO_SUM
90 A LO_SUM rtx with a base register and "LO12" symbol relocation.
91
92 ADDRESS_SYMBOLIC:
93 A constant symbolic address, in pc-relative literal pool. */
94
95enum aarch64_address_type {
96 ADDRESS_REG_IMM,
97 ADDRESS_REG_WB,
98 ADDRESS_REG_REG,
99 ADDRESS_REG_UXTW,
100 ADDRESS_REG_SXTW,
101 ADDRESS_LO_SUM,
102 ADDRESS_SYMBOLIC
103};
104
105struct aarch64_address_info {
106 enum aarch64_address_type type;
107 rtx base;
108 rtx offset;
109 int shift;
110 enum aarch64_symbol_type symbol_type;
111};
112
48063b9d
IB
113struct simd_immediate_info
114{
115 rtx value;
116 int shift;
117 int element_width;
48063b9d 118 bool mvn;
e4f0f84d 119 bool msl;
48063b9d
IB
120};
121
43e9d192
IB
122/* The current code model. */
123enum aarch64_code_model aarch64_cmodel;
124
125#ifdef HAVE_AS_TLS
126#undef TARGET_HAVE_TLS
127#define TARGET_HAVE_TLS 1
128#endif
129
38e8f663 130static bool aarch64_lra_p (void);
43e9d192
IB
131static bool aarch64_composite_type_p (const_tree, enum machine_mode);
132static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
133 const_tree,
134 enum machine_mode *, int *,
135 bool *);
136static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
137static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 138static void aarch64_override_options_after_change (void);
43e9d192
IB
139static bool aarch64_vector_mode_supported_p (enum machine_mode);
140static unsigned bit_count (unsigned HOST_WIDE_INT);
141static bool aarch64_const_vec_all_same_int_p (rtx,
142 HOST_WIDE_INT, HOST_WIDE_INT);
143
88b08073
JG
144static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
145 const unsigned char *sel);
2961177e 146static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
88b08073 147
43e9d192 148/* The processor for which instructions should be scheduled. */
02fdbd5b 149enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
150
151/* The current tuning set. */
152const struct tune_params *aarch64_tune_params;
153
154/* Mask to specify which instructions we are allowed to generate. */
155unsigned long aarch64_isa_flags = 0;
156
157/* Mask to specify which instruction scheduling options should be used. */
158unsigned long aarch64_tune_flags = 0;
159
160/* Tuning parameters. */
161
162#if HAVE_DESIGNATED_INITIALIZERS
163#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164#else
165#define NAMED_PARAM(NAME, VAL) (VAL)
166#endif
167
168#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169__extension__
170#endif
43e9d192
IB
171
172#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
173__extension__
174#endif
175static const struct cpu_addrcost_table generic_addrcost_table =
176{
67747367
JG
177#if HAVE_DESIGNATED_INITIALIZERS
178 .addr_scale_costs =
179#endif
180 {
181 NAMED_PARAM (qi, 0),
182 NAMED_PARAM (hi, 0),
183 NAMED_PARAM (si, 0),
184 NAMED_PARAM (ti, 0),
185 },
43e9d192
IB
186 NAMED_PARAM (pre_modify, 0),
187 NAMED_PARAM (post_modify, 0),
188 NAMED_PARAM (register_offset, 0),
189 NAMED_PARAM (register_extend, 0),
190 NAMED_PARAM (imm_offset, 0)
191};
192
60bff090
JG
193#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
194__extension__
195#endif
196static const struct cpu_addrcost_table cortexa57_addrcost_table =
197{
198#if HAVE_DESIGNATED_INITIALIZERS
199 .addr_scale_costs =
200#endif
201 {
202 NAMED_PARAM (qi, 0),
203 NAMED_PARAM (hi, 1),
204 NAMED_PARAM (si, 0),
205 NAMED_PARAM (ti, 1),
206 },
207 NAMED_PARAM (pre_modify, 0),
208 NAMED_PARAM (post_modify, 0),
209 NAMED_PARAM (register_offset, 0),
210 NAMED_PARAM (register_extend, 0),
211 NAMED_PARAM (imm_offset, 0),
212};
213
43e9d192
IB
214#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
215__extension__
216#endif
217static const struct cpu_regmove_cost generic_regmove_cost =
218{
219 NAMED_PARAM (GP2GP, 1),
220 NAMED_PARAM (GP2FP, 2),
221 NAMED_PARAM (FP2GP, 2),
222 /* We currently do not provide direct support for TFmode Q->Q move.
223 Therefore we need to raise the cost above 2 in order to have
224 reload handle the situation. */
225 NAMED_PARAM (FP2FP, 4)
226};
227
8990e73a
TB
228/* Generic costs for vector insn classes. */
229#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
230__extension__
231#endif
232static const struct cpu_vector_cost generic_vector_cost =
233{
234 NAMED_PARAM (scalar_stmt_cost, 1),
235 NAMED_PARAM (scalar_load_cost, 1),
236 NAMED_PARAM (scalar_store_cost, 1),
237 NAMED_PARAM (vec_stmt_cost, 1),
238 NAMED_PARAM (vec_to_scalar_cost, 1),
239 NAMED_PARAM (scalar_to_vec_cost, 1),
240 NAMED_PARAM (vec_align_load_cost, 1),
241 NAMED_PARAM (vec_unalign_load_cost, 1),
242 NAMED_PARAM (vec_unalign_store_cost, 1),
243 NAMED_PARAM (vec_store_cost, 1),
244 NAMED_PARAM (cond_taken_branch_cost, 3),
245 NAMED_PARAM (cond_not_taken_branch_cost, 1)
246};
247
60bff090
JG
248/* Generic costs for vector insn classes. */
249#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
250__extension__
251#endif
252static const struct cpu_vector_cost cortexa57_vector_cost =
253{
254 NAMED_PARAM (scalar_stmt_cost, 1),
255 NAMED_PARAM (scalar_load_cost, 4),
256 NAMED_PARAM (scalar_store_cost, 1),
257 NAMED_PARAM (vec_stmt_cost, 3),
258 NAMED_PARAM (vec_to_scalar_cost, 8),
259 NAMED_PARAM (scalar_to_vec_cost, 8),
260 NAMED_PARAM (vec_align_load_cost, 5),
261 NAMED_PARAM (vec_unalign_load_cost, 5),
262 NAMED_PARAM (vec_unalign_store_cost, 1),
263 NAMED_PARAM (vec_store_cost, 1),
264 NAMED_PARAM (cond_taken_branch_cost, 1),
265 NAMED_PARAM (cond_not_taken_branch_cost, 1)
266};
267
43e9d192
IB
268#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
269__extension__
270#endif
271static const struct tune_params generic_tunings =
272{
4e2cd668 273 &cortexa57_extra_costs,
43e9d192
IB
274 &generic_addrcost_table,
275 &generic_regmove_cost,
8990e73a 276 &generic_vector_cost,
d126a4ae
AP
277 NAMED_PARAM (memmov_cost, 4),
278 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
279};
280
984239ad
KT
281static const struct tune_params cortexa53_tunings =
282{
283 &cortexa53_extra_costs,
284 &generic_addrcost_table,
285 &generic_regmove_cost,
286 &generic_vector_cost,
d126a4ae
AP
287 NAMED_PARAM (memmov_cost, 4),
288 NAMED_PARAM (issue_rate, 2)
984239ad
KT
289};
290
4fd92af6
KT
291static const struct tune_params cortexa57_tunings =
292{
293 &cortexa57_extra_costs,
60bff090 294 &cortexa57_addrcost_table,
4fd92af6 295 &generic_regmove_cost,
60bff090 296 &cortexa57_vector_cost,
4fd92af6
KT
297 NAMED_PARAM (memmov_cost, 4),
298 NAMED_PARAM (issue_rate, 3)
299};
300
43e9d192
IB
301/* A processor implementing AArch64. */
302struct processor
303{
304 const char *const name;
305 enum aarch64_processor core;
306 const char *arch;
307 const unsigned long flags;
308 const struct tune_params *const tune;
309};
310
311/* Processor cores implementing AArch64. */
312static const struct processor all_cores[] =
313{
192ed1dd 314#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
315 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
316#include "aarch64-cores.def"
317#undef AARCH64_CORE
02fdbd5b 318 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
319 {NULL, aarch64_none, NULL, 0, NULL}
320};
321
322/* Architectures implementing AArch64. */
323static const struct processor all_architectures[] =
324{
325#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
326 {NAME, CORE, #ARCH, FLAGS, NULL},
327#include "aarch64-arches.def"
328#undef AARCH64_ARCH
43e9d192
IB
329 {NULL, aarch64_none, NULL, 0, NULL}
330};
331
332/* Target specification. These are populated as commandline arguments
333 are processed, or NULL if not specified. */
334static const struct processor *selected_arch;
335static const struct processor *selected_cpu;
336static const struct processor *selected_tune;
337
338#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339
340/* An ISA extension in the co-processor and main instruction set space. */
341struct aarch64_option_extension
342{
343 const char *const name;
344 const unsigned long flags_on;
345 const unsigned long flags_off;
346};
347
348/* ISA extensions in AArch64. */
349static const struct aarch64_option_extension all_extensions[] =
350{
351#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
352 {NAME, FLAGS_ON, FLAGS_OFF},
353#include "aarch64-option-extensions.def"
354#undef AARCH64_OPT_EXTENSION
355 {NULL, 0, 0}
356};
357
358/* Used to track the size of an address when generating a pre/post
359 increment address. */
360static enum machine_mode aarch64_memory_reference_mode;
361
362/* Used to force GTY into this file. */
363static GTY(()) int gty_dummy;
364
365/* A table of valid AArch64 "bitmask immediate" values for
366 logical instructions. */
367
368#define AARCH64_NUM_BITMASKS 5334
369static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
370
43e9d192
IB
371typedef enum aarch64_cond_code
372{
373 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
374 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
375 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
376}
377aarch64_cc;
378
379#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380
381/* The condition codes of the processor, and the inverse function. */
382static const char * const aarch64_condition_codes[] =
383{
384 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
385 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
386};
387
388/* Provide a mapping from gcc register numbers to dwarf register numbers. */
389unsigned
390aarch64_dbx_register_number (unsigned regno)
391{
392 if (GP_REGNUM_P (regno))
393 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
394 else if (regno == SP_REGNUM)
395 return AARCH64_DWARF_SP;
396 else if (FP_REGNUM_P (regno))
397 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
398
399 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
400 equivalent DWARF register. */
401 return DWARF_FRAME_REGISTERS;
402}
403
404/* Return TRUE if MODE is any of the large INT modes. */
405static bool
406aarch64_vect_struct_mode_p (enum machine_mode mode)
407{
408 return mode == OImode || mode == CImode || mode == XImode;
409}
410
411/* Return TRUE if MODE is any of the vector modes. */
412static bool
413aarch64_vector_mode_p (enum machine_mode mode)
414{
415 return aarch64_vector_mode_supported_p (mode)
416 || aarch64_vect_struct_mode_p (mode);
417}
418
419/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420static bool
421aarch64_array_mode_supported_p (enum machine_mode mode,
422 unsigned HOST_WIDE_INT nelems)
423{
424 if (TARGET_SIMD
425 && AARCH64_VALID_SIMD_QREG_MODE (mode)
426 && (nelems >= 2 && nelems <= 4))
427 return true;
428
429 return false;
430}
431
432/* Implement HARD_REGNO_NREGS. */
433
434int
435aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
436{
437 switch (aarch64_regno_regclass (regno))
438 {
439 case FP_REGS:
440 case FP_LO_REGS:
441 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
442 default:
443 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
444 }
445 gcc_unreachable ();
446}
447
448/* Implement HARD_REGNO_MODE_OK. */
449
450int
451aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
452{
453 if (GET_MODE_CLASS (mode) == MODE_CC)
454 return regno == CC_REGNUM;
455
9259db42
YZ
456 if (regno == SP_REGNUM)
457 /* The purpose of comparing with ptr_mode is to support the
458 global register variable associated with the stack pointer
459 register via the syntax of asm ("wsp") in ILP32. */
460 return mode == Pmode || mode == ptr_mode;
461
462 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
463 return mode == Pmode;
464
465 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
466 return 1;
467
468 if (FP_REGNUM_P (regno))
469 {
470 if (aarch64_vect_struct_mode_p (mode))
471 return
472 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
473 else
474 return 1;
475 }
476
477 return 0;
478}
479
73d9ac6a
IB
480/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
481enum machine_mode
482aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
483 enum machine_mode mode)
484{
485 /* Handle modes that fit within single registers. */
486 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
487 {
488 if (GET_MODE_SIZE (mode) >= 4)
489 return mode;
490 else
491 return SImode;
492 }
493 /* Fall back to generic for multi-reg and very large modes. */
494 else
495 return choose_hard_reg_mode (regno, nregs, false);
496}
497
43e9d192
IB
498/* Return true if calls to DECL should be treated as
499 long-calls (ie called via a register). */
500static bool
501aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
502{
503 return false;
504}
505
506/* Return true if calls to symbol-ref SYM should be treated as
507 long-calls (ie called via a register). */
508bool
509aarch64_is_long_call_p (rtx sym)
510{
511 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
512}
513
514/* Return true if the offsets to a zero/sign-extract operation
515 represent an expression that matches an extend operation. The
516 operands represent the paramters from
517
4745e701 518 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192
IB
519bool
520aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
521 rtx extract_imm)
522{
523 HOST_WIDE_INT mult_val, extract_val;
524
525 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
526 return false;
527
528 mult_val = INTVAL (mult_imm);
529 extract_val = INTVAL (extract_imm);
530
531 if (extract_val > 8
532 && extract_val < GET_MODE_BITSIZE (mode)
533 && exact_log2 (extract_val & ~7) > 0
534 && (extract_val & 7) <= 4
535 && mult_val == (1 << (extract_val & 7)))
536 return true;
537
538 return false;
539}
540
541/* Emit an insn that's a simple single-set. Both the operands must be
542 known to be valid. */
543inline static rtx
544emit_set_insn (rtx x, rtx y)
545{
546 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
547}
548
549/* X and Y are two things to compare using CODE. Emit the compare insn and
550 return the rtx for register 0 in the proper mode. */
551rtx
552aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
553{
554 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
555 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
556
557 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
558 return cc_reg;
559}
560
561/* Build the SYMBOL_REF for __tls_get_addr. */
562
563static GTY(()) rtx tls_get_addr_libfunc;
564
565rtx
566aarch64_tls_get_addr (void)
567{
568 if (!tls_get_addr_libfunc)
569 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
570 return tls_get_addr_libfunc;
571}
572
573/* Return the TLS model to use for ADDR. */
574
575static enum tls_model
576tls_symbolic_operand_type (rtx addr)
577{
578 enum tls_model tls_kind = TLS_MODEL_NONE;
579 rtx sym, addend;
580
581 if (GET_CODE (addr) == CONST)
582 {
583 split_const (addr, &sym, &addend);
584 if (GET_CODE (sym) == SYMBOL_REF)
585 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
586 }
587 else if (GET_CODE (addr) == SYMBOL_REF)
588 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
589
590 return tls_kind;
591}
592
593/* We'll allow lo_sum's in addresses in our legitimate addresses
594 so that combine would take care of combining addresses where
595 necessary, but for generation purposes, we'll generate the address
596 as :
597 RTL Absolute
598 tmp = hi (symbol_ref); adrp x1, foo
599 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
600 nop
601
602 PIC TLS
603 adrp x1, :got:foo adrp tmp, :tlsgd:foo
604 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
605 bl __tls_get_addr
606 nop
607
608 Load TLS symbol, depending on TLS mechanism and TLS access model.
609
610 Global Dynamic - Traditional TLS:
611 adrp tmp, :tlsgd:imm
612 add dest, tmp, #:tlsgd_lo12:imm
613 bl __tls_get_addr
614
615 Global Dynamic - TLS Descriptors:
616 adrp dest, :tlsdesc:imm
617 ldr tmp, [dest, #:tlsdesc_lo12:imm]
618 add dest, dest, #:tlsdesc_lo12:imm
619 blr tmp
620 mrs tp, tpidr_el0
621 add dest, dest, tp
622
623 Initial Exec:
624 mrs tp, tpidr_el0
625 adrp tmp, :gottprel:imm
626 ldr dest, [tmp, #:gottprel_lo12:imm]
627 add dest, dest, tp
628
629 Local Exec:
630 mrs tp, tpidr_el0
631 add t0, tp, #:tprel_hi12:imm
632 add t0, #:tprel_lo12_nc:imm
633*/
634
635static void
636aarch64_load_symref_appropriately (rtx dest, rtx imm,
637 enum aarch64_symbol_type type)
638{
639 switch (type)
640 {
641 case SYMBOL_SMALL_ABSOLUTE:
642 {
28514dda 643 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 644 rtx tmp_reg = dest;
28514dda
YZ
645 enum machine_mode mode = GET_MODE (dest);
646
647 gcc_assert (mode == Pmode || mode == ptr_mode);
648
43e9d192 649 if (can_create_pseudo_p ())
28514dda 650 tmp_reg = gen_reg_rtx (mode);
43e9d192 651
28514dda 652 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
653 emit_insn (gen_add_losym (dest, tmp_reg, imm));
654 return;
655 }
656
a5350ddc
CSS
657 case SYMBOL_TINY_ABSOLUTE:
658 emit_insn (gen_rtx_SET (Pmode, dest, imm));
659 return;
660
43e9d192
IB
661 case SYMBOL_SMALL_GOT:
662 {
28514dda
YZ
663 /* In ILP32, the mode of dest can be either SImode or DImode,
664 while the got entry is always of SImode size. The mode of
665 dest depends on how dest is used: if dest is assigned to a
666 pointer (e.g. in the memory), it has SImode; it may have
667 DImode if dest is dereferenced to access the memeory.
668 This is why we have to handle three different ldr_got_small
669 patterns here (two patterns for ILP32). */
43e9d192 670 rtx tmp_reg = dest;
28514dda
YZ
671 enum machine_mode mode = GET_MODE (dest);
672
43e9d192 673 if (can_create_pseudo_p ())
28514dda
YZ
674 tmp_reg = gen_reg_rtx (mode);
675
676 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
677 if (mode == ptr_mode)
678 {
679 if (mode == DImode)
680 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
681 else
682 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
683 }
684 else
685 {
686 gcc_assert (mode == Pmode);
687 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
688 }
689
43e9d192
IB
690 return;
691 }
692
693 case SYMBOL_SMALL_TLSGD:
694 {
695 rtx insns;
696 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
697
698 start_sequence ();
78607708 699 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
700 insns = get_insns ();
701 end_sequence ();
702
703 RTL_CONST_CALL_P (insns) = 1;
704 emit_libcall_block (insns, dest, result, imm);
705 return;
706 }
707
708 case SYMBOL_SMALL_TLSDESC:
709 {
621ad2de
AP
710 enum machine_mode mode = GET_MODE (dest);
711 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
712 rtx tp;
713
621ad2de
AP
714 gcc_assert (mode == Pmode || mode == ptr_mode);
715
716 /* In ILP32, the got entry is always of SImode size. Unlike
717 small GOT, the dest is fixed at reg 0. */
718 if (TARGET_ILP32)
719 emit_insn (gen_tlsdesc_small_si (imm));
720 else
721 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 722 tp = aarch64_load_tp (NULL);
621ad2de
AP
723
724 if (mode != Pmode)
725 tp = gen_lowpart (mode, tp);
726
727 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
728 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
729 return;
730 }
731
732 case SYMBOL_SMALL_GOTTPREL:
733 {
621ad2de
AP
734 /* In ILP32, the mode of dest can be either SImode or DImode,
735 while the got entry is always of SImode size. The mode of
736 dest depends on how dest is used: if dest is assigned to a
737 pointer (e.g. in the memory), it has SImode; it may have
738 DImode if dest is dereferenced to access the memeory.
739 This is why we have to handle three different tlsie_small
740 patterns here (two patterns for ILP32). */
741 enum machine_mode mode = GET_MODE (dest);
742 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 743 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
744
745 if (mode == ptr_mode)
746 {
747 if (mode == DImode)
748 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
749 else
750 {
751 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
752 tp = gen_lowpart (mode, tp);
753 }
754 }
755 else
756 {
757 gcc_assert (mode == Pmode);
758 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
759 }
760
761 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
762 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
763 return;
764 }
765
766 case SYMBOL_SMALL_TPREL:
767 {
768 rtx tp = aarch64_load_tp (NULL);
769 emit_insn (gen_tlsle_small (dest, tp, imm));
770 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
771 return;
772 }
773
87dd8ab0
MS
774 case SYMBOL_TINY_GOT:
775 emit_insn (gen_ldr_got_tiny (dest, imm));
776 return;
777
43e9d192
IB
778 default:
779 gcc_unreachable ();
780 }
781}
782
783/* Emit a move from SRC to DEST. Assume that the move expanders can
784 handle all moves if !can_create_pseudo_p (). The distinction is
785 important because, unlike emit_move_insn, the move expanders know
786 how to force Pmode objects into the constant pool even when the
787 constant pool address is not itself legitimate. */
788static rtx
789aarch64_emit_move (rtx dest, rtx src)
790{
791 return (can_create_pseudo_p ()
792 ? emit_move_insn (dest, src)
793 : emit_move_insn_1 (dest, src));
794}
795
030d03b8
RE
796/* Split a 128-bit move operation into two 64-bit move operations,
797 taking care to handle partial overlap of register to register
798 copies. Special cases are needed when moving between GP regs and
799 FP regs. SRC can be a register, constant or memory; DST a register
800 or memory. If either operand is memory it must not have any side
801 effects. */
43e9d192
IB
802void
803aarch64_split_128bit_move (rtx dst, rtx src)
804{
030d03b8
RE
805 rtx dst_lo, dst_hi;
806 rtx src_lo, src_hi;
43e9d192 807
030d03b8 808 enum machine_mode mode = GET_MODE (dst);
12dc6974 809
030d03b8
RE
810 gcc_assert (mode == TImode || mode == TFmode);
811 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
812 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
813
814 if (REG_P (dst) && REG_P (src))
815 {
030d03b8
RE
816 int src_regno = REGNO (src);
817 int dst_regno = REGNO (dst);
43e9d192 818
030d03b8 819 /* Handle FP <-> GP regs. */
43e9d192
IB
820 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
821 {
030d03b8
RE
822 src_lo = gen_lowpart (word_mode, src);
823 src_hi = gen_highpart (word_mode, src);
824
825 if (mode == TImode)
826 {
827 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
828 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
829 }
830 else
831 {
832 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
833 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
834 }
835 return;
43e9d192
IB
836 }
837 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
838 {
030d03b8
RE
839 dst_lo = gen_lowpart (word_mode, dst);
840 dst_hi = gen_highpart (word_mode, dst);
841
842 if (mode == TImode)
843 {
844 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
845 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
846 }
847 else
848 {
849 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
850 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
851 }
852 return;
43e9d192 853 }
43e9d192
IB
854 }
855
030d03b8
RE
856 dst_lo = gen_lowpart (word_mode, dst);
857 dst_hi = gen_highpart (word_mode, dst);
858 src_lo = gen_lowpart (word_mode, src);
859 src_hi = gen_highpart_mode (word_mode, mode, src);
860
861 /* At most one pairing may overlap. */
862 if (reg_overlap_mentioned_p (dst_lo, src_hi))
863 {
864 aarch64_emit_move (dst_hi, src_hi);
865 aarch64_emit_move (dst_lo, src_lo);
866 }
867 else
868 {
869 aarch64_emit_move (dst_lo, src_lo);
870 aarch64_emit_move (dst_hi, src_hi);
871 }
43e9d192
IB
872}
873
874bool
875aarch64_split_128bit_move_p (rtx dst, rtx src)
876{
877 return (! REG_P (src)
878 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
879}
880
8b033a8a
SN
881/* Split a complex SIMD combine. */
882
883void
884aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
885{
886 enum machine_mode src_mode = GET_MODE (src1);
887 enum machine_mode dst_mode = GET_MODE (dst);
888
889 gcc_assert (VECTOR_MODE_P (dst_mode));
890
891 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
892 {
893 rtx (*gen) (rtx, rtx, rtx);
894
895 switch (src_mode)
896 {
897 case V8QImode:
898 gen = gen_aarch64_simd_combinev8qi;
899 break;
900 case V4HImode:
901 gen = gen_aarch64_simd_combinev4hi;
902 break;
903 case V2SImode:
904 gen = gen_aarch64_simd_combinev2si;
905 break;
906 case V2SFmode:
907 gen = gen_aarch64_simd_combinev2sf;
908 break;
909 case DImode:
910 gen = gen_aarch64_simd_combinedi;
911 break;
912 case DFmode:
913 gen = gen_aarch64_simd_combinedf;
914 break;
915 default:
916 gcc_unreachable ();
917 }
918
919 emit_insn (gen (dst, src1, src2));
920 return;
921 }
922}
923
fd4842cd
SN
924/* Split a complex SIMD move. */
925
926void
927aarch64_split_simd_move (rtx dst, rtx src)
928{
929 enum machine_mode src_mode = GET_MODE (src);
930 enum machine_mode dst_mode = GET_MODE (dst);
931
932 gcc_assert (VECTOR_MODE_P (dst_mode));
933
934 if (REG_P (dst) && REG_P (src))
935 {
c59b7e28
SN
936 rtx (*gen) (rtx, rtx);
937
fd4842cd
SN
938 gcc_assert (VECTOR_MODE_P (src_mode));
939
940 switch (src_mode)
941 {
942 case V16QImode:
c59b7e28 943 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
944 break;
945 case V8HImode:
c59b7e28 946 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
947 break;
948 case V4SImode:
c59b7e28 949 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
950 break;
951 case V2DImode:
c59b7e28 952 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
953 break;
954 case V4SFmode:
c59b7e28 955 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
956 break;
957 case V2DFmode:
c59b7e28 958 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
959 break;
960 default:
961 gcc_unreachable ();
962 }
c59b7e28
SN
963
964 emit_insn (gen (dst, src));
fd4842cd
SN
965 return;
966 }
967}
968
43e9d192 969static rtx
e18b4a81 970aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
971{
972 if (can_create_pseudo_p ())
e18b4a81 973 return force_reg (mode, value);
43e9d192
IB
974 else
975 {
976 x = aarch64_emit_move (x, value);
977 return x;
978 }
979}
980
981
982static rtx
983aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
984{
9c023bf0 985 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
986 {
987 rtx high;
988 /* Load the full offset into a register. This
989 might be improvable in the future. */
990 high = GEN_INT (offset);
991 offset = 0;
e18b4a81
YZ
992 high = aarch64_force_temporary (mode, temp, high);
993 reg = aarch64_force_temporary (mode, temp,
994 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
995 }
996 return plus_constant (mode, reg, offset);
997}
998
999void
1000aarch64_expand_mov_immediate (rtx dest, rtx imm)
1001{
1002 enum machine_mode mode = GET_MODE (dest);
1003 unsigned HOST_WIDE_INT mask;
1004 int i;
1005 bool first;
1006 unsigned HOST_WIDE_INT val;
1007 bool subtargets;
1008 rtx subtarget;
1009 int one_match, zero_match;
1010
1011 gcc_assert (mode == SImode || mode == DImode);
1012
1013 /* Check on what type of symbol it is. */
1014 if (GET_CODE (imm) == SYMBOL_REF
1015 || GET_CODE (imm) == LABEL_REF
1016 || GET_CODE (imm) == CONST)
1017 {
1018 rtx mem, base, offset;
1019 enum aarch64_symbol_type sty;
1020
1021 /* If we have (const (plus symbol offset)), separate out the offset
1022 before we start classifying the symbol. */
1023 split_const (imm, &base, &offset);
1024
1025 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1026 switch (sty)
1027 {
1028 case SYMBOL_FORCE_TO_MEM:
1029 if (offset != const0_rtx
1030 && targetm.cannot_force_const_mem (mode, imm))
1031 {
aef66c94 1032 gcc_assert (can_create_pseudo_p ());
e18b4a81 1033 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1034 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1035 aarch64_emit_move (dest, base);
1036 return;
1037 }
28514dda 1038 mem = force_const_mem (ptr_mode, imm);
43e9d192 1039 gcc_assert (mem);
28514dda
YZ
1040 if (mode != ptr_mode)
1041 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
1042 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1043 return;
1044
1045 case SYMBOL_SMALL_TLSGD:
1046 case SYMBOL_SMALL_TLSDESC:
1047 case SYMBOL_SMALL_GOTTPREL:
1048 case SYMBOL_SMALL_GOT:
87dd8ab0 1049 case SYMBOL_TINY_GOT:
43e9d192
IB
1050 if (offset != const0_rtx)
1051 {
1052 gcc_assert(can_create_pseudo_p ());
e18b4a81 1053 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1054 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1055 aarch64_emit_move (dest, base);
1056 return;
1057 }
1058 /* FALLTHRU */
1059
1060 case SYMBOL_SMALL_TPREL:
1061 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 1062 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
1063 aarch64_load_symref_appropriately (dest, imm, sty);
1064 return;
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069 }
1070
1071 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1072 {
1073 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1074 return;
1075 }
1076
1077 if (!CONST_INT_P (imm))
1078 {
1079 if (GET_CODE (imm) == HIGH)
1080 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1081 else
1082 {
1083 rtx mem = force_const_mem (mode, imm);
1084 gcc_assert (mem);
1085 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1086 }
1087
1088 return;
1089 }
1090
1091 if (mode == SImode)
1092 {
1093 /* We know we can't do this in 1 insn, and we must be able to do it
1094 in two; so don't mess around looking for sequences that don't buy
1095 us anything. */
1096 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1097 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1098 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1099 return;
1100 }
1101
1102 /* Remaining cases are all for DImode. */
1103
1104 val = INTVAL (imm);
1105 subtargets = optimize && can_create_pseudo_p ();
1106
1107 one_match = 0;
1108 zero_match = 0;
1109 mask = 0xffff;
1110
1111 for (i = 0; i < 64; i += 16, mask <<= 16)
1112 {
1113 if ((val & mask) == 0)
1114 zero_match++;
1115 else if ((val & mask) == mask)
1116 one_match++;
1117 }
1118
1119 if (one_match == 2)
1120 {
1121 mask = 0xffff;
1122 for (i = 0; i < 64; i += 16, mask <<= 16)
1123 {
1124 if ((val & mask) != mask)
1125 {
1126 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1127 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1128 GEN_INT ((val >> i) & 0xffff)));
1129 return;
1130 }
1131 }
1132 gcc_unreachable ();
1133 }
1134
1135 if (zero_match == 2)
1136 goto simple_sequence;
1137
1138 mask = 0x0ffff0000UL;
1139 for (i = 16; i < 64; i += 16, mask <<= 16)
1140 {
1141 HOST_WIDE_INT comp = mask & ~(mask - 1);
1142
1143 if (aarch64_uimm12_shift (val - (val & mask)))
1144 {
1145 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1146
1147 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1148 emit_insn (gen_adddi3 (dest, subtarget,
1149 GEN_INT (val - (val & mask))));
1150 return;
1151 }
1152 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1153 {
1154 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1155
1156 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1157 GEN_INT ((val + comp) & mask)));
1158 emit_insn (gen_adddi3 (dest, subtarget,
1159 GEN_INT (val - ((val + comp) & mask))));
1160 return;
1161 }
1162 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1163 {
1164 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1165
1166 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1167 GEN_INT ((val - comp) | ~mask)));
1168 emit_insn (gen_adddi3 (dest, subtarget,
1169 GEN_INT (val - ((val - comp) | ~mask))));
1170 return;
1171 }
1172 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1173 {
1174 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1175
1176 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1177 GEN_INT (val | ~mask)));
1178 emit_insn (gen_adddi3 (dest, subtarget,
1179 GEN_INT (val - (val | ~mask))));
1180 return;
1181 }
1182 }
1183
1184 /* See if we can do it by arithmetically combining two
1185 immediates. */
1186 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1187 {
1188 int j;
1189 mask = 0xffff;
1190
1191 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1192 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1193 {
1194 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1195 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1196 GEN_INT (aarch64_bitmasks[i])));
1197 emit_insn (gen_adddi3 (dest, subtarget,
1198 GEN_INT (val - aarch64_bitmasks[i])));
1199 return;
1200 }
1201
1202 for (j = 0; j < 64; j += 16, mask <<= 16)
1203 {
1204 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1205 {
1206 emit_insn (gen_rtx_SET (VOIDmode, dest,
1207 GEN_INT (aarch64_bitmasks[i])));
1208 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1209 GEN_INT ((val >> j) & 0xffff)));
1210 return;
1211 }
1212 }
1213 }
1214
1215 /* See if we can do it by logically combining two immediates. */
1216 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1217 {
1218 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1219 {
1220 int j;
1221
1222 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1223 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1224 {
1225 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1226 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1227 GEN_INT (aarch64_bitmasks[i])));
1228 emit_insn (gen_iordi3 (dest, subtarget,
1229 GEN_INT (aarch64_bitmasks[j])));
1230 return;
1231 }
1232 }
1233 else if ((val & aarch64_bitmasks[i]) == val)
1234 {
1235 int j;
1236
1237 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1238 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1239 {
1240
1241 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1242 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1243 GEN_INT (aarch64_bitmasks[j])));
1244 emit_insn (gen_anddi3 (dest, subtarget,
1245 GEN_INT (aarch64_bitmasks[i])));
1246 return;
1247 }
1248 }
1249 }
1250
1251 simple_sequence:
1252 first = true;
1253 mask = 0xffff;
1254 for (i = 0; i < 64; i += 16, mask <<= 16)
1255 {
1256 if ((val & mask) != 0)
1257 {
1258 if (first)
1259 {
1260 emit_insn (gen_rtx_SET (VOIDmode, dest,
1261 GEN_INT (val & mask)));
1262 first = false;
1263 }
1264 else
1265 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1266 GEN_INT ((val >> i) & 0xffff)));
1267 }
1268 }
1269}
1270
1271static bool
fee9ba42
JW
1272aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1273 tree exp ATTRIBUTE_UNUSED)
43e9d192 1274{
fee9ba42 1275 /* Currently, always true. */
43e9d192
IB
1276 return true;
1277}
1278
1279/* Implement TARGET_PASS_BY_REFERENCE. */
1280
1281static bool
1282aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1283 enum machine_mode mode,
1284 const_tree type,
1285 bool named ATTRIBUTE_UNUSED)
1286{
1287 HOST_WIDE_INT size;
1288 enum machine_mode dummymode;
1289 int nregs;
1290
1291 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1292 size = (mode == BLKmode && type)
1293 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1294
aadc1c43
MHD
1295 /* Aggregates are passed by reference based on their size. */
1296 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1297 {
aadc1c43 1298 size = int_size_in_bytes (type);
43e9d192
IB
1299 }
1300
1301 /* Variable sized arguments are always returned by reference. */
1302 if (size < 0)
1303 return true;
1304
1305 /* Can this be a candidate to be passed in fp/simd register(s)? */
1306 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1307 &dummymode, &nregs,
1308 NULL))
1309 return false;
1310
1311 /* Arguments which are variable sized or larger than 2 registers are
1312 passed by reference unless they are a homogenous floating point
1313 aggregate. */
1314 return size > 2 * UNITS_PER_WORD;
1315}
1316
1317/* Return TRUE if VALTYPE is padded to its least significant bits. */
1318static bool
1319aarch64_return_in_msb (const_tree valtype)
1320{
1321 enum machine_mode dummy_mode;
1322 int dummy_int;
1323
1324 /* Never happens in little-endian mode. */
1325 if (!BYTES_BIG_ENDIAN)
1326 return false;
1327
1328 /* Only composite types smaller than or equal to 16 bytes can
1329 be potentially returned in registers. */
1330 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1331 || int_size_in_bytes (valtype) <= 0
1332 || int_size_in_bytes (valtype) > 16)
1333 return false;
1334
1335 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1336 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1337 is always passed/returned in the least significant bits of fp/simd
1338 register(s). */
1339 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1340 &dummy_mode, &dummy_int, NULL))
1341 return false;
1342
1343 return true;
1344}
1345
1346/* Implement TARGET_FUNCTION_VALUE.
1347 Define how to find the value returned by a function. */
1348
1349static rtx
1350aarch64_function_value (const_tree type, const_tree func,
1351 bool outgoing ATTRIBUTE_UNUSED)
1352{
1353 enum machine_mode mode;
1354 int unsignedp;
1355 int count;
1356 enum machine_mode ag_mode;
1357
1358 mode = TYPE_MODE (type);
1359 if (INTEGRAL_TYPE_P (type))
1360 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1361
1362 if (aarch64_return_in_msb (type))
1363 {
1364 HOST_WIDE_INT size = int_size_in_bytes (type);
1365
1366 if (size % UNITS_PER_WORD != 0)
1367 {
1368 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1369 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1370 }
1371 }
1372
1373 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1374 &ag_mode, &count, NULL))
1375 {
1376 if (!aarch64_composite_type_p (type, mode))
1377 {
1378 gcc_assert (count == 1 && mode == ag_mode);
1379 return gen_rtx_REG (mode, V0_REGNUM);
1380 }
1381 else
1382 {
1383 int i;
1384 rtx par;
1385
1386 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1387 for (i = 0; i < count; i++)
1388 {
1389 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1390 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1391 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1392 XVECEXP (par, 0, i) = tmp;
1393 }
1394 return par;
1395 }
1396 }
1397 else
1398 return gen_rtx_REG (mode, R0_REGNUM);
1399}
1400
1401/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1402 Return true if REGNO is the number of a hard register in which the values
1403 of called function may come back. */
1404
1405static bool
1406aarch64_function_value_regno_p (const unsigned int regno)
1407{
1408 /* Maximum of 16 bytes can be returned in the general registers. Examples
1409 of 16-byte return values are: 128-bit integers and 16-byte small
1410 structures (excluding homogeneous floating-point aggregates). */
1411 if (regno == R0_REGNUM || regno == R1_REGNUM)
1412 return true;
1413
1414 /* Up to four fp/simd registers can return a function value, e.g. a
1415 homogeneous floating-point aggregate having four members. */
1416 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1417 return !TARGET_GENERAL_REGS_ONLY;
1418
1419 return false;
1420}
1421
1422/* Implement TARGET_RETURN_IN_MEMORY.
1423
1424 If the type T of the result of a function is such that
1425 void func (T arg)
1426 would require that arg be passed as a value in a register (or set of
1427 registers) according to the parameter passing rules, then the result
1428 is returned in the same registers as would be used for such an
1429 argument. */
1430
1431static bool
1432aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1433{
1434 HOST_WIDE_INT size;
1435 enum machine_mode ag_mode;
1436 int count;
1437
1438 if (!AGGREGATE_TYPE_P (type)
1439 && TREE_CODE (type) != COMPLEX_TYPE
1440 && TREE_CODE (type) != VECTOR_TYPE)
1441 /* Simple scalar types always returned in registers. */
1442 return false;
1443
1444 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1445 type,
1446 &ag_mode,
1447 &count,
1448 NULL))
1449 return false;
1450
1451 /* Types larger than 2 registers returned in memory. */
1452 size = int_size_in_bytes (type);
1453 return (size < 0 || size > 2 * UNITS_PER_WORD);
1454}
1455
1456static bool
1457aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1458 const_tree type, int *nregs)
1459{
1460 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1461 return aarch64_vfp_is_call_or_return_candidate (mode,
1462 type,
1463 &pcum->aapcs_vfp_rmode,
1464 nregs,
1465 NULL);
1466}
1467
1468/* Given MODE and TYPE of a function argument, return the alignment in
1469 bits. The idea is to suppress any stronger alignment requested by
1470 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1471 This is a helper function for local use only. */
1472
1473static unsigned int
1474aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1475{
1476 unsigned int alignment;
1477
1478 if (type)
1479 {
1480 if (!integer_zerop (TYPE_SIZE (type)))
1481 {
1482 if (TYPE_MODE (type) == mode)
1483 alignment = TYPE_ALIGN (type);
1484 else
1485 alignment = GET_MODE_ALIGNMENT (mode);
1486 }
1487 else
1488 alignment = 0;
1489 }
1490 else
1491 alignment = GET_MODE_ALIGNMENT (mode);
1492
1493 return alignment;
1494}
1495
1496/* Layout a function argument according to the AAPCS64 rules. The rule
1497 numbers refer to the rule numbers in the AAPCS64. */
1498
1499static void
1500aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1501 const_tree type,
1502 bool named ATTRIBUTE_UNUSED)
1503{
1504 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1505 int ncrn, nvrn, nregs;
1506 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1507 HOST_WIDE_INT size;
43e9d192
IB
1508
1509 /* We need to do this once per argument. */
1510 if (pcum->aapcs_arg_processed)
1511 return;
1512
1513 pcum->aapcs_arg_processed = true;
1514
3abf17cf
YZ
1515 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1516 size
1517 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1518 UNITS_PER_WORD);
1519
43e9d192
IB
1520 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1521 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1522 mode,
1523 type,
1524 &nregs);
1525
1526 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1527 The following code thus handles passing by SIMD/FP registers first. */
1528
1529 nvrn = pcum->aapcs_nvrn;
1530
1531 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1532 and homogenous short-vector aggregates (HVA). */
1533 if (allocate_nvrn)
1534 {
1535 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1536 {
1537 pcum->aapcs_nextnvrn = nvrn + nregs;
1538 if (!aarch64_composite_type_p (type, mode))
1539 {
1540 gcc_assert (nregs == 1);
1541 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1542 }
1543 else
1544 {
1545 rtx par;
1546 int i;
1547 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1548 for (i = 0; i < nregs; i++)
1549 {
1550 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1551 V0_REGNUM + nvrn + i);
1552 tmp = gen_rtx_EXPR_LIST
1553 (VOIDmode, tmp,
1554 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1555 XVECEXP (par, 0, i) = tmp;
1556 }
1557 pcum->aapcs_reg = par;
1558 }
1559 return;
1560 }
1561 else
1562 {
1563 /* C.3 NSRN is set to 8. */
1564 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1565 goto on_stack;
1566 }
1567 }
1568
1569 ncrn = pcum->aapcs_ncrn;
3abf17cf 1570 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1571
1572 /* C6 - C9. though the sign and zero extension semantics are
1573 handled elsewhere. This is the case where the argument fits
1574 entirely general registers. */
1575 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1576 {
1577 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1578
1579 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1580
1581 /* C.8 if the argument has an alignment of 16 then the NGRN is
1582 rounded up to the next even number. */
1583 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1584 {
1585 ++ncrn;
1586 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1587 }
1588 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1589 A reg is still generated for it, but the caller should be smart
1590 enough not to use it. */
1591 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1592 {
1593 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1594 }
1595 else
1596 {
1597 rtx par;
1598 int i;
1599
1600 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1601 for (i = 0; i < nregs; i++)
1602 {
1603 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1604 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1605 GEN_INT (i * UNITS_PER_WORD));
1606 XVECEXP (par, 0, i) = tmp;
1607 }
1608 pcum->aapcs_reg = par;
1609 }
1610
1611 pcum->aapcs_nextncrn = ncrn + nregs;
1612 return;
1613 }
1614
1615 /* C.11 */
1616 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1617
1618 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1619 this argument and align the total size if necessary. */
43e9d192 1620on_stack:
3abf17cf 1621 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1622 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1623 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1624 16 / UNITS_PER_WORD);
43e9d192
IB
1625 return;
1626}
1627
1628/* Implement TARGET_FUNCTION_ARG. */
1629
1630static rtx
1631aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1632 const_tree type, bool named)
1633{
1634 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1635 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1636
1637 if (mode == VOIDmode)
1638 return NULL_RTX;
1639
1640 aarch64_layout_arg (pcum_v, mode, type, named);
1641 return pcum->aapcs_reg;
1642}
1643
1644void
1645aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1646 const_tree fntype ATTRIBUTE_UNUSED,
1647 rtx libname ATTRIBUTE_UNUSED,
1648 const_tree fndecl ATTRIBUTE_UNUSED,
1649 unsigned n_named ATTRIBUTE_UNUSED)
1650{
1651 pcum->aapcs_ncrn = 0;
1652 pcum->aapcs_nvrn = 0;
1653 pcum->aapcs_nextncrn = 0;
1654 pcum->aapcs_nextnvrn = 0;
1655 pcum->pcs_variant = ARM_PCS_AAPCS64;
1656 pcum->aapcs_reg = NULL_RTX;
1657 pcum->aapcs_arg_processed = false;
1658 pcum->aapcs_stack_words = 0;
1659 pcum->aapcs_stack_size = 0;
1660
1661 return;
1662}
1663
1664static void
1665aarch64_function_arg_advance (cumulative_args_t pcum_v,
1666 enum machine_mode mode,
1667 const_tree type,
1668 bool named)
1669{
1670 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1671 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1672 {
1673 aarch64_layout_arg (pcum_v, mode, type, named);
1674 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1675 != (pcum->aapcs_stack_words != 0));
1676 pcum->aapcs_arg_processed = false;
1677 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1678 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1679 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1680 pcum->aapcs_stack_words = 0;
1681 pcum->aapcs_reg = NULL_RTX;
1682 }
1683}
1684
1685bool
1686aarch64_function_arg_regno_p (unsigned regno)
1687{
1688 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1689 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1690}
1691
1692/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1693 PARM_BOUNDARY bits of alignment, but will be given anything up
1694 to STACK_BOUNDARY bits if the type requires it. This makes sure
1695 that both before and after the layout of each argument, the Next
1696 Stacked Argument Address (NSAA) will have a minimum alignment of
1697 8 bytes. */
1698
1699static unsigned int
1700aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1701{
1702 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1703
1704 if (alignment < PARM_BOUNDARY)
1705 alignment = PARM_BOUNDARY;
1706 if (alignment > STACK_BOUNDARY)
1707 alignment = STACK_BOUNDARY;
1708 return alignment;
1709}
1710
1711/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1712
1713 Return true if an argument passed on the stack should be padded upwards,
1714 i.e. if the least-significant byte of the stack slot has useful data.
1715
1716 Small aggregate types are placed in the lowest memory address.
1717
1718 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1719
1720bool
1721aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1722{
1723 /* On little-endian targets, the least significant byte of every stack
1724 argument is passed at the lowest byte address of the stack slot. */
1725 if (!BYTES_BIG_ENDIAN)
1726 return true;
1727
00edcfbe 1728 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1729 the least significant byte of a stack argument is passed at the highest
1730 byte address of the stack slot. */
1731 if (type
00edcfbe
YZ
1732 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1733 || POINTER_TYPE_P (type))
43e9d192
IB
1734 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1735 return false;
1736
1737 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1738 return true;
1739}
1740
1741/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1742
1743 It specifies padding for the last (may also be the only)
1744 element of a block move between registers and memory. If
1745 assuming the block is in the memory, padding upward means that
1746 the last element is padded after its highest significant byte,
1747 while in downward padding, the last element is padded at the
1748 its least significant byte side.
1749
1750 Small aggregates and small complex types are always padded
1751 upwards.
1752
1753 We don't need to worry about homogeneous floating-point or
1754 short-vector aggregates; their move is not affected by the
1755 padding direction determined here. Regardless of endianness,
1756 each element of such an aggregate is put in the least
1757 significant bits of a fp/simd register.
1758
1759 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1760 register has useful data, and return the opposite if the most
1761 significant byte does. */
1762
1763bool
1764aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1765 bool first ATTRIBUTE_UNUSED)
1766{
1767
1768 /* Small composite types are always padded upward. */
1769 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1770 {
1771 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1772 : GET_MODE_SIZE (mode));
1773 if (size < 2 * UNITS_PER_WORD)
1774 return true;
1775 }
1776
1777 /* Otherwise, use the default padding. */
1778 return !BYTES_BIG_ENDIAN;
1779}
1780
1781static enum machine_mode
1782aarch64_libgcc_cmp_return_mode (void)
1783{
1784 return SImode;
1785}
1786
1787static bool
1788aarch64_frame_pointer_required (void)
1789{
1790 /* If the function contains dynamic stack allocations, we need to
1791 use the frame pointer to access the static parts of the frame. */
1792 if (cfun->calls_alloca)
1793 return true;
1794
0b7f8166
MS
1795 /* In aarch64_override_options_after_change
1796 flag_omit_leaf_frame_pointer turns off the frame pointer by
1797 default. Turn it back on now if we've not got a leaf
1798 function. */
1799 if (flag_omit_leaf_frame_pointer
1800 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1801 return true;
43e9d192 1802
0b7f8166 1803 return false;
43e9d192
IB
1804}
1805
1806/* Mark the registers that need to be saved by the callee and calculate
1807 the size of the callee-saved registers area and frame record (both FP
1808 and LR may be omitted). */
1809static void
1810aarch64_layout_frame (void)
1811{
1812 HOST_WIDE_INT offset = 0;
1813 int regno;
1814
1815 if (reload_completed && cfun->machine->frame.laid_out)
1816 return;
1817
97826595
MS
1818#define SLOT_NOT_REQUIRED (-2)
1819#define SLOT_REQUIRED (-1)
1820
363ffa50
JW
1821 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1822 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1823
43e9d192
IB
1824 /* First mark all the registers that really need to be saved... */
1825 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1826 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1827
1828 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1829 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1830
1831 /* ... that includes the eh data registers (if needed)... */
1832 if (crtl->calls_eh_return)
1833 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
1834 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1835 = SLOT_REQUIRED;
43e9d192
IB
1836
1837 /* ... and any callee saved register that dataflow says is live. */
1838 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1839 if (df_regs_ever_live_p (regno)
1840 && !call_used_regs[regno])
97826595 1841 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
1842
1843 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1844 if (df_regs_ever_live_p (regno)
1845 && !call_used_regs[regno])
97826595 1846 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
1847
1848 if (frame_pointer_needed)
1849 {
2e1cdae5 1850 /* FP and LR are placed in the linkage record. */
43e9d192 1851 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 1852 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 1853 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 1854 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 1855 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 1856 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
1857 }
1858
1859 /* Now assign stack slots for them. */
2e1cdae5 1860 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1861 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
1862 {
1863 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
1864 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1865 cfun->machine->frame.wb_candidate1 = regno;
1866 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1867 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
1868 offset += UNITS_PER_WORD;
1869 }
1870
1871 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1872 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
1873 {
1874 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
1875 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1876 cfun->machine->frame.wb_candidate1 = regno;
1877 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1878 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1879 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
1880 offset += UNITS_PER_WORD;
1881 }
1882
43e9d192
IB
1883 cfun->machine->frame.padding0 =
1884 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1885 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1886
1887 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
1888
1889 cfun->machine->frame.hard_fp_offset
1890 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1891 + get_frame_size ()
1892 + cfun->machine->frame.saved_regs_size,
1893 STACK_BOUNDARY / BITS_PER_UNIT);
1894
1895 cfun->machine->frame.frame_size
1896 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1897 + crtl->outgoing_args_size,
1898 STACK_BOUNDARY / BITS_PER_UNIT);
1899
43e9d192
IB
1900 cfun->machine->frame.laid_out = true;
1901}
1902
1903/* Make the last instruction frame-related and note that it performs
1904 the operation described by FRAME_PATTERN. */
1905
1906static void
1907aarch64_set_frame_expr (rtx frame_pattern)
1908{
1909 rtx insn;
1910
1911 insn = get_last_insn ();
1912 RTX_FRAME_RELATED_P (insn) = 1;
1913 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1914 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1915 frame_pattern,
1916 REG_NOTES (insn));
1917}
1918
1919static bool
1920aarch64_register_saved_on_entry (int regno)
1921{
97826595 1922 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
1923}
1924
64dedd72
JW
1925static unsigned
1926aarch64_next_callee_save (unsigned regno, unsigned limit)
1927{
1928 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1929 regno ++;
1930 return regno;
1931}
43e9d192 1932
c5e1f66e
JW
1933static void
1934aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
1935 HOST_WIDE_INT adjustment)
1936 {
1937 rtx base_rtx = stack_pointer_rtx;
1938 rtx insn, reg, mem;
1939
1940 reg = gen_rtx_REG (mode, regno);
1941 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1942 plus_constant (Pmode, base_rtx, -adjustment));
1943 mem = gen_rtx_MEM (mode, mem);
1944
1945 insn = emit_move_insn (mem, reg);
1946 RTX_FRAME_RELATED_P (insn) = 1;
1947}
1948
80c11907
JW
1949static rtx
1950aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1951 HOST_WIDE_INT adjustment)
1952{
1953 switch (mode)
1954 {
1955 case DImode:
1956 return gen_storewb_pairdi_di (base, base, reg, reg2,
1957 GEN_INT (-adjustment),
1958 GEN_INT (UNITS_PER_WORD - adjustment));
1959 case DFmode:
1960 return gen_storewb_pairdf_di (base, base, reg, reg2,
1961 GEN_INT (-adjustment),
1962 GEN_INT (UNITS_PER_WORD - adjustment));
1963 default:
1964 gcc_unreachable ();
1965 }
1966}
1967
1968static void
1969aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
1970 unsigned regno2, HOST_WIDE_INT adjustment)
1971{
1972 rtx insn;
1973 rtx reg1 = gen_rtx_REG (mode, regno1);
1974 rtx reg2 = gen_rtx_REG (mode, regno2);
1975
1976 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
1977 reg2, adjustment));
1978 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1979
1980 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1981 RTX_FRAME_RELATED_P (insn) = 1;
1982}
1983
159313d9
JW
1984static rtx
1985aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1986 HOST_WIDE_INT adjustment)
1987{
1988 switch (mode)
1989 {
1990 case DImode:
1991 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
1992 GEN_INT (adjustment + UNITS_PER_WORD));
1993 case DFmode:
1994 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
1995 GEN_INT (adjustment + UNITS_PER_WORD));
1996 default:
1997 gcc_unreachable ();
1998 }
1999}
2000
2001static void
2002aarch64_popwb_pair_reg (enum machine_mode mode, unsigned regno1,
2003 unsigned regno2, HOST_WIDE_INT adjustment, rtx cfa)
2004{
2005 rtx insn;
2006 rtx reg1 = gen_rtx_REG (mode, regno1);
2007 rtx reg2 = gen_rtx_REG (mode, regno2);
2008
2009 insn = emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
2010 reg2, adjustment));
2011 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2012 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2013 RTX_FRAME_RELATED_P (insn) = 1;
2014
2015 if (cfa)
2016 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2017 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2018 plus_constant (Pmode, cfa, adjustment))));
2019
2020 add_reg_note (insn, REG_CFA_RESTORE, reg1);
2021 add_reg_note (insn, REG_CFA_RESTORE, reg2);
2022}
2023
72df5c1f
JW
2024static rtx
2025aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2026 rtx reg2)
2027{
2028 switch (mode)
2029 {
2030 case DImode:
2031 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2032
2033 case DFmode:
2034 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2035
2036 default:
2037 gcc_unreachable ();
2038 }
2039}
2040
2041static rtx
2042aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2043 rtx mem2)
2044{
2045 switch (mode)
2046 {
2047 case DImode:
2048 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2049
2050 case DFmode:
2051 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2052
2053 default:
2054 gcc_unreachable ();
2055 }
2056}
2057
43e9d192 2058
43e9d192 2059static void
8ed2fc62 2060aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2061 unsigned start, unsigned limit, bool skip_wb)
43e9d192
IB
2062{
2063 rtx insn;
a007a21c
JW
2064 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2065 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2066 unsigned regno;
2067 unsigned regno2;
2068
0ec74a1e 2069 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2070 regno <= limit;
2071 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2072 {
ae13fce3
JW
2073 rtx reg, mem;
2074 HOST_WIDE_INT offset;
64dedd72 2075
ae13fce3
JW
2076 if (skip_wb
2077 && (regno == cfun->machine->frame.wb_candidate1
2078 || regno == cfun->machine->frame.wb_candidate2))
2079 continue;
2080
2081 reg = gen_rtx_REG (mode, regno);
2082 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2083 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2084 offset));
64dedd72
JW
2085
2086 regno2 = aarch64_next_callee_save (regno + 1, limit);
2087
2088 if (regno2 <= limit
2089 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2090 == cfun->machine->frame.reg_offset[regno2]))
2091
43e9d192 2092 {
0ec74a1e 2093 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2094 rtx mem2;
2095
2096 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2097 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2098 offset));
2099 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2100 reg2));
0b4a9743 2101
64dedd72
JW
2102 /* The first part of a frame-related parallel insn is
2103 always assumed to be relevant to the frame
2104 calculations; subsequent parts, are only
2105 frame-related if explicitly marked. */
2106 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2107 regno = regno2;
2108 }
2109 else
8ed2fc62
JW
2110 insn = emit_move_insn (mem, reg);
2111
2112 RTX_FRAME_RELATED_P (insn) = 1;
2113 }
2114}
2115
2116static void
2117aarch64_restore_callee_saves (enum machine_mode mode,
2118 HOST_WIDE_INT start_offset, unsigned start,
ae13fce3 2119 unsigned limit, bool skip_wb)
8ed2fc62
JW
2120{
2121 rtx insn;
2122 rtx base_rtx = stack_pointer_rtx;
2123 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2124 ? gen_frame_mem : gen_rtx_MEM);
2125 unsigned regno;
2126 unsigned regno2;
2127 HOST_WIDE_INT offset;
2128
2129 for (regno = aarch64_next_callee_save (start, limit);
2130 regno <= limit;
2131 regno = aarch64_next_callee_save (regno + 1, limit))
2132 {
ae13fce3 2133 rtx reg, mem;
8ed2fc62 2134
ae13fce3
JW
2135 if (skip_wb
2136 && (regno == cfun->machine->frame.wb_candidate1
2137 || regno == cfun->machine->frame.wb_candidate2))
2138 continue;
2139
2140 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2141 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2142 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2143
2144 regno2 = aarch64_next_callee_save (regno + 1, limit);
2145
2146 if (regno2 <= limit
2147 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2148 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2149 {
8ed2fc62
JW
2150 rtx reg2 = gen_rtx_REG (mode, regno2);
2151 rtx mem2;
2152
2153 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2154 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2155 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2,
2156 mem2));
2157 add_reg_note (insn, REG_CFA_RESTORE, reg);
2158 add_reg_note (insn, REG_CFA_RESTORE, reg2);
2159
2160 /* The first part of a frame-related parallel insn is
2161 always assumed to be relevant to the frame
2162 calculations; subsequent parts, are only
2163 frame-related if explicitly marked. */
2164 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2165 regno = regno2;
43e9d192 2166 }
8ed2fc62
JW
2167 else
2168 {
2169 insn = emit_move_insn (reg, mem);
2170 add_reg_note (insn, REG_CFA_RESTORE, reg);
2171 }
2172
64dedd72 2173 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192 2174 }
43e9d192
IB
2175}
2176
2177/* AArch64 stack frames generated by this compiler look like:
2178
2179 +-------------------------------+
2180 | |
2181 | incoming stack arguments |
2182 | |
34834420
MS
2183 +-------------------------------+
2184 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2185 | callee-allocated save area |
2186 | for register varargs |
2187 | |
34834420
MS
2188 +-------------------------------+
2189 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2190 | |
2191 +-------------------------------+
454fdba9
RL
2192 | padding0 | \
2193 +-------------------------------+ |
454fdba9 2194 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2195 +-------------------------------+ |
2196 | LR' | |
2197 +-------------------------------+ |
34834420
MS
2198 | FP' | / <- hard_frame_pointer_rtx (aligned)
2199 +-------------------------------+
43e9d192
IB
2200 | dynamic allocation |
2201 +-------------------------------+
34834420
MS
2202 | padding |
2203 +-------------------------------+
2204 | outgoing stack arguments | <-- arg_pointer
2205 | |
2206 +-------------------------------+
2207 | | <-- stack_pointer_rtx (aligned)
43e9d192 2208
34834420
MS
2209 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2210 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2211 unchanged. */
43e9d192
IB
2212
2213/* Generate the prologue instructions for entry into a function.
2214 Establish the stack frame by decreasing the stack pointer with a
2215 properly calculated size and, if necessary, create a frame record
2216 filled with the values of LR and previous frame pointer. The
6991c977 2217 current FP is also set up if it is in use. */
43e9d192
IB
2218
2219void
2220aarch64_expand_prologue (void)
2221{
2222 /* sub sp, sp, #<frame_size>
2223 stp {fp, lr}, [sp, #<frame_size> - 16]
2224 add fp, sp, #<frame_size> - hardfp_offset
2225 stp {cs_reg}, [fp, #-16] etc.
2226
2227 sub sp, sp, <final_adjustment_if_any>
2228 */
43e9d192 2229 HOST_WIDE_INT frame_size, offset;
1c960e02 2230 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
43e9d192
IB
2231 rtx insn;
2232
2233 aarch64_layout_frame ();
43e9d192
IB
2234
2235 if (flag_stack_usage_info)
1c960e02
MS
2236 current_function_static_stack_size = cfun->machine->frame.frame_size;
2237
2238 frame_size = cfun->machine->frame.frame_size;
2239 offset = cfun->machine->frame.frame_size;
43e9d192 2240
1c960e02
MS
2241 fp_offset = cfun->machine->frame.frame_size
2242 - cfun->machine->frame.hard_fp_offset;
43e9d192 2243
44c0e7b9 2244 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2245 if (offset >= 512)
2246 {
2247 /* When the frame has a large size, an initial decrease is done on
2248 the stack pointer to jump over the callee-allocated save area for
2249 register varargs, the local variable area and/or the callee-saved
2250 register area. This will allow the pre-index write-back
2251 store pair instructions to be used for setting up the stack frame
2252 efficiently. */
1c960e02 2253 offset = cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2254 if (offset >= 512)
2255 offset = cfun->machine->frame.saved_regs_size;
2256
2257 frame_size -= (offset + crtl->outgoing_args_size);
2258 fp_offset = 0;
2259
2260 if (frame_size >= 0x1000000)
2261 {
2262 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2263 emit_move_insn (op0, GEN_INT (-frame_size));
2264 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2265 aarch64_set_frame_expr (gen_rtx_SET
2266 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2267 plus_constant (Pmode,
2268 stack_pointer_rtx,
2269 -frame_size)));
43e9d192
IB
2270 }
2271 else if (frame_size > 0)
2272 {
2273 if ((frame_size & 0xfff) != frame_size)
2274 {
2275 insn = emit_insn (gen_add2_insn
2276 (stack_pointer_rtx,
2277 GEN_INT (-(frame_size
2278 & ~(HOST_WIDE_INT)0xfff))));
2279 RTX_FRAME_RELATED_P (insn) = 1;
2280 }
2281 if ((frame_size & 0xfff) != 0)
2282 {
2283 insn = emit_insn (gen_add2_insn
2284 (stack_pointer_rtx,
2285 GEN_INT (-(frame_size
2286 & (HOST_WIDE_INT)0xfff))));
2287 RTX_FRAME_RELATED_P (insn) = 1;
2288 }
2289 }
2290 }
2291 else
2292 frame_size = -1;
2293
2294 if (offset > 0)
2295 {
ae13fce3
JW
2296 bool skip_wb = false;
2297
43e9d192
IB
2298 if (frame_pointer_needed)
2299 {
c5e1f66e
JW
2300 skip_wb = true;
2301
43e9d192
IB
2302 if (fp_offset)
2303 {
2304 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2305 GEN_INT (-offset)));
2306 RTX_FRAME_RELATED_P (insn) = 1;
2307 aarch64_set_frame_expr (gen_rtx_SET
2308 (Pmode, stack_pointer_rtx,
c5e1f66e 2309 gen_rtx_MINUS (Pmode, stack_pointer_rtx,
43e9d192 2310 GEN_INT (offset))));
80c11907
JW
2311
2312 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2313 R30_REGNUM, false);
43e9d192
IB
2314 }
2315 else
80c11907 2316 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2317
2318 /* Set up frame pointer to point to the location of the
2319 previous frame pointer on the stack. */
2320 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2321 stack_pointer_rtx,
2322 GEN_INT (fp_offset)));
2323 aarch64_set_frame_expr (gen_rtx_SET
2324 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2325 plus_constant (Pmode,
2326 stack_pointer_rtx,
2327 fp_offset)));
43e9d192
IB
2328 RTX_FRAME_RELATED_P (insn) = 1;
2329 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2330 hard_frame_pointer_rtx));
2331 }
2332 else
2333 {
c5e1f66e
JW
2334 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2335 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2336
c5e1f66e
JW
2337 if (fp_offset
2338 || reg1 == FIRST_PSEUDO_REGISTER
2339 || (reg2 == FIRST_PSEUDO_REGISTER
2340 && offset >= 256))
2341 {
2342 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2343 GEN_INT (-offset)));
2344 RTX_FRAME_RELATED_P (insn) = 1;
2345 }
2346 else
2347 {
2348 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2349
2350 skip_wb = true;
2351
2352 if (reg2 == FIRST_PSEUDO_REGISTER)
2353 aarch64_pushwb_single_reg (mode1, reg1, offset);
2354 else
2355 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2356 }
43e9d192
IB
2357 }
2358
c5e1f66e
JW
2359 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2360 skip_wb);
ae13fce3
JW
2361 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2362 skip_wb);
43e9d192
IB
2363 }
2364
2365 /* when offset >= 512,
2366 sub sp, sp, #<outgoing_args_size> */
2367 if (frame_size > -1)
2368 {
2369 if (crtl->outgoing_args_size > 0)
2370 {
2371 insn = emit_insn (gen_add2_insn
2372 (stack_pointer_rtx,
2373 GEN_INT (- crtl->outgoing_args_size)));
2374 RTX_FRAME_RELATED_P (insn) = 1;
2375 }
2376 }
2377}
2378
2379/* Generate the epilogue instructions for returning from a function. */
2380void
2381aarch64_expand_epilogue (bool for_sibcall)
2382{
1c960e02 2383 HOST_WIDE_INT frame_size, offset;
43e9d192
IB
2384 HOST_WIDE_INT fp_offset;
2385 rtx insn;
44c0e7b9 2386 rtx cfa_reg;
ae13fce3 2387 bool skip_wb = false;
43e9d192
IB
2388
2389 aarch64_layout_frame ();
43e9d192 2390
1c960e02
MS
2391 offset = frame_size = cfun->machine->frame.frame_size;
2392 fp_offset = cfun->machine->frame.frame_size
2393 - cfun->machine->frame.hard_fp_offset;
43e9d192 2394
44c0e7b9
YZ
2395 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2396
2397 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2398 if (offset >= 512)
2399 {
1c960e02 2400 offset = cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2401 if (offset >= 512)
2402 offset = cfun->machine->frame.saved_regs_size;
2403
2404 frame_size -= (offset + crtl->outgoing_args_size);
2405 fp_offset = 0;
2406 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2407 {
2408 insn = emit_insn (gen_add2_insn
2409 (stack_pointer_rtx,
2410 GEN_INT (crtl->outgoing_args_size)));
2411 RTX_FRAME_RELATED_P (insn) = 1;
2412 }
2413 }
2414 else
2415 frame_size = -1;
2416
2417 /* If there were outgoing arguments or we've done dynamic stack
2418 allocation, then restore the stack pointer from the frame
2419 pointer. This is at most one insn and more efficient than using
2420 GCC's internal mechanism. */
2421 if (frame_pointer_needed
2422 && (crtl->outgoing_args_size || cfun->calls_alloca))
2423 {
2424 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2425 hard_frame_pointer_rtx,
8f454e9f
JW
2426 GEN_INT (0)));
2427 offset = offset - fp_offset;
43e9d192 2428 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2429 /* As SP is set to (FP - fp_offset), according to the rules in
2430 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2431 from the value of SP from now on. */
2432 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2433 }
2434
8f454e9f 2435 aarch64_restore_callee_saves (DFmode, frame_pointer_needed ? 0 : fp_offset,
ae13fce3 2436 V0_REGNUM, V31_REGNUM, skip_wb);
43e9d192 2437
43e9d192
IB
2438 if (offset > 0)
2439 {
2440 if (frame_pointer_needed)
2441 {
ae13fce3
JW
2442 aarch64_restore_callee_saves (DImode, 0, R0_REGNUM, R28_REGNUM,
2443 skip_wb);
8f454e9f
JW
2444 aarch64_popwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset,
2445 cfa_reg);
43e9d192 2446 }
43e9d192
IB
2447 else
2448 {
159313d9 2449 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM,
ae13fce3 2450 R30_REGNUM, skip_wb);
43e9d192
IB
2451 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2452 GEN_INT (offset)));
2453 RTX_FRAME_RELATED_P (insn) = 1;
2454 }
2455 }
2456
2457 /* Stack adjustment for exception handler. */
2458 if (crtl->calls_eh_return)
2459 {
2460 /* We need to unwind the stack by the offset computed by
2461 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2462 based on SP. Ideally we would update the SP and define the
2463 CFA along the lines of:
2464
2465 SP = SP + EH_RETURN_STACKADJ_RTX
2466 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2467
2468 However the dwarf emitter only understands a constant
2469 register offset.
2470
631b20a7 2471 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2472 as a temporary register to hold the current SP value. The
2473 CFA is described using IP0 then SP is modified. */
2474
2475 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2476
2477 insn = emit_move_insn (ip0, stack_pointer_rtx);
2478 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2479 RTX_FRAME_RELATED_P (insn) = 1;
2480
2481 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2482
2483 /* Ensure the assignment to IP0 does not get optimized away. */
2484 emit_use (ip0);
2485 }
2486
2487 if (frame_size > -1)
2488 {
2489 if (frame_size >= 0x1000000)
2490 {
2491 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2492 emit_move_insn (op0, GEN_INT (frame_size));
2493 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2494 aarch64_set_frame_expr (gen_rtx_SET
2495 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2496 plus_constant (Pmode,
2497 stack_pointer_rtx,
2498 frame_size)));
43e9d192
IB
2499 }
2500 else if (frame_size > 0)
2501 {
2502 if ((frame_size & 0xfff) != 0)
2503 {
2504 insn = emit_insn (gen_add2_insn
2505 (stack_pointer_rtx,
2506 GEN_INT ((frame_size
2507 & (HOST_WIDE_INT) 0xfff))));
2508 RTX_FRAME_RELATED_P (insn) = 1;
2509 }
2510 if ((frame_size & 0xfff) != frame_size)
2511 {
2512 insn = emit_insn (gen_add2_insn
2513 (stack_pointer_rtx,
2514 GEN_INT ((frame_size
2515 & ~ (HOST_WIDE_INT) 0xfff))));
2516 RTX_FRAME_RELATED_P (insn) = 1;
2517 }
2518 }
2519
a007a21c
JW
2520 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2521 plus_constant (Pmode,
2522 stack_pointer_rtx,
2523 offset)));
43e9d192
IB
2524 }
2525
2526 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2527 if (!for_sibcall)
2528 emit_jump_insn (ret_rtx);
2529}
2530
2531/* Return the place to copy the exception unwinding return address to.
2532 This will probably be a stack slot, but could (in theory be the
2533 return register). */
2534rtx
2535aarch64_final_eh_return_addr (void)
2536{
1c960e02
MS
2537 HOST_WIDE_INT fp_offset;
2538
43e9d192 2539 aarch64_layout_frame ();
1c960e02
MS
2540
2541 fp_offset = cfun->machine->frame.frame_size
2542 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2543
2544 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2545 return gen_rtx_REG (DImode, LR_REGNUM);
2546
2547 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2548 result in a store to save LR introduced by builtin_eh_return () being
2549 incorrectly deleted because the alias is not detected.
2550 So in the calculation of the address to copy the exception unwinding
2551 return address to, we note 2 cases.
2552 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2553 we return a SP-relative location since all the addresses are SP-relative
2554 in this case. This prevents the store from being optimized away.
2555 If the fp_offset is not 0, then the addresses will be FP-relative and
2556 therefore we return a FP-relative location. */
2557
2558 if (frame_pointer_needed)
2559 {
2560 if (fp_offset)
2561 return gen_frame_mem (DImode,
2562 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2563 else
2564 return gen_frame_mem (DImode,
2565 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2566 }
2567
2568 /* If FP is not needed, we calculate the location of LR, which would be
2569 at the top of the saved registers block. */
2570
2571 return gen_frame_mem (DImode,
2572 plus_constant (Pmode,
2573 stack_pointer_rtx,
2574 fp_offset
2575 + cfun->machine->frame.saved_regs_size
2576 - 2 * UNITS_PER_WORD));
2577}
2578
9dfc162c
JG
2579/* Possibly output code to build up a constant in a register. For
2580 the benefit of the costs infrastructure, returns the number of
2581 instructions which would be emitted. GENERATE inhibits or
2582 enables code generation. */
2583
2584static int
2585aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2586{
9dfc162c
JG
2587 int insns = 0;
2588
43e9d192 2589 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2590 {
2591 if (generate)
2592 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2593 insns = 1;
2594 }
43e9d192
IB
2595 else
2596 {
2597 int i;
2598 int ncount = 0;
2599 int zcount = 0;
2600 HOST_WIDE_INT valp = val >> 16;
2601 HOST_WIDE_INT valm;
2602 HOST_WIDE_INT tval;
2603
2604 for (i = 16; i < 64; i += 16)
2605 {
2606 valm = (valp & 0xffff);
2607
2608 if (valm != 0)
2609 ++ zcount;
2610
2611 if (valm != 0xffff)
2612 ++ ncount;
2613
2614 valp >>= 16;
2615 }
2616
2617 /* zcount contains the number of additional MOVK instructions
2618 required if the constant is built up with an initial MOVZ instruction,
2619 while ncount is the number of MOVK instructions required if starting
2620 with a MOVN instruction. Choose the sequence that yields the fewest
2621 number of instructions, preferring MOVZ instructions when they are both
2622 the same. */
2623 if (ncount < zcount)
2624 {
9dfc162c
JG
2625 if (generate)
2626 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2627 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2628 tval = 0xffff;
9dfc162c 2629 insns++;
43e9d192
IB
2630 }
2631 else
2632 {
9dfc162c
JG
2633 if (generate)
2634 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2635 GEN_INT (val & 0xffff));
43e9d192 2636 tval = 0;
9dfc162c 2637 insns++;
43e9d192
IB
2638 }
2639
2640 val >>= 16;
2641
2642 for (i = 16; i < 64; i += 16)
2643 {
2644 if ((val & 0xffff) != tval)
9dfc162c
JG
2645 {
2646 if (generate)
2647 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2648 GEN_INT (i),
2649 GEN_INT (val & 0xffff)));
2650 insns++;
2651 }
43e9d192
IB
2652 val >>= 16;
2653 }
2654 }
9dfc162c 2655 return insns;
43e9d192
IB
2656}
2657
2658static void
d9600ae5 2659aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2660{
2661 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2662 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2663 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2664
2665 if (mdelta < 0)
2666 mdelta = -mdelta;
2667
2668 if (mdelta >= 4096 * 4096)
2669 {
9dfc162c 2670 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2671 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2672 }
2673 else if (mdelta > 0)
2674 {
43e9d192 2675 if (mdelta >= 4096)
d9600ae5
SN
2676 {
2677 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2678 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2679 if (delta < 0)
2680 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2681 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2682 else
2683 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2684 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2685 }
43e9d192 2686 if (mdelta % 4096 != 0)
d9600ae5
SN
2687 {
2688 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2689 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2690 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2691 }
43e9d192
IB
2692 }
2693}
2694
2695/* Output code to add DELTA to the first argument, and then jump
2696 to FUNCTION. Used for C++ multiple inheritance. */
2697static void
2698aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2699 HOST_WIDE_INT delta,
2700 HOST_WIDE_INT vcall_offset,
2701 tree function)
2702{
2703 /* The this pointer is always in x0. Note that this differs from
2704 Arm where the this pointer maybe bumped to r1 if r0 is required
2705 to return a pointer to an aggregate. On AArch64 a result value
2706 pointer will be in x8. */
2707 int this_regno = R0_REGNUM;
75f1d6fc 2708 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2709
75f1d6fc
SN
2710 reload_completed = 1;
2711 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2712
2713 if (vcall_offset == 0)
d9600ae5 2714 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2715 else
2716 {
28514dda 2717 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2718
75f1d6fc
SN
2719 this_rtx = gen_rtx_REG (Pmode, this_regno);
2720 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2721 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2722
75f1d6fc
SN
2723 addr = this_rtx;
2724 if (delta != 0)
2725 {
2726 if (delta >= -256 && delta < 256)
2727 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2728 plus_constant (Pmode, this_rtx, delta));
2729 else
d9600ae5 2730 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2731 }
2732
28514dda
YZ
2733 if (Pmode == ptr_mode)
2734 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2735 else
2736 aarch64_emit_move (temp0,
2737 gen_rtx_ZERO_EXTEND (Pmode,
2738 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2739
28514dda 2740 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2741 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2742 else
2743 {
9dfc162c 2744 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2745 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2746 }
2747
28514dda
YZ
2748 if (Pmode == ptr_mode)
2749 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2750 else
2751 aarch64_emit_move (temp1,
2752 gen_rtx_SIGN_EXTEND (Pmode,
2753 gen_rtx_MEM (ptr_mode, addr)));
2754
75f1d6fc 2755 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2756 }
2757
75f1d6fc
SN
2758 /* Generate a tail call to the target function. */
2759 if (!TREE_USED (function))
2760 {
2761 assemble_external (function);
2762 TREE_USED (function) = 1;
2763 }
2764 funexp = XEXP (DECL_RTL (function), 0);
2765 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2766 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2767 SIBLING_CALL_P (insn) = 1;
2768
2769 insn = get_insns ();
2770 shorten_branches (insn);
2771 final_start_function (insn, file, 1);
2772 final (insn, file, 1);
43e9d192 2773 final_end_function ();
75f1d6fc
SN
2774
2775 /* Stop pretending to be a post-reload pass. */
2776 reload_completed = 0;
43e9d192
IB
2777}
2778
43e9d192
IB
2779static int
2780aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2781{
2782 if (GET_CODE (*x) == SYMBOL_REF)
2783 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2784
2785 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2786 TLS offsets, not real symbol references. */
2787 if (GET_CODE (*x) == UNSPEC
2788 && XINT (*x, 1) == UNSPEC_TLS)
2789 return -1;
2790
2791 return 0;
2792}
2793
2794static bool
2795aarch64_tls_referenced_p (rtx x)
2796{
2797 if (!TARGET_HAVE_TLS)
2798 return false;
2799
2800 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2801}
2802
2803
2804static int
2805aarch64_bitmasks_cmp (const void *i1, const void *i2)
2806{
2807 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2808 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2809
2810 if (*imm1 < *imm2)
2811 return -1;
2812 if (*imm1 > *imm2)
2813 return +1;
2814 return 0;
2815}
2816
2817
2818static void
2819aarch64_build_bitmask_table (void)
2820{
2821 unsigned HOST_WIDE_INT mask, imm;
2822 unsigned int log_e, e, s, r;
2823 unsigned int nimms = 0;
2824
2825 for (log_e = 1; log_e <= 6; log_e++)
2826 {
2827 e = 1 << log_e;
2828 if (e == 64)
2829 mask = ~(HOST_WIDE_INT) 0;
2830 else
2831 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2832 for (s = 1; s < e; s++)
2833 {
2834 for (r = 0; r < e; r++)
2835 {
2836 /* set s consecutive bits to 1 (s < 64) */
2837 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2838 /* rotate right by r */
2839 if (r != 0)
2840 imm = ((imm >> r) | (imm << (e - r))) & mask;
2841 /* replicate the constant depending on SIMD size */
2842 switch (log_e) {
2843 case 1: imm |= (imm << 2);
2844 case 2: imm |= (imm << 4);
2845 case 3: imm |= (imm << 8);
2846 case 4: imm |= (imm << 16);
2847 case 5: imm |= (imm << 32);
2848 case 6:
2849 break;
2850 default:
2851 gcc_unreachable ();
2852 }
2853 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2854 aarch64_bitmasks[nimms++] = imm;
2855 }
2856 }
2857 }
2858
2859 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2860 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2861 aarch64_bitmasks_cmp);
2862}
2863
2864
2865/* Return true if val can be encoded as a 12-bit unsigned immediate with
2866 a left shift of 0 or 12 bits. */
2867bool
2868aarch64_uimm12_shift (HOST_WIDE_INT val)
2869{
2870 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2871 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2872 );
2873}
2874
2875
2876/* Return true if val is an immediate that can be loaded into a
2877 register by a MOVZ instruction. */
2878static bool
2879aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2880{
2881 if (GET_MODE_SIZE (mode) > 4)
2882 {
2883 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2884 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2885 return 1;
2886 }
2887 else
2888 {
2889 /* Ignore sign extension. */
2890 val &= (HOST_WIDE_INT) 0xffffffff;
2891 }
2892 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2893 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2894}
2895
2896
2897/* Return true if val is a valid bitmask immediate. */
2898bool
2899aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2900{
2901 if (GET_MODE_SIZE (mode) < 8)
2902 {
2903 /* Replicate bit pattern. */
2904 val &= (HOST_WIDE_INT) 0xffffffff;
2905 val |= val << 32;
2906 }
2907 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2908 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2909}
2910
2911
2912/* Return true if val is an immediate that can be loaded into a
2913 register in a single instruction. */
2914bool
2915aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2916{
2917 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2918 return 1;
2919 return aarch64_bitmask_imm (val, mode);
2920}
2921
2922static bool
2923aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2924{
2925 rtx base, offset;
7eda14e1 2926
43e9d192
IB
2927 if (GET_CODE (x) == HIGH)
2928 return true;
2929
2930 split_const (x, &base, &offset);
2931 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2932 {
2933 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2934 != SYMBOL_FORCE_TO_MEM)
2935 return true;
2936 else
2937 /* Avoid generating a 64-bit relocation in ILP32; leave
2938 to aarch64_expand_mov_immediate to handle it properly. */
2939 return mode != ptr_mode;
2940 }
43e9d192
IB
2941
2942 return aarch64_tls_referenced_p (x);
2943}
2944
2945/* Return true if register REGNO is a valid index register.
2946 STRICT_P is true if REG_OK_STRICT is in effect. */
2947
2948bool
2949aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2950{
2951 if (!HARD_REGISTER_NUM_P (regno))
2952 {
2953 if (!strict_p)
2954 return true;
2955
2956 if (!reg_renumber)
2957 return false;
2958
2959 regno = reg_renumber[regno];
2960 }
2961 return GP_REGNUM_P (regno);
2962}
2963
2964/* Return true if register REGNO is a valid base register for mode MODE.
2965 STRICT_P is true if REG_OK_STRICT is in effect. */
2966
2967bool
2968aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2969{
2970 if (!HARD_REGISTER_NUM_P (regno))
2971 {
2972 if (!strict_p)
2973 return true;
2974
2975 if (!reg_renumber)
2976 return false;
2977
2978 regno = reg_renumber[regno];
2979 }
2980
2981 /* The fake registers will be eliminated to either the stack or
2982 hard frame pointer, both of which are usually valid base registers.
2983 Reload deals with the cases where the eliminated form isn't valid. */
2984 return (GP_REGNUM_P (regno)
2985 || regno == SP_REGNUM
2986 || regno == FRAME_POINTER_REGNUM
2987 || regno == ARG_POINTER_REGNUM);
2988}
2989
2990/* Return true if X is a valid base register for mode MODE.
2991 STRICT_P is true if REG_OK_STRICT is in effect. */
2992
2993static bool
2994aarch64_base_register_rtx_p (rtx x, bool strict_p)
2995{
2996 if (!strict_p && GET_CODE (x) == SUBREG)
2997 x = SUBREG_REG (x);
2998
2999 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3000}
3001
3002/* Return true if address offset is a valid index. If it is, fill in INFO
3003 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3004
3005static bool
3006aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3007 enum machine_mode mode, bool strict_p)
3008{
3009 enum aarch64_address_type type;
3010 rtx index;
3011 int shift;
3012
3013 /* (reg:P) */
3014 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3015 && GET_MODE (x) == Pmode)
3016 {
3017 type = ADDRESS_REG_REG;
3018 index = x;
3019 shift = 0;
3020 }
3021 /* (sign_extend:DI (reg:SI)) */
3022 else if ((GET_CODE (x) == SIGN_EXTEND
3023 || GET_CODE (x) == ZERO_EXTEND)
3024 && GET_MODE (x) == DImode
3025 && GET_MODE (XEXP (x, 0)) == SImode)
3026 {
3027 type = (GET_CODE (x) == SIGN_EXTEND)
3028 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3029 index = XEXP (x, 0);
3030 shift = 0;
3031 }
3032 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3033 else if (GET_CODE (x) == MULT
3034 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3035 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3036 && GET_MODE (XEXP (x, 0)) == DImode
3037 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3038 && CONST_INT_P (XEXP (x, 1)))
3039 {
3040 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3041 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3042 index = XEXP (XEXP (x, 0), 0);
3043 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3044 }
3045 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3046 else if (GET_CODE (x) == ASHIFT
3047 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3048 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3049 && GET_MODE (XEXP (x, 0)) == DImode
3050 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3051 && CONST_INT_P (XEXP (x, 1)))
3052 {
3053 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3054 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3055 index = XEXP (XEXP (x, 0), 0);
3056 shift = INTVAL (XEXP (x, 1));
3057 }
3058 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3059 else if ((GET_CODE (x) == SIGN_EXTRACT
3060 || GET_CODE (x) == ZERO_EXTRACT)
3061 && GET_MODE (x) == DImode
3062 && GET_CODE (XEXP (x, 0)) == MULT
3063 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3064 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3065 {
3066 type = (GET_CODE (x) == SIGN_EXTRACT)
3067 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3068 index = XEXP (XEXP (x, 0), 0);
3069 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3070 if (INTVAL (XEXP (x, 1)) != 32 + shift
3071 || INTVAL (XEXP (x, 2)) != 0)
3072 shift = -1;
3073 }
3074 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3075 (const_int 0xffffffff<<shift)) */
3076 else if (GET_CODE (x) == AND
3077 && GET_MODE (x) == DImode
3078 && GET_CODE (XEXP (x, 0)) == MULT
3079 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3080 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3081 && CONST_INT_P (XEXP (x, 1)))
3082 {
3083 type = ADDRESS_REG_UXTW;
3084 index = XEXP (XEXP (x, 0), 0);
3085 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3086 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3087 shift = -1;
3088 }
3089 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3090 else if ((GET_CODE (x) == SIGN_EXTRACT
3091 || GET_CODE (x) == ZERO_EXTRACT)
3092 && GET_MODE (x) == DImode
3093 && GET_CODE (XEXP (x, 0)) == ASHIFT
3094 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3095 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3096 {
3097 type = (GET_CODE (x) == SIGN_EXTRACT)
3098 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3099 index = XEXP (XEXP (x, 0), 0);
3100 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3101 if (INTVAL (XEXP (x, 1)) != 32 + shift
3102 || INTVAL (XEXP (x, 2)) != 0)
3103 shift = -1;
3104 }
3105 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3106 (const_int 0xffffffff<<shift)) */
3107 else if (GET_CODE (x) == AND
3108 && GET_MODE (x) == DImode
3109 && GET_CODE (XEXP (x, 0)) == ASHIFT
3110 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3111 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3112 && CONST_INT_P (XEXP (x, 1)))
3113 {
3114 type = ADDRESS_REG_UXTW;
3115 index = XEXP (XEXP (x, 0), 0);
3116 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3117 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3118 shift = -1;
3119 }
3120 /* (mult:P (reg:P) (const_int scale)) */
3121 else if (GET_CODE (x) == MULT
3122 && GET_MODE (x) == Pmode
3123 && GET_MODE (XEXP (x, 0)) == Pmode
3124 && CONST_INT_P (XEXP (x, 1)))
3125 {
3126 type = ADDRESS_REG_REG;
3127 index = XEXP (x, 0);
3128 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3129 }
3130 /* (ashift:P (reg:P) (const_int shift)) */
3131 else if (GET_CODE (x) == ASHIFT
3132 && GET_MODE (x) == Pmode
3133 && GET_MODE (XEXP (x, 0)) == Pmode
3134 && CONST_INT_P (XEXP (x, 1)))
3135 {
3136 type = ADDRESS_REG_REG;
3137 index = XEXP (x, 0);
3138 shift = INTVAL (XEXP (x, 1));
3139 }
3140 else
3141 return false;
3142
3143 if (GET_CODE (index) == SUBREG)
3144 index = SUBREG_REG (index);
3145
3146 if ((shift == 0 ||
3147 (shift > 0 && shift <= 3
3148 && (1 << shift) == GET_MODE_SIZE (mode)))
3149 && REG_P (index)
3150 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3151 {
3152 info->type = type;
3153 info->offset = index;
3154 info->shift = shift;
3155 return true;
3156 }
3157
3158 return false;
3159}
3160
3161static inline bool
3162offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3163{
3164 return (offset >= -64 * GET_MODE_SIZE (mode)
3165 && offset < 64 * GET_MODE_SIZE (mode)
3166 && offset % GET_MODE_SIZE (mode) == 0);
3167}
3168
3169static inline bool
3170offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3171 HOST_WIDE_INT offset)
3172{
3173 return offset >= -256 && offset < 256;
3174}
3175
3176static inline bool
3177offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3178{
3179 return (offset >= 0
3180 && offset < 4096 * GET_MODE_SIZE (mode)
3181 && offset % GET_MODE_SIZE (mode) == 0);
3182}
3183
3184/* Return true if X is a valid address for machine mode MODE. If it is,
3185 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3186 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3187
3188static bool
3189aarch64_classify_address (struct aarch64_address_info *info,
3190 rtx x, enum machine_mode mode,
3191 RTX_CODE outer_code, bool strict_p)
3192{
3193 enum rtx_code code = GET_CODE (x);
3194 rtx op0, op1;
3195 bool allow_reg_index_p =
348d4b0a
BC
3196 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3197 || aarch64_vector_mode_supported_p (mode));
43e9d192
IB
3198 /* Don't support anything other than POST_INC or REG addressing for
3199 AdvSIMD. */
348d4b0a 3200 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
3201 && (code != POST_INC && code != REG))
3202 return false;
3203
3204 switch (code)
3205 {
3206 case REG:
3207 case SUBREG:
3208 info->type = ADDRESS_REG_IMM;
3209 info->base = x;
3210 info->offset = const0_rtx;
3211 return aarch64_base_register_rtx_p (x, strict_p);
3212
3213 case PLUS:
3214 op0 = XEXP (x, 0);
3215 op1 = XEXP (x, 1);
3216 if (GET_MODE_SIZE (mode) != 0
3217 && CONST_INT_P (op1)
3218 && aarch64_base_register_rtx_p (op0, strict_p))
3219 {
3220 HOST_WIDE_INT offset = INTVAL (op1);
3221
3222 info->type = ADDRESS_REG_IMM;
3223 info->base = op0;
3224 info->offset = op1;
3225
3226 /* TImode and TFmode values are allowed in both pairs of X
3227 registers and individual Q registers. The available
3228 address modes are:
3229 X,X: 7-bit signed scaled offset
3230 Q: 9-bit signed offset
3231 We conservatively require an offset representable in either mode.
3232 */
3233 if (mode == TImode || mode == TFmode)
3234 return (offset_7bit_signed_scaled_p (mode, offset)
3235 && offset_9bit_signed_unscaled_p (mode, offset));
3236
3237 if (outer_code == PARALLEL)
3238 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3239 && offset_7bit_signed_scaled_p (mode, offset));
3240 else
3241 return (offset_9bit_signed_unscaled_p (mode, offset)
3242 || offset_12bit_unsigned_scaled_p (mode, offset));
3243 }
3244
3245 if (allow_reg_index_p)
3246 {
3247 /* Look for base + (scaled/extended) index register. */
3248 if (aarch64_base_register_rtx_p (op0, strict_p)
3249 && aarch64_classify_index (info, op1, mode, strict_p))
3250 {
3251 info->base = op0;
3252 return true;
3253 }
3254 if (aarch64_base_register_rtx_p (op1, strict_p)
3255 && aarch64_classify_index (info, op0, mode, strict_p))
3256 {
3257 info->base = op1;
3258 return true;
3259 }
3260 }
3261
3262 return false;
3263
3264 case POST_INC:
3265 case POST_DEC:
3266 case PRE_INC:
3267 case PRE_DEC:
3268 info->type = ADDRESS_REG_WB;
3269 info->base = XEXP (x, 0);
3270 info->offset = NULL_RTX;
3271 return aarch64_base_register_rtx_p (info->base, strict_p);
3272
3273 case POST_MODIFY:
3274 case PRE_MODIFY:
3275 info->type = ADDRESS_REG_WB;
3276 info->base = XEXP (x, 0);
3277 if (GET_CODE (XEXP (x, 1)) == PLUS
3278 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3279 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3280 && aarch64_base_register_rtx_p (info->base, strict_p))
3281 {
3282 HOST_WIDE_INT offset;
3283 info->offset = XEXP (XEXP (x, 1), 1);
3284 offset = INTVAL (info->offset);
3285
3286 /* TImode and TFmode values are allowed in both pairs of X
3287 registers and individual Q registers. The available
3288 address modes are:
3289 X,X: 7-bit signed scaled offset
3290 Q: 9-bit signed offset
3291 We conservatively require an offset representable in either mode.
3292 */
3293 if (mode == TImode || mode == TFmode)
3294 return (offset_7bit_signed_scaled_p (mode, offset)
3295 && offset_9bit_signed_unscaled_p (mode, offset));
3296
3297 if (outer_code == PARALLEL)
3298 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3299 && offset_7bit_signed_scaled_p (mode, offset));
3300 else
3301 return offset_9bit_signed_unscaled_p (mode, offset);
3302 }
3303 return false;
3304
3305 case CONST:
3306 case SYMBOL_REF:
3307 case LABEL_REF:
79517551
SN
3308 /* load literal: pc-relative constant pool entry. Only supported
3309 for SI mode or larger. */
43e9d192 3310 info->type = ADDRESS_SYMBOLIC;
79517551 3311 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3312 {
3313 rtx sym, addend;
3314
3315 split_const (x, &sym, &addend);
3316 return (GET_CODE (sym) == LABEL_REF
3317 || (GET_CODE (sym) == SYMBOL_REF
3318 && CONSTANT_POOL_ADDRESS_P (sym)));
3319 }
3320 return false;
3321
3322 case LO_SUM:
3323 info->type = ADDRESS_LO_SUM;
3324 info->base = XEXP (x, 0);
3325 info->offset = XEXP (x, 1);
3326 if (allow_reg_index_p
3327 && aarch64_base_register_rtx_p (info->base, strict_p))
3328 {
3329 rtx sym, offs;
3330 split_const (info->offset, &sym, &offs);
3331 if (GET_CODE (sym) == SYMBOL_REF
3332 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3333 == SYMBOL_SMALL_ABSOLUTE))
3334 {
3335 /* The symbol and offset must be aligned to the access size. */
3336 unsigned int align;
3337 unsigned int ref_size;
3338
3339 if (CONSTANT_POOL_ADDRESS_P (sym))
3340 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3341 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3342 {
3343 tree exp = SYMBOL_REF_DECL (sym);
3344 align = TYPE_ALIGN (TREE_TYPE (exp));
3345 align = CONSTANT_ALIGNMENT (exp, align);
3346 }
3347 else if (SYMBOL_REF_DECL (sym))
3348 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3349 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3350 && SYMBOL_REF_BLOCK (sym) != NULL)
3351 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3352 else
3353 align = BITS_PER_UNIT;
3354
3355 ref_size = GET_MODE_SIZE (mode);
3356 if (ref_size == 0)
3357 ref_size = GET_MODE_SIZE (DImode);
3358
3359 return ((INTVAL (offs) & (ref_size - 1)) == 0
3360 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3361 }
3362 }
3363 return false;
3364
3365 default:
3366 return false;
3367 }
3368}
3369
3370bool
3371aarch64_symbolic_address_p (rtx x)
3372{
3373 rtx offset;
3374
3375 split_const (x, &x, &offset);
3376 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3377}
3378
3379/* Classify the base of symbolic expression X, given that X appears in
3380 context CONTEXT. */
da4f13a4
MS
3381
3382enum aarch64_symbol_type
3383aarch64_classify_symbolic_expression (rtx x,
3384 enum aarch64_symbol_context context)
43e9d192
IB
3385{
3386 rtx offset;
da4f13a4 3387
43e9d192
IB
3388 split_const (x, &x, &offset);
3389 return aarch64_classify_symbol (x, context);
3390}
3391
3392
3393/* Return TRUE if X is a legitimate address for accessing memory in
3394 mode MODE. */
3395static bool
3396aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3397{
3398 struct aarch64_address_info addr;
3399
3400 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3401}
3402
3403/* Return TRUE if X is a legitimate address for accessing memory in
3404 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3405 pair operation. */
3406bool
3407aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
aef66c94 3408 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3409{
3410 struct aarch64_address_info addr;
3411
3412 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3413}
3414
3415/* Return TRUE if rtx X is immediate constant 0.0 */
3416bool
3520f7cc 3417aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3418{
3419 REAL_VALUE_TYPE r;
3420
3421 if (GET_MODE (x) == VOIDmode)
3422 return false;
3423
3424 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3425 if (REAL_VALUE_MINUS_ZERO (r))
3426 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3427 return REAL_VALUES_EQUAL (r, dconst0);
3428}
3429
70f09188
AP
3430/* Return the fixed registers used for condition codes. */
3431
3432static bool
3433aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3434{
3435 *p1 = CC_REGNUM;
3436 *p2 = INVALID_REGNUM;
3437 return true;
3438}
3439
78607708
TV
3440/* Emit call insn with PAT and do aarch64-specific handling. */
3441
d07a3fed 3442void
78607708
TV
3443aarch64_emit_call_insn (rtx pat)
3444{
3445 rtx insn = emit_call_insn (pat);
3446
3447 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3448 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3449 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3450}
3451
43e9d192
IB
3452enum machine_mode
3453aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3454{
3455 /* All floating point compares return CCFP if it is an equality
3456 comparison, and CCFPE otherwise. */
3457 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3458 {
3459 switch (code)
3460 {
3461 case EQ:
3462 case NE:
3463 case UNORDERED:
3464 case ORDERED:
3465 case UNLT:
3466 case UNLE:
3467 case UNGT:
3468 case UNGE:
3469 case UNEQ:
3470 case LTGT:
3471 return CCFPmode;
3472
3473 case LT:
3474 case LE:
3475 case GT:
3476 case GE:
3477 return CCFPEmode;
3478
3479 default:
3480 gcc_unreachable ();
3481 }
3482 }
3483
3484 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3485 && y == const0_rtx
3486 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3487 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3488 || GET_CODE (x) == NEG))
43e9d192
IB
3489 return CC_NZmode;
3490
1c992d1e 3491 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3492 the comparison will have to be swapped when we emit the assembly
3493 code. */
3494 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3495 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3496 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3497 || GET_CODE (x) == LSHIFTRT
1c992d1e 3498 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3499 return CC_SWPmode;
3500
1c992d1e
RE
3501 /* Similarly for a negated operand, but we can only do this for
3502 equalities. */
3503 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3504 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3505 && (code == EQ || code == NE)
3506 && GET_CODE (x) == NEG)
3507 return CC_Zmode;
3508
43e9d192
IB
3509 /* A compare of a mode narrower than SI mode against zero can be done
3510 by extending the value in the comparison. */
3511 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3512 && y == const0_rtx)
3513 /* Only use sign-extension if we really need it. */
3514 return ((code == GT || code == GE || code == LE || code == LT)
3515 ? CC_SESWPmode : CC_ZESWPmode);
3516
3517 /* For everything else, return CCmode. */
3518 return CCmode;
3519}
3520
3521static unsigned
3522aarch64_get_condition_code (rtx x)
3523{
3524 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3525 enum rtx_code comp_code = GET_CODE (x);
3526
3527 if (GET_MODE_CLASS (mode) != MODE_CC)
3528 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3529
3530 switch (mode)
3531 {
3532 case CCFPmode:
3533 case CCFPEmode:
3534 switch (comp_code)
3535 {
3536 case GE: return AARCH64_GE;
3537 case GT: return AARCH64_GT;
3538 case LE: return AARCH64_LS;
3539 case LT: return AARCH64_MI;
3540 case NE: return AARCH64_NE;
3541 case EQ: return AARCH64_EQ;
3542 case ORDERED: return AARCH64_VC;
3543 case UNORDERED: return AARCH64_VS;
3544 case UNLT: return AARCH64_LT;
3545 case UNLE: return AARCH64_LE;
3546 case UNGT: return AARCH64_HI;
3547 case UNGE: return AARCH64_PL;
3548 default: gcc_unreachable ();
3549 }
3550 break;
3551
3552 case CCmode:
3553 switch (comp_code)
3554 {
3555 case NE: return AARCH64_NE;
3556 case EQ: return AARCH64_EQ;
3557 case GE: return AARCH64_GE;
3558 case GT: return AARCH64_GT;
3559 case LE: return AARCH64_LE;
3560 case LT: return AARCH64_LT;
3561 case GEU: return AARCH64_CS;
3562 case GTU: return AARCH64_HI;
3563 case LEU: return AARCH64_LS;
3564 case LTU: return AARCH64_CC;
3565 default: gcc_unreachable ();
3566 }
3567 break;
3568
3569 case CC_SWPmode:
3570 case CC_ZESWPmode:
3571 case CC_SESWPmode:
3572 switch (comp_code)
3573 {
3574 case NE: return AARCH64_NE;
3575 case EQ: return AARCH64_EQ;
3576 case GE: return AARCH64_LE;
3577 case GT: return AARCH64_LT;
3578 case LE: return AARCH64_GE;
3579 case LT: return AARCH64_GT;
3580 case GEU: return AARCH64_LS;
3581 case GTU: return AARCH64_CC;
3582 case LEU: return AARCH64_CS;
3583 case LTU: return AARCH64_HI;
3584 default: gcc_unreachable ();
3585 }
3586 break;
3587
3588 case CC_NZmode:
3589 switch (comp_code)
3590 {
3591 case NE: return AARCH64_NE;
3592 case EQ: return AARCH64_EQ;
3593 case GE: return AARCH64_PL;
3594 case LT: return AARCH64_MI;
3595 default: gcc_unreachable ();
3596 }
3597 break;
3598
1c992d1e
RE
3599 case CC_Zmode:
3600 switch (comp_code)
3601 {
3602 case NE: return AARCH64_NE;
3603 case EQ: return AARCH64_EQ;
3604 default: gcc_unreachable ();
3605 }
3606 break;
3607
43e9d192
IB
3608 default:
3609 gcc_unreachable ();
3610 break;
3611 }
3612}
3613
3614static unsigned
3615bit_count (unsigned HOST_WIDE_INT value)
3616{
3617 unsigned count = 0;
3618
3619 while (value)
3620 {
3621 count++;
3622 value &= value - 1;
3623 }
3624
3625 return count;
3626}
3627
3628void
3629aarch64_print_operand (FILE *f, rtx x, char code)
3630{
3631 switch (code)
3632 {
f541a481
KT
3633 /* An integer or symbol address without a preceding # sign. */
3634 case 'c':
3635 switch (GET_CODE (x))
3636 {
3637 case CONST_INT:
3638 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3639 break;
3640
3641 case SYMBOL_REF:
3642 output_addr_const (f, x);
3643 break;
3644
3645 case CONST:
3646 if (GET_CODE (XEXP (x, 0)) == PLUS
3647 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3648 {
3649 output_addr_const (f, x);
3650 break;
3651 }
3652 /* Fall through. */
3653
3654 default:
3655 output_operand_lossage ("Unsupported operand for code '%c'", code);
3656 }
3657 break;
3658
43e9d192
IB
3659 case 'e':
3660 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3661 {
3662 int n;
3663
3664 if (GET_CODE (x) != CONST_INT
3665 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3666 {
3667 output_operand_lossage ("invalid operand for '%%%c'", code);
3668 return;
3669 }
3670
3671 switch (n)
3672 {
3673 case 3:
3674 fputc ('b', f);
3675 break;
3676 case 4:
3677 fputc ('h', f);
3678 break;
3679 case 5:
3680 fputc ('w', f);
3681 break;
3682 default:
3683 output_operand_lossage ("invalid operand for '%%%c'", code);
3684 return;
3685 }
3686 }
3687 break;
3688
3689 case 'p':
3690 {
3691 int n;
3692
3693 /* Print N such that 2^N == X. */
3694 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3695 {
3696 output_operand_lossage ("invalid operand for '%%%c'", code);
3697 return;
3698 }
3699
3700 asm_fprintf (f, "%d", n);
3701 }
3702 break;
3703
3704 case 'P':
3705 /* Print the number of non-zero bits in X (a const_int). */
3706 if (GET_CODE (x) != CONST_INT)
3707 {
3708 output_operand_lossage ("invalid operand for '%%%c'", code);
3709 return;
3710 }
3711
3712 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3713 break;
3714
3715 case 'H':
3716 /* Print the higher numbered register of a pair (TImode) of regs. */
3717 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3718 {
3719 output_operand_lossage ("invalid operand for '%%%c'", code);
3720 return;
3721 }
3722
01a3a324 3723 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3724 break;
3725
43e9d192
IB
3726 case 'm':
3727 /* Print a condition (eq, ne, etc). */
3728
3729 /* CONST_TRUE_RTX means always -- that's the default. */
3730 if (x == const_true_rtx)
3731 return;
3732
3733 if (!COMPARISON_P (x))
3734 {
3735 output_operand_lossage ("invalid operand for '%%%c'", code);
3736 return;
3737 }
3738
3739 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3740 break;
3741
3742 case 'M':
3743 /* Print the inverse of a condition (eq <-> ne, etc). */
3744
3745 /* CONST_TRUE_RTX means never -- that's the default. */
3746 if (x == const_true_rtx)
3747 {
3748 fputs ("nv", f);
3749 return;
3750 }
3751
3752 if (!COMPARISON_P (x))
3753 {
3754 output_operand_lossage ("invalid operand for '%%%c'", code);
3755 return;
3756 }
3757
3758 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3759 (aarch64_get_condition_code (x))], f);
3760 break;
3761
3762 case 'b':
3763 case 'h':
3764 case 's':
3765 case 'd':
3766 case 'q':
3767 /* Print a scalar FP/SIMD register name. */
3768 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3769 {
3770 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3771 return;
3772 }
50ce6f88 3773 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3774 break;
3775
3776 case 'S':
3777 case 'T':
3778 case 'U':
3779 case 'V':
3780 /* Print the first FP/SIMD register name in a list. */
3781 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3782 {
3783 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3784 return;
3785 }
50ce6f88 3786 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3787 break;
3788
a05c0ddf 3789 case 'X':
50d38551 3790 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3791 if (GET_CODE (x) != CONST_INT)
3792 {
3793 output_operand_lossage ("invalid operand for '%%%c'", code);
3794 return;
3795 }
50d38551 3796 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3797 break;
3798
43e9d192
IB
3799 case 'w':
3800 case 'x':
3801 /* Print a general register name or the zero register (32-bit or
3802 64-bit). */
3520f7cc
JG
3803 if (x == const0_rtx
3804 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3805 {
50ce6f88 3806 asm_fprintf (f, "%czr", code);
43e9d192
IB
3807 break;
3808 }
3809
3810 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3811 {
50ce6f88 3812 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3813 break;
3814 }
3815
3816 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3817 {
50ce6f88 3818 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3819 break;
3820 }
3821
3822 /* Fall through */
3823
3824 case 0:
3825 /* Print a normal operand, if it's a general register, then we
3826 assume DImode. */
3827 if (x == NULL)
3828 {
3829 output_operand_lossage ("missing operand");
3830 return;
3831 }
3832
3833 switch (GET_CODE (x))
3834 {
3835 case REG:
01a3a324 3836 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3837 break;
3838
3839 case MEM:
3840 aarch64_memory_reference_mode = GET_MODE (x);
3841 output_address (XEXP (x, 0));
3842 break;
3843
3844 case LABEL_REF:
3845 case SYMBOL_REF:
3846 output_addr_const (asm_out_file, x);
3847 break;
3848
3849 case CONST_INT:
3850 asm_fprintf (f, "%wd", INTVAL (x));
3851 break;
3852
3853 case CONST_VECTOR:
3520f7cc
JG
3854 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3855 {
3856 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3857 HOST_WIDE_INT_MIN,
3858 HOST_WIDE_INT_MAX));
3859 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3860 }
3861 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3862 {
3863 fputc ('0', f);
3864 }
3865 else
3866 gcc_unreachable ();
43e9d192
IB
3867 break;
3868
3520f7cc
JG
3869 case CONST_DOUBLE:
3870 /* CONST_DOUBLE can represent a double-width integer.
3871 In this case, the mode of x is VOIDmode. */
3872 if (GET_MODE (x) == VOIDmode)
3873 ; /* Do Nothing. */
3874 else if (aarch64_float_const_zero_rtx_p (x))
3875 {
3876 fputc ('0', f);
3877 break;
3878 }
3879 else if (aarch64_float_const_representable_p (x))
3880 {
3881#define buf_size 20
3882 char float_buf[buf_size] = {'\0'};
3883 REAL_VALUE_TYPE r;
3884 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3885 real_to_decimal_for_mode (float_buf, &r,
3886 buf_size, buf_size,
3887 1, GET_MODE (x));
3888 asm_fprintf (asm_out_file, "%s", float_buf);
3889 break;
3890#undef buf_size
3891 }
3892 output_operand_lossage ("invalid constant");
3893 return;
43e9d192
IB
3894 default:
3895 output_operand_lossage ("invalid operand");
3896 return;
3897 }
3898 break;
3899
3900 case 'A':
3901 if (GET_CODE (x) == HIGH)
3902 x = XEXP (x, 0);
3903
3904 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3905 {
3906 case SYMBOL_SMALL_GOT:
3907 asm_fprintf (asm_out_file, ":got:");
3908 break;
3909
3910 case SYMBOL_SMALL_TLSGD:
3911 asm_fprintf (asm_out_file, ":tlsgd:");
3912 break;
3913
3914 case SYMBOL_SMALL_TLSDESC:
3915 asm_fprintf (asm_out_file, ":tlsdesc:");
3916 break;
3917
3918 case SYMBOL_SMALL_GOTTPREL:
3919 asm_fprintf (asm_out_file, ":gottprel:");
3920 break;
3921
3922 case SYMBOL_SMALL_TPREL:
3923 asm_fprintf (asm_out_file, ":tprel:");
3924 break;
3925
87dd8ab0
MS
3926 case SYMBOL_TINY_GOT:
3927 gcc_unreachable ();
3928 break;
3929
43e9d192
IB
3930 default:
3931 break;
3932 }
3933 output_addr_const (asm_out_file, x);
3934 break;
3935
3936 case 'L':
3937 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3938 {
3939 case SYMBOL_SMALL_GOT:
3940 asm_fprintf (asm_out_file, ":lo12:");
3941 break;
3942
3943 case SYMBOL_SMALL_TLSGD:
3944 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3945 break;
3946
3947 case SYMBOL_SMALL_TLSDESC:
3948 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3949 break;
3950
3951 case SYMBOL_SMALL_GOTTPREL:
3952 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3953 break;
3954
3955 case SYMBOL_SMALL_TPREL:
3956 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3957 break;
3958
87dd8ab0
MS
3959 case SYMBOL_TINY_GOT:
3960 asm_fprintf (asm_out_file, ":got:");
3961 break;
3962
43e9d192
IB
3963 default:
3964 break;
3965 }
3966 output_addr_const (asm_out_file, x);
3967 break;
3968
3969 case 'G':
3970
3971 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3972 {
3973 case SYMBOL_SMALL_TPREL:
3974 asm_fprintf (asm_out_file, ":tprel_hi12:");
3975 break;
3976 default:
3977 break;
3978 }
3979 output_addr_const (asm_out_file, x);
3980 break;
3981
3982 default:
3983 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3984 return;
3985 }
3986}
3987
3988void
3989aarch64_print_operand_address (FILE *f, rtx x)
3990{
3991 struct aarch64_address_info addr;
3992
3993 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3994 MEM, true))
3995 switch (addr.type)
3996 {
3997 case ADDRESS_REG_IMM:
3998 if (addr.offset == const0_rtx)
01a3a324 3999 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4000 else
16a3246f 4001 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4002 INTVAL (addr.offset));
4003 return;
4004
4005 case ADDRESS_REG_REG:
4006 if (addr.shift == 0)
16a3246f 4007 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4008 reg_names [REGNO (addr.offset)]);
43e9d192 4009 else
16a3246f 4010 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4011 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4012 return;
4013
4014 case ADDRESS_REG_UXTW:
4015 if (addr.shift == 0)
16a3246f 4016 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4017 REGNO (addr.offset) - R0_REGNUM);
4018 else
16a3246f 4019 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4020 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4021 return;
4022
4023 case ADDRESS_REG_SXTW:
4024 if (addr.shift == 0)
16a3246f 4025 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4026 REGNO (addr.offset) - R0_REGNUM);
4027 else
16a3246f 4028 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4029 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4030 return;
4031
4032 case ADDRESS_REG_WB:
4033 switch (GET_CODE (x))
4034 {
4035 case PRE_INC:
16a3246f 4036 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4037 GET_MODE_SIZE (aarch64_memory_reference_mode));
4038 return;
4039 case POST_INC:
16a3246f 4040 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4041 GET_MODE_SIZE (aarch64_memory_reference_mode));
4042 return;
4043 case PRE_DEC:
16a3246f 4044 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4045 GET_MODE_SIZE (aarch64_memory_reference_mode));
4046 return;
4047 case POST_DEC:
16a3246f 4048 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4049 GET_MODE_SIZE (aarch64_memory_reference_mode));
4050 return;
4051 case PRE_MODIFY:
16a3246f 4052 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4053 INTVAL (addr.offset));
4054 return;
4055 case POST_MODIFY:
16a3246f 4056 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4057 INTVAL (addr.offset));
4058 return;
4059 default:
4060 break;
4061 }
4062 break;
4063
4064 case ADDRESS_LO_SUM:
16a3246f 4065 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4066 output_addr_const (f, addr.offset);
4067 asm_fprintf (f, "]");
4068 return;
4069
4070 case ADDRESS_SYMBOLIC:
4071 break;
4072 }
4073
4074 output_addr_const (f, x);
4075}
4076
43e9d192
IB
4077bool
4078aarch64_label_mentioned_p (rtx x)
4079{
4080 const char *fmt;
4081 int i;
4082
4083 if (GET_CODE (x) == LABEL_REF)
4084 return true;
4085
4086 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4087 referencing instruction, but they are constant offsets, not
4088 symbols. */
4089 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4090 return false;
4091
4092 fmt = GET_RTX_FORMAT (GET_CODE (x));
4093 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4094 {
4095 if (fmt[i] == 'E')
4096 {
4097 int j;
4098
4099 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4100 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4101 return 1;
4102 }
4103 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4104 return 1;
4105 }
4106
4107 return 0;
4108}
4109
4110/* Implement REGNO_REG_CLASS. */
4111
4112enum reg_class
4113aarch64_regno_regclass (unsigned regno)
4114{
4115 if (GP_REGNUM_P (regno))
a4a182c6 4116 return GENERAL_REGS;
43e9d192
IB
4117
4118 if (regno == SP_REGNUM)
4119 return STACK_REG;
4120
4121 if (regno == FRAME_POINTER_REGNUM
4122 || regno == ARG_POINTER_REGNUM)
f24bb080 4123 return POINTER_REGS;
43e9d192
IB
4124
4125 if (FP_REGNUM_P (regno))
4126 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4127
4128 return NO_REGS;
4129}
4130
4131/* Try a machine-dependent way of reloading an illegitimate address
4132 operand. If we find one, push the reload and return the new rtx. */
4133
4134rtx
4135aarch64_legitimize_reload_address (rtx *x_p,
4136 enum machine_mode mode,
4137 int opnum, int type,
4138 int ind_levels ATTRIBUTE_UNUSED)
4139{
4140 rtx x = *x_p;
4141
348d4b0a
BC
4142 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4143 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4144 && GET_CODE (x) == PLUS
4145 && REG_P (XEXP (x, 0))
4146 && CONST_INT_P (XEXP (x, 1)))
4147 {
4148 rtx orig_rtx = x;
4149 x = copy_rtx (x);
4150 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4151 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4152 opnum, (enum reload_type) type);
4153 return x;
4154 }
4155
4156 /* We must recognize output that we have already generated ourselves. */
4157 if (GET_CODE (x) == PLUS
4158 && GET_CODE (XEXP (x, 0)) == PLUS
4159 && REG_P (XEXP (XEXP (x, 0), 0))
4160 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4161 && CONST_INT_P (XEXP (x, 1)))
4162 {
4163 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4164 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4165 opnum, (enum reload_type) type);
4166 return x;
4167 }
4168
4169 /* We wish to handle large displacements off a base register by splitting
4170 the addend across an add and the mem insn. This can cut the number of
4171 extra insns needed from 3 to 1. It is only useful for load/store of a
4172 single register with 12 bit offset field. */
4173 if (GET_CODE (x) == PLUS
4174 && REG_P (XEXP (x, 0))
4175 && CONST_INT_P (XEXP (x, 1))
4176 && HARD_REGISTER_P (XEXP (x, 0))
4177 && mode != TImode
4178 && mode != TFmode
4179 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4180 {
4181 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4182 HOST_WIDE_INT low = val & 0xfff;
4183 HOST_WIDE_INT high = val - low;
4184 HOST_WIDE_INT offs;
4185 rtx cst;
28514dda
YZ
4186 enum machine_mode xmode = GET_MODE (x);
4187
4188 /* In ILP32, xmode can be either DImode or SImode. */
4189 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4190
4191 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4192 BLKmode alignment. */
4193 if (GET_MODE_SIZE (mode) == 0)
4194 return NULL_RTX;
4195
4196 offs = low % GET_MODE_SIZE (mode);
4197
4198 /* Align misaligned offset by adjusting high part to compensate. */
4199 if (offs != 0)
4200 {
4201 if (aarch64_uimm12_shift (high + offs))
4202 {
4203 /* Align down. */
4204 low = low - offs;
4205 high = high + offs;
4206 }
4207 else
4208 {
4209 /* Align up. */
4210 offs = GET_MODE_SIZE (mode) - offs;
4211 low = low + offs;
4212 high = high + (low & 0x1000) - offs;
4213 low &= 0xfff;
4214 }
4215 }
4216
4217 /* Check for overflow. */
4218 if (high + low != val)
4219 return NULL_RTX;
4220
4221 cst = GEN_INT (high);
4222 if (!aarch64_uimm12_shift (high))
28514dda 4223 cst = force_const_mem (xmode, cst);
43e9d192
IB
4224
4225 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4226 in the mem instruction.
4227 Note that replacing this gen_rtx_PLUS with plus_constant is
4228 wrong in this case because we rely on the
4229 (plus (plus reg c1) c2) structure being preserved so that
4230 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4231 x = gen_rtx_PLUS (xmode,
4232 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4233 GEN_INT (low));
43e9d192
IB
4234
4235 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4236 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4237 opnum, (enum reload_type) type);
4238 return x;
4239 }
4240
4241 return NULL_RTX;
4242}
4243
4244
4245static reg_class_t
4246aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4247 reg_class_t rclass,
4248 enum machine_mode mode,
4249 secondary_reload_info *sri)
4250{
43e9d192
IB
4251 /* Without the TARGET_SIMD instructions we cannot move a Q register
4252 to a Q register directly. We need a scratch. */
4253 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4254 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4255 && reg_class_subset_p (rclass, FP_REGS))
4256 {
4257 if (mode == TFmode)
4258 sri->icode = CODE_FOR_aarch64_reload_movtf;
4259 else if (mode == TImode)
4260 sri->icode = CODE_FOR_aarch64_reload_movti;
4261 return NO_REGS;
4262 }
4263
4264 /* A TFmode or TImode memory access should be handled via an FP_REGS
4265 because AArch64 has richer addressing modes for LDR/STR instructions
4266 than LDP/STP instructions. */
a4a182c6 4267 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4268 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4269 return FP_REGS;
4270
4271 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4272 return GENERAL_REGS;
43e9d192
IB
4273
4274 return NO_REGS;
4275}
4276
4277static bool
4278aarch64_can_eliminate (const int from, const int to)
4279{
4280 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4281 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4282
4283 if (frame_pointer_needed)
4284 {
4285 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4286 return true;
4287 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4288 return false;
4289 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4290 && !cfun->calls_alloca)
4291 return true;
4292 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4293 return true;
0b7f8166
MS
4294
4295 return false;
43e9d192 4296 }
777e6976 4297
43e9d192
IB
4298 return true;
4299}
4300
4301HOST_WIDE_INT
4302aarch64_initial_elimination_offset (unsigned from, unsigned to)
4303{
43e9d192 4304 aarch64_layout_frame ();
78c29983
MS
4305
4306 if (to == HARD_FRAME_POINTER_REGNUM)
4307 {
4308 if (from == ARG_POINTER_REGNUM)
1c960e02 4309 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4310
4311 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4312 return (cfun->machine->frame.hard_fp_offset
4313 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4314 }
4315
4316 if (to == STACK_POINTER_REGNUM)
4317 {
4318 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4319 return (cfun->machine->frame.frame_size
4320 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4321 }
4322
1c960e02 4323 return cfun->machine->frame.frame_size;
43e9d192
IB
4324}
4325
43e9d192
IB
4326/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4327 previous frame. */
4328
4329rtx
4330aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4331{
4332 if (count != 0)
4333 return const0_rtx;
4334 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4335}
4336
4337
4338static void
4339aarch64_asm_trampoline_template (FILE *f)
4340{
28514dda
YZ
4341 if (TARGET_ILP32)
4342 {
4343 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4344 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4345 }
4346 else
4347 {
4348 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4349 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4350 }
01a3a324 4351 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4352 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4353 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4354 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4355}
4356
4357static void
4358aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4359{
4360 rtx fnaddr, mem, a_tramp;
28514dda 4361 const int tramp_code_sz = 16;
43e9d192
IB
4362
4363 /* Don't need to copy the trailing D-words, we fill those in below. */
4364 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4365 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4366 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4367 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4368 if (GET_MODE (fnaddr) != ptr_mode)
4369 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4370 emit_move_insn (mem, fnaddr);
4371
28514dda 4372 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4373 emit_move_insn (mem, chain_value);
4374
4375 /* XXX We should really define a "clear_cache" pattern and use
4376 gen_clear_cache(). */
4377 a_tramp = XEXP (m_tramp, 0);
4378 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4379 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4380 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4381 ptr_mode);
43e9d192
IB
4382}
4383
4384static unsigned char
4385aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4386{
4387 switch (regclass)
4388 {
fee9ba42 4389 case CALLER_SAVE_REGS:
43e9d192
IB
4390 case POINTER_REGS:
4391 case GENERAL_REGS:
4392 case ALL_REGS:
4393 case FP_REGS:
4394 case FP_LO_REGS:
4395 return
4396 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4397 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4398 case STACK_REG:
4399 return 1;
4400
4401 case NO_REGS:
4402 return 0;
4403
4404 default:
4405 break;
4406 }
4407 gcc_unreachable ();
4408}
4409
4410static reg_class_t
78d8b9f0 4411aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4412{
51bb310d 4413 if (regclass == POINTER_REGS)
78d8b9f0
IB
4414 return GENERAL_REGS;
4415
51bb310d
MS
4416 if (regclass == STACK_REG)
4417 {
4418 if (REG_P(x)
4419 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4420 return regclass;
4421
4422 return NO_REGS;
4423 }
4424
78d8b9f0
IB
4425 /* If it's an integer immediate that MOVI can't handle, then
4426 FP_REGS is not an option, so we return NO_REGS instead. */
4427 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4428 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4429 return NO_REGS;
4430
27bd251b
IB
4431 /* Register eliminiation can result in a request for
4432 SP+constant->FP_REGS. We cannot support such operations which
4433 use SP as source and an FP_REG as destination, so reject out
4434 right now. */
4435 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4436 {
4437 rtx lhs = XEXP (x, 0);
4438
4439 /* Look through a possible SUBREG introduced by ILP32. */
4440 if (GET_CODE (lhs) == SUBREG)
4441 lhs = SUBREG_REG (lhs);
4442
4443 gcc_assert (REG_P (lhs));
4444 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4445 POINTER_REGS));
4446 return NO_REGS;
4447 }
4448
78d8b9f0 4449 return regclass;
43e9d192
IB
4450}
4451
4452void
4453aarch64_asm_output_labelref (FILE* f, const char *name)
4454{
4455 asm_fprintf (f, "%U%s", name);
4456}
4457
4458static void
4459aarch64_elf_asm_constructor (rtx symbol, int priority)
4460{
4461 if (priority == DEFAULT_INIT_PRIORITY)
4462 default_ctor_section_asm_out_constructor (symbol, priority);
4463 else
4464 {
4465 section *s;
4466 char buf[18];
4467 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4468 s = get_section (buf, SECTION_WRITE, NULL);
4469 switch_to_section (s);
4470 assemble_align (POINTER_SIZE);
28514dda 4471 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4472 }
4473}
4474
4475static void
4476aarch64_elf_asm_destructor (rtx symbol, int priority)
4477{
4478 if (priority == DEFAULT_INIT_PRIORITY)
4479 default_dtor_section_asm_out_destructor (symbol, priority);
4480 else
4481 {
4482 section *s;
4483 char buf[18];
4484 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4485 s = get_section (buf, SECTION_WRITE, NULL);
4486 switch_to_section (s);
4487 assemble_align (POINTER_SIZE);
28514dda 4488 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4489 }
4490}
4491
4492const char*
4493aarch64_output_casesi (rtx *operands)
4494{
4495 char buf[100];
4496 char label[100];
592a16fc 4497 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4498 int index;
4499 static const char *const patterns[4][2] =
4500 {
4501 {
4502 "ldrb\t%w3, [%0,%w1,uxtw]",
4503 "add\t%3, %4, %w3, sxtb #2"
4504 },
4505 {
4506 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4507 "add\t%3, %4, %w3, sxth #2"
4508 },
4509 {
4510 "ldr\t%w3, [%0,%w1,uxtw #2]",
4511 "add\t%3, %4, %w3, sxtw #2"
4512 },
4513 /* We assume that DImode is only generated when not optimizing and
4514 that we don't really need 64-bit address offsets. That would
4515 imply an object file with 8GB of code in a single function! */
4516 {
4517 "ldr\t%w3, [%0,%w1,uxtw #2]",
4518 "add\t%3, %4, %w3, sxtw #2"
4519 }
4520 };
4521
4522 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4523
4524 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4525
4526 gcc_assert (index >= 0 && index <= 3);
4527
4528 /* Need to implement table size reduction, by chaning the code below. */
4529 output_asm_insn (patterns[index][0], operands);
4530 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4531 snprintf (buf, sizeof (buf),
4532 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4533 output_asm_insn (buf, operands);
4534 output_asm_insn (patterns[index][1], operands);
4535 output_asm_insn ("br\t%3", operands);
4536 assemble_label (asm_out_file, label);
4537 return "";
4538}
4539
4540
4541/* Return size in bits of an arithmetic operand which is shifted/scaled and
4542 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4543 operator. */
4544
4545int
4546aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4547{
4548 if (shift >= 0 && shift <= 3)
4549 {
4550 int size;
4551 for (size = 8; size <= 32; size *= 2)
4552 {
4553 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4554 if (mask == bits << shift)
4555 return size;
4556 }
4557 }
4558 return 0;
4559}
4560
4561static bool
4562aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4563 const_rtx x ATTRIBUTE_UNUSED)
4564{
4565 /* We can't use blocks for constants when we're using a per-function
4566 constant pool. */
4567 return false;
4568}
4569
4570static section *
4571aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4572 rtx x ATTRIBUTE_UNUSED,
4573 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4574{
4575 /* Force all constant pool entries into the current function section. */
4576 return function_section (current_function_decl);
4577}
4578
4579
4580/* Costs. */
4581
4582/* Helper function for rtx cost calculation. Strip a shift expression
4583 from X. Returns the inner operand if successful, or the original
4584 expression on failure. */
4585static rtx
4586aarch64_strip_shift (rtx x)
4587{
4588 rtx op = x;
4589
57b77d46
RE
4590 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4591 we can convert both to ROR during final output. */
43e9d192
IB
4592 if ((GET_CODE (op) == ASHIFT
4593 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
4594 || GET_CODE (op) == LSHIFTRT
4595 || GET_CODE (op) == ROTATERT
4596 || GET_CODE (op) == ROTATE)
43e9d192
IB
4597 && CONST_INT_P (XEXP (op, 1)))
4598 return XEXP (op, 0);
4599
4600 if (GET_CODE (op) == MULT
4601 && CONST_INT_P (XEXP (op, 1))
4602 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4603 return XEXP (op, 0);
4604
4605 return x;
4606}
4607
4745e701 4608/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
4609 expression from X. Returns the inner operand if successful, or the
4610 original expression on failure. We deal with a number of possible
4611 canonicalization variations here. */
4612static rtx
4745e701 4613aarch64_strip_extend (rtx x)
43e9d192
IB
4614{
4615 rtx op = x;
4616
4617 /* Zero and sign extraction of a widened value. */
4618 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4619 && XEXP (op, 2) == const0_rtx
4745e701 4620 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
4621 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4622 XEXP (op, 1)))
4623 return XEXP (XEXP (op, 0), 0);
4624
4625 /* It can also be represented (for zero-extend) as an AND with an
4626 immediate. */
4627 if (GET_CODE (op) == AND
4628 && GET_CODE (XEXP (op, 0)) == MULT
4629 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4630 && CONST_INT_P (XEXP (op, 1))
4631 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4632 INTVAL (XEXP (op, 1))) != 0)
4633 return XEXP (XEXP (op, 0), 0);
4634
4635 /* Now handle extended register, as this may also have an optional
4636 left shift by 1..4. */
4637 if (GET_CODE (op) == ASHIFT
4638 && CONST_INT_P (XEXP (op, 1))
4639 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4640 op = XEXP (op, 0);
4641
4642 if (GET_CODE (op) == ZERO_EXTEND
4643 || GET_CODE (op) == SIGN_EXTEND)
4644 op = XEXP (op, 0);
4645
4646 if (op != x)
4647 return op;
4648
4745e701
JG
4649 return x;
4650}
4651
4652/* Helper function for rtx cost calculation. Calculate the cost of
4653 a MULT, which may be part of a multiply-accumulate rtx. Return
4654 the calculated cost of the expression, recursing manually in to
4655 operands where needed. */
4656
4657static int
4658aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4659{
4660 rtx op0, op1;
4661 const struct cpu_cost_table *extra_cost
4662 = aarch64_tune_params->insn_extra_cost;
4663 int cost = 0;
4664 bool maybe_fma = (outer == PLUS || outer == MINUS);
4665 enum machine_mode mode = GET_MODE (x);
4666
4667 gcc_checking_assert (code == MULT);
4668
4669 op0 = XEXP (x, 0);
4670 op1 = XEXP (x, 1);
4671
4672 if (VECTOR_MODE_P (mode))
4673 mode = GET_MODE_INNER (mode);
4674
4675 /* Integer multiply/fma. */
4676 if (GET_MODE_CLASS (mode) == MODE_INT)
4677 {
4678 /* The multiply will be canonicalized as a shift, cost it as such. */
4679 if (CONST_INT_P (op1)
4680 && exact_log2 (INTVAL (op1)) > 0)
4681 {
4682 if (speed)
4683 {
4684 if (maybe_fma)
4685 /* ADD (shifted register). */
4686 cost += extra_cost->alu.arith_shift;
4687 else
4688 /* LSL (immediate). */
4689 cost += extra_cost->alu.shift;
4690 }
4691
4692 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4693
4694 return cost;
4695 }
4696
4697 /* Integer multiplies or FMAs have zero/sign extending variants. */
4698 if ((GET_CODE (op0) == ZERO_EXTEND
4699 && GET_CODE (op1) == ZERO_EXTEND)
4700 || (GET_CODE (op0) == SIGN_EXTEND
4701 && GET_CODE (op1) == SIGN_EXTEND))
4702 {
4703 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4704 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4705
4706 if (speed)
4707 {
4708 if (maybe_fma)
4709 /* MADD/SMADDL/UMADDL. */
4710 cost += extra_cost->mult[0].extend_add;
4711 else
4712 /* MUL/SMULL/UMULL. */
4713 cost += extra_cost->mult[0].extend;
4714 }
4715
4716 return cost;
4717 }
4718
4719 /* This is either an integer multiply or an FMA. In both cases
4720 we want to recurse and cost the operands. */
4721 cost += rtx_cost (op0, MULT, 0, speed)
4722 + rtx_cost (op1, MULT, 1, speed);
4723
4724 if (speed)
4725 {
4726 if (maybe_fma)
4727 /* MADD. */
4728 cost += extra_cost->mult[mode == DImode].add;
4729 else
4730 /* MUL. */
4731 cost += extra_cost->mult[mode == DImode].simple;
4732 }
4733
4734 return cost;
4735 }
4736 else
4737 {
4738 if (speed)
4739 {
3d840f7d 4740 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
4741 operands. */
4742 if (GET_CODE (op0) == NEG)
3d840f7d 4743 op0 = XEXP (op0, 0);
4745e701 4744 if (GET_CODE (op1) == NEG)
3d840f7d 4745 op1 = XEXP (op1, 0);
4745e701
JG
4746
4747 if (maybe_fma)
4748 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4749 cost += extra_cost->fp[mode == DFmode].fma;
4750 else
3d840f7d 4751 /* FMUL/FNMUL. */
4745e701
JG
4752 cost += extra_cost->fp[mode == DFmode].mult;
4753 }
4754
4755 cost += rtx_cost (op0, MULT, 0, speed)
4756 + rtx_cost (op1, MULT, 1, speed);
4757 return cost;
4758 }
43e9d192
IB
4759}
4760
67747367
JG
4761static int
4762aarch64_address_cost (rtx x,
4763 enum machine_mode mode,
4764 addr_space_t as ATTRIBUTE_UNUSED,
4765 bool speed)
4766{
4767 enum rtx_code c = GET_CODE (x);
4768 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4769 struct aarch64_address_info info;
4770 int cost = 0;
4771 info.shift = 0;
4772
4773 if (!aarch64_classify_address (&info, x, mode, c, false))
4774 {
4775 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4776 {
4777 /* This is a CONST or SYMBOL ref which will be split
4778 in a different way depending on the code model in use.
4779 Cost it through the generic infrastructure. */
4780 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4781 /* Divide through by the cost of one instruction to
4782 bring it to the same units as the address costs. */
4783 cost_symbol_ref /= COSTS_N_INSNS (1);
4784 /* The cost is then the cost of preparing the address,
4785 followed by an immediate (possibly 0) offset. */
4786 return cost_symbol_ref + addr_cost->imm_offset;
4787 }
4788 else
4789 {
4790 /* This is most likely a jump table from a case
4791 statement. */
4792 return addr_cost->register_offset;
4793 }
4794 }
4795
4796 switch (info.type)
4797 {
4798 case ADDRESS_LO_SUM:
4799 case ADDRESS_SYMBOLIC:
4800 case ADDRESS_REG_IMM:
4801 cost += addr_cost->imm_offset;
4802 break;
4803
4804 case ADDRESS_REG_WB:
4805 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4806 cost += addr_cost->pre_modify;
4807 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4808 cost += addr_cost->post_modify;
4809 else
4810 gcc_unreachable ();
4811
4812 break;
4813
4814 case ADDRESS_REG_REG:
4815 cost += addr_cost->register_offset;
4816 break;
4817
4818 case ADDRESS_REG_UXTW:
4819 case ADDRESS_REG_SXTW:
4820 cost += addr_cost->register_extend;
4821 break;
4822
4823 default:
4824 gcc_unreachable ();
4825 }
4826
4827
4828 if (info.shift > 0)
4829 {
4830 /* For the sake of calculating the cost of the shifted register
4831 component, we can treat same sized modes in the same way. */
4832 switch (GET_MODE_BITSIZE (mode))
4833 {
4834 case 16:
4835 cost += addr_cost->addr_scale_costs.hi;
4836 break;
4837
4838 case 32:
4839 cost += addr_cost->addr_scale_costs.si;
4840 break;
4841
4842 case 64:
4843 cost += addr_cost->addr_scale_costs.di;
4844 break;
4845
4846 /* We can't tell, or this is a 128-bit vector. */
4847 default:
4848 cost += addr_cost->addr_scale_costs.ti;
4849 break;
4850 }
4851 }
4852
4853 return cost;
4854}
4855
7cc2145f
JG
4856/* Return true if the RTX X in mode MODE is a zero or sign extract
4857 usable in an ADD or SUB (extended register) instruction. */
4858static bool
4859aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4860{
4861 /* Catch add with a sign extract.
4862 This is add_<optab><mode>_multp2. */
4863 if (GET_CODE (x) == SIGN_EXTRACT
4864 || GET_CODE (x) == ZERO_EXTRACT)
4865 {
4866 rtx op0 = XEXP (x, 0);
4867 rtx op1 = XEXP (x, 1);
4868 rtx op2 = XEXP (x, 2);
4869
4870 if (GET_CODE (op0) == MULT
4871 && CONST_INT_P (op1)
4872 && op2 == const0_rtx
4873 && CONST_INT_P (XEXP (op0, 1))
4874 && aarch64_is_extend_from_extract (mode,
4875 XEXP (op0, 1),
4876 op1))
4877 {
4878 return true;
4879 }
4880 }
4881
4882 return false;
4883}
4884
61263118
KT
4885static bool
4886aarch64_frint_unspec_p (unsigned int u)
4887{
4888 switch (u)
4889 {
4890 case UNSPEC_FRINTZ:
4891 case UNSPEC_FRINTP:
4892 case UNSPEC_FRINTM:
4893 case UNSPEC_FRINTA:
4894 case UNSPEC_FRINTN:
4895 case UNSPEC_FRINTX:
4896 case UNSPEC_FRINTI:
4897 return true;
4898
4899 default:
4900 return false;
4901 }
4902}
4903
2d5ffe46
AP
4904/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4905 storing it in *COST. Result is true if the total cost of the operation
4906 has now been calculated. */
4907static bool
4908aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4909{
b9e3afe9
AP
4910 rtx inner;
4911 rtx comparator;
4912 enum rtx_code cmpcode;
4913
4914 if (COMPARISON_P (op0))
4915 {
4916 inner = XEXP (op0, 0);
4917 comparator = XEXP (op0, 1);
4918 cmpcode = GET_CODE (op0);
4919 }
4920 else
4921 {
4922 inner = op0;
4923 comparator = const0_rtx;
4924 cmpcode = NE;
4925 }
4926
2d5ffe46
AP
4927 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4928 {
4929 /* Conditional branch. */
b9e3afe9 4930 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
4931 return true;
4932 else
4933 {
b9e3afe9 4934 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 4935 {
2d5ffe46
AP
4936 if (comparator == const0_rtx)
4937 {
4938 /* TBZ/TBNZ/CBZ/CBNZ. */
4939 if (GET_CODE (inner) == ZERO_EXTRACT)
4940 /* TBZ/TBNZ. */
4941 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4942 0, speed);
4943 else
4944 /* CBZ/CBNZ. */
b9e3afe9 4945 *cost += rtx_cost (inner, cmpcode, 0, speed);
2d5ffe46
AP
4946
4947 return true;
4948 }
4949 }
b9e3afe9 4950 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 4951 {
2d5ffe46
AP
4952 /* TBZ/TBNZ. */
4953 if (comparator == const0_rtx)
4954 return true;
4955 }
4956 }
4957 }
b9e3afe9 4958 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
4959 {
4960 /* It's a conditional operation based on the status flags,
4961 so it must be some flavor of CSEL. */
4962
4963 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
4964 if (GET_CODE (op1) == NEG
4965 || GET_CODE (op1) == NOT
4966 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
4967 op1 = XEXP (op1, 0);
4968
4969 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
4970 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
4971 return true;
4972 }
4973
4974 /* We don't know what this is, cost all operands. */
4975 return false;
4976}
4977
43e9d192
IB
4978/* Calculate the cost of calculating X, storing it in *COST. Result
4979 is true if the total cost of the operation has now been calculated. */
4980static bool
4981aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4982 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4983{
a8eecd00 4984 rtx op0, op1, op2;
73250c4c 4985 const struct cpu_cost_table *extra_cost
43e9d192 4986 = aarch64_tune_params->insn_extra_cost;
9dfc162c 4987 enum machine_mode mode = GET_MODE (x);
43e9d192 4988
7fc5ef02
JG
4989 /* By default, assume that everything has equivalent cost to the
4990 cheapest instruction. Any additional costs are applied as a delta
4991 above this default. */
4992 *cost = COSTS_N_INSNS (1);
4993
4994 /* TODO: The cost infrastructure currently does not handle
4995 vector operations. Assume that all vector operations
4996 are equally expensive. */
4997 if (VECTOR_MODE_P (mode))
4998 {
4999 if (speed)
5000 *cost += extra_cost->vect.alu;
5001 return true;
5002 }
5003
43e9d192
IB
5004 switch (code)
5005 {
5006 case SET:
ba123b0d
JG
5007 /* The cost depends entirely on the operands to SET. */
5008 *cost = 0;
43e9d192
IB
5009 op0 = SET_DEST (x);
5010 op1 = SET_SRC (x);
5011
5012 switch (GET_CODE (op0))
5013 {
5014 case MEM:
5015 if (speed)
2961177e
JG
5016 {
5017 rtx address = XEXP (op0, 0);
5018 if (GET_MODE_CLASS (mode) == MODE_INT)
5019 *cost += extra_cost->ldst.store;
5020 else if (mode == SFmode)
5021 *cost += extra_cost->ldst.storef;
5022 else if (mode == DFmode)
5023 *cost += extra_cost->ldst.stored;
5024
5025 *cost +=
5026 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5027 0, speed));
5028 }
43e9d192 5029
ba123b0d 5030 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5031 return true;
5032
5033 case SUBREG:
5034 if (! REG_P (SUBREG_REG (op0)))
5035 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 5036
43e9d192
IB
5037 /* Fall through. */
5038 case REG:
ba123b0d
JG
5039 /* const0_rtx is in general free, but we will use an
5040 instruction to set a register to 0. */
5041 if (REG_P (op1) || op1 == const0_rtx)
5042 {
5043 /* The cost is 1 per register copied. */
5044 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5045 / UNITS_PER_WORD;
5046 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5047 }
5048 else
5049 /* Cost is just the cost of the RHS of the set. */
5050 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5051 return true;
5052
ba123b0d 5053 case ZERO_EXTRACT:
43e9d192 5054 case SIGN_EXTRACT:
ba123b0d
JG
5055 /* Bit-field insertion. Strip any redundant widening of
5056 the RHS to meet the width of the target. */
43e9d192
IB
5057 if (GET_CODE (op1) == SUBREG)
5058 op1 = SUBREG_REG (op1);
5059 if ((GET_CODE (op1) == ZERO_EXTEND
5060 || GET_CODE (op1) == SIGN_EXTEND)
5061 && GET_CODE (XEXP (op0, 1)) == CONST_INT
5062 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5063 >= INTVAL (XEXP (op0, 1))))
5064 op1 = XEXP (op1, 0);
ba123b0d
JG
5065
5066 if (CONST_INT_P (op1))
5067 {
5068 /* MOV immediate is assumed to always be cheap. */
5069 *cost = COSTS_N_INSNS (1);
5070 }
5071 else
5072 {
5073 /* BFM. */
5074 if (speed)
5075 *cost += extra_cost->alu.bfi;
5076 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5077 }
5078
43e9d192
IB
5079 return true;
5080
5081 default:
ba123b0d
JG
5082 /* We can't make sense of this, assume default cost. */
5083 *cost = COSTS_N_INSNS (1);
61263118 5084 return false;
43e9d192
IB
5085 }
5086 return false;
5087
9dfc162c
JG
5088 case CONST_INT:
5089 /* If an instruction can incorporate a constant within the
5090 instruction, the instruction's expression avoids calling
5091 rtx_cost() on the constant. If rtx_cost() is called on a
5092 constant, then it is usually because the constant must be
5093 moved into a register by one or more instructions.
5094
5095 The exception is constant 0, which can be expressed
5096 as XZR/WZR and is therefore free. The exception to this is
5097 if we have (set (reg) (const0_rtx)) in which case we must cost
5098 the move. However, we can catch that when we cost the SET, so
5099 we don't need to consider that here. */
5100 if (x == const0_rtx)
5101 *cost = 0;
5102 else
5103 {
5104 /* To an approximation, building any other constant is
5105 proportionally expensive to the number of instructions
5106 required to build that constant. This is true whether we
5107 are compiling for SPEED or otherwise. */
5108 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5109 INTVAL (x),
5110 false));
5111 }
5112 return true;
5113
5114 case CONST_DOUBLE:
5115 if (speed)
5116 {
5117 /* mov[df,sf]_aarch64. */
5118 if (aarch64_float_const_representable_p (x))
5119 /* FMOV (scalar immediate). */
5120 *cost += extra_cost->fp[mode == DFmode].fpconst;
5121 else if (!aarch64_float_const_zero_rtx_p (x))
5122 {
5123 /* This will be a load from memory. */
5124 if (mode == DFmode)
5125 *cost += extra_cost->ldst.loadd;
5126 else
5127 *cost += extra_cost->ldst.loadf;
5128 }
5129 else
5130 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5131 or MOV v0.s[0], wzr - neither of which are modeled by the
5132 cost tables. Just use the default cost. */
5133 {
5134 }
5135 }
5136
5137 return true;
5138
43e9d192
IB
5139 case MEM:
5140 if (speed)
2961177e
JG
5141 {
5142 /* For loads we want the base cost of a load, plus an
5143 approximation for the additional cost of the addressing
5144 mode. */
5145 rtx address = XEXP (x, 0);
5146 if (GET_MODE_CLASS (mode) == MODE_INT)
5147 *cost += extra_cost->ldst.load;
5148 else if (mode == SFmode)
5149 *cost += extra_cost->ldst.loadf;
5150 else if (mode == DFmode)
5151 *cost += extra_cost->ldst.loadd;
5152
5153 *cost +=
5154 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5155 0, speed));
5156 }
43e9d192
IB
5157
5158 return true;
5159
5160 case NEG:
4745e701
JG
5161 op0 = XEXP (x, 0);
5162
5163 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5164 {
5165 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5166 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5167 {
5168 /* CSETM. */
5169 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5170 return true;
5171 }
5172
5173 /* Cost this as SUB wzr, X. */
5174 op0 = CONST0_RTX (GET_MODE (x));
5175 op1 = XEXP (x, 0);
5176 goto cost_minus;
5177 }
5178
5179 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5180 {
5181 /* Support (neg(fma...)) as a single instruction only if
5182 sign of zeros is unimportant. This matches the decision
5183 making in aarch64.md. */
5184 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5185 {
5186 /* FNMADD. */
5187 *cost = rtx_cost (op0, NEG, 0, speed);
5188 return true;
5189 }
5190 if (speed)
5191 /* FNEG. */
5192 *cost += extra_cost->fp[mode == DFmode].neg;
5193 return false;
5194 }
5195
5196 return false;
43e9d192 5197
781aeb73
KT
5198 case CLRSB:
5199 case CLZ:
5200 if (speed)
5201 *cost += extra_cost->alu.clz;
5202
5203 return false;
5204
43e9d192
IB
5205 case COMPARE:
5206 op0 = XEXP (x, 0);
5207 op1 = XEXP (x, 1);
5208
5209 if (op1 == const0_rtx
5210 && GET_CODE (op0) == AND)
5211 {
5212 x = op0;
5213 goto cost_logic;
5214 }
5215
a8eecd00
JG
5216 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5217 {
5218 /* TODO: A write to the CC flags possibly costs extra, this
5219 needs encoding in the cost tables. */
5220
5221 /* CC_ZESWPmode supports zero extend for free. */
5222 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5223 op0 = XEXP (op0, 0);
5224
5225 /* ANDS. */
5226 if (GET_CODE (op0) == AND)
5227 {
5228 x = op0;
5229 goto cost_logic;
5230 }
5231
5232 if (GET_CODE (op0) == PLUS)
5233 {
5234 /* ADDS (and CMN alias). */
5235 x = op0;
5236 goto cost_plus;
5237 }
5238
5239 if (GET_CODE (op0) == MINUS)
5240 {
5241 /* SUBS. */
5242 x = op0;
5243 goto cost_minus;
5244 }
5245
5246 if (GET_CODE (op1) == NEG)
5247 {
5248 /* CMN. */
5249 if (speed)
5250 *cost += extra_cost->alu.arith;
5251
5252 *cost += rtx_cost (op0, COMPARE, 0, speed);
5253 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5254 return true;
5255 }
5256
5257 /* CMP.
5258
5259 Compare can freely swap the order of operands, and
5260 canonicalization puts the more complex operation first.
5261 But the integer MINUS logic expects the shift/extend
5262 operation in op1. */
5263 if (! (REG_P (op0)
5264 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5265 {
5266 op0 = XEXP (x, 1);
5267 op1 = XEXP (x, 0);
5268 }
5269 goto cost_minus;
5270 }
5271
5272 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5273 {
5274 /* FCMP. */
5275 if (speed)
5276 *cost += extra_cost->fp[mode == DFmode].compare;
5277
5278 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5279 {
5280 /* FCMP supports constant 0.0 for no extra cost. */
5281 return true;
5282 }
5283 return false;
5284 }
5285
5286 return false;
43e9d192
IB
5287
5288 case MINUS:
4745e701
JG
5289 {
5290 op0 = XEXP (x, 0);
5291 op1 = XEXP (x, 1);
5292
5293cost_minus:
5294 /* Detect valid immediates. */
5295 if ((GET_MODE_CLASS (mode) == MODE_INT
5296 || (GET_MODE_CLASS (mode) == MODE_CC
5297 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5298 && CONST_INT_P (op1)
5299 && aarch64_uimm12_shift (INTVAL (op1)))
5300 {
5301 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5302
4745e701
JG
5303 if (speed)
5304 /* SUB(S) (immediate). */
5305 *cost += extra_cost->alu.arith;
5306 return true;
5307
5308 }
5309
7cc2145f
JG
5310 /* Look for SUB (extended register). */
5311 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5312 {
5313 if (speed)
5314 *cost += extra_cost->alu.arith_shift;
5315
5316 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5317 (enum rtx_code) GET_CODE (op1),
5318 0, speed);
5319 return true;
5320 }
5321
4745e701
JG
5322 rtx new_op1 = aarch64_strip_extend (op1);
5323
5324 /* Cost this as an FMA-alike operation. */
5325 if ((GET_CODE (new_op1) == MULT
5326 || GET_CODE (new_op1) == ASHIFT)
5327 && code != COMPARE)
5328 {
5329 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5330 (enum rtx_code) code,
5331 speed);
43e9d192 5332 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5333 return true;
5334 }
43e9d192 5335
4745e701 5336 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5337
4745e701
JG
5338 if (speed)
5339 {
5340 if (GET_MODE_CLASS (mode) == MODE_INT)
5341 /* SUB(S). */
5342 *cost += extra_cost->alu.arith;
5343 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5344 /* FSUB. */
5345 *cost += extra_cost->fp[mode == DFmode].addsub;
5346 }
5347 return true;
5348 }
43e9d192
IB
5349
5350 case PLUS:
4745e701
JG
5351 {
5352 rtx new_op0;
43e9d192 5353
4745e701
JG
5354 op0 = XEXP (x, 0);
5355 op1 = XEXP (x, 1);
43e9d192 5356
a8eecd00 5357cost_plus:
4745e701
JG
5358 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5359 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5360 {
5361 /* CSINC. */
5362 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5363 *cost += rtx_cost (op1, PLUS, 1, speed);
5364 return true;
5365 }
43e9d192 5366
4745e701
JG
5367 if (GET_MODE_CLASS (mode) == MODE_INT
5368 && CONST_INT_P (op1)
5369 && aarch64_uimm12_shift (INTVAL (op1)))
5370 {
5371 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5372
4745e701
JG
5373 if (speed)
5374 /* ADD (immediate). */
5375 *cost += extra_cost->alu.arith;
5376 return true;
5377 }
5378
7cc2145f
JG
5379 /* Look for ADD (extended register). */
5380 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5381 {
5382 if (speed)
5383 *cost += extra_cost->alu.arith_shift;
5384
5385 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5386 (enum rtx_code) GET_CODE (op0),
5387 0, speed);
5388 return true;
5389 }
5390
4745e701
JG
5391 /* Strip any extend, leave shifts behind as we will
5392 cost them through mult_cost. */
5393 new_op0 = aarch64_strip_extend (op0);
5394
5395 if (GET_CODE (new_op0) == MULT
5396 || GET_CODE (new_op0) == ASHIFT)
5397 {
5398 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5399 speed);
5400 *cost += rtx_cost (op1, PLUS, 1, speed);
5401 return true;
5402 }
5403
5404 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5405 + rtx_cost (op1, PLUS, 1, speed));
5406
5407 if (speed)
5408 {
5409 if (GET_MODE_CLASS (mode) == MODE_INT)
5410 /* ADD. */
5411 *cost += extra_cost->alu.arith;
5412 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5413 /* FADD. */
5414 *cost += extra_cost->fp[mode == DFmode].addsub;
5415 }
5416 return true;
5417 }
43e9d192 5418
18b42b2a
KT
5419 case BSWAP:
5420 *cost = COSTS_N_INSNS (1);
5421
5422 if (speed)
5423 *cost += extra_cost->alu.rev;
5424
5425 return false;
5426
43e9d192 5427 case IOR:
f7d5cf8d
KT
5428 if (aarch_rev16_p (x))
5429 {
5430 *cost = COSTS_N_INSNS (1);
5431
5432 if (speed)
5433 *cost += extra_cost->alu.rev;
5434
5435 return true;
5436 }
5437 /* Fall through. */
43e9d192
IB
5438 case XOR:
5439 case AND:
5440 cost_logic:
5441 op0 = XEXP (x, 0);
5442 op1 = XEXP (x, 1);
5443
268c3b47
JG
5444 if (code == AND
5445 && GET_CODE (op0) == MULT
5446 && CONST_INT_P (XEXP (op0, 1))
5447 && CONST_INT_P (op1)
5448 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5449 INTVAL (op1)) != 0)
5450 {
5451 /* This is a UBFM/SBFM. */
5452 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5453 if (speed)
5454 *cost += extra_cost->alu.bfx;
5455 return true;
5456 }
5457
43e9d192
IB
5458 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5459 {
268c3b47
JG
5460 /* We possibly get the immediate for free, this is not
5461 modelled. */
43e9d192
IB
5462 if (CONST_INT_P (op1)
5463 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5464 {
268c3b47
JG
5465 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5466
5467 if (speed)
5468 *cost += extra_cost->alu.logical;
5469
5470 return true;
43e9d192
IB
5471 }
5472 else
5473 {
268c3b47
JG
5474 rtx new_op0 = op0;
5475
5476 /* Handle ORN, EON, or BIC. */
43e9d192
IB
5477 if (GET_CODE (op0) == NOT)
5478 op0 = XEXP (op0, 0);
268c3b47
JG
5479
5480 new_op0 = aarch64_strip_shift (op0);
5481
5482 /* If we had a shift on op0 then this is a logical-shift-
5483 by-register/immediate operation. Otherwise, this is just
5484 a logical operation. */
5485 if (speed)
5486 {
5487 if (new_op0 != op0)
5488 {
5489 /* Shift by immediate. */
5490 if (CONST_INT_P (XEXP (op0, 1)))
5491 *cost += extra_cost->alu.log_shift;
5492 else
5493 *cost += extra_cost->alu.log_shift_reg;
5494 }
5495 else
5496 *cost += extra_cost->alu.logical;
5497 }
5498
5499 /* In both cases we want to cost both operands. */
5500 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5501 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5502
5503 return true;
43e9d192 5504 }
43e9d192
IB
5505 }
5506 return false;
5507
268c3b47
JG
5508 case NOT:
5509 /* MVN. */
5510 if (speed)
5511 *cost += extra_cost->alu.logical;
5512
5513 /* The logical instruction could have the shifted register form,
5514 but the cost is the same if the shift is processed as a separate
5515 instruction, so we don't bother with it here. */
5516 return false;
5517
43e9d192 5518 case ZERO_EXTEND:
b1685e62
JG
5519
5520 op0 = XEXP (x, 0);
5521 /* If a value is written in SI mode, then zero extended to DI
5522 mode, the operation will in general be free as a write to
5523 a 'w' register implicitly zeroes the upper bits of an 'x'
5524 register. However, if this is
5525
5526 (set (reg) (zero_extend (reg)))
5527
5528 we must cost the explicit register move. */
5529 if (mode == DImode
5530 && GET_MODE (op0) == SImode
5531 && outer == SET)
5532 {
5533 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5534
5535 if (!op_cost && speed)
5536 /* MOV. */
5537 *cost += extra_cost->alu.extend;
5538 else
5539 /* Free, the cost is that of the SI mode operation. */
5540 *cost = op_cost;
5541
5542 return true;
5543 }
5544 else if (MEM_P (XEXP (x, 0)))
43e9d192 5545 {
b1685e62
JG
5546 /* All loads can zero extend to any size for free. */
5547 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
5548 return true;
5549 }
b1685e62
JG
5550
5551 /* UXTB/UXTH. */
5552 if (speed)
5553 *cost += extra_cost->alu.extend;
5554
43e9d192
IB
5555 return false;
5556
5557 case SIGN_EXTEND:
b1685e62 5558 if (MEM_P (XEXP (x, 0)))
43e9d192 5559 {
b1685e62
JG
5560 /* LDRSH. */
5561 if (speed)
5562 {
5563 rtx address = XEXP (XEXP (x, 0), 0);
5564 *cost += extra_cost->ldst.load_sign_extend;
5565
5566 *cost +=
5567 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5568 0, speed));
5569 }
43e9d192
IB
5570 return true;
5571 }
b1685e62
JG
5572
5573 if (speed)
5574 *cost += extra_cost->alu.extend;
43e9d192
IB
5575 return false;
5576
ba0cfa17
JG
5577 case ASHIFT:
5578 op0 = XEXP (x, 0);
5579 op1 = XEXP (x, 1);
5580
5581 if (CONST_INT_P (op1))
5582 {
5583 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5584 aliases. */
5585 if (speed)
5586 *cost += extra_cost->alu.shift;
5587
5588 /* We can incorporate zero/sign extend for free. */
5589 if (GET_CODE (op0) == ZERO_EXTEND
5590 || GET_CODE (op0) == SIGN_EXTEND)
5591 op0 = XEXP (op0, 0);
5592
5593 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5594 return true;
5595 }
5596 else
5597 {
5598 /* LSLV. */
5599 if (speed)
5600 *cost += extra_cost->alu.shift_reg;
5601
5602 return false; /* All arguments need to be in registers. */
5603 }
5604
43e9d192 5605 case ROTATE:
43e9d192
IB
5606 case ROTATERT:
5607 case LSHIFTRT:
43e9d192 5608 case ASHIFTRT:
ba0cfa17
JG
5609 op0 = XEXP (x, 0);
5610 op1 = XEXP (x, 1);
43e9d192 5611
ba0cfa17
JG
5612 if (CONST_INT_P (op1))
5613 {
5614 /* ASR (immediate) and friends. */
5615 if (speed)
5616 *cost += extra_cost->alu.shift;
43e9d192 5617
ba0cfa17
JG
5618 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5619 return true;
5620 }
5621 else
5622 {
5623
5624 /* ASR (register) and friends. */
5625 if (speed)
5626 *cost += extra_cost->alu.shift_reg;
5627
5628 return false; /* All arguments need to be in registers. */
5629 }
43e9d192 5630
909734be
JG
5631 case SYMBOL_REF:
5632
5633 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5634 {
5635 /* LDR. */
5636 if (speed)
5637 *cost += extra_cost->ldst.load;
5638 }
5639 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5640 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5641 {
5642 /* ADRP, followed by ADD. */
5643 *cost += COSTS_N_INSNS (1);
5644 if (speed)
5645 *cost += 2 * extra_cost->alu.arith;
5646 }
5647 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5648 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5649 {
5650 /* ADR. */
5651 if (speed)
5652 *cost += extra_cost->alu.arith;
5653 }
5654
5655 if (flag_pic)
5656 {
5657 /* One extra load instruction, after accessing the GOT. */
5658 *cost += COSTS_N_INSNS (1);
5659 if (speed)
5660 *cost += extra_cost->ldst.load;
5661 }
43e9d192
IB
5662 return true;
5663
909734be 5664 case HIGH:
43e9d192 5665 case LO_SUM:
909734be
JG
5666 /* ADRP/ADD (immediate). */
5667 if (speed)
5668 *cost += extra_cost->alu.arith;
43e9d192
IB
5669 return true;
5670
5671 case ZERO_EXTRACT:
5672 case SIGN_EXTRACT:
7cc2145f
JG
5673 /* UBFX/SBFX. */
5674 if (speed)
5675 *cost += extra_cost->alu.bfx;
5676
5677 /* We can trust that the immediates used will be correct (there
5678 are no by-register forms), so we need only cost op0. */
5679 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
5680 return true;
5681
5682 case MULT:
4745e701
JG
5683 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5684 /* aarch64_rtx_mult_cost always handles recursion to its
5685 operands. */
5686 return true;
43e9d192
IB
5687
5688 case MOD:
5689 case UMOD:
43e9d192
IB
5690 if (speed)
5691 {
5692 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
5693 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5694 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 5695 else if (GET_MODE (x) == DFmode)
73250c4c
KT
5696 *cost += (extra_cost->fp[1].mult
5697 + extra_cost->fp[1].div);
43e9d192 5698 else if (GET_MODE (x) == SFmode)
73250c4c
KT
5699 *cost += (extra_cost->fp[0].mult
5700 + extra_cost->fp[0].div);
43e9d192
IB
5701 }
5702 return false; /* All arguments need to be in registers. */
5703
5704 case DIV:
5705 case UDIV:
4105fe38 5706 case SQRT:
43e9d192
IB
5707 if (speed)
5708 {
4105fe38
JG
5709 if (GET_MODE_CLASS (mode) == MODE_INT)
5710 /* There is no integer SQRT, so only DIV and UDIV can get
5711 here. */
5712 *cost += extra_cost->mult[mode == DImode].idiv;
5713 else
5714 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
5715 }
5716 return false; /* All arguments need to be in registers. */
5717
a8eecd00 5718 case IF_THEN_ELSE:
2d5ffe46
AP
5719 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5720 XEXP (x, 2), cost, speed);
a8eecd00
JG
5721
5722 case EQ:
5723 case NE:
5724 case GT:
5725 case GTU:
5726 case LT:
5727 case LTU:
5728 case GE:
5729 case GEU:
5730 case LE:
5731 case LEU:
5732
5733 return false; /* All arguments must be in registers. */
5734
b292109f
JG
5735 case FMA:
5736 op0 = XEXP (x, 0);
5737 op1 = XEXP (x, 1);
5738 op2 = XEXP (x, 2);
5739
5740 if (speed)
5741 *cost += extra_cost->fp[mode == DFmode].fma;
5742
5743 /* FMSUB, FNMADD, and FNMSUB are free. */
5744 if (GET_CODE (op0) == NEG)
5745 op0 = XEXP (op0, 0);
5746
5747 if (GET_CODE (op2) == NEG)
5748 op2 = XEXP (op2, 0);
5749
5750 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5751 and the by-element operand as operand 0. */
5752 if (GET_CODE (op1) == NEG)
5753 op1 = XEXP (op1, 0);
5754
5755 /* Catch vector-by-element operations. The by-element operand can
5756 either be (vec_duplicate (vec_select (x))) or just
5757 (vec_select (x)), depending on whether we are multiplying by
5758 a vector or a scalar.
5759
5760 Canonicalization is not very good in these cases, FMA4 will put the
5761 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5762 if (GET_CODE (op0) == VEC_DUPLICATE)
5763 op0 = XEXP (op0, 0);
5764 else if (GET_CODE (op1) == VEC_DUPLICATE)
5765 op1 = XEXP (op1, 0);
5766
5767 if (GET_CODE (op0) == VEC_SELECT)
5768 op0 = XEXP (op0, 0);
5769 else if (GET_CODE (op1) == VEC_SELECT)
5770 op1 = XEXP (op1, 0);
5771
5772 /* If the remaining parameters are not registers,
5773 get the cost to put them into registers. */
5774 *cost += rtx_cost (op0, FMA, 0, speed);
5775 *cost += rtx_cost (op1, FMA, 1, speed);
5776 *cost += rtx_cost (op2, FMA, 2, speed);
5777 return true;
5778
5779 case FLOAT_EXTEND:
5780 if (speed)
5781 *cost += extra_cost->fp[mode == DFmode].widen;
5782 return false;
5783
5784 case FLOAT_TRUNCATE:
5785 if (speed)
5786 *cost += extra_cost->fp[mode == DFmode].narrow;
5787 return false;
5788
61263118
KT
5789 case FIX:
5790 case UNSIGNED_FIX:
5791 x = XEXP (x, 0);
5792 /* Strip the rounding part. They will all be implemented
5793 by the fcvt* family of instructions anyway. */
5794 if (GET_CODE (x) == UNSPEC)
5795 {
5796 unsigned int uns_code = XINT (x, 1);
5797
5798 if (uns_code == UNSPEC_FRINTA
5799 || uns_code == UNSPEC_FRINTM
5800 || uns_code == UNSPEC_FRINTN
5801 || uns_code == UNSPEC_FRINTP
5802 || uns_code == UNSPEC_FRINTZ)
5803 x = XVECEXP (x, 0, 0);
5804 }
5805
5806 if (speed)
5807 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5808
5809 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5810 return true;
5811
b292109f
JG
5812 case ABS:
5813 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5814 {
5815 /* FABS and FNEG are analogous. */
5816 if (speed)
5817 *cost += extra_cost->fp[mode == DFmode].neg;
5818 }
5819 else
5820 {
5821 /* Integer ABS will either be split to
5822 two arithmetic instructions, or will be an ABS
5823 (scalar), which we don't model. */
5824 *cost = COSTS_N_INSNS (2);
5825 if (speed)
5826 *cost += 2 * extra_cost->alu.arith;
5827 }
5828 return false;
5829
5830 case SMAX:
5831 case SMIN:
5832 if (speed)
5833 {
5834 /* FMAXNM/FMINNM/FMAX/FMIN.
5835 TODO: This may not be accurate for all implementations, but
5836 we do not model this in the cost tables. */
5837 *cost += extra_cost->fp[mode == DFmode].addsub;
5838 }
5839 return false;
5840
61263118
KT
5841 case UNSPEC:
5842 /* The floating point round to integer frint* instructions. */
5843 if (aarch64_frint_unspec_p (XINT (x, 1)))
5844 {
5845 if (speed)
5846 *cost += extra_cost->fp[mode == DFmode].roundint;
5847
5848 return false;
5849 }
781aeb73
KT
5850
5851 if (XINT (x, 1) == UNSPEC_RBIT)
5852 {
5853 if (speed)
5854 *cost += extra_cost->alu.rev;
5855
5856 return false;
5857 }
61263118
KT
5858 break;
5859
fb620c4a
JG
5860 case TRUNCATE:
5861
5862 /* Decompose <su>muldi3_highpart. */
5863 if (/* (truncate:DI */
5864 mode == DImode
5865 /* (lshiftrt:TI */
5866 && GET_MODE (XEXP (x, 0)) == TImode
5867 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5868 /* (mult:TI */
5869 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5870 /* (ANY_EXTEND:TI (reg:DI))
5871 (ANY_EXTEND:TI (reg:DI))) */
5872 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5873 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5874 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5875 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5876 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5877 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5878 /* (const_int 64) */
5879 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5880 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5881 {
5882 /* UMULH/SMULH. */
5883 if (speed)
5884 *cost += extra_cost->mult[mode == DImode].extend;
5885 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5886 MULT, 0, speed);
5887 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5888 MULT, 1, speed);
5889 return true;
5890 }
5891
5892 /* Fall through. */
43e9d192 5893 default:
61263118 5894 break;
43e9d192 5895 }
61263118
KT
5896
5897 if (dump_file && (dump_flags & TDF_DETAILS))
5898 fprintf (dump_file,
5899 "\nFailed to cost RTX. Assuming default cost.\n");
5900
5901 return true;
43e9d192
IB
5902}
5903
0ee859b5
JG
5904/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5905 calculated for X. This cost is stored in *COST. Returns true
5906 if the total cost of X was calculated. */
5907static bool
5908aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5909 int param, int *cost, bool speed)
5910{
5911 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5912
5913 if (dump_file && (dump_flags & TDF_DETAILS))
5914 {
5915 print_rtl_single (dump_file, x);
5916 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5917 speed ? "Hot" : "Cold",
5918 *cost, result ? "final" : "partial");
5919 }
5920
5921 return result;
5922}
5923
43e9d192 5924static int
8a3a7e67
RH
5925aarch64_register_move_cost (enum machine_mode mode,
5926 reg_class_t from_i, reg_class_t to_i)
43e9d192 5927{
8a3a7e67
RH
5928 enum reg_class from = (enum reg_class) from_i;
5929 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
5930 const struct cpu_regmove_cost *regmove_cost
5931 = aarch64_tune_params->regmove_cost;
5932
6ee70f81
AP
5933 /* Moving between GPR and stack cost is the same as GP2GP. */
5934 if ((from == GENERAL_REGS && to == STACK_REG)
5935 || (to == GENERAL_REGS && from == STACK_REG))
5936 return regmove_cost->GP2GP;
5937
5938 /* To/From the stack register, we move via the gprs. */
5939 if (to == STACK_REG || from == STACK_REG)
5940 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5941 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5942
43e9d192
IB
5943 if (from == GENERAL_REGS && to == GENERAL_REGS)
5944 return regmove_cost->GP2GP;
5945 else if (from == GENERAL_REGS)
5946 return regmove_cost->GP2FP;
5947 else if (to == GENERAL_REGS)
5948 return regmove_cost->FP2GP;
5949
5950 /* When AdvSIMD instructions are disabled it is not possible to move
5951 a 128-bit value directly between Q registers. This is handled in
5952 secondary reload. A general register is used as a scratch to move
5953 the upper DI value and the lower DI value is moved directly,
5954 hence the cost is the sum of three moves. */
8a3a7e67 5955 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
43e9d192
IB
5956 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5957
5958 return regmove_cost->FP2FP;
5959}
5960
5961static int
5962aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5963 reg_class_t rclass ATTRIBUTE_UNUSED,
5964 bool in ATTRIBUTE_UNUSED)
5965{
5966 return aarch64_tune_params->memmov_cost;
5967}
5968
d126a4ae
AP
5969/* Return the number of instructions that can be issued per cycle. */
5970static int
5971aarch64_sched_issue_rate (void)
5972{
5973 return aarch64_tune_params->issue_rate;
5974}
5975
8990e73a
TB
5976/* Vectorizer cost model target hooks. */
5977
5978/* Implement targetm.vectorize.builtin_vectorization_cost. */
5979static int
5980aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5981 tree vectype,
5982 int misalign ATTRIBUTE_UNUSED)
5983{
5984 unsigned elements;
5985
5986 switch (type_of_cost)
5987 {
5988 case scalar_stmt:
5989 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5990
5991 case scalar_load:
5992 return aarch64_tune_params->vec_costs->scalar_load_cost;
5993
5994 case scalar_store:
5995 return aarch64_tune_params->vec_costs->scalar_store_cost;
5996
5997 case vector_stmt:
5998 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5999
6000 case vector_load:
6001 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6002
6003 case vector_store:
6004 return aarch64_tune_params->vec_costs->vec_store_cost;
6005
6006 case vec_to_scalar:
6007 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6008
6009 case scalar_to_vec:
6010 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6011
6012 case unaligned_load:
6013 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6014
6015 case unaligned_store:
6016 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6017
6018 case cond_branch_taken:
6019 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6020
6021 case cond_branch_not_taken:
6022 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6023
6024 case vec_perm:
6025 case vec_promote_demote:
6026 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6027
6028 case vec_construct:
6029 elements = TYPE_VECTOR_SUBPARTS (vectype);
6030 return elements / 2 + 1;
6031
6032 default:
6033 gcc_unreachable ();
6034 }
6035}
6036
6037/* Implement targetm.vectorize.add_stmt_cost. */
6038static unsigned
6039aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6040 struct _stmt_vec_info *stmt_info, int misalign,
6041 enum vect_cost_model_location where)
6042{
6043 unsigned *cost = (unsigned *) data;
6044 unsigned retval = 0;
6045
6046 if (flag_vect_cost_model)
6047 {
6048 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6049 int stmt_cost =
6050 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6051
6052 /* Statements in an inner loop relative to the loop being
6053 vectorized are weighted more heavily. The value here is
6054 a function (linear for now) of the loop nest level. */
6055 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6056 {
6057 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6058 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6059 unsigned nest_level = loop_depth (loop);
6060
6061 count *= nest_level;
6062 }
6063
6064 retval = (unsigned) (count * stmt_cost);
6065 cost[where] += retval;
6066 }
6067
6068 return retval;
6069}
6070
43e9d192
IB
6071static void initialize_aarch64_code_model (void);
6072
6073/* Parse the architecture extension string. */
6074
6075static void
6076aarch64_parse_extension (char *str)
6077{
6078 /* The extension string is parsed left to right. */
6079 const struct aarch64_option_extension *opt = NULL;
6080
6081 /* Flag to say whether we are adding or removing an extension. */
6082 int adding_ext = -1;
6083
6084 while (str != NULL && *str != 0)
6085 {
6086 char *ext;
6087 size_t len;
6088
6089 str++;
6090 ext = strchr (str, '+');
6091
6092 if (ext != NULL)
6093 len = ext - str;
6094 else
6095 len = strlen (str);
6096
6097 if (len >= 2 && strncmp (str, "no", 2) == 0)
6098 {
6099 adding_ext = 0;
6100 len -= 2;
6101 str += 2;
6102 }
6103 else if (len > 0)
6104 adding_ext = 1;
6105
6106 if (len == 0)
6107 {
6108 error ("missing feature modifier after %qs", "+no");
6109 return;
6110 }
6111
6112 /* Scan over the extensions table trying to find an exact match. */
6113 for (opt = all_extensions; opt->name != NULL; opt++)
6114 {
6115 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6116 {
6117 /* Add or remove the extension. */
6118 if (adding_ext)
6119 aarch64_isa_flags |= opt->flags_on;
6120 else
6121 aarch64_isa_flags &= ~(opt->flags_off);
6122 break;
6123 }
6124 }
6125
6126 if (opt->name == NULL)
6127 {
6128 /* Extension not found in list. */
6129 error ("unknown feature modifier %qs", str);
6130 return;
6131 }
6132
6133 str = ext;
6134 };
6135
6136 return;
6137}
6138
6139/* Parse the ARCH string. */
6140
6141static void
6142aarch64_parse_arch (void)
6143{
6144 char *ext;
6145 const struct processor *arch;
6146 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6147 size_t len;
6148
6149 strcpy (str, aarch64_arch_string);
6150
6151 ext = strchr (str, '+');
6152
6153 if (ext != NULL)
6154 len = ext - str;
6155 else
6156 len = strlen (str);
6157
6158 if (len == 0)
6159 {
6160 error ("missing arch name in -march=%qs", str);
6161 return;
6162 }
6163
6164 /* Loop through the list of supported ARCHs to find a match. */
6165 for (arch = all_architectures; arch->name != NULL; arch++)
6166 {
6167 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6168 {
6169 selected_arch = arch;
6170 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6171
6172 if (!selected_cpu)
6173 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6174
6175 if (ext != NULL)
6176 {
6177 /* ARCH string contains at least one extension. */
6178 aarch64_parse_extension (ext);
6179 }
6180
ffee7aa9
JG
6181 if (strcmp (selected_arch->arch, selected_cpu->arch))
6182 {
6183 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6184 selected_cpu->name, selected_arch->name);
6185 }
6186
43e9d192
IB
6187 return;
6188 }
6189 }
6190
6191 /* ARCH name not found in list. */
6192 error ("unknown value %qs for -march", str);
6193 return;
6194}
6195
6196/* Parse the CPU string. */
6197
6198static void
6199aarch64_parse_cpu (void)
6200{
6201 char *ext;
6202 const struct processor *cpu;
6203 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6204 size_t len;
6205
6206 strcpy (str, aarch64_cpu_string);
6207
6208 ext = strchr (str, '+');
6209
6210 if (ext != NULL)
6211 len = ext - str;
6212 else
6213 len = strlen (str);
6214
6215 if (len == 0)
6216 {
6217 error ("missing cpu name in -mcpu=%qs", str);
6218 return;
6219 }
6220
6221 /* Loop through the list of supported CPUs to find a match. */
6222 for (cpu = all_cores; cpu->name != NULL; cpu++)
6223 {
6224 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6225 {
6226 selected_cpu = cpu;
192ed1dd 6227 selected_tune = cpu;
43e9d192
IB
6228 aarch64_isa_flags = selected_cpu->flags;
6229
6230 if (ext != NULL)
6231 {
6232 /* CPU string contains at least one extension. */
6233 aarch64_parse_extension (ext);
6234 }
6235
6236 return;
6237 }
6238 }
6239
6240 /* CPU name not found in list. */
6241 error ("unknown value %qs for -mcpu", str);
6242 return;
6243}
6244
6245/* Parse the TUNE string. */
6246
6247static void
6248aarch64_parse_tune (void)
6249{
6250 const struct processor *cpu;
6251 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6252 strcpy (str, aarch64_tune_string);
6253
6254 /* Loop through the list of supported CPUs to find a match. */
6255 for (cpu = all_cores; cpu->name != NULL; cpu++)
6256 {
6257 if (strcmp (cpu->name, str) == 0)
6258 {
6259 selected_tune = cpu;
6260 return;
6261 }
6262 }
6263
6264 /* CPU name not found in list. */
6265 error ("unknown value %qs for -mtune", str);
6266 return;
6267}
6268
6269
6270/* Implement TARGET_OPTION_OVERRIDE. */
6271
6272static void
6273aarch64_override_options (void)
6274{
ffee7aa9
JG
6275 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6276 If either of -march or -mtune is given, they override their
6277 respective component of -mcpu.
43e9d192 6278
ffee7aa9
JG
6279 So, first parse AARCH64_CPU_STRING, then the others, be careful
6280 with -march as, if -mcpu is not present on the command line, march
6281 must set a sensible default CPU. */
6282 if (aarch64_cpu_string)
43e9d192 6283 {
ffee7aa9 6284 aarch64_parse_cpu ();
43e9d192
IB
6285 }
6286
ffee7aa9 6287 if (aarch64_arch_string)
43e9d192 6288 {
ffee7aa9 6289 aarch64_parse_arch ();
43e9d192
IB
6290 }
6291
6292 if (aarch64_tune_string)
6293 {
6294 aarch64_parse_tune ();
6295 }
6296
63892fa2
KV
6297#ifndef HAVE_AS_MABI_OPTION
6298 /* The compiler may have been configured with 2.23.* binutils, which does
6299 not have support for ILP32. */
6300 if (TARGET_ILP32)
6301 error ("Assembler does not support -mabi=ilp32");
6302#endif
6303
43e9d192
IB
6304 initialize_aarch64_code_model ();
6305
6306 aarch64_build_bitmask_table ();
6307
6308 /* This target defaults to strict volatile bitfields. */
6309 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6310 flag_strict_volatile_bitfields = 1;
6311
6312 /* If the user did not specify a processor, choose the default
6313 one for them. This will be the CPU set during configuration using
a3cd0246 6314 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6315 if (!selected_cpu)
6316 {
6317 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6318 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6319 }
6320
6321 gcc_assert (selected_cpu);
6322
6323 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6324 if (!selected_tune)
6325 selected_tune = &all_cores[selected_cpu->core];
6326
6327 aarch64_tune_flags = selected_tune->flags;
6328 aarch64_tune = selected_tune->core;
6329 aarch64_tune_params = selected_tune->tune;
6330
6331 aarch64_override_options_after_change ();
6332}
6333
6334/* Implement targetm.override_options_after_change. */
6335
6336static void
6337aarch64_override_options_after_change (void)
6338{
0b7f8166
MS
6339 if (flag_omit_frame_pointer)
6340 flag_omit_leaf_frame_pointer = false;
6341 else if (flag_omit_leaf_frame_pointer)
6342 flag_omit_frame_pointer = true;
43e9d192
IB
6343}
6344
6345static struct machine_function *
6346aarch64_init_machine_status (void)
6347{
6348 struct machine_function *machine;
766090c2 6349 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6350 return machine;
6351}
6352
6353void
6354aarch64_init_expanders (void)
6355{
6356 init_machine_status = aarch64_init_machine_status;
6357}
6358
6359/* A checking mechanism for the implementation of the various code models. */
6360static void
6361initialize_aarch64_code_model (void)
6362{
6363 if (flag_pic)
6364 {
6365 switch (aarch64_cmodel_var)
6366 {
6367 case AARCH64_CMODEL_TINY:
6368 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6369 break;
6370 case AARCH64_CMODEL_SMALL:
6371 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6372 break;
6373 case AARCH64_CMODEL_LARGE:
6374 sorry ("code model %qs with -f%s", "large",
6375 flag_pic > 1 ? "PIC" : "pic");
6376 default:
6377 gcc_unreachable ();
6378 }
6379 }
6380 else
6381 aarch64_cmodel = aarch64_cmodel_var;
6382}
6383
6384/* Return true if SYMBOL_REF X binds locally. */
6385
6386static bool
6387aarch64_symbol_binds_local_p (const_rtx x)
6388{
6389 return (SYMBOL_REF_DECL (x)
6390 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6391 : SYMBOL_REF_LOCAL_P (x));
6392}
6393
6394/* Return true if SYMBOL_REF X is thread local */
6395static bool
6396aarch64_tls_symbol_p (rtx x)
6397{
6398 if (! TARGET_HAVE_TLS)
6399 return false;
6400
6401 if (GET_CODE (x) != SYMBOL_REF)
6402 return false;
6403
6404 return SYMBOL_REF_TLS_MODEL (x) != 0;
6405}
6406
6407/* Classify a TLS symbol into one of the TLS kinds. */
6408enum aarch64_symbol_type
6409aarch64_classify_tls_symbol (rtx x)
6410{
6411 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6412
6413 switch (tls_kind)
6414 {
6415 case TLS_MODEL_GLOBAL_DYNAMIC:
6416 case TLS_MODEL_LOCAL_DYNAMIC:
6417 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6418
6419 case TLS_MODEL_INITIAL_EXEC:
6420 return SYMBOL_SMALL_GOTTPREL;
6421
6422 case TLS_MODEL_LOCAL_EXEC:
6423 return SYMBOL_SMALL_TPREL;
6424
6425 case TLS_MODEL_EMULATED:
6426 case TLS_MODEL_NONE:
6427 return SYMBOL_FORCE_TO_MEM;
6428
6429 default:
6430 gcc_unreachable ();
6431 }
6432}
6433
6434/* Return the method that should be used to access SYMBOL_REF or
6435 LABEL_REF X in context CONTEXT. */
17f4d4bf 6436
43e9d192
IB
6437enum aarch64_symbol_type
6438aarch64_classify_symbol (rtx x,
6439 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6440{
6441 if (GET_CODE (x) == LABEL_REF)
6442 {
6443 switch (aarch64_cmodel)
6444 {
6445 case AARCH64_CMODEL_LARGE:
6446 return SYMBOL_FORCE_TO_MEM;
6447
6448 case AARCH64_CMODEL_TINY_PIC:
6449 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6450 return SYMBOL_TINY_ABSOLUTE;
6451
43e9d192
IB
6452 case AARCH64_CMODEL_SMALL_PIC:
6453 case AARCH64_CMODEL_SMALL:
6454 return SYMBOL_SMALL_ABSOLUTE;
6455
6456 default:
6457 gcc_unreachable ();
6458 }
6459 }
6460
17f4d4bf 6461 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 6462 {
4a985a37
MS
6463 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6464 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
6465
6466 if (aarch64_tls_symbol_p (x))
6467 return aarch64_classify_tls_symbol (x);
6468
17f4d4bf
CSS
6469 switch (aarch64_cmodel)
6470 {
6471 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6472 if (SYMBOL_REF_WEAK (x))
6473 return SYMBOL_FORCE_TO_MEM;
6474 return SYMBOL_TINY_ABSOLUTE;
6475
17f4d4bf
CSS
6476 case AARCH64_CMODEL_SMALL:
6477 if (SYMBOL_REF_WEAK (x))
6478 return SYMBOL_FORCE_TO_MEM;
6479 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6480
17f4d4bf 6481 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 6482 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 6483 return SYMBOL_TINY_GOT;
38e6c9a6
MS
6484 return SYMBOL_TINY_ABSOLUTE;
6485
17f4d4bf
CSS
6486 case AARCH64_CMODEL_SMALL_PIC:
6487 if (!aarch64_symbol_binds_local_p (x))
6488 return SYMBOL_SMALL_GOT;
6489 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6490
17f4d4bf
CSS
6491 default:
6492 gcc_unreachable ();
6493 }
43e9d192 6494 }
17f4d4bf 6495
43e9d192
IB
6496 /* By default push everything into the constant pool. */
6497 return SYMBOL_FORCE_TO_MEM;
6498}
6499
43e9d192
IB
6500bool
6501aarch64_constant_address_p (rtx x)
6502{
6503 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6504}
6505
6506bool
6507aarch64_legitimate_pic_operand_p (rtx x)
6508{
6509 if (GET_CODE (x) == SYMBOL_REF
6510 || (GET_CODE (x) == CONST
6511 && GET_CODE (XEXP (x, 0)) == PLUS
6512 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6513 return false;
6514
6515 return true;
6516}
6517
3520f7cc
JG
6518/* Return true if X holds either a quarter-precision or
6519 floating-point +0.0 constant. */
6520static bool
6521aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6522{
6523 if (!CONST_DOUBLE_P (x))
6524 return false;
6525
6526 /* TODO: We could handle moving 0.0 to a TFmode register,
6527 but first we would like to refactor the movtf_aarch64
6528 to be more amicable to split moves properly and
6529 correctly gate on TARGET_SIMD. For now - reject all
6530 constants which are not to SFmode or DFmode registers. */
6531 if (!(mode == SFmode || mode == DFmode))
6532 return false;
6533
6534 if (aarch64_float_const_zero_rtx_p (x))
6535 return true;
6536 return aarch64_float_const_representable_p (x);
6537}
6538
43e9d192
IB
6539static bool
6540aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6541{
6542 /* Do not allow vector struct mode constants. We could support
6543 0 and -1 easily, but they need support in aarch64-simd.md. */
6544 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6545 return false;
6546
6547 /* This could probably go away because
6548 we now decompose CONST_INTs according to expand_mov_immediate. */
6549 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 6550 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
6551 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6552 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
6553
6554 if (GET_CODE (x) == HIGH
6555 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6556 return true;
6557
6558 return aarch64_constant_address_p (x);
6559}
6560
a5bc806c 6561rtx
43e9d192
IB
6562aarch64_load_tp (rtx target)
6563{
6564 if (!target
6565 || GET_MODE (target) != Pmode
6566 || !register_operand (target, Pmode))
6567 target = gen_reg_rtx (Pmode);
6568
6569 /* Can return in any reg. */
6570 emit_insn (gen_aarch64_load_tp_hard (target));
6571 return target;
6572}
6573
43e9d192
IB
6574/* On AAPCS systems, this is the "struct __va_list". */
6575static GTY(()) tree va_list_type;
6576
6577/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6578 Return the type to use as __builtin_va_list.
6579
6580 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6581
6582 struct __va_list
6583 {
6584 void *__stack;
6585 void *__gr_top;
6586 void *__vr_top;
6587 int __gr_offs;
6588 int __vr_offs;
6589 }; */
6590
6591static tree
6592aarch64_build_builtin_va_list (void)
6593{
6594 tree va_list_name;
6595 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6596
6597 /* Create the type. */
6598 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6599 /* Give it the required name. */
6600 va_list_name = build_decl (BUILTINS_LOCATION,
6601 TYPE_DECL,
6602 get_identifier ("__va_list"),
6603 va_list_type);
6604 DECL_ARTIFICIAL (va_list_name) = 1;
6605 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 6606 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
6607
6608 /* Create the fields. */
6609 f_stack = build_decl (BUILTINS_LOCATION,
6610 FIELD_DECL, get_identifier ("__stack"),
6611 ptr_type_node);
6612 f_grtop = build_decl (BUILTINS_LOCATION,
6613 FIELD_DECL, get_identifier ("__gr_top"),
6614 ptr_type_node);
6615 f_vrtop = build_decl (BUILTINS_LOCATION,
6616 FIELD_DECL, get_identifier ("__vr_top"),
6617 ptr_type_node);
6618 f_groff = build_decl (BUILTINS_LOCATION,
6619 FIELD_DECL, get_identifier ("__gr_offs"),
6620 integer_type_node);
6621 f_vroff = build_decl (BUILTINS_LOCATION,
6622 FIELD_DECL, get_identifier ("__vr_offs"),
6623 integer_type_node);
6624
6625 DECL_ARTIFICIAL (f_stack) = 1;
6626 DECL_ARTIFICIAL (f_grtop) = 1;
6627 DECL_ARTIFICIAL (f_vrtop) = 1;
6628 DECL_ARTIFICIAL (f_groff) = 1;
6629 DECL_ARTIFICIAL (f_vroff) = 1;
6630
6631 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6632 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6633 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6634 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6635 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6636
6637 TYPE_FIELDS (va_list_type) = f_stack;
6638 DECL_CHAIN (f_stack) = f_grtop;
6639 DECL_CHAIN (f_grtop) = f_vrtop;
6640 DECL_CHAIN (f_vrtop) = f_groff;
6641 DECL_CHAIN (f_groff) = f_vroff;
6642
6643 /* Compute its layout. */
6644 layout_type (va_list_type);
6645
6646 return va_list_type;
6647}
6648
6649/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6650static void
6651aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6652{
6653 const CUMULATIVE_ARGS *cum;
6654 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6655 tree stack, grtop, vrtop, groff, vroff;
6656 tree t;
6657 int gr_save_area_size;
6658 int vr_save_area_size;
6659 int vr_offset;
6660
6661 cum = &crtl->args.info;
6662 gr_save_area_size
6663 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6664 vr_save_area_size
6665 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6666
6667 if (TARGET_GENERAL_REGS_ONLY)
6668 {
6669 if (cum->aapcs_nvrn > 0)
6670 sorry ("%qs and floating point or vector arguments",
6671 "-mgeneral-regs-only");
6672 vr_save_area_size = 0;
6673 }
6674
6675 f_stack = TYPE_FIELDS (va_list_type_node);
6676 f_grtop = DECL_CHAIN (f_stack);
6677 f_vrtop = DECL_CHAIN (f_grtop);
6678 f_groff = DECL_CHAIN (f_vrtop);
6679 f_vroff = DECL_CHAIN (f_groff);
6680
6681 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6682 NULL_TREE);
6683 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6684 NULL_TREE);
6685 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6686 NULL_TREE);
6687 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6688 NULL_TREE);
6689 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6690 NULL_TREE);
6691
6692 /* Emit code to initialize STACK, which points to the next varargs stack
6693 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6694 by named arguments. STACK is 8-byte aligned. */
6695 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6696 if (cum->aapcs_stack_size > 0)
6697 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6698 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6699 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6700
6701 /* Emit code to initialize GRTOP, the top of the GR save area.
6702 virtual_incoming_args_rtx should have been 16 byte aligned. */
6703 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6704 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6705 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6706
6707 /* Emit code to initialize VRTOP, the top of the VR save area.
6708 This address is gr_save_area_bytes below GRTOP, rounded
6709 down to the next 16-byte boundary. */
6710 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6711 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6712 STACK_BOUNDARY / BITS_PER_UNIT);
6713
6714 if (vr_offset)
6715 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6716 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6717 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6718
6719 /* Emit code to initialize GROFF, the offset from GRTOP of the
6720 next GPR argument. */
6721 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6722 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6723 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6724
6725 /* Likewise emit code to initialize VROFF, the offset from FTOP
6726 of the next VR argument. */
6727 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6728 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6729 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6730}
6731
6732/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6733
6734static tree
6735aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6736 gimple_seq *post_p ATTRIBUTE_UNUSED)
6737{
6738 tree addr;
6739 bool indirect_p;
6740 bool is_ha; /* is HFA or HVA. */
6741 bool dw_align; /* double-word align. */
6742 enum machine_mode ag_mode = VOIDmode;
6743 int nregs;
6744 enum machine_mode mode;
6745
6746 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6747 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6748 HOST_WIDE_INT size, rsize, adjust, align;
6749 tree t, u, cond1, cond2;
6750
6751 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6752 if (indirect_p)
6753 type = build_pointer_type (type);
6754
6755 mode = TYPE_MODE (type);
6756
6757 f_stack = TYPE_FIELDS (va_list_type_node);
6758 f_grtop = DECL_CHAIN (f_stack);
6759 f_vrtop = DECL_CHAIN (f_grtop);
6760 f_groff = DECL_CHAIN (f_vrtop);
6761 f_vroff = DECL_CHAIN (f_groff);
6762
6763 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6764 f_stack, NULL_TREE);
6765 size = int_size_in_bytes (type);
6766 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6767
6768 dw_align = false;
6769 adjust = 0;
6770 if (aarch64_vfp_is_call_or_return_candidate (mode,
6771 type,
6772 &ag_mode,
6773 &nregs,
6774 &is_ha))
6775 {
6776 /* TYPE passed in fp/simd registers. */
6777 if (TARGET_GENERAL_REGS_ONLY)
6778 sorry ("%qs and floating point or vector arguments",
6779 "-mgeneral-regs-only");
6780
6781 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6782 unshare_expr (valist), f_vrtop, NULL_TREE);
6783 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6784 unshare_expr (valist), f_vroff, NULL_TREE);
6785
6786 rsize = nregs * UNITS_PER_VREG;
6787
6788 if (is_ha)
6789 {
6790 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6791 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6792 }
6793 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6794 && size < UNITS_PER_VREG)
6795 {
6796 adjust = UNITS_PER_VREG - size;
6797 }
6798 }
6799 else
6800 {
6801 /* TYPE passed in general registers. */
6802 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6803 unshare_expr (valist), f_grtop, NULL_TREE);
6804 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6805 unshare_expr (valist), f_groff, NULL_TREE);
6806 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6807 nregs = rsize / UNITS_PER_WORD;
6808
6809 if (align > 8)
6810 dw_align = true;
6811
6812 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6813 && size < UNITS_PER_WORD)
6814 {
6815 adjust = UNITS_PER_WORD - size;
6816 }
6817 }
6818
6819 /* Get a local temporary for the field value. */
6820 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6821
6822 /* Emit code to branch if off >= 0. */
6823 t = build2 (GE_EXPR, boolean_type_node, off,
6824 build_int_cst (TREE_TYPE (off), 0));
6825 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6826
6827 if (dw_align)
6828 {
6829 /* Emit: offs = (offs + 15) & -16. */
6830 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6831 build_int_cst (TREE_TYPE (off), 15));
6832 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6833 build_int_cst (TREE_TYPE (off), -16));
6834 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6835 }
6836 else
6837 roundup = NULL;
6838
6839 /* Update ap.__[g|v]r_offs */
6840 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6841 build_int_cst (TREE_TYPE (off), rsize));
6842 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6843
6844 /* String up. */
6845 if (roundup)
6846 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6847
6848 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6849 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6850 build_int_cst (TREE_TYPE (f_off), 0));
6851 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6852
6853 /* String up: make sure the assignment happens before the use. */
6854 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6855 COND_EXPR_ELSE (cond1) = t;
6856
6857 /* Prepare the trees handling the argument that is passed on the stack;
6858 the top level node will store in ON_STACK. */
6859 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6860 if (align > 8)
6861 {
6862 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6863 t = fold_convert (intDI_type_node, arg);
6864 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6865 build_int_cst (TREE_TYPE (t), 15));
6866 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6867 build_int_cst (TREE_TYPE (t), -16));
6868 t = fold_convert (TREE_TYPE (arg), t);
6869 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6870 }
6871 else
6872 roundup = NULL;
6873 /* Advance ap.__stack */
6874 t = fold_convert (intDI_type_node, arg);
6875 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6876 build_int_cst (TREE_TYPE (t), size + 7));
6877 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6878 build_int_cst (TREE_TYPE (t), -8));
6879 t = fold_convert (TREE_TYPE (arg), t);
6880 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6881 /* String up roundup and advance. */
6882 if (roundup)
6883 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6884 /* String up with arg */
6885 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6886 /* Big-endianness related address adjustment. */
6887 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6888 && size < UNITS_PER_WORD)
6889 {
6890 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6891 size_int (UNITS_PER_WORD - size));
6892 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6893 }
6894
6895 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6896 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6897
6898 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6899 t = off;
6900 if (adjust)
6901 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6902 build_int_cst (TREE_TYPE (off), adjust));
6903
6904 t = fold_convert (sizetype, t);
6905 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6906
6907 if (is_ha)
6908 {
6909 /* type ha; // treat as "struct {ftype field[n];}"
6910 ... [computing offs]
6911 for (i = 0; i <nregs; ++i, offs += 16)
6912 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6913 return ha; */
6914 int i;
6915 tree tmp_ha, field_t, field_ptr_t;
6916
6917 /* Declare a local variable. */
6918 tmp_ha = create_tmp_var_raw (type, "ha");
6919 gimple_add_tmp_var (tmp_ha);
6920
6921 /* Establish the base type. */
6922 switch (ag_mode)
6923 {
6924 case SFmode:
6925 field_t = float_type_node;
6926 field_ptr_t = float_ptr_type_node;
6927 break;
6928 case DFmode:
6929 field_t = double_type_node;
6930 field_ptr_t = double_ptr_type_node;
6931 break;
6932 case TFmode:
6933 field_t = long_double_type_node;
6934 field_ptr_t = long_double_ptr_type_node;
6935 break;
6936/* The half precision and quad precision are not fully supported yet. Enable
6937 the following code after the support is complete. Need to find the correct
6938 type node for __fp16 *. */
6939#if 0
6940 case HFmode:
6941 field_t = float_type_node;
6942 field_ptr_t = float_ptr_type_node;
6943 break;
6944#endif
6945 case V2SImode:
6946 case V4SImode:
6947 {
6948 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6949 field_t = build_vector_type_for_mode (innertype, ag_mode);
6950 field_ptr_t = build_pointer_type (field_t);
6951 }
6952 break;
6953 default:
6954 gcc_assert (0);
6955 }
6956
6957 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6958 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6959 addr = t;
6960 t = fold_convert (field_ptr_t, addr);
6961 t = build2 (MODIFY_EXPR, field_t,
6962 build1 (INDIRECT_REF, field_t, tmp_ha),
6963 build1 (INDIRECT_REF, field_t, t));
6964
6965 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6966 for (i = 1; i < nregs; ++i)
6967 {
6968 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6969 u = fold_convert (field_ptr_t, addr);
6970 u = build2 (MODIFY_EXPR, field_t,
6971 build2 (MEM_REF, field_t, tmp_ha,
6972 build_int_cst (field_ptr_t,
6973 (i *
6974 int_size_in_bytes (field_t)))),
6975 build1 (INDIRECT_REF, field_t, u));
6976 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6977 }
6978
6979 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6980 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6981 }
6982
6983 COND_EXPR_ELSE (cond2) = t;
6984 addr = fold_convert (build_pointer_type (type), cond1);
6985 addr = build_va_arg_indirect_ref (addr);
6986
6987 if (indirect_p)
6988 addr = build_va_arg_indirect_ref (addr);
6989
6990 return addr;
6991}
6992
6993/* Implement TARGET_SETUP_INCOMING_VARARGS. */
6994
6995static void
6996aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6997 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6998 int no_rtl)
6999{
7000 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7001 CUMULATIVE_ARGS local_cum;
7002 int gr_saved, vr_saved;
7003
7004 /* The caller has advanced CUM up to, but not beyond, the last named
7005 argument. Advance a local copy of CUM past the last "real" named
7006 argument, to find out how many registers are left over. */
7007 local_cum = *cum;
7008 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7009
7010 /* Found out how many registers we need to save. */
7011 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7012 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7013
7014 if (TARGET_GENERAL_REGS_ONLY)
7015 {
7016 if (local_cum.aapcs_nvrn > 0)
7017 sorry ("%qs and floating point or vector arguments",
7018 "-mgeneral-regs-only");
7019 vr_saved = 0;
7020 }
7021
7022 if (!no_rtl)
7023 {
7024 if (gr_saved > 0)
7025 {
7026 rtx ptr, mem;
7027
7028 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7029 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7030 - gr_saved * UNITS_PER_WORD);
7031 mem = gen_frame_mem (BLKmode, ptr);
7032 set_mem_alias_set (mem, get_varargs_alias_set ());
7033
7034 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7035 mem, gr_saved);
7036 }
7037 if (vr_saved > 0)
7038 {
7039 /* We can't use move_block_from_reg, because it will use
7040 the wrong mode, storing D regs only. */
7041 enum machine_mode mode = TImode;
7042 int off, i;
7043
7044 /* Set OFF to the offset from virtual_incoming_args_rtx of
7045 the first vector register. The VR save area lies below
7046 the GR one, and is aligned to 16 bytes. */
7047 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7048 STACK_BOUNDARY / BITS_PER_UNIT);
7049 off -= vr_saved * UNITS_PER_VREG;
7050
7051 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7052 {
7053 rtx ptr, mem;
7054
7055 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7056 mem = gen_frame_mem (mode, ptr);
7057 set_mem_alias_set (mem, get_varargs_alias_set ());
7058 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7059 off += UNITS_PER_VREG;
7060 }
7061 }
7062 }
7063
7064 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7065 any complication of having crtl->args.pretend_args_size changed. */
8799637a 7066 cfun->machine->frame.saved_varargs_size
43e9d192
IB
7067 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7068 STACK_BOUNDARY / BITS_PER_UNIT)
7069 + vr_saved * UNITS_PER_VREG);
7070}
7071
7072static void
7073aarch64_conditional_register_usage (void)
7074{
7075 int i;
7076 if (!TARGET_FLOAT)
7077 {
7078 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7079 {
7080 fixed_regs[i] = 1;
7081 call_used_regs[i] = 1;
7082 }
7083 }
7084}
7085
7086/* Walk down the type tree of TYPE counting consecutive base elements.
7087 If *MODEP is VOIDmode, then set it to the first valid floating point
7088 type. If a non-floating point type is found, or if a floating point
7089 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7090 otherwise return the count in the sub-tree. */
7091static int
7092aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
7093{
7094 enum machine_mode mode;
7095 HOST_WIDE_INT size;
7096
7097 switch (TREE_CODE (type))
7098 {
7099 case REAL_TYPE:
7100 mode = TYPE_MODE (type);
7101 if (mode != DFmode && mode != SFmode && mode != TFmode)
7102 return -1;
7103
7104 if (*modep == VOIDmode)
7105 *modep = mode;
7106
7107 if (*modep == mode)
7108 return 1;
7109
7110 break;
7111
7112 case COMPLEX_TYPE:
7113 mode = TYPE_MODE (TREE_TYPE (type));
7114 if (mode != DFmode && mode != SFmode && mode != TFmode)
7115 return -1;
7116
7117 if (*modep == VOIDmode)
7118 *modep = mode;
7119
7120 if (*modep == mode)
7121 return 2;
7122
7123 break;
7124
7125 case VECTOR_TYPE:
7126 /* Use V2SImode and V4SImode as representatives of all 64-bit
7127 and 128-bit vector types. */
7128 size = int_size_in_bytes (type);
7129 switch (size)
7130 {
7131 case 8:
7132 mode = V2SImode;
7133 break;
7134 case 16:
7135 mode = V4SImode;
7136 break;
7137 default:
7138 return -1;
7139 }
7140
7141 if (*modep == VOIDmode)
7142 *modep = mode;
7143
7144 /* Vector modes are considered to be opaque: two vectors are
7145 equivalent for the purposes of being homogeneous aggregates
7146 if they are the same size. */
7147 if (*modep == mode)
7148 return 1;
7149
7150 break;
7151
7152 case ARRAY_TYPE:
7153 {
7154 int count;
7155 tree index = TYPE_DOMAIN (type);
7156
807e902e
KZ
7157 /* Can't handle incomplete types nor sizes that are not
7158 fixed. */
7159 if (!COMPLETE_TYPE_P (type)
7160 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7161 return -1;
7162
7163 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7164 if (count == -1
7165 || !index
7166 || !TYPE_MAX_VALUE (index)
cc269bb6 7167 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7168 || !TYPE_MIN_VALUE (index)
cc269bb6 7169 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7170 || count < 0)
7171 return -1;
7172
ae7e9ddd
RS
7173 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7174 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7175
7176 /* There must be no padding. */
807e902e 7177 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7178 return -1;
7179
7180 return count;
7181 }
7182
7183 case RECORD_TYPE:
7184 {
7185 int count = 0;
7186 int sub_count;
7187 tree field;
7188
807e902e
KZ
7189 /* Can't handle incomplete types nor sizes that are not
7190 fixed. */
7191 if (!COMPLETE_TYPE_P (type)
7192 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7193 return -1;
7194
7195 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7196 {
7197 if (TREE_CODE (field) != FIELD_DECL)
7198 continue;
7199
7200 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7201 if (sub_count < 0)
7202 return -1;
7203 count += sub_count;
7204 }
7205
7206 /* There must be no padding. */
807e902e 7207 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7208 return -1;
7209
7210 return count;
7211 }
7212
7213 case UNION_TYPE:
7214 case QUAL_UNION_TYPE:
7215 {
7216 /* These aren't very interesting except in a degenerate case. */
7217 int count = 0;
7218 int sub_count;
7219 tree field;
7220
807e902e
KZ
7221 /* Can't handle incomplete types nor sizes that are not
7222 fixed. */
7223 if (!COMPLETE_TYPE_P (type)
7224 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7225 return -1;
7226
7227 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7228 {
7229 if (TREE_CODE (field) != FIELD_DECL)
7230 continue;
7231
7232 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7233 if (sub_count < 0)
7234 return -1;
7235 count = count > sub_count ? count : sub_count;
7236 }
7237
7238 /* There must be no padding. */
807e902e 7239 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7240 return -1;
7241
7242 return count;
7243 }
7244
7245 default:
7246 break;
7247 }
7248
7249 return -1;
7250}
7251
38e8f663
YR
7252/* Return true if we use LRA instead of reload pass. */
7253static bool
7254aarch64_lra_p (void)
7255{
7256 return aarch64_lra_flag;
7257}
7258
43e9d192
IB
7259/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7260 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7261 array types. The C99 floating-point complex types are also considered
7262 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7263 types, which are GCC extensions and out of the scope of AAPCS64, are
7264 treated as composite types here as well.
7265
7266 Note that MODE itself is not sufficient in determining whether a type
7267 is such a composite type or not. This is because
7268 stor-layout.c:compute_record_mode may have already changed the MODE
7269 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7270 structure with only one field may have its MODE set to the mode of the
7271 field. Also an integer mode whose size matches the size of the
7272 RECORD_TYPE type may be used to substitute the original mode
7273 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7274 solely relied on. */
7275
7276static bool
7277aarch64_composite_type_p (const_tree type,
7278 enum machine_mode mode)
7279{
7280 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7281 return true;
7282
7283 if (mode == BLKmode
7284 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7285 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7286 return true;
7287
7288 return false;
7289}
7290
7291/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7292 type as described in AAPCS64 \S 4.1.2.
7293
7294 See the comment above aarch64_composite_type_p for the notes on MODE. */
7295
7296static bool
7297aarch64_short_vector_p (const_tree type,
7298 enum machine_mode mode)
7299{
7300 HOST_WIDE_INT size = -1;
7301
7302 if (type && TREE_CODE (type) == VECTOR_TYPE)
7303 size = int_size_in_bytes (type);
7304 else if (!aarch64_composite_type_p (type, mode)
7305 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7306 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7307 size = GET_MODE_SIZE (mode);
7308
7309 return (size == 8 || size == 16) ? true : false;
7310}
7311
7312/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7313 shall be passed or returned in simd/fp register(s) (providing these
7314 parameter passing registers are available).
7315
7316 Upon successful return, *COUNT returns the number of needed registers,
7317 *BASE_MODE returns the mode of the individual register and when IS_HAF
7318 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7319 floating-point aggregate or a homogeneous short-vector aggregate. */
7320
7321static bool
7322aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7323 const_tree type,
7324 enum machine_mode *base_mode,
7325 int *count,
7326 bool *is_ha)
7327{
7328 enum machine_mode new_mode = VOIDmode;
7329 bool composite_p = aarch64_composite_type_p (type, mode);
7330
7331 if (is_ha != NULL) *is_ha = false;
7332
7333 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7334 || aarch64_short_vector_p (type, mode))
7335 {
7336 *count = 1;
7337 new_mode = mode;
7338 }
7339 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7340 {
7341 if (is_ha != NULL) *is_ha = true;
7342 *count = 2;
7343 new_mode = GET_MODE_INNER (mode);
7344 }
7345 else if (type && composite_p)
7346 {
7347 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7348
7349 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7350 {
7351 if (is_ha != NULL) *is_ha = true;
7352 *count = ag_count;
7353 }
7354 else
7355 return false;
7356 }
7357 else
7358 return false;
7359
7360 *base_mode = new_mode;
7361 return true;
7362}
7363
7364/* Implement TARGET_STRUCT_VALUE_RTX. */
7365
7366static rtx
7367aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7368 int incoming ATTRIBUTE_UNUSED)
7369{
7370 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7371}
7372
7373/* Implements target hook vector_mode_supported_p. */
7374static bool
7375aarch64_vector_mode_supported_p (enum machine_mode mode)
7376{
7377 if (TARGET_SIMD
7378 && (mode == V4SImode || mode == V8HImode
7379 || mode == V16QImode || mode == V2DImode
7380 || mode == V2SImode || mode == V4HImode
7381 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
7382 || mode == V4SFmode || mode == V2DFmode
7383 || mode == V1DFmode))
43e9d192
IB
7384 return true;
7385
7386 return false;
7387}
7388
b7342d25
IB
7389/* Return appropriate SIMD container
7390 for MODE within a vector of WIDTH bits. */
43e9d192 7391static enum machine_mode
b7342d25 7392aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 7393{
b7342d25 7394 gcc_assert (width == 64 || width == 128);
43e9d192 7395 if (TARGET_SIMD)
b7342d25
IB
7396 {
7397 if (width == 128)
7398 switch (mode)
7399 {
7400 case DFmode:
7401 return V2DFmode;
7402 case SFmode:
7403 return V4SFmode;
7404 case SImode:
7405 return V4SImode;
7406 case HImode:
7407 return V8HImode;
7408 case QImode:
7409 return V16QImode;
7410 case DImode:
7411 return V2DImode;
7412 default:
7413 break;
7414 }
7415 else
7416 switch (mode)
7417 {
7418 case SFmode:
7419 return V2SFmode;
7420 case SImode:
7421 return V2SImode;
7422 case HImode:
7423 return V4HImode;
7424 case QImode:
7425 return V8QImode;
7426 default:
7427 break;
7428 }
7429 }
43e9d192
IB
7430 return word_mode;
7431}
7432
b7342d25
IB
7433/* Return 128-bit container as the preferred SIMD mode for MODE. */
7434static enum machine_mode
7435aarch64_preferred_simd_mode (enum machine_mode mode)
7436{
7437 return aarch64_simd_container_mode (mode, 128);
7438}
7439
3b357264
JG
7440/* Return the bitmask of possible vector sizes for the vectorizer
7441 to iterate over. */
7442static unsigned int
7443aarch64_autovectorize_vector_sizes (void)
7444{
7445 return (16 | 8);
7446}
7447
c6fc9e43
YZ
7448/* A table to help perform AArch64-specific name mangling for AdvSIMD
7449 vector types in order to conform to the AAPCS64 (see "Procedure
7450 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7451 qualify for emission with the mangled names defined in that document,
7452 a vector type must not only be of the correct mode but also be
7453 composed of AdvSIMD vector element types (e.g.
7454 _builtin_aarch64_simd_qi); these types are registered by
7455 aarch64_init_simd_builtins (). In other words, vector types defined
7456 in other ways e.g. via vector_size attribute will get default
7457 mangled names. */
7458typedef struct
7459{
7460 enum machine_mode mode;
7461 const char *element_type_name;
7462 const char *mangled_name;
7463} aarch64_simd_mangle_map_entry;
7464
7465static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7466 /* 64-bit containerized types. */
7467 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7468 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7469 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7470 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7471 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7472 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7473 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
096c59be
AL
7474 { DImode, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7475 { DImode, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
c6a29a09 7476 { V1DFmode, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
c6fc9e43
YZ
7477 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7478 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7479 /* 128-bit containerized types. */
7480 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7481 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7482 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7483 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7484 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7485 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7486 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7487 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7488 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7489 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7490 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7491 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 7492 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
7493 { VOIDmode, NULL, NULL }
7494};
7495
ac2b960f
YZ
7496/* Implement TARGET_MANGLE_TYPE. */
7497
6f549691 7498static const char *
ac2b960f
YZ
7499aarch64_mangle_type (const_tree type)
7500{
7501 /* The AArch64 ABI documents say that "__va_list" has to be
7502 managled as if it is in the "std" namespace. */
7503 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7504 return "St9__va_list";
7505
c6fc9e43
YZ
7506 /* Check the mode of the vector type, and the name of the vector
7507 element type, against the table. */
7508 if (TREE_CODE (type) == VECTOR_TYPE)
7509 {
7510 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7511
7512 while (pos->mode != VOIDmode)
7513 {
7514 tree elt_type = TREE_TYPE (type);
7515
7516 if (pos->mode == TYPE_MODE (type)
7517 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7518 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7519 pos->element_type_name))
7520 return pos->mangled_name;
7521
7522 pos++;
7523 }
7524 }
7525
ac2b960f
YZ
7526 /* Use the default mangling. */
7527 return NULL;
7528}
7529
43e9d192 7530/* Return the equivalent letter for size. */
81c2dfb9 7531static char
43e9d192
IB
7532sizetochar (int size)
7533{
7534 switch (size)
7535 {
7536 case 64: return 'd';
7537 case 32: return 's';
7538 case 16: return 'h';
7539 case 8 : return 'b';
7540 default: gcc_unreachable ();
7541 }
7542}
7543
3520f7cc
JG
7544/* Return true iff x is a uniform vector of floating-point
7545 constants, and the constant can be represented in
7546 quarter-precision form. Note, as aarch64_float_const_representable
7547 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7548static bool
7549aarch64_vect_float_const_representable_p (rtx x)
7550{
7551 int i = 0;
7552 REAL_VALUE_TYPE r0, ri;
7553 rtx x0, xi;
7554
7555 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7556 return false;
7557
7558 x0 = CONST_VECTOR_ELT (x, 0);
7559 if (!CONST_DOUBLE_P (x0))
7560 return false;
7561
7562 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7563
7564 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7565 {
7566 xi = CONST_VECTOR_ELT (x, i);
7567 if (!CONST_DOUBLE_P (xi))
7568 return false;
7569
7570 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7571 if (!REAL_VALUES_EQUAL (r0, ri))
7572 return false;
7573 }
7574
7575 return aarch64_float_const_representable_p (x0);
7576}
7577
d8edd899 7578/* Return true for valid and false for invalid. */
3ea63f60 7579bool
48063b9d
IB
7580aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7581 struct simd_immediate_info *info)
43e9d192
IB
7582{
7583#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7584 matches = 1; \
7585 for (i = 0; i < idx; i += (STRIDE)) \
7586 if (!(TEST)) \
7587 matches = 0; \
7588 if (matches) \
7589 { \
7590 immtype = (CLASS); \
7591 elsize = (ELSIZE); \
43e9d192
IB
7592 eshift = (SHIFT); \
7593 emvn = (NEG); \
7594 break; \
7595 }
7596
7597 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7598 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7599 unsigned char bytes[16];
43e9d192
IB
7600 int immtype = -1, matches;
7601 unsigned int invmask = inverse ? 0xff : 0;
7602 int eshift, emvn;
7603
43e9d192 7604 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 7605 {
81c2dfb9
IB
7606 if (! (aarch64_simd_imm_zero_p (op, mode)
7607 || aarch64_vect_float_const_representable_p (op)))
d8edd899 7608 return false;
3520f7cc 7609
48063b9d
IB
7610 if (info)
7611 {
7612 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 7613 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
7614 info->mvn = false;
7615 info->shift = 0;
7616 }
3520f7cc 7617
d8edd899 7618 return true;
3520f7cc 7619 }
43e9d192
IB
7620
7621 /* Splat vector constant out into a byte vector. */
7622 for (i = 0; i < n_elts; i++)
7623 {
4b1e108c
AL
7624 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7625 it must be laid out in the vector register in reverse order. */
7626 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
7627 unsigned HOST_WIDE_INT elpart;
7628 unsigned int part, parts;
7629
7630 if (GET_CODE (el) == CONST_INT)
7631 {
7632 elpart = INTVAL (el);
7633 parts = 1;
7634 }
7635 else if (GET_CODE (el) == CONST_DOUBLE)
7636 {
7637 elpart = CONST_DOUBLE_LOW (el);
7638 parts = 2;
7639 }
7640 else
7641 gcc_unreachable ();
7642
7643 for (part = 0; part < parts; part++)
7644 {
7645 unsigned int byte;
7646 for (byte = 0; byte < innersize; byte++)
7647 {
7648 bytes[idx++] = (elpart & 0xff) ^ invmask;
7649 elpart >>= BITS_PER_UNIT;
7650 }
7651 if (GET_CODE (el) == CONST_DOUBLE)
7652 elpart = CONST_DOUBLE_HIGH (el);
7653 }
7654 }
7655
7656 /* Sanity check. */
7657 gcc_assert (idx == GET_MODE_SIZE (mode));
7658
7659 do
7660 {
7661 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7662 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7663
7664 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7665 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7666
7667 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7668 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7669
7670 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7671 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7672
7673 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7674
7675 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7676
7677 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7678 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7679
7680 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7681 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7682
7683 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7684 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7685
7686 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7687 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7688
7689 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7690
7691 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7692
7693 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 7694 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
7695
7696 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 7697 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
7698
7699 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 7700 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
7701
7702 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 7703 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
7704
7705 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7706
7707 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7708 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7709 }
7710 while (0);
7711
e4f0f84d 7712 if (immtype == -1)
d8edd899 7713 return false;
43e9d192 7714
48063b9d 7715 if (info)
43e9d192 7716 {
48063b9d 7717 info->element_width = elsize;
48063b9d
IB
7718 info->mvn = emvn != 0;
7719 info->shift = eshift;
7720
43e9d192
IB
7721 unsigned HOST_WIDE_INT imm = 0;
7722
e4f0f84d
TB
7723 if (immtype >= 12 && immtype <= 15)
7724 info->msl = true;
7725
43e9d192
IB
7726 /* Un-invert bytes of recognized vector, if necessary. */
7727 if (invmask != 0)
7728 for (i = 0; i < idx; i++)
7729 bytes[i] ^= invmask;
7730
7731 if (immtype == 17)
7732 {
7733 /* FIXME: Broken on 32-bit H_W_I hosts. */
7734 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7735
7736 for (i = 0; i < 8; i++)
7737 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7738 << (i * BITS_PER_UNIT);
7739
43e9d192 7740
48063b9d
IB
7741 info->value = GEN_INT (imm);
7742 }
7743 else
7744 {
7745 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7746 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
7747
7748 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
7749 generic constants. */
7750 if (info->mvn)
43e9d192 7751 imm = ~imm;
48063b9d
IB
7752 imm = (imm >> info->shift) & 0xff;
7753 info->value = GEN_INT (imm);
7754 }
43e9d192
IB
7755 }
7756
48063b9d 7757 return true;
43e9d192
IB
7758#undef CHECK
7759}
7760
43e9d192
IB
7761static bool
7762aarch64_const_vec_all_same_int_p (rtx x,
7763 HOST_WIDE_INT minval,
7764 HOST_WIDE_INT maxval)
7765{
7766 HOST_WIDE_INT firstval;
7767 int count, i;
7768
7769 if (GET_CODE (x) != CONST_VECTOR
7770 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7771 return false;
7772
7773 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7774 if (firstval < minval || firstval > maxval)
7775 return false;
7776
7777 count = CONST_VECTOR_NUNITS (x);
7778 for (i = 1; i < count; i++)
7779 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7780 return false;
7781
7782 return true;
7783}
7784
7785/* Check of immediate shift constants are within range. */
7786bool
7787aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7788{
7789 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7790 if (left)
7791 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7792 else
7793 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7794}
7795
3520f7cc
JG
7796/* Return true if X is a uniform vector where all elements
7797 are either the floating-point constant 0.0 or the
7798 integer constant 0. */
43e9d192
IB
7799bool
7800aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7801{
3520f7cc 7802 return x == CONST0_RTX (mode);
43e9d192
IB
7803}
7804
7805bool
7806aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7807{
7808 HOST_WIDE_INT imm = INTVAL (x);
7809 int i;
7810
7811 for (i = 0; i < 8; i++)
7812 {
7813 unsigned int byte = imm & 0xff;
7814 if (byte != 0xff && byte != 0)
7815 return false;
7816 imm >>= 8;
7817 }
7818
7819 return true;
7820}
7821
83f8c414
CSS
7822bool
7823aarch64_mov_operand_p (rtx x,
a5350ddc 7824 enum aarch64_symbol_context context,
83f8c414
CSS
7825 enum machine_mode mode)
7826{
83f8c414
CSS
7827 if (GET_CODE (x) == HIGH
7828 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7829 return true;
7830
7831 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7832 return true;
7833
7834 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7835 return true;
7836
a5350ddc
CSS
7837 return aarch64_classify_symbolic_expression (x, context)
7838 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
7839}
7840
43e9d192
IB
7841/* Return a const_int vector of VAL. */
7842rtx
7843aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7844{
7845 int nunits = GET_MODE_NUNITS (mode);
7846 rtvec v = rtvec_alloc (nunits);
7847 int i;
7848
7849 for (i=0; i < nunits; i++)
7850 RTVEC_ELT (v, i) = GEN_INT (val);
7851
7852 return gen_rtx_CONST_VECTOR (mode, v);
7853}
7854
051d0e2f
SN
7855/* Check OP is a legal scalar immediate for the MOVI instruction. */
7856
7857bool
7858aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7859{
7860 enum machine_mode vmode;
7861
7862 gcc_assert (!VECTOR_MODE_P (mode));
7863 vmode = aarch64_preferred_simd_mode (mode);
7864 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 7865 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
7866}
7867
43e9d192
IB
7868/* Construct and return a PARALLEL RTX vector. */
7869rtx
7870aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7871{
7872 int nunits = GET_MODE_NUNITS (mode);
7873 rtvec v = rtvec_alloc (nunits / 2);
7874 int base = high ? nunits / 2 : 0;
7875 rtx t1;
7876 int i;
7877
7878 for (i=0; i < nunits / 2; i++)
7879 RTVEC_ELT (v, i) = GEN_INT (base + i);
7880
7881 t1 = gen_rtx_PARALLEL (mode, v);
7882 return t1;
7883}
7884
7885/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7886 HIGH (exclusive). */
7887void
7888aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7889{
7890 HOST_WIDE_INT lane;
7891 gcc_assert (GET_CODE (operand) == CONST_INT);
7892 lane = INTVAL (operand);
7893
7894 if (lane < low || lane >= high)
7895 error ("lane out of range");
7896}
7897
7898void
7899aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7900{
7901 gcc_assert (GET_CODE (operand) == CONST_INT);
7902 HOST_WIDE_INT lane = INTVAL (operand);
7903
7904 if (lane < low || lane >= high)
7905 error ("constant out of range");
7906}
7907
7908/* Emit code to reinterpret one AdvSIMD type as another,
7909 without altering bits. */
7910void
7911aarch64_simd_reinterpret (rtx dest, rtx src)
7912{
7913 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7914}
7915
7916/* Emit code to place a AdvSIMD pair result in memory locations (with equal
7917 registers). */
7918void
7919aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7920 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7921 rtx op1)
7922{
7923 rtx mem = gen_rtx_MEM (mode, destaddr);
7924 rtx tmp1 = gen_reg_rtx (mode);
7925 rtx tmp2 = gen_reg_rtx (mode);
7926
7927 emit_insn (intfn (tmp1, op1, tmp2));
7928
7929 emit_move_insn (mem, tmp1);
7930 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7931 emit_move_insn (mem, tmp2);
7932}
7933
7934/* Return TRUE if OP is a valid vector addressing mode. */
7935bool
7936aarch64_simd_mem_operand_p (rtx op)
7937{
7938 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7939 || GET_CODE (XEXP (op, 0)) == REG);
7940}
7941
7942/* Set up OPERANDS for a register copy from SRC to DEST, taking care
7943 not to early-clobber SRC registers in the process.
7944
7945 We assume that the operands described by SRC and DEST represent a
7946 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7947 number of components into which the copy has been decomposed. */
7948void
7949aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7950 rtx *src, unsigned int count)
7951{
7952 unsigned int i;
7953
7954 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7955 || REGNO (operands[0]) < REGNO (operands[1]))
7956 {
7957 for (i = 0; i < count; i++)
7958 {
7959 operands[2 * i] = dest[i];
7960 operands[2 * i + 1] = src[i];
7961 }
7962 }
7963 else
7964 {
7965 for (i = 0; i < count; i++)
7966 {
7967 operands[2 * i] = dest[count - i - 1];
7968 operands[2 * i + 1] = src[count - i - 1];
7969 }
7970 }
7971}
7972
7973/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7974 one of VSTRUCT modes: OI, CI or XI. */
7975int
7976aarch64_simd_attr_length_move (rtx insn)
7977{
43e9d192
IB
7978 enum machine_mode mode;
7979
7980 extract_insn_cached (insn);
7981
7982 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7983 {
7984 mode = GET_MODE (recog_data.operand[0]);
7985 switch (mode)
7986 {
7987 case OImode:
7988 return 8;
7989 case CImode:
7990 return 12;
7991 case XImode:
7992 return 16;
7993 default:
7994 gcc_unreachable ();
7995 }
7996 }
7997 return 4;
7998}
7999
db0253a4
TB
8000/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8001 alignment of a vector to 128 bits. */
8002static HOST_WIDE_INT
8003aarch64_simd_vector_alignment (const_tree type)
8004{
9439e9a1 8005 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
8006 return MIN (align, 128);
8007}
8008
8009/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8010static bool
8011aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8012{
8013 if (is_packed)
8014 return false;
8015
8016 /* We guarantee alignment for vectors up to 128-bits. */
8017 if (tree_int_cst_compare (TYPE_SIZE (type),
8018 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8019 return false;
8020
8021 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8022 return true;
8023}
8024
4369c11e
TB
8025/* If VALS is a vector constant that can be loaded into a register
8026 using DUP, generate instructions to do so and return an RTX to
8027 assign to the register. Otherwise return NULL_RTX. */
8028static rtx
8029aarch64_simd_dup_constant (rtx vals)
8030{
8031 enum machine_mode mode = GET_MODE (vals);
8032 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8033 int n_elts = GET_MODE_NUNITS (mode);
8034 bool all_same = true;
8035 rtx x;
8036 int i;
8037
8038 if (GET_CODE (vals) != CONST_VECTOR)
8039 return NULL_RTX;
8040
8041 for (i = 1; i < n_elts; ++i)
8042 {
8043 x = CONST_VECTOR_ELT (vals, i);
8044 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8045 all_same = false;
8046 }
8047
8048 if (!all_same)
8049 return NULL_RTX;
8050
8051 /* We can load this constant by using DUP and a constant in a
8052 single ARM register. This will be cheaper than a vector
8053 load. */
8054 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8055 return gen_rtx_VEC_DUPLICATE (mode, x);
8056}
8057
8058
8059/* Generate code to load VALS, which is a PARALLEL containing only
8060 constants (for vec_init) or CONST_VECTOR, efficiently into a
8061 register. Returns an RTX to copy into the register, or NULL_RTX
8062 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 8063static rtx
4369c11e
TB
8064aarch64_simd_make_constant (rtx vals)
8065{
8066 enum machine_mode mode = GET_MODE (vals);
8067 rtx const_dup;
8068 rtx const_vec = NULL_RTX;
8069 int n_elts = GET_MODE_NUNITS (mode);
8070 int n_const = 0;
8071 int i;
8072
8073 if (GET_CODE (vals) == CONST_VECTOR)
8074 const_vec = vals;
8075 else if (GET_CODE (vals) == PARALLEL)
8076 {
8077 /* A CONST_VECTOR must contain only CONST_INTs and
8078 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8079 Only store valid constants in a CONST_VECTOR. */
8080 for (i = 0; i < n_elts; ++i)
8081 {
8082 rtx x = XVECEXP (vals, 0, i);
8083 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8084 n_const++;
8085 }
8086 if (n_const == n_elts)
8087 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8088 }
8089 else
8090 gcc_unreachable ();
8091
8092 if (const_vec != NULL_RTX
48063b9d 8093 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
8094 /* Load using MOVI/MVNI. */
8095 return const_vec;
8096 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8097 /* Loaded using DUP. */
8098 return const_dup;
8099 else if (const_vec != NULL_RTX)
8100 /* Load from constant pool. We can not take advantage of single-cycle
8101 LD1 because we need a PC-relative addressing mode. */
8102 return const_vec;
8103 else
8104 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8105 We can not construct an initializer. */
8106 return NULL_RTX;
8107}
8108
8109void
8110aarch64_expand_vector_init (rtx target, rtx vals)
8111{
8112 enum machine_mode mode = GET_MODE (target);
8113 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8114 int n_elts = GET_MODE_NUNITS (mode);
8115 int n_var = 0, one_var = -1;
8116 bool all_same = true;
8117 rtx x, mem;
8118 int i;
8119
8120 x = XVECEXP (vals, 0, 0);
8121 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8122 n_var = 1, one_var = 0;
8123
8124 for (i = 1; i < n_elts; ++i)
8125 {
8126 x = XVECEXP (vals, 0, i);
8127 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8128 ++n_var, one_var = i;
8129
8130 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8131 all_same = false;
8132 }
8133
8134 if (n_var == 0)
8135 {
8136 rtx constant = aarch64_simd_make_constant (vals);
8137 if (constant != NULL_RTX)
8138 {
8139 emit_move_insn (target, constant);
8140 return;
8141 }
8142 }
8143
8144 /* Splat a single non-constant element if we can. */
8145 if (all_same)
8146 {
8147 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8148 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8149 return;
8150 }
8151
8152 /* One field is non-constant. Load constant then overwrite varying
8153 field. This is more efficient than using the stack. */
8154 if (n_var == 1)
8155 {
8156 rtx copy = copy_rtx (vals);
8157 rtx index = GEN_INT (one_var);
8158 enum insn_code icode;
8159
8160 /* Load constant part of vector, substitute neighboring value for
8161 varying element. */
8162 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8163 aarch64_expand_vector_init (target, copy);
8164
8165 /* Insert variable. */
8166 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8167 icode = optab_handler (vec_set_optab, mode);
8168 gcc_assert (icode != CODE_FOR_nothing);
8169 emit_insn (GEN_FCN (icode) (target, x, index));
8170 return;
8171 }
8172
8173 /* Construct the vector in memory one field at a time
8174 and load the whole vector. */
8175 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8176 for (i = 0; i < n_elts; i++)
8177 emit_move_insn (adjust_address_nv (mem, inner_mode,
8178 i * GET_MODE_SIZE (inner_mode)),
8179 XVECEXP (vals, 0, i));
8180 emit_move_insn (target, mem);
8181
8182}
8183
43e9d192
IB
8184static unsigned HOST_WIDE_INT
8185aarch64_shift_truncation_mask (enum machine_mode mode)
8186{
8187 return
8188 (aarch64_vector_mode_supported_p (mode)
8189 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8190}
8191
8192#ifndef TLS_SECTION_ASM_FLAG
8193#define TLS_SECTION_ASM_FLAG 'T'
8194#endif
8195
8196void
8197aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8198 tree decl ATTRIBUTE_UNUSED)
8199{
8200 char flagchars[10], *f = flagchars;
8201
8202 /* If we have already declared this section, we can use an
8203 abbreviated form to switch back to it -- unless this section is
8204 part of a COMDAT groups, in which case GAS requires the full
8205 declaration every time. */
8206 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8207 && (flags & SECTION_DECLARED))
8208 {
8209 fprintf (asm_out_file, "\t.section\t%s\n", name);
8210 return;
8211 }
8212
8213 if (!(flags & SECTION_DEBUG))
8214 *f++ = 'a';
8215 if (flags & SECTION_WRITE)
8216 *f++ = 'w';
8217 if (flags & SECTION_CODE)
8218 *f++ = 'x';
8219 if (flags & SECTION_SMALL)
8220 *f++ = 's';
8221 if (flags & SECTION_MERGE)
8222 *f++ = 'M';
8223 if (flags & SECTION_STRINGS)
8224 *f++ = 'S';
8225 if (flags & SECTION_TLS)
8226 *f++ = TLS_SECTION_ASM_FLAG;
8227 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8228 *f++ = 'G';
8229 *f = '\0';
8230
8231 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8232
8233 if (!(flags & SECTION_NOTYPE))
8234 {
8235 const char *type;
8236 const char *format;
8237
8238 if (flags & SECTION_BSS)
8239 type = "nobits";
8240 else
8241 type = "progbits";
8242
8243#ifdef TYPE_OPERAND_FMT
8244 format = "," TYPE_OPERAND_FMT;
8245#else
8246 format = ",@%s";
8247#endif
8248
8249 fprintf (asm_out_file, format, type);
8250
8251 if (flags & SECTION_ENTSIZE)
8252 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8253 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8254 {
8255 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8256 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8257 else
8258 fprintf (asm_out_file, ",%s,comdat",
8259 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8260 }
8261 }
8262
8263 putc ('\n', asm_out_file);
8264}
8265
8266/* Select a format to encode pointers in exception handling data. */
8267int
8268aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8269{
8270 int type;
8271 switch (aarch64_cmodel)
8272 {
8273 case AARCH64_CMODEL_TINY:
8274 case AARCH64_CMODEL_TINY_PIC:
8275 case AARCH64_CMODEL_SMALL:
8276 case AARCH64_CMODEL_SMALL_PIC:
8277 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8278 for everything. */
8279 type = DW_EH_PE_sdata4;
8280 break;
8281 default:
8282 /* No assumptions here. 8-byte relocs required. */
8283 type = DW_EH_PE_sdata8;
8284 break;
8285 }
8286 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8287}
8288
0462169c
SN
8289/* Emit load exclusive. */
8290
8291static void
8292aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8293 rtx mem, rtx model_rtx)
8294{
8295 rtx (*gen) (rtx, rtx, rtx);
8296
8297 switch (mode)
8298 {
8299 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8300 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8301 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8302 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8303 default:
8304 gcc_unreachable ();
8305 }
8306
8307 emit_insn (gen (rval, mem, model_rtx));
8308}
8309
8310/* Emit store exclusive. */
8311
8312static void
8313aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8314 rtx rval, rtx mem, rtx model_rtx)
8315{
8316 rtx (*gen) (rtx, rtx, rtx, rtx);
8317
8318 switch (mode)
8319 {
8320 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8321 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8322 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8323 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8324 default:
8325 gcc_unreachable ();
8326 }
8327
8328 emit_insn (gen (bval, rval, mem, model_rtx));
8329}
8330
8331/* Mark the previous jump instruction as unlikely. */
8332
8333static void
8334aarch64_emit_unlikely_jump (rtx insn)
8335{
e5af9ddd 8336 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
8337
8338 insn = emit_jump_insn (insn);
e5af9ddd 8339 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
8340}
8341
8342/* Expand a compare and swap pattern. */
8343
8344void
8345aarch64_expand_compare_and_swap (rtx operands[])
8346{
8347 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8348 enum machine_mode mode, cmp_mode;
8349 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8350
8351 bval = operands[0];
8352 rval = operands[1];
8353 mem = operands[2];
8354 oldval = operands[3];
8355 newval = operands[4];
8356 is_weak = operands[5];
8357 mod_s = operands[6];
8358 mod_f = operands[7];
8359 mode = GET_MODE (mem);
8360 cmp_mode = mode;
8361
8362 /* Normally the succ memory model must be stronger than fail, but in the
8363 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8364 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8365
8366 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8367 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8368 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8369
8370 switch (mode)
8371 {
8372 case QImode:
8373 case HImode:
8374 /* For short modes, we're going to perform the comparison in SImode,
8375 so do the zero-extension now. */
8376 cmp_mode = SImode;
8377 rval = gen_reg_rtx (SImode);
8378 oldval = convert_modes (SImode, mode, oldval, true);
8379 /* Fall through. */
8380
8381 case SImode:
8382 case DImode:
8383 /* Force the value into a register if needed. */
8384 if (!aarch64_plus_operand (oldval, mode))
8385 oldval = force_reg (cmp_mode, oldval);
8386 break;
8387
8388 default:
8389 gcc_unreachable ();
8390 }
8391
8392 switch (mode)
8393 {
8394 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8395 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8396 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8397 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8398 default:
8399 gcc_unreachable ();
8400 }
8401
8402 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8403
8404 if (mode == QImode || mode == HImode)
8405 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8406
8407 x = gen_rtx_REG (CCmode, CC_REGNUM);
8408 x = gen_rtx_EQ (SImode, x, const0_rtx);
8409 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8410}
8411
8412/* Split a compare and swap pattern. */
8413
8414void
8415aarch64_split_compare_and_swap (rtx operands[])
8416{
8417 rtx rval, mem, oldval, newval, scratch;
8418 enum machine_mode mode;
0462169c
SN
8419 bool is_weak;
8420 rtx label1, label2, x, cond;
8421
8422 rval = operands[0];
8423 mem = operands[1];
8424 oldval = operands[2];
8425 newval = operands[3];
8426 is_weak = (operands[4] != const0_rtx);
0462169c
SN
8427 scratch = operands[7];
8428 mode = GET_MODE (mem);
8429
8430 label1 = NULL_RTX;
8431 if (!is_weak)
8432 {
8433 label1 = gen_label_rtx ();
8434 emit_label (label1);
8435 }
8436 label2 = gen_label_rtx ();
8437
8438 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8439
8440 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8441 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8442 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8443 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8444 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8445
8446 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8447
8448 if (!is_weak)
8449 {
8450 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8451 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8452 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8453 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8454 }
8455 else
8456 {
8457 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8458 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8459 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8460 }
8461
8462 emit_label (label2);
8463}
8464
8465/* Split an atomic operation. */
8466
8467void
8468aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8469 rtx value, rtx model_rtx, rtx cond)
8470{
8471 enum machine_mode mode = GET_MODE (mem);
8472 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8473 rtx label, x;
8474
8475 label = gen_label_rtx ();
8476 emit_label (label);
8477
8478 if (new_out)
8479 new_out = gen_lowpart (wmode, new_out);
8480 if (old_out)
8481 old_out = gen_lowpart (wmode, old_out);
8482 else
8483 old_out = new_out;
8484 value = simplify_gen_subreg (wmode, value, mode, 0);
8485
8486 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8487
8488 switch (code)
8489 {
8490 case SET:
8491 new_out = value;
8492 break;
8493
8494 case NOT:
8495 x = gen_rtx_AND (wmode, old_out, value);
8496 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8497 x = gen_rtx_NOT (wmode, new_out);
8498 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8499 break;
8500
8501 case MINUS:
8502 if (CONST_INT_P (value))
8503 {
8504 value = GEN_INT (-INTVAL (value));
8505 code = PLUS;
8506 }
8507 /* Fall through. */
8508
8509 default:
8510 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8511 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8512 break;
8513 }
8514
8515 aarch64_emit_store_exclusive (mode, cond, mem,
8516 gen_lowpart (mode, new_out), model_rtx);
8517
8518 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8519 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8520 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8521 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8522}
8523
95ca411e
YZ
8524static void
8525aarch64_print_extension (void)
8526{
8527 const struct aarch64_option_extension *opt = NULL;
8528
8529 for (opt = all_extensions; opt->name != NULL; opt++)
8530 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8531 asm_fprintf (asm_out_file, "+%s", opt->name);
8532
8533 asm_fprintf (asm_out_file, "\n");
8534}
8535
43e9d192
IB
8536static void
8537aarch64_start_file (void)
8538{
8539 if (selected_arch)
95ca411e
YZ
8540 {
8541 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8542 aarch64_print_extension ();
8543 }
43e9d192 8544 else if (selected_cpu)
95ca411e 8545 {
682287fb
JG
8546 const char *truncated_name
8547 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8548 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
8549 aarch64_print_extension ();
8550 }
43e9d192
IB
8551 default_file_start();
8552}
8553
8554/* Target hook for c_mode_for_suffix. */
8555static enum machine_mode
8556aarch64_c_mode_for_suffix (char suffix)
8557{
8558 if (suffix == 'q')
8559 return TFmode;
8560
8561 return VOIDmode;
8562}
8563
3520f7cc
JG
8564/* We can only represent floating point constants which will fit in
8565 "quarter-precision" values. These values are characterised by
8566 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8567 by:
8568
8569 (-1)^s * (n/16) * 2^r
8570
8571 Where:
8572 's' is the sign bit.
8573 'n' is an integer in the range 16 <= n <= 31.
8574 'r' is an integer in the range -3 <= r <= 4. */
8575
8576/* Return true iff X can be represented by a quarter-precision
8577 floating point immediate operand X. Note, we cannot represent 0.0. */
8578bool
8579aarch64_float_const_representable_p (rtx x)
8580{
8581 /* This represents our current view of how many bits
8582 make up the mantissa. */
8583 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 8584 int exponent;
3520f7cc 8585 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 8586 REAL_VALUE_TYPE r, m;
807e902e 8587 bool fail;
3520f7cc
JG
8588
8589 if (!CONST_DOUBLE_P (x))
8590 return false;
8591
94bfa2da
TV
8592 if (GET_MODE (x) == VOIDmode)
8593 return false;
8594
3520f7cc
JG
8595 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8596
8597 /* We cannot represent infinities, NaNs or +/-zero. We won't
8598 know if we have +zero until we analyse the mantissa, but we
8599 can reject the other invalid values. */
8600 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8601 || REAL_VALUE_MINUS_ZERO (r))
8602 return false;
8603
ba96cdfb 8604 /* Extract exponent. */
3520f7cc
JG
8605 r = real_value_abs (&r);
8606 exponent = REAL_EXP (&r);
8607
8608 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8609 highest (sign) bit, with a fixed binary point at bit point_pos.
8610 m1 holds the low part of the mantissa, m2 the high part.
8611 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8612 bits for the mantissa, this can fail (low bits will be lost). */
8613 real_ldexp (&m, &r, point_pos - exponent);
807e902e 8614 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
8615
8616 /* If the low part of the mantissa has bits set we cannot represent
8617 the value. */
807e902e 8618 if (w.elt (0) != 0)
3520f7cc
JG
8619 return false;
8620 /* We have rejected the lower HOST_WIDE_INT, so update our
8621 understanding of how many bits lie in the mantissa and
8622 look only at the high HOST_WIDE_INT. */
807e902e 8623 mantissa = w.elt (1);
3520f7cc
JG
8624 point_pos -= HOST_BITS_PER_WIDE_INT;
8625
8626 /* We can only represent values with a mantissa of the form 1.xxxx. */
8627 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8628 if ((mantissa & mask) != 0)
8629 return false;
8630
8631 /* Having filtered unrepresentable values, we may now remove all
8632 but the highest 5 bits. */
8633 mantissa >>= point_pos - 5;
8634
8635 /* We cannot represent the value 0.0, so reject it. This is handled
8636 elsewhere. */
8637 if (mantissa == 0)
8638 return false;
8639
8640 /* Then, as bit 4 is always set, we can mask it off, leaving
8641 the mantissa in the range [0, 15]. */
8642 mantissa &= ~(1 << 4);
8643 gcc_assert (mantissa <= 15);
8644
8645 /* GCC internally does not use IEEE754-like encoding (where normalized
8646 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8647 Our mantissa values are shifted 4 places to the left relative to
8648 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8649 by 5 places to correct for GCC's representation. */
8650 exponent = 5 - exponent;
8651
8652 return (exponent >= 0 && exponent <= 7);
8653}
8654
8655char*
81c2dfb9 8656aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
8657 enum machine_mode mode,
8658 unsigned width)
8659{
3ea63f60 8660 bool is_valid;
3520f7cc 8661 static char templ[40];
3520f7cc 8662 const char *mnemonic;
e4f0f84d 8663 const char *shift_op;
3520f7cc 8664 unsigned int lane_count = 0;
81c2dfb9 8665 char element_char;
3520f7cc 8666
e4f0f84d 8667 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
8668
8669 /* This will return true to show const_vector is legal for use as either
8670 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8671 also update INFO to show how the immediate should be generated. */
81c2dfb9 8672 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
8673 gcc_assert (is_valid);
8674
81c2dfb9 8675 element_char = sizetochar (info.element_width);
48063b9d
IB
8676 lane_count = width / info.element_width;
8677
3520f7cc
JG
8678 mode = GET_MODE_INNER (mode);
8679 if (mode == SFmode || mode == DFmode)
8680 {
48063b9d
IB
8681 gcc_assert (info.shift == 0 && ! info.mvn);
8682 if (aarch64_float_const_zero_rtx_p (info.value))
8683 info.value = GEN_INT (0);
8684 else
8685 {
8686#define buf_size 20
8687 REAL_VALUE_TYPE r;
8688 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8689 char float_buf[buf_size] = {'\0'};
8690 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8691#undef buf_size
8692
8693 if (lane_count == 1)
8694 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8695 else
8696 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 8697 lane_count, element_char, float_buf);
48063b9d
IB
8698 return templ;
8699 }
3520f7cc 8700 }
3520f7cc 8701
48063b9d 8702 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 8703 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
8704
8705 if (lane_count == 1)
48063b9d
IB
8706 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8707 mnemonic, UINTVAL (info.value));
8708 else if (info.shift)
8709 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
8710 ", %s %d", mnemonic, lane_count, element_char,
8711 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 8712 else
48063b9d 8713 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 8714 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
8715 return templ;
8716}
8717
b7342d25
IB
8718char*
8719aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8720 enum machine_mode mode)
8721{
8722 enum machine_mode vmode;
8723
8724 gcc_assert (!VECTOR_MODE_P (mode));
8725 vmode = aarch64_simd_container_mode (mode, 64);
8726 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8727 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8728}
8729
88b08073
JG
8730/* Split operands into moves from op[1] + op[2] into op[0]. */
8731
8732void
8733aarch64_split_combinev16qi (rtx operands[3])
8734{
8735 unsigned int dest = REGNO (operands[0]);
8736 unsigned int src1 = REGNO (operands[1]);
8737 unsigned int src2 = REGNO (operands[2]);
8738 enum machine_mode halfmode = GET_MODE (operands[1]);
8739 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8740 rtx destlo, desthi;
8741
8742 gcc_assert (halfmode == V16QImode);
8743
8744 if (src1 == dest && src2 == dest + halfregs)
8745 {
8746 /* No-op move. Can't split to nothing; emit something. */
8747 emit_note (NOTE_INSN_DELETED);
8748 return;
8749 }
8750
8751 /* Preserve register attributes for variable tracking. */
8752 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8753 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8754 GET_MODE_SIZE (halfmode));
8755
8756 /* Special case of reversed high/low parts. */
8757 if (reg_overlap_mentioned_p (operands[2], destlo)
8758 && reg_overlap_mentioned_p (operands[1], desthi))
8759 {
8760 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8761 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8762 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8763 }
8764 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8765 {
8766 /* Try to avoid unnecessary moves if part of the result
8767 is in the right place already. */
8768 if (src1 != dest)
8769 emit_move_insn (destlo, operands[1]);
8770 if (src2 != dest + halfregs)
8771 emit_move_insn (desthi, operands[2]);
8772 }
8773 else
8774 {
8775 if (src2 != dest + halfregs)
8776 emit_move_insn (desthi, operands[2]);
8777 if (src1 != dest)
8778 emit_move_insn (destlo, operands[1]);
8779 }
8780}
8781
8782/* vec_perm support. */
8783
8784#define MAX_VECT_LEN 16
8785
8786struct expand_vec_perm_d
8787{
8788 rtx target, op0, op1;
8789 unsigned char perm[MAX_VECT_LEN];
8790 enum machine_mode vmode;
8791 unsigned char nelt;
8792 bool one_vector_p;
8793 bool testing_p;
8794};
8795
8796/* Generate a variable permutation. */
8797
8798static void
8799aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8800{
8801 enum machine_mode vmode = GET_MODE (target);
8802 bool one_vector_p = rtx_equal_p (op0, op1);
8803
8804 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8805 gcc_checking_assert (GET_MODE (op0) == vmode);
8806 gcc_checking_assert (GET_MODE (op1) == vmode);
8807 gcc_checking_assert (GET_MODE (sel) == vmode);
8808 gcc_checking_assert (TARGET_SIMD);
8809
8810 if (one_vector_p)
8811 {
8812 if (vmode == V8QImode)
8813 {
8814 /* Expand the argument to a V16QI mode by duplicating it. */
8815 rtx pair = gen_reg_rtx (V16QImode);
8816 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8817 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8818 }
8819 else
8820 {
8821 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8822 }
8823 }
8824 else
8825 {
8826 rtx pair;
8827
8828 if (vmode == V8QImode)
8829 {
8830 pair = gen_reg_rtx (V16QImode);
8831 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8832 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8833 }
8834 else
8835 {
8836 pair = gen_reg_rtx (OImode);
8837 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8838 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8839 }
8840 }
8841}
8842
8843void
8844aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8845{
8846 enum machine_mode vmode = GET_MODE (target);
c9d1a16a 8847 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 8848 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 8849 rtx mask;
88b08073
JG
8850
8851 /* The TBL instruction does not use a modulo index, so we must take care
8852 of that ourselves. */
f7c4e5b8
AL
8853 mask = aarch64_simd_gen_const_vector_dup (vmode,
8854 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
8855 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8856
f7c4e5b8
AL
8857 /* For big-endian, we also need to reverse the index within the vector
8858 (but not which vector). */
8859 if (BYTES_BIG_ENDIAN)
8860 {
8861 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
8862 if (!one_vector_p)
8863 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
8864 sel = expand_simple_binop (vmode, XOR, sel, mask,
8865 NULL, 0, OPTAB_LIB_WIDEN);
8866 }
88b08073
JG
8867 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8868}
8869
cc4d934f
JG
8870/* Recognize patterns suitable for the TRN instructions. */
8871static bool
8872aarch64_evpc_trn (struct expand_vec_perm_d *d)
8873{
8874 unsigned int i, odd, mask, nelt = d->nelt;
8875 rtx out, in0, in1, x;
8876 rtx (*gen) (rtx, rtx, rtx);
8877 enum machine_mode vmode = d->vmode;
8878
8879 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8880 return false;
8881
8882 /* Note that these are little-endian tests.
8883 We correct for big-endian later. */
8884 if (d->perm[0] == 0)
8885 odd = 0;
8886 else if (d->perm[0] == 1)
8887 odd = 1;
8888 else
8889 return false;
8890 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8891
8892 for (i = 0; i < nelt; i += 2)
8893 {
8894 if (d->perm[i] != i + odd)
8895 return false;
8896 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8897 return false;
8898 }
8899
8900 /* Success! */
8901 if (d->testing_p)
8902 return true;
8903
8904 in0 = d->op0;
8905 in1 = d->op1;
8906 if (BYTES_BIG_ENDIAN)
8907 {
8908 x = in0, in0 = in1, in1 = x;
8909 odd = !odd;
8910 }
8911 out = d->target;
8912
8913 if (odd)
8914 {
8915 switch (vmode)
8916 {
8917 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8918 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8919 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8920 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8921 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8922 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8923 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8924 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8925 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8926 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8927 default:
8928 return false;
8929 }
8930 }
8931 else
8932 {
8933 switch (vmode)
8934 {
8935 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8936 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8937 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8938 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8939 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8940 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8941 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8942 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8943 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8944 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8945 default:
8946 return false;
8947 }
8948 }
8949
8950 emit_insn (gen (out, in0, in1));
8951 return true;
8952}
8953
8954/* Recognize patterns suitable for the UZP instructions. */
8955static bool
8956aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8957{
8958 unsigned int i, odd, mask, nelt = d->nelt;
8959 rtx out, in0, in1, x;
8960 rtx (*gen) (rtx, rtx, rtx);
8961 enum machine_mode vmode = d->vmode;
8962
8963 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8964 return false;
8965
8966 /* Note that these are little-endian tests.
8967 We correct for big-endian later. */
8968 if (d->perm[0] == 0)
8969 odd = 0;
8970 else if (d->perm[0] == 1)
8971 odd = 1;
8972 else
8973 return false;
8974 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8975
8976 for (i = 0; i < nelt; i++)
8977 {
8978 unsigned elt = (i * 2 + odd) & mask;
8979 if (d->perm[i] != elt)
8980 return false;
8981 }
8982
8983 /* Success! */
8984 if (d->testing_p)
8985 return true;
8986
8987 in0 = d->op0;
8988 in1 = d->op1;
8989 if (BYTES_BIG_ENDIAN)
8990 {
8991 x = in0, in0 = in1, in1 = x;
8992 odd = !odd;
8993 }
8994 out = d->target;
8995
8996 if (odd)
8997 {
8998 switch (vmode)
8999 {
9000 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9001 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9002 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9003 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9004 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9005 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9006 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9007 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9008 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9009 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9010 default:
9011 return false;
9012 }
9013 }
9014 else
9015 {
9016 switch (vmode)
9017 {
9018 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9019 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9020 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9021 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9022 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9023 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9024 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9025 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9026 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9027 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9028 default:
9029 return false;
9030 }
9031 }
9032
9033 emit_insn (gen (out, in0, in1));
9034 return true;
9035}
9036
9037/* Recognize patterns suitable for the ZIP instructions. */
9038static bool
9039aarch64_evpc_zip (struct expand_vec_perm_d *d)
9040{
9041 unsigned int i, high, mask, nelt = d->nelt;
9042 rtx out, in0, in1, x;
9043 rtx (*gen) (rtx, rtx, rtx);
9044 enum machine_mode vmode = d->vmode;
9045
9046 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9047 return false;
9048
9049 /* Note that these are little-endian tests.
9050 We correct for big-endian later. */
9051 high = nelt / 2;
9052 if (d->perm[0] == high)
9053 /* Do Nothing. */
9054 ;
9055 else if (d->perm[0] == 0)
9056 high = 0;
9057 else
9058 return false;
9059 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9060
9061 for (i = 0; i < nelt / 2; i++)
9062 {
9063 unsigned elt = (i + high) & mask;
9064 if (d->perm[i * 2] != elt)
9065 return false;
9066 elt = (elt + nelt) & mask;
9067 if (d->perm[i * 2 + 1] != elt)
9068 return false;
9069 }
9070
9071 /* Success! */
9072 if (d->testing_p)
9073 return true;
9074
9075 in0 = d->op0;
9076 in1 = d->op1;
9077 if (BYTES_BIG_ENDIAN)
9078 {
9079 x = in0, in0 = in1, in1 = x;
9080 high = !high;
9081 }
9082 out = d->target;
9083
9084 if (high)
9085 {
9086 switch (vmode)
9087 {
9088 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9089 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9090 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9091 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9092 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9093 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9094 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9095 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9096 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9097 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9098 default:
9099 return false;
9100 }
9101 }
9102 else
9103 {
9104 switch (vmode)
9105 {
9106 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9107 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9108 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9109 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9110 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9111 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9112 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9113 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9114 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9115 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9116 default:
9117 return false;
9118 }
9119 }
9120
9121 emit_insn (gen (out, in0, in1));
9122 return true;
9123}
9124
ae0533da
AL
9125/* Recognize patterns for the EXT insn. */
9126
9127static bool
9128aarch64_evpc_ext (struct expand_vec_perm_d *d)
9129{
9130 unsigned int i, nelt = d->nelt;
9131 rtx (*gen) (rtx, rtx, rtx, rtx);
9132 rtx offset;
9133
9134 unsigned int location = d->perm[0]; /* Always < nelt. */
9135
9136 /* Check if the extracted indices are increasing by one. */
9137 for (i = 1; i < nelt; i++)
9138 {
9139 unsigned int required = location + i;
9140 if (d->one_vector_p)
9141 {
9142 /* We'll pass the same vector in twice, so allow indices to wrap. */
9143 required &= (nelt - 1);
9144 }
9145 if (d->perm[i] != required)
9146 return false;
9147 }
9148
ae0533da
AL
9149 switch (d->vmode)
9150 {
9151 case V16QImode: gen = gen_aarch64_extv16qi; break;
9152 case V8QImode: gen = gen_aarch64_extv8qi; break;
9153 case V4HImode: gen = gen_aarch64_extv4hi; break;
9154 case V8HImode: gen = gen_aarch64_extv8hi; break;
9155 case V2SImode: gen = gen_aarch64_extv2si; break;
9156 case V4SImode: gen = gen_aarch64_extv4si; break;
9157 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9158 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9159 case V2DImode: gen = gen_aarch64_extv2di; break;
9160 case V2DFmode: gen = gen_aarch64_extv2df; break;
9161 default:
9162 return false;
9163 }
9164
9165 /* Success! */
9166 if (d->testing_p)
9167 return true;
9168
b31e65bb
AL
9169 /* The case where (location == 0) is a no-op for both big- and little-endian,
9170 and is removed by the mid-end at optimization levels -O1 and higher. */
9171
9172 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
9173 {
9174 /* After setup, we want the high elements of the first vector (stored
9175 at the LSB end of the register), and the low elements of the second
9176 vector (stored at the MSB end of the register). So swap. */
9177 rtx temp = d->op0;
9178 d->op0 = d->op1;
9179 d->op1 = temp;
9180 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9181 location = nelt - location;
9182 }
9183
9184 offset = GEN_INT (location);
9185 emit_insn (gen (d->target, d->op0, d->op1, offset));
9186 return true;
9187}
9188
923fcec3
AL
9189/* Recognize patterns for the REV insns. */
9190
9191static bool
9192aarch64_evpc_rev (struct expand_vec_perm_d *d)
9193{
9194 unsigned int i, j, diff, nelt = d->nelt;
9195 rtx (*gen) (rtx, rtx);
9196
9197 if (!d->one_vector_p)
9198 return false;
9199
9200 diff = d->perm[0];
9201 switch (diff)
9202 {
9203 case 7:
9204 switch (d->vmode)
9205 {
9206 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9207 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9208 default:
9209 return false;
9210 }
9211 break;
9212 case 3:
9213 switch (d->vmode)
9214 {
9215 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9216 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9217 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9218 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9219 default:
9220 return false;
9221 }
9222 break;
9223 case 1:
9224 switch (d->vmode)
9225 {
9226 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9227 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9228 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9229 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9230 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9231 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9232 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9233 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9234 default:
9235 return false;
9236 }
9237 break;
9238 default:
9239 return false;
9240 }
9241
9242 for (i = 0; i < nelt ; i += diff + 1)
9243 for (j = 0; j <= diff; j += 1)
9244 {
9245 /* This is guaranteed to be true as the value of diff
9246 is 7, 3, 1 and we should have enough elements in the
9247 queue to generate this. Getting a vector mask with a
9248 value of diff other than these values implies that
9249 something is wrong by the time we get here. */
9250 gcc_assert (i + j < nelt);
9251 if (d->perm[i + j] != i + diff - j)
9252 return false;
9253 }
9254
9255 /* Success! */
9256 if (d->testing_p)
9257 return true;
9258
9259 emit_insn (gen (d->target, d->op0));
9260 return true;
9261}
9262
91bd4114
JG
9263static bool
9264aarch64_evpc_dup (struct expand_vec_perm_d *d)
9265{
9266 rtx (*gen) (rtx, rtx, rtx);
9267 rtx out = d->target;
9268 rtx in0;
9269 enum machine_mode vmode = d->vmode;
9270 unsigned int i, elt, nelt = d->nelt;
9271 rtx lane;
9272
9273 /* TODO: This may not be big-endian safe. */
9274 if (BYTES_BIG_ENDIAN)
9275 return false;
9276
9277 elt = d->perm[0];
9278 for (i = 1; i < nelt; i++)
9279 {
9280 if (elt != d->perm[i])
9281 return false;
9282 }
9283
9284 /* The generic preparation in aarch64_expand_vec_perm_const_1
9285 swaps the operand order and the permute indices if it finds
9286 d->perm[0] to be in the second operand. Thus, we can always
9287 use d->op0 and need not do any extra arithmetic to get the
9288 correct lane number. */
9289 in0 = d->op0;
9290 lane = GEN_INT (elt);
9291
9292 switch (vmode)
9293 {
9294 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9295 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9296 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9297 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9298 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9299 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9300 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9301 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9302 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9303 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9304 default:
9305 return false;
9306 }
9307
9308 emit_insn (gen (out, in0, lane));
9309 return true;
9310}
9311
88b08073
JG
9312static bool
9313aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9314{
9315 rtx rperm[MAX_VECT_LEN], sel;
9316 enum machine_mode vmode = d->vmode;
9317 unsigned int i, nelt = d->nelt;
9318
88b08073
JG
9319 if (d->testing_p)
9320 return true;
9321
9322 /* Generic code will try constant permutation twice. Once with the
9323 original mode and again with the elements lowered to QImode.
9324 So wait and don't do the selector expansion ourselves. */
9325 if (vmode != V8QImode && vmode != V16QImode)
9326 return false;
9327
9328 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
9329 {
9330 int nunits = GET_MODE_NUNITS (vmode);
9331
9332 /* If big-endian and two vectors we end up with a weird mixed-endian
9333 mode on NEON. Reverse the index within each word but not the word
9334 itself. */
9335 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9336 : d->perm[i]);
9337 }
88b08073
JG
9338 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9339 sel = force_reg (vmode, sel);
9340
9341 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9342 return true;
9343}
9344
9345static bool
9346aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9347{
9348 /* The pattern matching functions above are written to look for a small
9349 number to begin the sequence (0, 1, N/2). If we begin with an index
9350 from the second operand, we can swap the operands. */
9351 if (d->perm[0] >= d->nelt)
9352 {
9353 unsigned i, nelt = d->nelt;
9354 rtx x;
9355
0696116a 9356 gcc_assert (nelt == (nelt & -nelt));
88b08073 9357 for (i = 0; i < nelt; ++i)
0696116a 9358 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073
JG
9359
9360 x = d->op0;
9361 d->op0 = d->op1;
9362 d->op1 = x;
9363 }
9364
9365 if (TARGET_SIMD)
cc4d934f 9366 {
923fcec3
AL
9367 if (aarch64_evpc_rev (d))
9368 return true;
9369 else if (aarch64_evpc_ext (d))
ae0533da
AL
9370 return true;
9371 else if (aarch64_evpc_zip (d))
cc4d934f
JG
9372 return true;
9373 else if (aarch64_evpc_uzp (d))
9374 return true;
9375 else if (aarch64_evpc_trn (d))
9376 return true;
91bd4114
JG
9377 else if (aarch64_evpc_dup (d))
9378 return true;
cc4d934f
JG
9379 return aarch64_evpc_tbl (d);
9380 }
88b08073
JG
9381 return false;
9382}
9383
9384/* Expand a vec_perm_const pattern. */
9385
9386bool
9387aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9388{
9389 struct expand_vec_perm_d d;
9390 int i, nelt, which;
9391
9392 d.target = target;
9393 d.op0 = op0;
9394 d.op1 = op1;
9395
9396 d.vmode = GET_MODE (target);
9397 gcc_assert (VECTOR_MODE_P (d.vmode));
9398 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9399 d.testing_p = false;
9400
9401 for (i = which = 0; i < nelt; ++i)
9402 {
9403 rtx e = XVECEXP (sel, 0, i);
9404 int ei = INTVAL (e) & (2 * nelt - 1);
9405 which |= (ei < nelt ? 1 : 2);
9406 d.perm[i] = ei;
9407 }
9408
9409 switch (which)
9410 {
9411 default:
9412 gcc_unreachable ();
9413
9414 case 3:
9415 d.one_vector_p = false;
9416 if (!rtx_equal_p (op0, op1))
9417 break;
9418
9419 /* The elements of PERM do not suggest that only the first operand
9420 is used, but both operands are identical. Allow easier matching
9421 of the permutation by folding the permutation into the single
9422 input vector. */
9423 /* Fall Through. */
9424 case 2:
9425 for (i = 0; i < nelt; ++i)
9426 d.perm[i] &= nelt - 1;
9427 d.op0 = op1;
9428 d.one_vector_p = true;
9429 break;
9430
9431 case 1:
9432 d.op1 = op0;
9433 d.one_vector_p = true;
9434 break;
9435 }
9436
9437 return aarch64_expand_vec_perm_const_1 (&d);
9438}
9439
9440static bool
9441aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9442 const unsigned char *sel)
9443{
9444 struct expand_vec_perm_d d;
9445 unsigned int i, nelt, which;
9446 bool ret;
9447
9448 d.vmode = vmode;
9449 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9450 d.testing_p = true;
9451 memcpy (d.perm, sel, nelt);
9452
9453 /* Calculate whether all elements are in one vector. */
9454 for (i = which = 0; i < nelt; ++i)
9455 {
9456 unsigned char e = d.perm[i];
9457 gcc_assert (e < 2 * nelt);
9458 which |= (e < nelt ? 1 : 2);
9459 }
9460
9461 /* If all elements are from the second vector, reindex as if from the
9462 first vector. */
9463 if (which == 2)
9464 for (i = 0; i < nelt; ++i)
9465 d.perm[i] -= nelt;
9466
9467 /* Check whether the mask can be applied to a single vector. */
9468 d.one_vector_p = (which != 3);
9469
9470 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9471 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9472 if (!d.one_vector_p)
9473 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9474
9475 start_sequence ();
9476 ret = aarch64_expand_vec_perm_const_1 (&d);
9477 end_sequence ();
9478
9479 return ret;
9480}
9481
69675d50
TB
9482/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9483bool
9484aarch64_cannot_change_mode_class (enum machine_mode from,
9485 enum machine_mode to,
9486 enum reg_class rclass)
9487{
9488 /* Full-reg subregs are allowed on general regs or any class if they are
9489 the same size. */
9490 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9491 || !reg_classes_intersect_p (FP_REGS, rclass))
9492 return false;
9493
9494 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9495 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9496 2. Scalar to Scalar for integer modes or same size float modes.
97e1ad78
JG
9497 3. Vector to Vector modes.
9498 4. On little-endian only, Vector-Structure to Vector modes. */
69675d50
TB
9499 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9500 {
9501 if (aarch64_vector_mode_supported_p (from)
9502 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9503 return false;
9504
9505 if (GET_MODE_NUNITS (from) == 1
9506 && GET_MODE_NUNITS (to) == 1
9507 && (GET_MODE_CLASS (from) == MODE_INT
9508 || from == to))
9509 return false;
9510
9511 if (aarch64_vector_mode_supported_p (from)
9512 && aarch64_vector_mode_supported_p (to))
9513 return false;
97e1ad78
JG
9514
9515 /* Within an vector structure straddling multiple vector registers
9516 we are in a mixed-endian representation. As such, we can't
9517 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9518 switch between vectors and vector structures cheaply. */
9519 if (!BYTES_BIG_ENDIAN)
9520 if ((aarch64_vector_mode_supported_p (from)
9521 && aarch64_vect_struct_mode_p (to))
9522 || (aarch64_vector_mode_supported_p (to)
9523 && aarch64_vect_struct_mode_p (from)))
9524 return false;
69675d50
TB
9525 }
9526
9527 return true;
9528}
9529
97e1ad78
JG
9530/* Implement MODES_TIEABLE_P. */
9531
9532bool
9533aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9534{
9535 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9536 return true;
9537
9538 /* We specifically want to allow elements of "structure" modes to
9539 be tieable to the structure. This more general condition allows
9540 other rarer situations too. */
9541 if (TARGET_SIMD
9542 && aarch64_vector_mode_p (mode1)
9543 && aarch64_vector_mode_p (mode2))
9544 return true;
9545
9546 return false;
9547}
9548
e2c75eea
JG
9549/* Return a new RTX holding the result of moving POINTER forward by
9550 AMOUNT bytes. */
9551
9552static rtx
9553aarch64_move_pointer (rtx pointer, int amount)
9554{
9555 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9556
9557 return adjust_automodify_address (pointer, GET_MODE (pointer),
9558 next, amount);
9559}
9560
9561/* Return a new RTX holding the result of moving POINTER forward by the
9562 size of the mode it points to. */
9563
9564static rtx
9565aarch64_progress_pointer (rtx pointer)
9566{
9567 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9568
9569 return aarch64_move_pointer (pointer, amount);
9570}
9571
9572/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9573 MODE bytes. */
9574
9575static void
9576aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9577 enum machine_mode mode)
9578{
9579 rtx reg = gen_reg_rtx (mode);
9580
9581 /* "Cast" the pointers to the correct mode. */
9582 *src = adjust_address (*src, mode, 0);
9583 *dst = adjust_address (*dst, mode, 0);
9584 /* Emit the memcpy. */
9585 emit_move_insn (reg, *src);
9586 emit_move_insn (*dst, reg);
9587 /* Move the pointers forward. */
9588 *src = aarch64_progress_pointer (*src);
9589 *dst = aarch64_progress_pointer (*dst);
9590}
9591
9592/* Expand movmem, as if from a __builtin_memcpy. Return true if
9593 we succeed, otherwise return false. */
9594
9595bool
9596aarch64_expand_movmem (rtx *operands)
9597{
9598 unsigned int n;
9599 rtx dst = operands[0];
9600 rtx src = operands[1];
9601 rtx base;
9602 bool speed_p = !optimize_function_for_size_p (cfun);
9603
9604 /* When optimizing for size, give a better estimate of the length of a
9605 memcpy call, but use the default otherwise. */
9606 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9607
9608 /* We can't do anything smart if the amount to copy is not constant. */
9609 if (!CONST_INT_P (operands[2]))
9610 return false;
9611
9612 n = UINTVAL (operands[2]);
9613
9614 /* Try to keep the number of instructions low. For cases below 16 bytes we
9615 need to make at most two moves. For cases above 16 bytes it will be one
9616 move for each 16 byte chunk, then at most two additional moves. */
9617 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9618 return false;
9619
9620 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9621 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9622
9623 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9624 src = adjust_automodify_address (src, VOIDmode, base, 0);
9625
9626 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9627 1-byte chunk. */
9628 if (n < 4)
9629 {
9630 if (n >= 2)
9631 {
9632 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9633 n -= 2;
9634 }
9635
9636 if (n == 1)
9637 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9638
9639 return true;
9640 }
9641
9642 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9643 4-byte chunk, partially overlapping with the previously copied chunk. */
9644 if (n < 8)
9645 {
9646 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9647 n -= 4;
9648 if (n > 0)
9649 {
9650 int move = n - 4;
9651
9652 src = aarch64_move_pointer (src, move);
9653 dst = aarch64_move_pointer (dst, move);
9654 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9655 }
9656 return true;
9657 }
9658
9659 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9660 them, then (if applicable) an 8-byte chunk. */
9661 while (n >= 8)
9662 {
9663 if (n / 16)
9664 {
9665 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9666 n -= 16;
9667 }
9668 else
9669 {
9670 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9671 n -= 8;
9672 }
9673 }
9674
9675 /* Finish the final bytes of the copy. We can always do this in one
9676 instruction. We either copy the exact amount we need, or partially
9677 overlap with the previous chunk we copied and copy 8-bytes. */
9678 if (n == 0)
9679 return true;
9680 else if (n == 1)
9681 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9682 else if (n == 2)
9683 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9684 else if (n == 4)
9685 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9686 else
9687 {
9688 if (n == 3)
9689 {
9690 src = aarch64_move_pointer (src, -1);
9691 dst = aarch64_move_pointer (dst, -1);
9692 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9693 }
9694 else
9695 {
9696 int move = n - 8;
9697
9698 src = aarch64_move_pointer (src, move);
9699 dst = aarch64_move_pointer (dst, move);
9700 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9701 }
9702 }
9703
9704 return true;
9705}
9706
43e9d192
IB
9707#undef TARGET_ADDRESS_COST
9708#define TARGET_ADDRESS_COST aarch64_address_cost
9709
9710/* This hook will determines whether unnamed bitfields affect the alignment
9711 of the containing structure. The hook returns true if the structure
9712 should inherit the alignment requirements of an unnamed bitfield's
9713 type. */
9714#undef TARGET_ALIGN_ANON_BITFIELD
9715#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9716
9717#undef TARGET_ASM_ALIGNED_DI_OP
9718#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9719
9720#undef TARGET_ASM_ALIGNED_HI_OP
9721#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9722
9723#undef TARGET_ASM_ALIGNED_SI_OP
9724#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9725
9726#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9727#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9728 hook_bool_const_tree_hwi_hwi_const_tree_true
9729
9730#undef TARGET_ASM_FILE_START
9731#define TARGET_ASM_FILE_START aarch64_start_file
9732
9733#undef TARGET_ASM_OUTPUT_MI_THUNK
9734#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9735
9736#undef TARGET_ASM_SELECT_RTX_SECTION
9737#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9738
9739#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9740#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9741
9742#undef TARGET_BUILD_BUILTIN_VA_LIST
9743#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9744
9745#undef TARGET_CALLEE_COPIES
9746#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9747
9748#undef TARGET_CAN_ELIMINATE
9749#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9750
9751#undef TARGET_CANNOT_FORCE_CONST_MEM
9752#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9753
9754#undef TARGET_CONDITIONAL_REGISTER_USAGE
9755#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9756
9757/* Only the least significant bit is used for initialization guard
9758 variables. */
9759#undef TARGET_CXX_GUARD_MASK_BIT
9760#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9761
9762#undef TARGET_C_MODE_FOR_SUFFIX
9763#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9764
9765#ifdef TARGET_BIG_ENDIAN_DEFAULT
9766#undef TARGET_DEFAULT_TARGET_FLAGS
9767#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9768#endif
9769
9770#undef TARGET_CLASS_MAX_NREGS
9771#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9772
119103ca
JG
9773#undef TARGET_BUILTIN_DECL
9774#define TARGET_BUILTIN_DECL aarch64_builtin_decl
9775
43e9d192
IB
9776#undef TARGET_EXPAND_BUILTIN
9777#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9778
9779#undef TARGET_EXPAND_BUILTIN_VA_START
9780#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9781
9697e620
JG
9782#undef TARGET_FOLD_BUILTIN
9783#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9784
43e9d192
IB
9785#undef TARGET_FUNCTION_ARG
9786#define TARGET_FUNCTION_ARG aarch64_function_arg
9787
9788#undef TARGET_FUNCTION_ARG_ADVANCE
9789#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9790
9791#undef TARGET_FUNCTION_ARG_BOUNDARY
9792#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9793
9794#undef TARGET_FUNCTION_OK_FOR_SIBCALL
9795#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9796
9797#undef TARGET_FUNCTION_VALUE
9798#define TARGET_FUNCTION_VALUE aarch64_function_value
9799
9800#undef TARGET_FUNCTION_VALUE_REGNO_P
9801#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9802
9803#undef TARGET_FRAME_POINTER_REQUIRED
9804#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9805
0ac198d3
JG
9806#undef TARGET_GIMPLE_FOLD_BUILTIN
9807#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9808
43e9d192
IB
9809#undef TARGET_GIMPLIFY_VA_ARG_EXPR
9810#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9811
9812#undef TARGET_INIT_BUILTINS
9813#define TARGET_INIT_BUILTINS aarch64_init_builtins
9814
9815#undef TARGET_LEGITIMATE_ADDRESS_P
9816#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9817
9818#undef TARGET_LEGITIMATE_CONSTANT_P
9819#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9820
9821#undef TARGET_LIBGCC_CMP_RETURN_MODE
9822#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9823
38e8f663
YR
9824#undef TARGET_LRA_P
9825#define TARGET_LRA_P aarch64_lra_p
9826
ac2b960f
YZ
9827#undef TARGET_MANGLE_TYPE
9828#define TARGET_MANGLE_TYPE aarch64_mangle_type
9829
43e9d192
IB
9830#undef TARGET_MEMORY_MOVE_COST
9831#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9832
9833#undef TARGET_MUST_PASS_IN_STACK
9834#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9835
9836/* This target hook should return true if accesses to volatile bitfields
9837 should use the narrowest mode possible. It should return false if these
9838 accesses should use the bitfield container type. */
9839#undef TARGET_NARROW_VOLATILE_BITFIELD
9840#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9841
9842#undef TARGET_OPTION_OVERRIDE
9843#define TARGET_OPTION_OVERRIDE aarch64_override_options
9844
9845#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9846#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9847 aarch64_override_options_after_change
9848
9849#undef TARGET_PASS_BY_REFERENCE
9850#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9851
9852#undef TARGET_PREFERRED_RELOAD_CLASS
9853#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9854
9855#undef TARGET_SECONDARY_RELOAD
9856#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9857
9858#undef TARGET_SHIFT_TRUNCATION_MASK
9859#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9860
9861#undef TARGET_SETUP_INCOMING_VARARGS
9862#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9863
9864#undef TARGET_STRUCT_VALUE_RTX
9865#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9866
9867#undef TARGET_REGISTER_MOVE_COST
9868#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9869
9870#undef TARGET_RETURN_IN_MEMORY
9871#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9872
9873#undef TARGET_RETURN_IN_MSB
9874#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9875
9876#undef TARGET_RTX_COSTS
7cc2145f 9877#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 9878
d126a4ae
AP
9879#undef TARGET_SCHED_ISSUE_RATE
9880#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9881
43e9d192
IB
9882#undef TARGET_TRAMPOLINE_INIT
9883#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9884
9885#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9886#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9887
9888#undef TARGET_VECTOR_MODE_SUPPORTED_P
9889#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9890
9891#undef TARGET_ARRAY_MODE_SUPPORTED_P
9892#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9893
8990e73a
TB
9894#undef TARGET_VECTORIZE_ADD_STMT_COST
9895#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9896
9897#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9898#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9899 aarch64_builtin_vectorization_cost
9900
43e9d192
IB
9901#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9902#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9903
42fc9a7f
JG
9904#undef TARGET_VECTORIZE_BUILTINS
9905#define TARGET_VECTORIZE_BUILTINS
9906
9907#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9908#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9909 aarch64_builtin_vectorized_function
9910
3b357264
JG
9911#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9912#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9913 aarch64_autovectorize_vector_sizes
9914
aa87aced
KV
9915#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9916#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9917 aarch64_atomic_assign_expand_fenv
9918
43e9d192
IB
9919/* Section anchor support. */
9920
9921#undef TARGET_MIN_ANCHOR_OFFSET
9922#define TARGET_MIN_ANCHOR_OFFSET -256
9923
9924/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9925 byte offset; we can do much more for larger data types, but have no way
9926 to determine the size of the access. We assume accesses are aligned. */
9927#undef TARGET_MAX_ANCHOR_OFFSET
9928#define TARGET_MAX_ANCHOR_OFFSET 4095
9929
db0253a4
TB
9930#undef TARGET_VECTOR_ALIGNMENT
9931#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9932
9933#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9934#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9935 aarch64_simd_vector_alignment_reachable
9936
88b08073
JG
9937/* vec_perm support. */
9938
9939#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9940#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9941 aarch64_vectorize_vec_perm_const_ok
9942
70f09188 9943
706b2314 9944#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
9945#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9946
5cb74e90
RR
9947#undef TARGET_FLAGS_REGNUM
9948#define TARGET_FLAGS_REGNUM CC_REGNUM
9949
78607708
TV
9950#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
9951#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
9952
43e9d192
IB
9953struct gcc_target targetm = TARGET_INITIALIZER;
9954
9955#include "gt-aarch64.h"