]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Be a bit less explicit.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
2fb9a547
AM
49#include "pointer-set.h"
50#include "hash-table.h"
51#include "vec.h"
52#include "basic-block.h"
53#include "tree-ssa-alias.h"
54#include "internal-fn.h"
55#include "gimple-fold.h"
56#include "tree-eh.h"
57#include "gimple-expr.h"
58#include "is-a.h"
18f429e2 59#include "gimple.h"
45b0be94 60#include "gimplify.h"
43e9d192
IB
61#include "optabs.h"
62#include "dwarf2.h"
8990e73a
TB
63#include "cfgloop.h"
64#include "tree-vectorizer.h"
73250c4c 65#include "config/arm/aarch-cost-tables.h"
0ee859b5 66#include "dumpfile.h"
43e9d192 67
28514dda
YZ
68/* Defined for convenience. */
69#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
70
43e9d192
IB
71/* Classifies an address.
72
73 ADDRESS_REG_IMM
74 A simple base register plus immediate offset.
75
76 ADDRESS_REG_WB
77 A base register indexed by immediate offset with writeback.
78
79 ADDRESS_REG_REG
80 A base register indexed by (optionally scaled) register.
81
82 ADDRESS_REG_UXTW
83 A base register indexed by (optionally scaled) zero-extended register.
84
85 ADDRESS_REG_SXTW
86 A base register indexed by (optionally scaled) sign-extended register.
87
88 ADDRESS_LO_SUM
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
90
91 ADDRESS_SYMBOLIC:
92 A constant symbolic address, in pc-relative literal pool. */
93
94enum aarch64_address_type {
95 ADDRESS_REG_IMM,
96 ADDRESS_REG_WB,
97 ADDRESS_REG_REG,
98 ADDRESS_REG_UXTW,
99 ADDRESS_REG_SXTW,
100 ADDRESS_LO_SUM,
101 ADDRESS_SYMBOLIC
102};
103
104struct aarch64_address_info {
105 enum aarch64_address_type type;
106 rtx base;
107 rtx offset;
108 int shift;
109 enum aarch64_symbol_type symbol_type;
110};
111
48063b9d
IB
112struct simd_immediate_info
113{
114 rtx value;
115 int shift;
116 int element_width;
48063b9d 117 bool mvn;
e4f0f84d 118 bool msl;
48063b9d
IB
119};
120
43e9d192
IB
121/* The current code model. */
122enum aarch64_code_model aarch64_cmodel;
123
124#ifdef HAVE_AS_TLS
125#undef TARGET_HAVE_TLS
126#define TARGET_HAVE_TLS 1
127#endif
128
38e8f663 129static bool aarch64_lra_p (void);
43e9d192
IB
130static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
132 const_tree,
133 enum machine_mode *, int *,
134 bool *);
135static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 137static void aarch64_override_options_after_change (void);
43e9d192
IB
138static bool aarch64_vector_mode_supported_p (enum machine_mode);
139static unsigned bit_count (unsigned HOST_WIDE_INT);
140static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
142
88b08073
JG
143static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
2961177e 145static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
88b08073 146
43e9d192 147/* The processor for which instructions should be scheduled. */
02fdbd5b 148enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
149
150/* The current tuning set. */
151const struct tune_params *aarch64_tune_params;
152
153/* Mask to specify which instructions we are allowed to generate. */
154unsigned long aarch64_isa_flags = 0;
155
156/* Mask to specify which instruction scheduling options should be used. */
157unsigned long aarch64_tune_flags = 0;
158
159/* Tuning parameters. */
160
161#if HAVE_DESIGNATED_INITIALIZERS
162#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
163#else
164#define NAMED_PARAM(NAME, VAL) (VAL)
165#endif
166
167#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168__extension__
169#endif
43e9d192
IB
170
171#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
172__extension__
173#endif
174static const struct cpu_addrcost_table generic_addrcost_table =
175{
67747367
JG
176#if HAVE_DESIGNATED_INITIALIZERS
177 .addr_scale_costs =
178#endif
179 {
180 NAMED_PARAM (qi, 0),
181 NAMED_PARAM (hi, 0),
182 NAMED_PARAM (si, 0),
183 NAMED_PARAM (ti, 0),
184 },
43e9d192
IB
185 NAMED_PARAM (pre_modify, 0),
186 NAMED_PARAM (post_modify, 0),
187 NAMED_PARAM (register_offset, 0),
188 NAMED_PARAM (register_extend, 0),
189 NAMED_PARAM (imm_offset, 0)
190};
191
60bff090
JG
192#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
193__extension__
194#endif
195static const struct cpu_addrcost_table cortexa57_addrcost_table =
196{
197#if HAVE_DESIGNATED_INITIALIZERS
198 .addr_scale_costs =
199#endif
200 {
201 NAMED_PARAM (qi, 0),
202 NAMED_PARAM (hi, 1),
203 NAMED_PARAM (si, 0),
204 NAMED_PARAM (ti, 1),
205 },
206 NAMED_PARAM (pre_modify, 0),
207 NAMED_PARAM (post_modify, 0),
208 NAMED_PARAM (register_offset, 0),
209 NAMED_PARAM (register_extend, 0),
210 NAMED_PARAM (imm_offset, 0),
211};
212
43e9d192
IB
213#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
214__extension__
215#endif
216static const struct cpu_regmove_cost generic_regmove_cost =
217{
218 NAMED_PARAM (GP2GP, 1),
219 NAMED_PARAM (GP2FP, 2),
220 NAMED_PARAM (FP2GP, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP, 4)
225};
226
8990e73a
TB
227/* Generic costs for vector insn classes. */
228#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
229__extension__
230#endif
231static const struct cpu_vector_cost generic_vector_cost =
232{
233 NAMED_PARAM (scalar_stmt_cost, 1),
234 NAMED_PARAM (scalar_load_cost, 1),
235 NAMED_PARAM (scalar_store_cost, 1),
236 NAMED_PARAM (vec_stmt_cost, 1),
237 NAMED_PARAM (vec_to_scalar_cost, 1),
238 NAMED_PARAM (scalar_to_vec_cost, 1),
239 NAMED_PARAM (vec_align_load_cost, 1),
240 NAMED_PARAM (vec_unalign_load_cost, 1),
241 NAMED_PARAM (vec_unalign_store_cost, 1),
242 NAMED_PARAM (vec_store_cost, 1),
243 NAMED_PARAM (cond_taken_branch_cost, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost, 1)
245};
246
60bff090
JG
247/* Generic costs for vector insn classes. */
248#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
249__extension__
250#endif
251static const struct cpu_vector_cost cortexa57_vector_cost =
252{
253 NAMED_PARAM (scalar_stmt_cost, 1),
254 NAMED_PARAM (scalar_load_cost, 4),
255 NAMED_PARAM (scalar_store_cost, 1),
256 NAMED_PARAM (vec_stmt_cost, 3),
257 NAMED_PARAM (vec_to_scalar_cost, 8),
258 NAMED_PARAM (scalar_to_vec_cost, 8),
259 NAMED_PARAM (vec_align_load_cost, 5),
260 NAMED_PARAM (vec_unalign_load_cost, 5),
261 NAMED_PARAM (vec_unalign_store_cost, 1),
262 NAMED_PARAM (vec_store_cost, 1),
263 NAMED_PARAM (cond_taken_branch_cost, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost, 1)
265};
266
43e9d192
IB
267#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
268__extension__
269#endif
270static const struct tune_params generic_tunings =
271{
4e2cd668 272 &cortexa57_extra_costs,
43e9d192
IB
273 &generic_addrcost_table,
274 &generic_regmove_cost,
8990e73a 275 &generic_vector_cost,
d126a4ae
AP
276 NAMED_PARAM (memmov_cost, 4),
277 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
278};
279
984239ad
KT
280static const struct tune_params cortexa53_tunings =
281{
282 &cortexa53_extra_costs,
283 &generic_addrcost_table,
284 &generic_regmove_cost,
285 &generic_vector_cost,
d126a4ae
AP
286 NAMED_PARAM (memmov_cost, 4),
287 NAMED_PARAM (issue_rate, 2)
984239ad
KT
288};
289
4fd92af6
KT
290static const struct tune_params cortexa57_tunings =
291{
292 &cortexa57_extra_costs,
60bff090 293 &cortexa57_addrcost_table,
4fd92af6 294 &generic_regmove_cost,
60bff090 295 &cortexa57_vector_cost,
4fd92af6
KT
296 NAMED_PARAM (memmov_cost, 4),
297 NAMED_PARAM (issue_rate, 3)
298};
299
43e9d192
IB
300/* A processor implementing AArch64. */
301struct processor
302{
303 const char *const name;
304 enum aarch64_processor core;
305 const char *arch;
306 const unsigned long flags;
307 const struct tune_params *const tune;
308};
309
310/* Processor cores implementing AArch64. */
311static const struct processor all_cores[] =
312{
192ed1dd 313#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315#include "aarch64-cores.def"
316#undef AARCH64_CORE
02fdbd5b 317 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
318 {NULL, aarch64_none, NULL, 0, NULL}
319};
320
321/* Architectures implementing AArch64. */
322static const struct processor all_architectures[] =
323{
324#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326#include "aarch64-arches.def"
327#undef AARCH64_ARCH
43e9d192
IB
328 {NULL, aarch64_none, NULL, 0, NULL}
329};
330
331/* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333static const struct processor *selected_arch;
334static const struct processor *selected_cpu;
335static const struct processor *selected_tune;
336
337#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
338
339/* An ISA extension in the co-processor and main instruction set space. */
340struct aarch64_option_extension
341{
342 const char *const name;
343 const unsigned long flags_on;
344 const unsigned long flags_off;
345};
346
347/* ISA extensions in AArch64. */
348static const struct aarch64_option_extension all_extensions[] =
349{
350#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352#include "aarch64-option-extensions.def"
353#undef AARCH64_OPT_EXTENSION
354 {NULL, 0, 0}
355};
356
357/* Used to track the size of an address when generating a pre/post
358 increment address. */
359static enum machine_mode aarch64_memory_reference_mode;
360
361/* Used to force GTY into this file. */
362static GTY(()) int gty_dummy;
363
364/* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
366
367#define AARCH64_NUM_BITMASKS 5334
368static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
369
43e9d192
IB
370typedef enum aarch64_cond_code
371{
372 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
373 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
374 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
375}
376aarch64_cc;
377
378#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
379
380/* The condition codes of the processor, and the inverse function. */
381static const char * const aarch64_condition_codes[] =
382{
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
385};
386
387/* Provide a mapping from gcc register numbers to dwarf register numbers. */
388unsigned
389aarch64_dbx_register_number (unsigned regno)
390{
391 if (GP_REGNUM_P (regno))
392 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
393 else if (regno == SP_REGNUM)
394 return AARCH64_DWARF_SP;
395 else if (FP_REGNUM_P (regno))
396 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
397
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS;
401}
402
403/* Return TRUE if MODE is any of the large INT modes. */
404static bool
405aarch64_vect_struct_mode_p (enum machine_mode mode)
406{
407 return mode == OImode || mode == CImode || mode == XImode;
408}
409
410/* Return TRUE if MODE is any of the vector modes. */
411static bool
412aarch64_vector_mode_p (enum machine_mode mode)
413{
414 return aarch64_vector_mode_supported_p (mode)
415 || aarch64_vect_struct_mode_p (mode);
416}
417
418/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
419static bool
420aarch64_array_mode_supported_p (enum machine_mode mode,
421 unsigned HOST_WIDE_INT nelems)
422{
423 if (TARGET_SIMD
424 && AARCH64_VALID_SIMD_QREG_MODE (mode)
425 && (nelems >= 2 && nelems <= 4))
426 return true;
427
428 return false;
429}
430
431/* Implement HARD_REGNO_NREGS. */
432
433int
434aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
435{
436 switch (aarch64_regno_regclass (regno))
437 {
438 case FP_REGS:
439 case FP_LO_REGS:
440 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
441 default:
442 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
443 }
444 gcc_unreachable ();
445}
446
447/* Implement HARD_REGNO_MODE_OK. */
448
449int
450aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
451{
452 if (GET_MODE_CLASS (mode) == MODE_CC)
453 return regno == CC_REGNUM;
454
9259db42
YZ
455 if (regno == SP_REGNUM)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode == Pmode || mode == ptr_mode;
460
461 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
462 return mode == Pmode;
463
464 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
465 return 1;
466
467 if (FP_REGNUM_P (regno))
468 {
469 if (aarch64_vect_struct_mode_p (mode))
470 return
471 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
472 else
473 return 1;
474 }
475
476 return 0;
477}
478
73d9ac6a
IB
479/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
480enum machine_mode
481aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
482 enum machine_mode mode)
483{
484 /* Handle modes that fit within single registers. */
485 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
486 {
487 if (GET_MODE_SIZE (mode) >= 4)
488 return mode;
489 else
490 return SImode;
491 }
492 /* Fall back to generic for multi-reg and very large modes. */
493 else
494 return choose_hard_reg_mode (regno, nregs, false);
495}
496
43e9d192
IB
497/* Return true if calls to DECL should be treated as
498 long-calls (ie called via a register). */
499static bool
500aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
501{
502 return false;
503}
504
505/* Return true if calls to symbol-ref SYM should be treated as
506 long-calls (ie called via a register). */
507bool
508aarch64_is_long_call_p (rtx sym)
509{
510 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
511}
512
513/* Return true if the offsets to a zero/sign-extract operation
514 represent an expression that matches an extend operation. The
515 operands represent the paramters from
516
4745e701 517 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192
IB
518bool
519aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
520 rtx extract_imm)
521{
522 HOST_WIDE_INT mult_val, extract_val;
523
524 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
525 return false;
526
527 mult_val = INTVAL (mult_imm);
528 extract_val = INTVAL (extract_imm);
529
530 if (extract_val > 8
531 && extract_val < GET_MODE_BITSIZE (mode)
532 && exact_log2 (extract_val & ~7) > 0
533 && (extract_val & 7) <= 4
534 && mult_val == (1 << (extract_val & 7)))
535 return true;
536
537 return false;
538}
539
540/* Emit an insn that's a simple single-set. Both the operands must be
541 known to be valid. */
542inline static rtx
543emit_set_insn (rtx x, rtx y)
544{
545 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
546}
547
548/* X and Y are two things to compare using CODE. Emit the compare insn and
549 return the rtx for register 0 in the proper mode. */
550rtx
551aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
552{
553 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
554 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
555
556 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
557 return cc_reg;
558}
559
560/* Build the SYMBOL_REF for __tls_get_addr. */
561
562static GTY(()) rtx tls_get_addr_libfunc;
563
564rtx
565aarch64_tls_get_addr (void)
566{
567 if (!tls_get_addr_libfunc)
568 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
569 return tls_get_addr_libfunc;
570}
571
572/* Return the TLS model to use for ADDR. */
573
574static enum tls_model
575tls_symbolic_operand_type (rtx addr)
576{
577 enum tls_model tls_kind = TLS_MODEL_NONE;
578 rtx sym, addend;
579
580 if (GET_CODE (addr) == CONST)
581 {
582 split_const (addr, &sym, &addend);
583 if (GET_CODE (sym) == SYMBOL_REF)
584 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
585 }
586 else if (GET_CODE (addr) == SYMBOL_REF)
587 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
588
589 return tls_kind;
590}
591
592/* We'll allow lo_sum's in addresses in our legitimate addresses
593 so that combine would take care of combining addresses where
594 necessary, but for generation purposes, we'll generate the address
595 as :
596 RTL Absolute
597 tmp = hi (symbol_ref); adrp x1, foo
598 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
599 nop
600
601 PIC TLS
602 adrp x1, :got:foo adrp tmp, :tlsgd:foo
603 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
604 bl __tls_get_addr
605 nop
606
607 Load TLS symbol, depending on TLS mechanism and TLS access model.
608
609 Global Dynamic - Traditional TLS:
610 adrp tmp, :tlsgd:imm
611 add dest, tmp, #:tlsgd_lo12:imm
612 bl __tls_get_addr
613
614 Global Dynamic - TLS Descriptors:
615 adrp dest, :tlsdesc:imm
616 ldr tmp, [dest, #:tlsdesc_lo12:imm]
617 add dest, dest, #:tlsdesc_lo12:imm
618 blr tmp
619 mrs tp, tpidr_el0
620 add dest, dest, tp
621
622 Initial Exec:
623 mrs tp, tpidr_el0
624 adrp tmp, :gottprel:imm
625 ldr dest, [tmp, #:gottprel_lo12:imm]
626 add dest, dest, tp
627
628 Local Exec:
629 mrs tp, tpidr_el0
630 add t0, tp, #:tprel_hi12:imm
631 add t0, #:tprel_lo12_nc:imm
632*/
633
634static void
635aarch64_load_symref_appropriately (rtx dest, rtx imm,
636 enum aarch64_symbol_type type)
637{
638 switch (type)
639 {
640 case SYMBOL_SMALL_ABSOLUTE:
641 {
28514dda 642 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 643 rtx tmp_reg = dest;
28514dda
YZ
644 enum machine_mode mode = GET_MODE (dest);
645
646 gcc_assert (mode == Pmode || mode == ptr_mode);
647
43e9d192 648 if (can_create_pseudo_p ())
28514dda 649 tmp_reg = gen_reg_rtx (mode);
43e9d192 650
28514dda 651 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
652 emit_insn (gen_add_losym (dest, tmp_reg, imm));
653 return;
654 }
655
a5350ddc
CSS
656 case SYMBOL_TINY_ABSOLUTE:
657 emit_insn (gen_rtx_SET (Pmode, dest, imm));
658 return;
659
43e9d192
IB
660 case SYMBOL_SMALL_GOT:
661 {
28514dda
YZ
662 /* In ILP32, the mode of dest can be either SImode or DImode,
663 while the got entry is always of SImode size. The mode of
664 dest depends on how dest is used: if dest is assigned to a
665 pointer (e.g. in the memory), it has SImode; it may have
666 DImode if dest is dereferenced to access the memeory.
667 This is why we have to handle three different ldr_got_small
668 patterns here (two patterns for ILP32). */
43e9d192 669 rtx tmp_reg = dest;
28514dda
YZ
670 enum machine_mode mode = GET_MODE (dest);
671
43e9d192 672 if (can_create_pseudo_p ())
28514dda
YZ
673 tmp_reg = gen_reg_rtx (mode);
674
675 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
676 if (mode == ptr_mode)
677 {
678 if (mode == DImode)
679 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
680 else
681 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
682 }
683 else
684 {
685 gcc_assert (mode == Pmode);
686 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
687 }
688
43e9d192
IB
689 return;
690 }
691
692 case SYMBOL_SMALL_TLSGD:
693 {
694 rtx insns;
695 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
696
697 start_sequence ();
698 emit_call_insn (gen_tlsgd_small (result, imm));
699 insns = get_insns ();
700 end_sequence ();
701
702 RTL_CONST_CALL_P (insns) = 1;
703 emit_libcall_block (insns, dest, result, imm);
704 return;
705 }
706
707 case SYMBOL_SMALL_TLSDESC:
708 {
621ad2de
AP
709 enum machine_mode mode = GET_MODE (dest);
710 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
711 rtx tp;
712
621ad2de
AP
713 gcc_assert (mode == Pmode || mode == ptr_mode);
714
715 /* In ILP32, the got entry is always of SImode size. Unlike
716 small GOT, the dest is fixed at reg 0. */
717 if (TARGET_ILP32)
718 emit_insn (gen_tlsdesc_small_si (imm));
719 else
720 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 721 tp = aarch64_load_tp (NULL);
621ad2de
AP
722
723 if (mode != Pmode)
724 tp = gen_lowpart (mode, tp);
725
726 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
727 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
728 return;
729 }
730
731 case SYMBOL_SMALL_GOTTPREL:
732 {
621ad2de
AP
733 /* In ILP32, the mode of dest can be either SImode or DImode,
734 while the got entry is always of SImode size. The mode of
735 dest depends on how dest is used: if dest is assigned to a
736 pointer (e.g. in the memory), it has SImode; it may have
737 DImode if dest is dereferenced to access the memeory.
738 This is why we have to handle three different tlsie_small
739 patterns here (two patterns for ILP32). */
740 enum machine_mode mode = GET_MODE (dest);
741 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 742 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
743
744 if (mode == ptr_mode)
745 {
746 if (mode == DImode)
747 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
748 else
749 {
750 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
751 tp = gen_lowpart (mode, tp);
752 }
753 }
754 else
755 {
756 gcc_assert (mode == Pmode);
757 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
758 }
759
760 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
761 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
762 return;
763 }
764
765 case SYMBOL_SMALL_TPREL:
766 {
767 rtx tp = aarch64_load_tp (NULL);
768 emit_insn (gen_tlsle_small (dest, tp, imm));
769 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
770 return;
771 }
772
87dd8ab0
MS
773 case SYMBOL_TINY_GOT:
774 emit_insn (gen_ldr_got_tiny (dest, imm));
775 return;
776
43e9d192
IB
777 default:
778 gcc_unreachable ();
779 }
780}
781
782/* Emit a move from SRC to DEST. Assume that the move expanders can
783 handle all moves if !can_create_pseudo_p (). The distinction is
784 important because, unlike emit_move_insn, the move expanders know
785 how to force Pmode objects into the constant pool even when the
786 constant pool address is not itself legitimate. */
787static rtx
788aarch64_emit_move (rtx dest, rtx src)
789{
790 return (can_create_pseudo_p ()
791 ? emit_move_insn (dest, src)
792 : emit_move_insn_1 (dest, src));
793}
794
030d03b8
RE
795/* Split a 128-bit move operation into two 64-bit move operations,
796 taking care to handle partial overlap of register to register
797 copies. Special cases are needed when moving between GP regs and
798 FP regs. SRC can be a register, constant or memory; DST a register
799 or memory. If either operand is memory it must not have any side
800 effects. */
43e9d192
IB
801void
802aarch64_split_128bit_move (rtx dst, rtx src)
803{
030d03b8
RE
804 rtx dst_lo, dst_hi;
805 rtx src_lo, src_hi;
43e9d192 806
030d03b8 807 enum machine_mode mode = GET_MODE (dst);
12dc6974 808
030d03b8
RE
809 gcc_assert (mode == TImode || mode == TFmode);
810 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
811 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
812
813 if (REG_P (dst) && REG_P (src))
814 {
030d03b8
RE
815 int src_regno = REGNO (src);
816 int dst_regno = REGNO (dst);
43e9d192 817
030d03b8 818 /* Handle FP <-> GP regs. */
43e9d192
IB
819 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
820 {
030d03b8
RE
821 src_lo = gen_lowpart (word_mode, src);
822 src_hi = gen_highpart (word_mode, src);
823
824 if (mode == TImode)
825 {
826 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
827 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
828 }
829 else
830 {
831 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
832 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
833 }
834 return;
43e9d192
IB
835 }
836 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
837 {
030d03b8
RE
838 dst_lo = gen_lowpart (word_mode, dst);
839 dst_hi = gen_highpart (word_mode, dst);
840
841 if (mode == TImode)
842 {
843 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
844 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
845 }
846 else
847 {
848 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
849 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
850 }
851 return;
43e9d192 852 }
43e9d192
IB
853 }
854
030d03b8
RE
855 dst_lo = gen_lowpart (word_mode, dst);
856 dst_hi = gen_highpart (word_mode, dst);
857 src_lo = gen_lowpart (word_mode, src);
858 src_hi = gen_highpart_mode (word_mode, mode, src);
859
860 /* At most one pairing may overlap. */
861 if (reg_overlap_mentioned_p (dst_lo, src_hi))
862 {
863 aarch64_emit_move (dst_hi, src_hi);
864 aarch64_emit_move (dst_lo, src_lo);
865 }
866 else
867 {
868 aarch64_emit_move (dst_lo, src_lo);
869 aarch64_emit_move (dst_hi, src_hi);
870 }
43e9d192
IB
871}
872
873bool
874aarch64_split_128bit_move_p (rtx dst, rtx src)
875{
876 return (! REG_P (src)
877 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
878}
879
8b033a8a
SN
880/* Split a complex SIMD combine. */
881
882void
883aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
884{
885 enum machine_mode src_mode = GET_MODE (src1);
886 enum machine_mode dst_mode = GET_MODE (dst);
887
888 gcc_assert (VECTOR_MODE_P (dst_mode));
889
890 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
891 {
892 rtx (*gen) (rtx, rtx, rtx);
893
894 switch (src_mode)
895 {
896 case V8QImode:
897 gen = gen_aarch64_simd_combinev8qi;
898 break;
899 case V4HImode:
900 gen = gen_aarch64_simd_combinev4hi;
901 break;
902 case V2SImode:
903 gen = gen_aarch64_simd_combinev2si;
904 break;
905 case V2SFmode:
906 gen = gen_aarch64_simd_combinev2sf;
907 break;
908 case DImode:
909 gen = gen_aarch64_simd_combinedi;
910 break;
911 case DFmode:
912 gen = gen_aarch64_simd_combinedf;
913 break;
914 default:
915 gcc_unreachable ();
916 }
917
918 emit_insn (gen (dst, src1, src2));
919 return;
920 }
921}
922
fd4842cd
SN
923/* Split a complex SIMD move. */
924
925void
926aarch64_split_simd_move (rtx dst, rtx src)
927{
928 enum machine_mode src_mode = GET_MODE (src);
929 enum machine_mode dst_mode = GET_MODE (dst);
930
931 gcc_assert (VECTOR_MODE_P (dst_mode));
932
933 if (REG_P (dst) && REG_P (src))
934 {
c59b7e28
SN
935 rtx (*gen) (rtx, rtx);
936
fd4842cd
SN
937 gcc_assert (VECTOR_MODE_P (src_mode));
938
939 switch (src_mode)
940 {
941 case V16QImode:
c59b7e28 942 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
943 break;
944 case V8HImode:
c59b7e28 945 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
946 break;
947 case V4SImode:
c59b7e28 948 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
949 break;
950 case V2DImode:
c59b7e28 951 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
952 break;
953 case V4SFmode:
c59b7e28 954 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
955 break;
956 case V2DFmode:
c59b7e28 957 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
958 break;
959 default:
960 gcc_unreachable ();
961 }
c59b7e28
SN
962
963 emit_insn (gen (dst, src));
fd4842cd
SN
964 return;
965 }
966}
967
43e9d192 968static rtx
e18b4a81 969aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
970{
971 if (can_create_pseudo_p ())
e18b4a81 972 return force_reg (mode, value);
43e9d192
IB
973 else
974 {
975 x = aarch64_emit_move (x, value);
976 return x;
977 }
978}
979
980
981static rtx
982aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
983{
9c023bf0 984 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
985 {
986 rtx high;
987 /* Load the full offset into a register. This
988 might be improvable in the future. */
989 high = GEN_INT (offset);
990 offset = 0;
e18b4a81
YZ
991 high = aarch64_force_temporary (mode, temp, high);
992 reg = aarch64_force_temporary (mode, temp,
993 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
994 }
995 return plus_constant (mode, reg, offset);
996}
997
998void
999aarch64_expand_mov_immediate (rtx dest, rtx imm)
1000{
1001 enum machine_mode mode = GET_MODE (dest);
1002 unsigned HOST_WIDE_INT mask;
1003 int i;
1004 bool first;
1005 unsigned HOST_WIDE_INT val;
1006 bool subtargets;
1007 rtx subtarget;
1008 int one_match, zero_match;
1009
1010 gcc_assert (mode == SImode || mode == DImode);
1011
1012 /* Check on what type of symbol it is. */
1013 if (GET_CODE (imm) == SYMBOL_REF
1014 || GET_CODE (imm) == LABEL_REF
1015 || GET_CODE (imm) == CONST)
1016 {
1017 rtx mem, base, offset;
1018 enum aarch64_symbol_type sty;
1019
1020 /* If we have (const (plus symbol offset)), separate out the offset
1021 before we start classifying the symbol. */
1022 split_const (imm, &base, &offset);
1023
1024 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1025 switch (sty)
1026 {
1027 case SYMBOL_FORCE_TO_MEM:
1028 if (offset != const0_rtx
1029 && targetm.cannot_force_const_mem (mode, imm))
1030 {
aef66c94 1031 gcc_assert (can_create_pseudo_p ());
e18b4a81 1032 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1033 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1034 aarch64_emit_move (dest, base);
1035 return;
1036 }
28514dda 1037 mem = force_const_mem (ptr_mode, imm);
43e9d192 1038 gcc_assert (mem);
28514dda
YZ
1039 if (mode != ptr_mode)
1040 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
1041 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1042 return;
1043
1044 case SYMBOL_SMALL_TLSGD:
1045 case SYMBOL_SMALL_TLSDESC:
1046 case SYMBOL_SMALL_GOTTPREL:
1047 case SYMBOL_SMALL_GOT:
87dd8ab0 1048 case SYMBOL_TINY_GOT:
43e9d192
IB
1049 if (offset != const0_rtx)
1050 {
1051 gcc_assert(can_create_pseudo_p ());
e18b4a81 1052 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1053 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1054 aarch64_emit_move (dest, base);
1055 return;
1056 }
1057 /* FALLTHRU */
1058
1059 case SYMBOL_SMALL_TPREL:
1060 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 1061 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
1062 aarch64_load_symref_appropriately (dest, imm, sty);
1063 return;
1064
1065 default:
1066 gcc_unreachable ();
1067 }
1068 }
1069
1070 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1071 {
1072 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1073 return;
1074 }
1075
1076 if (!CONST_INT_P (imm))
1077 {
1078 if (GET_CODE (imm) == HIGH)
1079 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1080 else
1081 {
1082 rtx mem = force_const_mem (mode, imm);
1083 gcc_assert (mem);
1084 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1085 }
1086
1087 return;
1088 }
1089
1090 if (mode == SImode)
1091 {
1092 /* We know we can't do this in 1 insn, and we must be able to do it
1093 in two; so don't mess around looking for sequences that don't buy
1094 us anything. */
1095 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1096 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1097 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1098 return;
1099 }
1100
1101 /* Remaining cases are all for DImode. */
1102
1103 val = INTVAL (imm);
1104 subtargets = optimize && can_create_pseudo_p ();
1105
1106 one_match = 0;
1107 zero_match = 0;
1108 mask = 0xffff;
1109
1110 for (i = 0; i < 64; i += 16, mask <<= 16)
1111 {
1112 if ((val & mask) == 0)
1113 zero_match++;
1114 else if ((val & mask) == mask)
1115 one_match++;
1116 }
1117
1118 if (one_match == 2)
1119 {
1120 mask = 0xffff;
1121 for (i = 0; i < 64; i += 16, mask <<= 16)
1122 {
1123 if ((val & mask) != mask)
1124 {
1125 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1126 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1127 GEN_INT ((val >> i) & 0xffff)));
1128 return;
1129 }
1130 }
1131 gcc_unreachable ();
1132 }
1133
1134 if (zero_match == 2)
1135 goto simple_sequence;
1136
1137 mask = 0x0ffff0000UL;
1138 for (i = 16; i < 64; i += 16, mask <<= 16)
1139 {
1140 HOST_WIDE_INT comp = mask & ~(mask - 1);
1141
1142 if (aarch64_uimm12_shift (val - (val & mask)))
1143 {
1144 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1145
1146 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1147 emit_insn (gen_adddi3 (dest, subtarget,
1148 GEN_INT (val - (val & mask))));
1149 return;
1150 }
1151 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1152 {
1153 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1154
1155 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1156 GEN_INT ((val + comp) & mask)));
1157 emit_insn (gen_adddi3 (dest, subtarget,
1158 GEN_INT (val - ((val + comp) & mask))));
1159 return;
1160 }
1161 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1162 {
1163 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1164
1165 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1166 GEN_INT ((val - comp) | ~mask)));
1167 emit_insn (gen_adddi3 (dest, subtarget,
1168 GEN_INT (val - ((val - comp) | ~mask))));
1169 return;
1170 }
1171 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1172 {
1173 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1174
1175 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176 GEN_INT (val | ~mask)));
1177 emit_insn (gen_adddi3 (dest, subtarget,
1178 GEN_INT (val - (val | ~mask))));
1179 return;
1180 }
1181 }
1182
1183 /* See if we can do it by arithmetically combining two
1184 immediates. */
1185 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1186 {
1187 int j;
1188 mask = 0xffff;
1189
1190 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1191 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1192 {
1193 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1194 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1195 GEN_INT (aarch64_bitmasks[i])));
1196 emit_insn (gen_adddi3 (dest, subtarget,
1197 GEN_INT (val - aarch64_bitmasks[i])));
1198 return;
1199 }
1200
1201 for (j = 0; j < 64; j += 16, mask <<= 16)
1202 {
1203 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1204 {
1205 emit_insn (gen_rtx_SET (VOIDmode, dest,
1206 GEN_INT (aarch64_bitmasks[i])));
1207 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1208 GEN_INT ((val >> j) & 0xffff)));
1209 return;
1210 }
1211 }
1212 }
1213
1214 /* See if we can do it by logically combining two immediates. */
1215 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1216 {
1217 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1218 {
1219 int j;
1220
1221 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1222 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1223 {
1224 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1225 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1226 GEN_INT (aarch64_bitmasks[i])));
1227 emit_insn (gen_iordi3 (dest, subtarget,
1228 GEN_INT (aarch64_bitmasks[j])));
1229 return;
1230 }
1231 }
1232 else if ((val & aarch64_bitmasks[i]) == val)
1233 {
1234 int j;
1235
1236 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1238 {
1239
1240 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1241 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1242 GEN_INT (aarch64_bitmasks[j])));
1243 emit_insn (gen_anddi3 (dest, subtarget,
1244 GEN_INT (aarch64_bitmasks[i])));
1245 return;
1246 }
1247 }
1248 }
1249
1250 simple_sequence:
1251 first = true;
1252 mask = 0xffff;
1253 for (i = 0; i < 64; i += 16, mask <<= 16)
1254 {
1255 if ((val & mask) != 0)
1256 {
1257 if (first)
1258 {
1259 emit_insn (gen_rtx_SET (VOIDmode, dest,
1260 GEN_INT (val & mask)));
1261 first = false;
1262 }
1263 else
1264 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1265 GEN_INT ((val >> i) & 0xffff)));
1266 }
1267 }
1268}
1269
1270static bool
1271aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1272{
1273 /* Indirect calls are not currently supported. */
1274 if (decl == NULL)
1275 return false;
1276
1277 /* Cannot tail-call to long-calls, since these are outside of the
1278 range of a branch instruction (we could handle this if we added
1279 support for indirect tail-calls. */
1280 if (aarch64_decl_is_long_call_p (decl))
1281 return false;
1282
1283 return true;
1284}
1285
1286/* Implement TARGET_PASS_BY_REFERENCE. */
1287
1288static bool
1289aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1290 enum machine_mode mode,
1291 const_tree type,
1292 bool named ATTRIBUTE_UNUSED)
1293{
1294 HOST_WIDE_INT size;
1295 enum machine_mode dummymode;
1296 int nregs;
1297
1298 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1299 size = (mode == BLKmode && type)
1300 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1301
aadc1c43
MHD
1302 /* Aggregates are passed by reference based on their size. */
1303 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1304 {
aadc1c43 1305 size = int_size_in_bytes (type);
43e9d192
IB
1306 }
1307
1308 /* Variable sized arguments are always returned by reference. */
1309 if (size < 0)
1310 return true;
1311
1312 /* Can this be a candidate to be passed in fp/simd register(s)? */
1313 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1314 &dummymode, &nregs,
1315 NULL))
1316 return false;
1317
1318 /* Arguments which are variable sized or larger than 2 registers are
1319 passed by reference unless they are a homogenous floating point
1320 aggregate. */
1321 return size > 2 * UNITS_PER_WORD;
1322}
1323
1324/* Return TRUE if VALTYPE is padded to its least significant bits. */
1325static bool
1326aarch64_return_in_msb (const_tree valtype)
1327{
1328 enum machine_mode dummy_mode;
1329 int dummy_int;
1330
1331 /* Never happens in little-endian mode. */
1332 if (!BYTES_BIG_ENDIAN)
1333 return false;
1334
1335 /* Only composite types smaller than or equal to 16 bytes can
1336 be potentially returned in registers. */
1337 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1338 || int_size_in_bytes (valtype) <= 0
1339 || int_size_in_bytes (valtype) > 16)
1340 return false;
1341
1342 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1343 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1344 is always passed/returned in the least significant bits of fp/simd
1345 register(s). */
1346 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1347 &dummy_mode, &dummy_int, NULL))
1348 return false;
1349
1350 return true;
1351}
1352
1353/* Implement TARGET_FUNCTION_VALUE.
1354 Define how to find the value returned by a function. */
1355
1356static rtx
1357aarch64_function_value (const_tree type, const_tree func,
1358 bool outgoing ATTRIBUTE_UNUSED)
1359{
1360 enum machine_mode mode;
1361 int unsignedp;
1362 int count;
1363 enum machine_mode ag_mode;
1364
1365 mode = TYPE_MODE (type);
1366 if (INTEGRAL_TYPE_P (type))
1367 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1368
1369 if (aarch64_return_in_msb (type))
1370 {
1371 HOST_WIDE_INT size = int_size_in_bytes (type);
1372
1373 if (size % UNITS_PER_WORD != 0)
1374 {
1375 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1376 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1377 }
1378 }
1379
1380 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1381 &ag_mode, &count, NULL))
1382 {
1383 if (!aarch64_composite_type_p (type, mode))
1384 {
1385 gcc_assert (count == 1 && mode == ag_mode);
1386 return gen_rtx_REG (mode, V0_REGNUM);
1387 }
1388 else
1389 {
1390 int i;
1391 rtx par;
1392
1393 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1394 for (i = 0; i < count; i++)
1395 {
1396 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1397 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1398 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1399 XVECEXP (par, 0, i) = tmp;
1400 }
1401 return par;
1402 }
1403 }
1404 else
1405 return gen_rtx_REG (mode, R0_REGNUM);
1406}
1407
1408/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1409 Return true if REGNO is the number of a hard register in which the values
1410 of called function may come back. */
1411
1412static bool
1413aarch64_function_value_regno_p (const unsigned int regno)
1414{
1415 /* Maximum of 16 bytes can be returned in the general registers. Examples
1416 of 16-byte return values are: 128-bit integers and 16-byte small
1417 structures (excluding homogeneous floating-point aggregates). */
1418 if (regno == R0_REGNUM || regno == R1_REGNUM)
1419 return true;
1420
1421 /* Up to four fp/simd registers can return a function value, e.g. a
1422 homogeneous floating-point aggregate having four members. */
1423 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1424 return !TARGET_GENERAL_REGS_ONLY;
1425
1426 return false;
1427}
1428
1429/* Implement TARGET_RETURN_IN_MEMORY.
1430
1431 If the type T of the result of a function is such that
1432 void func (T arg)
1433 would require that arg be passed as a value in a register (or set of
1434 registers) according to the parameter passing rules, then the result
1435 is returned in the same registers as would be used for such an
1436 argument. */
1437
1438static bool
1439aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1440{
1441 HOST_WIDE_INT size;
1442 enum machine_mode ag_mode;
1443 int count;
1444
1445 if (!AGGREGATE_TYPE_P (type)
1446 && TREE_CODE (type) != COMPLEX_TYPE
1447 && TREE_CODE (type) != VECTOR_TYPE)
1448 /* Simple scalar types always returned in registers. */
1449 return false;
1450
1451 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1452 type,
1453 &ag_mode,
1454 &count,
1455 NULL))
1456 return false;
1457
1458 /* Types larger than 2 registers returned in memory. */
1459 size = int_size_in_bytes (type);
1460 return (size < 0 || size > 2 * UNITS_PER_WORD);
1461}
1462
1463static bool
1464aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1465 const_tree type, int *nregs)
1466{
1467 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1468 return aarch64_vfp_is_call_or_return_candidate (mode,
1469 type,
1470 &pcum->aapcs_vfp_rmode,
1471 nregs,
1472 NULL);
1473}
1474
1475/* Given MODE and TYPE of a function argument, return the alignment in
1476 bits. The idea is to suppress any stronger alignment requested by
1477 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1478 This is a helper function for local use only. */
1479
1480static unsigned int
1481aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1482{
1483 unsigned int alignment;
1484
1485 if (type)
1486 {
1487 if (!integer_zerop (TYPE_SIZE (type)))
1488 {
1489 if (TYPE_MODE (type) == mode)
1490 alignment = TYPE_ALIGN (type);
1491 else
1492 alignment = GET_MODE_ALIGNMENT (mode);
1493 }
1494 else
1495 alignment = 0;
1496 }
1497 else
1498 alignment = GET_MODE_ALIGNMENT (mode);
1499
1500 return alignment;
1501}
1502
1503/* Layout a function argument according to the AAPCS64 rules. The rule
1504 numbers refer to the rule numbers in the AAPCS64. */
1505
1506static void
1507aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1508 const_tree type,
1509 bool named ATTRIBUTE_UNUSED)
1510{
1511 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1512 int ncrn, nvrn, nregs;
1513 bool allocate_ncrn, allocate_nvrn;
1514
1515 /* We need to do this once per argument. */
1516 if (pcum->aapcs_arg_processed)
1517 return;
1518
1519 pcum->aapcs_arg_processed = true;
1520
1521 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1522 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1523 mode,
1524 type,
1525 &nregs);
1526
1527 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1528 The following code thus handles passing by SIMD/FP registers first. */
1529
1530 nvrn = pcum->aapcs_nvrn;
1531
1532 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1533 and homogenous short-vector aggregates (HVA). */
1534 if (allocate_nvrn)
1535 {
1536 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1537 {
1538 pcum->aapcs_nextnvrn = nvrn + nregs;
1539 if (!aarch64_composite_type_p (type, mode))
1540 {
1541 gcc_assert (nregs == 1);
1542 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1543 }
1544 else
1545 {
1546 rtx par;
1547 int i;
1548 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1549 for (i = 0; i < nregs; i++)
1550 {
1551 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1552 V0_REGNUM + nvrn + i);
1553 tmp = gen_rtx_EXPR_LIST
1554 (VOIDmode, tmp,
1555 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1556 XVECEXP (par, 0, i) = tmp;
1557 }
1558 pcum->aapcs_reg = par;
1559 }
1560 return;
1561 }
1562 else
1563 {
1564 /* C.3 NSRN is set to 8. */
1565 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1566 goto on_stack;
1567 }
1568 }
1569
1570 ncrn = pcum->aapcs_ncrn;
1571 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1572 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1573
1574
1575 /* C6 - C9. though the sign and zero extension semantics are
1576 handled elsewhere. This is the case where the argument fits
1577 entirely general registers. */
1578 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1579 {
1580 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1581
1582 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1583
1584 /* C.8 if the argument has an alignment of 16 then the NGRN is
1585 rounded up to the next even number. */
1586 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1587 {
1588 ++ncrn;
1589 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1590 }
1591 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1592 A reg is still generated for it, but the caller should be smart
1593 enough not to use it. */
1594 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1595 {
1596 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1597 }
1598 else
1599 {
1600 rtx par;
1601 int i;
1602
1603 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1604 for (i = 0; i < nregs; i++)
1605 {
1606 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1607 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1608 GEN_INT (i * UNITS_PER_WORD));
1609 XVECEXP (par, 0, i) = tmp;
1610 }
1611 pcum->aapcs_reg = par;
1612 }
1613
1614 pcum->aapcs_nextncrn = ncrn + nregs;
1615 return;
1616 }
1617
1618 /* C.11 */
1619 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1620
1621 /* The argument is passed on stack; record the needed number of words for
1622 this argument (we can re-use NREGS) and align the total size if
1623 necessary. */
1624on_stack:
1625 pcum->aapcs_stack_words = nregs;
1626 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1627 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1628 16 / UNITS_PER_WORD) + 1;
1629 return;
1630}
1631
1632/* Implement TARGET_FUNCTION_ARG. */
1633
1634static rtx
1635aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1636 const_tree type, bool named)
1637{
1638 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1639 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1640
1641 if (mode == VOIDmode)
1642 return NULL_RTX;
1643
1644 aarch64_layout_arg (pcum_v, mode, type, named);
1645 return pcum->aapcs_reg;
1646}
1647
1648void
1649aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1650 const_tree fntype ATTRIBUTE_UNUSED,
1651 rtx libname ATTRIBUTE_UNUSED,
1652 const_tree fndecl ATTRIBUTE_UNUSED,
1653 unsigned n_named ATTRIBUTE_UNUSED)
1654{
1655 pcum->aapcs_ncrn = 0;
1656 pcum->aapcs_nvrn = 0;
1657 pcum->aapcs_nextncrn = 0;
1658 pcum->aapcs_nextnvrn = 0;
1659 pcum->pcs_variant = ARM_PCS_AAPCS64;
1660 pcum->aapcs_reg = NULL_RTX;
1661 pcum->aapcs_arg_processed = false;
1662 pcum->aapcs_stack_words = 0;
1663 pcum->aapcs_stack_size = 0;
1664
1665 return;
1666}
1667
1668static void
1669aarch64_function_arg_advance (cumulative_args_t pcum_v,
1670 enum machine_mode mode,
1671 const_tree type,
1672 bool named)
1673{
1674 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1675 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1676 {
1677 aarch64_layout_arg (pcum_v, mode, type, named);
1678 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1679 != (pcum->aapcs_stack_words != 0));
1680 pcum->aapcs_arg_processed = false;
1681 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1682 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1683 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1684 pcum->aapcs_stack_words = 0;
1685 pcum->aapcs_reg = NULL_RTX;
1686 }
1687}
1688
1689bool
1690aarch64_function_arg_regno_p (unsigned regno)
1691{
1692 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1693 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1694}
1695
1696/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1697 PARM_BOUNDARY bits of alignment, but will be given anything up
1698 to STACK_BOUNDARY bits if the type requires it. This makes sure
1699 that both before and after the layout of each argument, the Next
1700 Stacked Argument Address (NSAA) will have a minimum alignment of
1701 8 bytes. */
1702
1703static unsigned int
1704aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1705{
1706 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1707
1708 if (alignment < PARM_BOUNDARY)
1709 alignment = PARM_BOUNDARY;
1710 if (alignment > STACK_BOUNDARY)
1711 alignment = STACK_BOUNDARY;
1712 return alignment;
1713}
1714
1715/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1716
1717 Return true if an argument passed on the stack should be padded upwards,
1718 i.e. if the least-significant byte of the stack slot has useful data.
1719
1720 Small aggregate types are placed in the lowest memory address.
1721
1722 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1723
1724bool
1725aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1726{
1727 /* On little-endian targets, the least significant byte of every stack
1728 argument is passed at the lowest byte address of the stack slot. */
1729 if (!BYTES_BIG_ENDIAN)
1730 return true;
1731
00edcfbe 1732 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1733 the least significant byte of a stack argument is passed at the highest
1734 byte address of the stack slot. */
1735 if (type
00edcfbe
YZ
1736 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1737 || POINTER_TYPE_P (type))
43e9d192
IB
1738 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1739 return false;
1740
1741 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1742 return true;
1743}
1744
1745/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1746
1747 It specifies padding for the last (may also be the only)
1748 element of a block move between registers and memory. If
1749 assuming the block is in the memory, padding upward means that
1750 the last element is padded after its highest significant byte,
1751 while in downward padding, the last element is padded at the
1752 its least significant byte side.
1753
1754 Small aggregates and small complex types are always padded
1755 upwards.
1756
1757 We don't need to worry about homogeneous floating-point or
1758 short-vector aggregates; their move is not affected by the
1759 padding direction determined here. Regardless of endianness,
1760 each element of such an aggregate is put in the least
1761 significant bits of a fp/simd register.
1762
1763 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1764 register has useful data, and return the opposite if the most
1765 significant byte does. */
1766
1767bool
1768aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1769 bool first ATTRIBUTE_UNUSED)
1770{
1771
1772 /* Small composite types are always padded upward. */
1773 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1774 {
1775 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1776 : GET_MODE_SIZE (mode));
1777 if (size < 2 * UNITS_PER_WORD)
1778 return true;
1779 }
1780
1781 /* Otherwise, use the default padding. */
1782 return !BYTES_BIG_ENDIAN;
1783}
1784
1785static enum machine_mode
1786aarch64_libgcc_cmp_return_mode (void)
1787{
1788 return SImode;
1789}
1790
1791static bool
1792aarch64_frame_pointer_required (void)
1793{
1794 /* If the function contains dynamic stack allocations, we need to
1795 use the frame pointer to access the static parts of the frame. */
1796 if (cfun->calls_alloca)
1797 return true;
1798
0b7f8166
MS
1799 /* In aarch64_override_options_after_change
1800 flag_omit_leaf_frame_pointer turns off the frame pointer by
1801 default. Turn it back on now if we've not got a leaf
1802 function. */
1803 if (flag_omit_leaf_frame_pointer
1804 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1805 return true;
43e9d192 1806
0b7f8166 1807 return false;
43e9d192
IB
1808}
1809
1810/* Mark the registers that need to be saved by the callee and calculate
1811 the size of the callee-saved registers area and frame record (both FP
1812 and LR may be omitted). */
1813static void
1814aarch64_layout_frame (void)
1815{
1816 HOST_WIDE_INT offset = 0;
1817 int regno;
1818
1819 if (reload_completed && cfun->machine->frame.laid_out)
1820 return;
1821
43e9d192
IB
1822 /* First mark all the registers that really need to be saved... */
1823 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1824 cfun->machine->frame.reg_offset[regno] = -1;
1825
1826 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1827 cfun->machine->frame.reg_offset[regno] = -1;
1828
1829 /* ... that includes the eh data registers (if needed)... */
1830 if (crtl->calls_eh_return)
1831 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1832 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1833
1834 /* ... and any callee saved register that dataflow says is live. */
1835 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1836 if (df_regs_ever_live_p (regno)
1837 && !call_used_regs[regno])
1838 cfun->machine->frame.reg_offset[regno] = 0;
1839
1840 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1841 if (df_regs_ever_live_p (regno)
1842 && !call_used_regs[regno])
1843 cfun->machine->frame.reg_offset[regno] = 0;
1844
1845 if (frame_pointer_needed)
1846 {
1847 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1848 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1849 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1850 }
1851
1852 /* Now assign stack slots for them. */
1853 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1854 if (cfun->machine->frame.reg_offset[regno] != -1)
1855 {
1856 cfun->machine->frame.reg_offset[regno] = offset;
1857 offset += UNITS_PER_WORD;
1858 }
1859
1860 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1861 if (cfun->machine->frame.reg_offset[regno] != -1)
1862 {
1863 cfun->machine->frame.reg_offset[regno] = offset;
1864 offset += UNITS_PER_WORD;
1865 }
1866
1867 if (frame_pointer_needed)
1868 {
1869 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1870 offset += UNITS_PER_WORD;
43e9d192
IB
1871 }
1872
1873 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1874 {
1875 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1876 offset += UNITS_PER_WORD;
43e9d192
IB
1877 }
1878
1879 cfun->machine->frame.padding0 =
1880 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1881 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1882
1883 cfun->machine->frame.saved_regs_size = offset;
1884 cfun->machine->frame.laid_out = true;
1885}
1886
1887/* Make the last instruction frame-related and note that it performs
1888 the operation described by FRAME_PATTERN. */
1889
1890static void
1891aarch64_set_frame_expr (rtx frame_pattern)
1892{
1893 rtx insn;
1894
1895 insn = get_last_insn ();
1896 RTX_FRAME_RELATED_P (insn) = 1;
1897 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1898 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1899 frame_pattern,
1900 REG_NOTES (insn));
1901}
1902
1903static bool
1904aarch64_register_saved_on_entry (int regno)
1905{
1906 return cfun->machine->frame.reg_offset[regno] != -1;
1907}
1908
1909
1910static void
1911aarch64_save_or_restore_fprs (int start_offset, int increment,
1912 bool restore, rtx base_rtx)
1913
1914{
1915 unsigned regno;
1916 unsigned regno2;
1917 rtx insn;
e0f396bc
MS
1918 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1919 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
43e9d192
IB
1920
1921
1922 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1923 {
1924 if (aarch64_register_saved_on_entry (regno))
1925 {
1926 rtx mem;
1927 mem = gen_mem_ref (DFmode,
1928 plus_constant (Pmode,
1929 base_rtx,
1930 start_offset));
1931
1932 for (regno2 = regno + 1;
1933 regno2 <= V31_REGNUM
1934 && !aarch64_register_saved_on_entry (regno2);
1935 regno2++)
1936 {
1937 /* Empty loop. */
1938 }
1939 if (regno2 <= V31_REGNUM &&
1940 aarch64_register_saved_on_entry (regno2))
1941 {
1942 rtx mem2;
1943 /* Next highest register to be saved. */
1944 mem2 = gen_mem_ref (DFmode,
1945 plus_constant
1946 (Pmode,
1947 base_rtx,
1948 start_offset + increment));
1949 if (restore == false)
1950 {
1951 insn = emit_insn
1952 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1953 mem2, gen_rtx_REG (DFmode, regno2)));
1954
1955 }
1956 else
1957 {
1958 insn = emit_insn
1959 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1960 gen_rtx_REG (DFmode, regno2), mem2));
1961
e0f396bc
MS
1962 add_reg_note (insn, REG_CFA_RESTORE,
1963 gen_rtx_REG (DFmode, regno));
1964 add_reg_note (insn, REG_CFA_RESTORE,
1965 gen_rtx_REG (DFmode, regno2));
43e9d192
IB
1966 }
1967
1968 /* The first part of a frame-related parallel insn
1969 is always assumed to be relevant to the frame
1970 calculations; subsequent parts, are only
1971 frame-related if explicitly marked. */
e0f396bc 1972 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
43e9d192
IB
1973 regno = regno2;
1974 start_offset += increment * 2;
1975 }
1976 else
1977 {
1978 if (restore == false)
1979 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1980 else
1981 {
1982 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
e0f396bc
MS
1983 add_reg_note (insn, REG_CFA_RESTORE,
1984 gen_rtx_REG (DImode, regno));
43e9d192
IB
1985 }
1986 start_offset += increment;
1987 }
1988 RTX_FRAME_RELATED_P (insn) = 1;
1989 }
1990 }
1991
1992}
1993
1994
1995/* offset from the stack pointer of where the saves and
1996 restore's have to happen. */
1997static void
1998aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1999 bool restore)
2000{
2001 rtx insn;
2002 rtx base_rtx = stack_pointer_rtx;
2003 HOST_WIDE_INT start_offset = offset;
2004 HOST_WIDE_INT increment = UNITS_PER_WORD;
2005 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
2006 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
2007 unsigned regno;
2008 unsigned regno2;
2009
2010 for (regno = R0_REGNUM; regno <= limit; regno++)
2011 {
2012 if (aarch64_register_saved_on_entry (regno))
2013 {
2014 rtx mem;
2015 mem = gen_mem_ref (Pmode,
2016 plus_constant (Pmode,
2017 base_rtx,
2018 start_offset));
2019
2020 for (regno2 = regno + 1;
2021 regno2 <= limit
2022 && !aarch64_register_saved_on_entry (regno2);
2023 regno2++)
2024 {
2025 /* Empty loop. */
2026 }
2027 if (regno2 <= limit &&
2028 aarch64_register_saved_on_entry (regno2))
2029 {
2030 rtx mem2;
2031 /* Next highest register to be saved. */
2032 mem2 = gen_mem_ref (Pmode,
2033 plus_constant
2034 (Pmode,
2035 base_rtx,
2036 start_offset + increment));
2037 if (restore == false)
2038 {
2039 insn = emit_insn
2040 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2041 mem2, gen_rtx_REG (DImode, regno2)));
2042
2043 }
2044 else
2045 {
2046 insn = emit_insn
2047 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2048 gen_rtx_REG (DImode, regno2), mem2));
2049
2050 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2051 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2052 }
2053
2054 /* The first part of a frame-related parallel insn
2055 is always assumed to be relevant to the frame
2056 calculations; subsequent parts, are only
2057 frame-related if explicitly marked. */
2058 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2059 1)) = 1;
2060 regno = regno2;
2061 start_offset += increment * 2;
2062 }
2063 else
2064 {
2065 if (restore == false)
2066 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2067 else
2068 {
2069 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2070 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2071 }
2072 start_offset += increment;
2073 }
2074 RTX_FRAME_RELATED_P (insn) = 1;
2075 }
2076 }
2077
2078 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2079
2080}
2081
2082/* AArch64 stack frames generated by this compiler look like:
2083
2084 +-------------------------------+
2085 | |
2086 | incoming stack arguments |
2087 | |
2088 +-------------------------------+ <-- arg_pointer_rtx
2089 | |
2090 | callee-allocated save area |
2091 | for register varargs |
2092 | |
43e9d192
IB
2093 +-------------------------------+ <-- frame_pointer_rtx
2094 | |
454fdba9 2095 | local variables |
43e9d192
IB
2096 | |
2097 +-------------------------------+
454fdba9
RL
2098 | padding0 | \
2099 +-------------------------------+ |
2100 | | |
2101 | | |
2102 | callee-saved registers | | frame.saved_regs_size
2103 | | |
2104 +-------------------------------+ |
2105 | LR' | |
2106 +-------------------------------+ |
2107 | FP' | /
43e9d192
IB
2108 P +-------------------------------+ <-- hard_frame_pointer_rtx
2109 | dynamic allocation |
2110 +-------------------------------+
2111 | |
2112 | outgoing stack arguments |
2113 | |
2114 +-------------------------------+ <-- stack_pointer_rtx
2115
2116 Dynamic stack allocations such as alloca insert data at point P.
2117 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2118 hard_frame_pointer_rtx unchanged. */
2119
2120/* Generate the prologue instructions for entry into a function.
2121 Establish the stack frame by decreasing the stack pointer with a
2122 properly calculated size and, if necessary, create a frame record
2123 filled with the values of LR and previous frame pointer. The
6991c977 2124 current FP is also set up if it is in use. */
43e9d192
IB
2125
2126void
2127aarch64_expand_prologue (void)
2128{
2129 /* sub sp, sp, #<frame_size>
2130 stp {fp, lr}, [sp, #<frame_size> - 16]
2131 add fp, sp, #<frame_size> - hardfp_offset
2132 stp {cs_reg}, [fp, #-16] etc.
2133
2134 sub sp, sp, <final_adjustment_if_any>
2135 */
2136 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2137 HOST_WIDE_INT frame_size, offset;
2138 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2139 rtx insn;
2140
2141 aarch64_layout_frame ();
2142 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2143 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2144 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2145 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2146 + crtl->outgoing_args_size);
2147 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2148 STACK_BOUNDARY / BITS_PER_UNIT);
2149
2150 if (flag_stack_usage_info)
2151 current_function_static_stack_size = frame_size;
2152
2153 fp_offset = (offset
2154 - original_frame_size
2155 - cfun->machine->frame.saved_regs_size);
2156
44c0e7b9 2157 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2158 if (offset >= 512)
2159 {
2160 /* When the frame has a large size, an initial decrease is done on
2161 the stack pointer to jump over the callee-allocated save area for
2162 register varargs, the local variable area and/or the callee-saved
2163 register area. This will allow the pre-index write-back
2164 store pair instructions to be used for setting up the stack frame
2165 efficiently. */
2166 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2167 if (offset >= 512)
2168 offset = cfun->machine->frame.saved_regs_size;
2169
2170 frame_size -= (offset + crtl->outgoing_args_size);
2171 fp_offset = 0;
2172
2173 if (frame_size >= 0x1000000)
2174 {
2175 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2176 emit_move_insn (op0, GEN_INT (-frame_size));
2177 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2178 aarch64_set_frame_expr (gen_rtx_SET
2179 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2180 plus_constant (Pmode,
2181 stack_pointer_rtx,
2182 -frame_size)));
43e9d192
IB
2183 }
2184 else if (frame_size > 0)
2185 {
2186 if ((frame_size & 0xfff) != frame_size)
2187 {
2188 insn = emit_insn (gen_add2_insn
2189 (stack_pointer_rtx,
2190 GEN_INT (-(frame_size
2191 & ~(HOST_WIDE_INT)0xfff))));
2192 RTX_FRAME_RELATED_P (insn) = 1;
2193 }
2194 if ((frame_size & 0xfff) != 0)
2195 {
2196 insn = emit_insn (gen_add2_insn
2197 (stack_pointer_rtx,
2198 GEN_INT (-(frame_size
2199 & (HOST_WIDE_INT)0xfff))));
2200 RTX_FRAME_RELATED_P (insn) = 1;
2201 }
2202 }
2203 }
2204 else
2205 frame_size = -1;
2206
2207 if (offset > 0)
2208 {
2209 /* Save the frame pointer and lr if the frame pointer is needed
2210 first. Make the frame pointer point to the location of the
2211 old frame pointer on the stack. */
2212 if (frame_pointer_needed)
2213 {
2214 rtx mem_fp, mem_lr;
2215
2216 if (fp_offset)
2217 {
2218 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2219 GEN_INT (-offset)));
2220 RTX_FRAME_RELATED_P (insn) = 1;
2221 aarch64_set_frame_expr (gen_rtx_SET
2222 (Pmode, stack_pointer_rtx,
2223 gen_rtx_MINUS (Pmode,
2224 stack_pointer_rtx,
2225 GEN_INT (offset))));
2226 mem_fp = gen_frame_mem (DImode,
2227 plus_constant (Pmode,
2228 stack_pointer_rtx,
2229 fp_offset));
2230 mem_lr = gen_frame_mem (DImode,
2231 plus_constant (Pmode,
2232 stack_pointer_rtx,
2233 fp_offset
2234 + UNITS_PER_WORD));
2235 insn = emit_insn (gen_store_pairdi (mem_fp,
2236 hard_frame_pointer_rtx,
2237 mem_lr,
2238 gen_rtx_REG (DImode,
2239 LR_REGNUM)));
2240 }
2241 else
2242 {
2243 insn = emit_insn (gen_storewb_pairdi_di
2244 (stack_pointer_rtx, stack_pointer_rtx,
2245 hard_frame_pointer_rtx,
2246 gen_rtx_REG (DImode, LR_REGNUM),
2247 GEN_INT (-offset),
2248 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2249 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2250 }
2251
2252 /* The first part of a frame-related parallel insn is always
2253 assumed to be relevant to the frame calculations;
2254 subsequent parts, are only frame-related if explicitly
2255 marked. */
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2257 RTX_FRAME_RELATED_P (insn) = 1;
2258
2259 /* Set up frame pointer to point to the location of the
2260 previous frame pointer on the stack. */
2261 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2262 stack_pointer_rtx,
2263 GEN_INT (fp_offset)));
2264 aarch64_set_frame_expr (gen_rtx_SET
2265 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2266 plus_constant (Pmode,
2267 stack_pointer_rtx,
2268 fp_offset)));
43e9d192
IB
2269 RTX_FRAME_RELATED_P (insn) = 1;
2270 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2271 hard_frame_pointer_rtx));
2272 }
2273 else
2274 {
2275 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2276 GEN_INT (-offset)));
2277 RTX_FRAME_RELATED_P (insn) = 1;
2278 }
2279
2280 aarch64_save_or_restore_callee_save_registers
2281 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2282 }
2283
2284 /* when offset >= 512,
2285 sub sp, sp, #<outgoing_args_size> */
2286 if (frame_size > -1)
2287 {
2288 if (crtl->outgoing_args_size > 0)
2289 {
2290 insn = emit_insn (gen_add2_insn
2291 (stack_pointer_rtx,
2292 GEN_INT (- crtl->outgoing_args_size)));
2293 RTX_FRAME_RELATED_P (insn) = 1;
2294 }
2295 }
2296}
2297
2298/* Generate the epilogue instructions for returning from a function. */
2299void
2300aarch64_expand_epilogue (bool for_sibcall)
2301{
2302 HOST_WIDE_INT original_frame_size, frame_size, offset;
2303 HOST_WIDE_INT fp_offset;
2304 rtx insn;
44c0e7b9 2305 rtx cfa_reg;
43e9d192
IB
2306
2307 aarch64_layout_frame ();
2308 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2309 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2310 + crtl->outgoing_args_size);
2311 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2312 STACK_BOUNDARY / BITS_PER_UNIT);
2313
2314 fp_offset = (offset
2315 - original_frame_size
2316 - cfun->machine->frame.saved_regs_size);
2317
44c0e7b9
YZ
2318 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2319
2320 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2321 if (offset >= 512)
2322 {
2323 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2324 if (offset >= 512)
2325 offset = cfun->machine->frame.saved_regs_size;
2326
2327 frame_size -= (offset + crtl->outgoing_args_size);
2328 fp_offset = 0;
2329 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2330 {
2331 insn = emit_insn (gen_add2_insn
2332 (stack_pointer_rtx,
2333 GEN_INT (crtl->outgoing_args_size)));
2334 RTX_FRAME_RELATED_P (insn) = 1;
2335 }
2336 }
2337 else
2338 frame_size = -1;
2339
2340 /* If there were outgoing arguments or we've done dynamic stack
2341 allocation, then restore the stack pointer from the frame
2342 pointer. This is at most one insn and more efficient than using
2343 GCC's internal mechanism. */
2344 if (frame_pointer_needed
2345 && (crtl->outgoing_args_size || cfun->calls_alloca))
2346 {
2347 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2348 hard_frame_pointer_rtx,
2349 GEN_INT (- fp_offset)));
2350 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2351 /* As SP is set to (FP - fp_offset), according to the rules in
2352 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2353 from the value of SP from now on. */
2354 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2355 }
2356
2357 aarch64_save_or_restore_callee_save_registers
2358 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2359
2360 /* Restore the frame pointer and lr if the frame pointer is needed. */
2361 if (offset > 0)
2362 {
2363 if (frame_pointer_needed)
2364 {
2365 rtx mem_fp, mem_lr;
2366
2367 if (fp_offset)
2368 {
2369 mem_fp = gen_frame_mem (DImode,
2370 plus_constant (Pmode,
2371 stack_pointer_rtx,
2372 fp_offset));
2373 mem_lr = gen_frame_mem (DImode,
2374 plus_constant (Pmode,
2375 stack_pointer_rtx,
2376 fp_offset
2377 + UNITS_PER_WORD));
2378 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2379 mem_fp,
2380 gen_rtx_REG (DImode,
2381 LR_REGNUM),
2382 mem_lr));
2383 }
2384 else
2385 {
2386 insn = emit_insn (gen_loadwb_pairdi_di
2387 (stack_pointer_rtx,
2388 stack_pointer_rtx,
2389 hard_frame_pointer_rtx,
2390 gen_rtx_REG (DImode, LR_REGNUM),
2391 GEN_INT (offset),
2392 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2393 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2394 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2395 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2396 plus_constant (Pmode, cfa_reg,
2397 offset))));
43e9d192
IB
2398 }
2399
2400 /* The first part of a frame-related parallel insn
2401 is always assumed to be relevant to the frame
2402 calculations; subsequent parts, are only
2403 frame-related if explicitly marked. */
2404 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2405 RTX_FRAME_RELATED_P (insn) = 1;
2406 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2407 add_reg_note (insn, REG_CFA_RESTORE,
2408 gen_rtx_REG (DImode, LR_REGNUM));
2409
2410 if (fp_offset)
2411 {
2412 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2413 GEN_INT (offset)));
2414 RTX_FRAME_RELATED_P (insn) = 1;
2415 }
2416 }
43e9d192
IB
2417 else
2418 {
2419 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2420 GEN_INT (offset)));
2421 RTX_FRAME_RELATED_P (insn) = 1;
2422 }
2423 }
2424
2425 /* Stack adjustment for exception handler. */
2426 if (crtl->calls_eh_return)
2427 {
2428 /* We need to unwind the stack by the offset computed by
2429 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2430 based on SP. Ideally we would update the SP and define the
2431 CFA along the lines of:
2432
2433 SP = SP + EH_RETURN_STACKADJ_RTX
2434 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2435
2436 However the dwarf emitter only understands a constant
2437 register offset.
2438
631b20a7 2439 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2440 as a temporary register to hold the current SP value. The
2441 CFA is described using IP0 then SP is modified. */
2442
2443 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2444
2445 insn = emit_move_insn (ip0, stack_pointer_rtx);
2446 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2447 RTX_FRAME_RELATED_P (insn) = 1;
2448
2449 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2450
2451 /* Ensure the assignment to IP0 does not get optimized away. */
2452 emit_use (ip0);
2453 }
2454
2455 if (frame_size > -1)
2456 {
2457 if (frame_size >= 0x1000000)
2458 {
2459 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2460 emit_move_insn (op0, GEN_INT (frame_size));
2461 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2462 aarch64_set_frame_expr (gen_rtx_SET
2463 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2464 plus_constant (Pmode,
2465 stack_pointer_rtx,
2466 frame_size)));
43e9d192
IB
2467 }
2468 else if (frame_size > 0)
2469 {
2470 if ((frame_size & 0xfff) != 0)
2471 {
2472 insn = emit_insn (gen_add2_insn
2473 (stack_pointer_rtx,
2474 GEN_INT ((frame_size
2475 & (HOST_WIDE_INT) 0xfff))));
2476 RTX_FRAME_RELATED_P (insn) = 1;
2477 }
2478 if ((frame_size & 0xfff) != frame_size)
2479 {
2480 insn = emit_insn (gen_add2_insn
2481 (stack_pointer_rtx,
2482 GEN_INT ((frame_size
2483 & ~ (HOST_WIDE_INT) 0xfff))));
2484 RTX_FRAME_RELATED_P (insn) = 1;
2485 }
2486 }
2487
f6fe771a
RL
2488 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2489 plus_constant (Pmode,
2490 stack_pointer_rtx,
2491 offset)));
43e9d192
IB
2492 }
2493
2494 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2495 if (!for_sibcall)
2496 emit_jump_insn (ret_rtx);
2497}
2498
2499/* Return the place to copy the exception unwinding return address to.
2500 This will probably be a stack slot, but could (in theory be the
2501 return register). */
2502rtx
2503aarch64_final_eh_return_addr (void)
2504{
2505 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2506 aarch64_layout_frame ();
2507 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2508 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2509 + crtl->outgoing_args_size);
2510 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2511 STACK_BOUNDARY / BITS_PER_UNIT);
2512 fp_offset = offset
2513 - original_frame_size
2514 - cfun->machine->frame.saved_regs_size;
2515
2516 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2517 return gen_rtx_REG (DImode, LR_REGNUM);
2518
2519 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2520 result in a store to save LR introduced by builtin_eh_return () being
2521 incorrectly deleted because the alias is not detected.
2522 So in the calculation of the address to copy the exception unwinding
2523 return address to, we note 2 cases.
2524 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2525 we return a SP-relative location since all the addresses are SP-relative
2526 in this case. This prevents the store from being optimized away.
2527 If the fp_offset is not 0, then the addresses will be FP-relative and
2528 therefore we return a FP-relative location. */
2529
2530 if (frame_pointer_needed)
2531 {
2532 if (fp_offset)
2533 return gen_frame_mem (DImode,
2534 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2535 else
2536 return gen_frame_mem (DImode,
2537 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2538 }
2539
2540 /* If FP is not needed, we calculate the location of LR, which would be
2541 at the top of the saved registers block. */
2542
2543 return gen_frame_mem (DImode,
2544 plus_constant (Pmode,
2545 stack_pointer_rtx,
2546 fp_offset
2547 + cfun->machine->frame.saved_regs_size
2548 - 2 * UNITS_PER_WORD));
2549}
2550
9dfc162c
JG
2551/* Possibly output code to build up a constant in a register. For
2552 the benefit of the costs infrastructure, returns the number of
2553 instructions which would be emitted. GENERATE inhibits or
2554 enables code generation. */
2555
2556static int
2557aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2558{
9dfc162c
JG
2559 int insns = 0;
2560
43e9d192 2561 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2562 {
2563 if (generate)
2564 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2565 insns = 1;
2566 }
43e9d192
IB
2567 else
2568 {
2569 int i;
2570 int ncount = 0;
2571 int zcount = 0;
2572 HOST_WIDE_INT valp = val >> 16;
2573 HOST_WIDE_INT valm;
2574 HOST_WIDE_INT tval;
2575
2576 for (i = 16; i < 64; i += 16)
2577 {
2578 valm = (valp & 0xffff);
2579
2580 if (valm != 0)
2581 ++ zcount;
2582
2583 if (valm != 0xffff)
2584 ++ ncount;
2585
2586 valp >>= 16;
2587 }
2588
2589 /* zcount contains the number of additional MOVK instructions
2590 required if the constant is built up with an initial MOVZ instruction,
2591 while ncount is the number of MOVK instructions required if starting
2592 with a MOVN instruction. Choose the sequence that yields the fewest
2593 number of instructions, preferring MOVZ instructions when they are both
2594 the same. */
2595 if (ncount < zcount)
2596 {
9dfc162c
JG
2597 if (generate)
2598 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2599 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2600 tval = 0xffff;
9dfc162c 2601 insns++;
43e9d192
IB
2602 }
2603 else
2604 {
9dfc162c
JG
2605 if (generate)
2606 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2607 GEN_INT (val & 0xffff));
43e9d192 2608 tval = 0;
9dfc162c 2609 insns++;
43e9d192
IB
2610 }
2611
2612 val >>= 16;
2613
2614 for (i = 16; i < 64; i += 16)
2615 {
2616 if ((val & 0xffff) != tval)
9dfc162c
JG
2617 {
2618 if (generate)
2619 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2620 GEN_INT (i),
2621 GEN_INT (val & 0xffff)));
2622 insns++;
2623 }
43e9d192
IB
2624 val >>= 16;
2625 }
2626 }
9dfc162c 2627 return insns;
43e9d192
IB
2628}
2629
2630static void
d9600ae5 2631aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2632{
2633 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2634 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2635 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2636
2637 if (mdelta < 0)
2638 mdelta = -mdelta;
2639
2640 if (mdelta >= 4096 * 4096)
2641 {
9dfc162c 2642 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2643 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2644 }
2645 else if (mdelta > 0)
2646 {
43e9d192 2647 if (mdelta >= 4096)
d9600ae5
SN
2648 {
2649 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2650 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2651 if (delta < 0)
2652 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2653 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2654 else
2655 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2656 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2657 }
43e9d192 2658 if (mdelta % 4096 != 0)
d9600ae5
SN
2659 {
2660 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2661 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2662 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2663 }
43e9d192
IB
2664 }
2665}
2666
2667/* Output code to add DELTA to the first argument, and then jump
2668 to FUNCTION. Used for C++ multiple inheritance. */
2669static void
2670aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2671 HOST_WIDE_INT delta,
2672 HOST_WIDE_INT vcall_offset,
2673 tree function)
2674{
2675 /* The this pointer is always in x0. Note that this differs from
2676 Arm where the this pointer maybe bumped to r1 if r0 is required
2677 to return a pointer to an aggregate. On AArch64 a result value
2678 pointer will be in x8. */
2679 int this_regno = R0_REGNUM;
75f1d6fc 2680 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2681
75f1d6fc
SN
2682 reload_completed = 1;
2683 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2684
2685 if (vcall_offset == 0)
d9600ae5 2686 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2687 else
2688 {
28514dda 2689 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2690
75f1d6fc
SN
2691 this_rtx = gen_rtx_REG (Pmode, this_regno);
2692 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2693 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2694
75f1d6fc
SN
2695 addr = this_rtx;
2696 if (delta != 0)
2697 {
2698 if (delta >= -256 && delta < 256)
2699 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2700 plus_constant (Pmode, this_rtx, delta));
2701 else
d9600ae5 2702 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2703 }
2704
28514dda
YZ
2705 if (Pmode == ptr_mode)
2706 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2707 else
2708 aarch64_emit_move (temp0,
2709 gen_rtx_ZERO_EXTEND (Pmode,
2710 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2711
28514dda 2712 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2713 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2714 else
2715 {
9dfc162c 2716 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2717 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2718 }
2719
28514dda
YZ
2720 if (Pmode == ptr_mode)
2721 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2722 else
2723 aarch64_emit_move (temp1,
2724 gen_rtx_SIGN_EXTEND (Pmode,
2725 gen_rtx_MEM (ptr_mode, addr)));
2726
75f1d6fc 2727 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2728 }
2729
75f1d6fc
SN
2730 /* Generate a tail call to the target function. */
2731 if (!TREE_USED (function))
2732 {
2733 assemble_external (function);
2734 TREE_USED (function) = 1;
2735 }
2736 funexp = XEXP (DECL_RTL (function), 0);
2737 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2738 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2739 SIBLING_CALL_P (insn) = 1;
2740
2741 insn = get_insns ();
2742 shorten_branches (insn);
2743 final_start_function (insn, file, 1);
2744 final (insn, file, 1);
43e9d192 2745 final_end_function ();
75f1d6fc
SN
2746
2747 /* Stop pretending to be a post-reload pass. */
2748 reload_completed = 0;
43e9d192
IB
2749}
2750
43e9d192
IB
2751static int
2752aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2753{
2754 if (GET_CODE (*x) == SYMBOL_REF)
2755 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2756
2757 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2758 TLS offsets, not real symbol references. */
2759 if (GET_CODE (*x) == UNSPEC
2760 && XINT (*x, 1) == UNSPEC_TLS)
2761 return -1;
2762
2763 return 0;
2764}
2765
2766static bool
2767aarch64_tls_referenced_p (rtx x)
2768{
2769 if (!TARGET_HAVE_TLS)
2770 return false;
2771
2772 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2773}
2774
2775
2776static int
2777aarch64_bitmasks_cmp (const void *i1, const void *i2)
2778{
2779 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2780 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2781
2782 if (*imm1 < *imm2)
2783 return -1;
2784 if (*imm1 > *imm2)
2785 return +1;
2786 return 0;
2787}
2788
2789
2790static void
2791aarch64_build_bitmask_table (void)
2792{
2793 unsigned HOST_WIDE_INT mask, imm;
2794 unsigned int log_e, e, s, r;
2795 unsigned int nimms = 0;
2796
2797 for (log_e = 1; log_e <= 6; log_e++)
2798 {
2799 e = 1 << log_e;
2800 if (e == 64)
2801 mask = ~(HOST_WIDE_INT) 0;
2802 else
2803 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2804 for (s = 1; s < e; s++)
2805 {
2806 for (r = 0; r < e; r++)
2807 {
2808 /* set s consecutive bits to 1 (s < 64) */
2809 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2810 /* rotate right by r */
2811 if (r != 0)
2812 imm = ((imm >> r) | (imm << (e - r))) & mask;
2813 /* replicate the constant depending on SIMD size */
2814 switch (log_e) {
2815 case 1: imm |= (imm << 2);
2816 case 2: imm |= (imm << 4);
2817 case 3: imm |= (imm << 8);
2818 case 4: imm |= (imm << 16);
2819 case 5: imm |= (imm << 32);
2820 case 6:
2821 break;
2822 default:
2823 gcc_unreachable ();
2824 }
2825 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2826 aarch64_bitmasks[nimms++] = imm;
2827 }
2828 }
2829 }
2830
2831 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2832 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2833 aarch64_bitmasks_cmp);
2834}
2835
2836
2837/* Return true if val can be encoded as a 12-bit unsigned immediate with
2838 a left shift of 0 or 12 bits. */
2839bool
2840aarch64_uimm12_shift (HOST_WIDE_INT val)
2841{
2842 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2843 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2844 );
2845}
2846
2847
2848/* Return true if val is an immediate that can be loaded into a
2849 register by a MOVZ instruction. */
2850static bool
2851aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2852{
2853 if (GET_MODE_SIZE (mode) > 4)
2854 {
2855 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2856 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2857 return 1;
2858 }
2859 else
2860 {
2861 /* Ignore sign extension. */
2862 val &= (HOST_WIDE_INT) 0xffffffff;
2863 }
2864 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2865 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2866}
2867
2868
2869/* Return true if val is a valid bitmask immediate. */
2870bool
2871aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2872{
2873 if (GET_MODE_SIZE (mode) < 8)
2874 {
2875 /* Replicate bit pattern. */
2876 val &= (HOST_WIDE_INT) 0xffffffff;
2877 val |= val << 32;
2878 }
2879 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2880 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2881}
2882
2883
2884/* Return true if val is an immediate that can be loaded into a
2885 register in a single instruction. */
2886bool
2887aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2888{
2889 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2890 return 1;
2891 return aarch64_bitmask_imm (val, mode);
2892}
2893
2894static bool
2895aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2896{
2897 rtx base, offset;
7eda14e1 2898
43e9d192
IB
2899 if (GET_CODE (x) == HIGH)
2900 return true;
2901
2902 split_const (x, &base, &offset);
2903 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2904 {
2905 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2906 != SYMBOL_FORCE_TO_MEM)
2907 return true;
2908 else
2909 /* Avoid generating a 64-bit relocation in ILP32; leave
2910 to aarch64_expand_mov_immediate to handle it properly. */
2911 return mode != ptr_mode;
2912 }
43e9d192
IB
2913
2914 return aarch64_tls_referenced_p (x);
2915}
2916
2917/* Return true if register REGNO is a valid index register.
2918 STRICT_P is true if REG_OK_STRICT is in effect. */
2919
2920bool
2921aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2922{
2923 if (!HARD_REGISTER_NUM_P (regno))
2924 {
2925 if (!strict_p)
2926 return true;
2927
2928 if (!reg_renumber)
2929 return false;
2930
2931 regno = reg_renumber[regno];
2932 }
2933 return GP_REGNUM_P (regno);
2934}
2935
2936/* Return true if register REGNO is a valid base register for mode MODE.
2937 STRICT_P is true if REG_OK_STRICT is in effect. */
2938
2939bool
2940aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2941{
2942 if (!HARD_REGISTER_NUM_P (regno))
2943 {
2944 if (!strict_p)
2945 return true;
2946
2947 if (!reg_renumber)
2948 return false;
2949
2950 regno = reg_renumber[regno];
2951 }
2952
2953 /* The fake registers will be eliminated to either the stack or
2954 hard frame pointer, both of which are usually valid base registers.
2955 Reload deals with the cases where the eliminated form isn't valid. */
2956 return (GP_REGNUM_P (regno)
2957 || regno == SP_REGNUM
2958 || regno == FRAME_POINTER_REGNUM
2959 || regno == ARG_POINTER_REGNUM);
2960}
2961
2962/* Return true if X is a valid base register for mode MODE.
2963 STRICT_P is true if REG_OK_STRICT is in effect. */
2964
2965static bool
2966aarch64_base_register_rtx_p (rtx x, bool strict_p)
2967{
2968 if (!strict_p && GET_CODE (x) == SUBREG)
2969 x = SUBREG_REG (x);
2970
2971 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2972}
2973
2974/* Return true if address offset is a valid index. If it is, fill in INFO
2975 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2976
2977static bool
2978aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2979 enum machine_mode mode, bool strict_p)
2980{
2981 enum aarch64_address_type type;
2982 rtx index;
2983 int shift;
2984
2985 /* (reg:P) */
2986 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2987 && GET_MODE (x) == Pmode)
2988 {
2989 type = ADDRESS_REG_REG;
2990 index = x;
2991 shift = 0;
2992 }
2993 /* (sign_extend:DI (reg:SI)) */
2994 else if ((GET_CODE (x) == SIGN_EXTEND
2995 || GET_CODE (x) == ZERO_EXTEND)
2996 && GET_MODE (x) == DImode
2997 && GET_MODE (XEXP (x, 0)) == SImode)
2998 {
2999 type = (GET_CODE (x) == SIGN_EXTEND)
3000 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3001 index = XEXP (x, 0);
3002 shift = 0;
3003 }
3004 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3005 else if (GET_CODE (x) == MULT
3006 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3007 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3008 && GET_MODE (XEXP (x, 0)) == DImode
3009 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3010 && CONST_INT_P (XEXP (x, 1)))
3011 {
3012 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3013 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3014 index = XEXP (XEXP (x, 0), 0);
3015 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3016 }
3017 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3018 else if (GET_CODE (x) == ASHIFT
3019 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3020 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3021 && GET_MODE (XEXP (x, 0)) == DImode
3022 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3023 && CONST_INT_P (XEXP (x, 1)))
3024 {
3025 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3026 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3027 index = XEXP (XEXP (x, 0), 0);
3028 shift = INTVAL (XEXP (x, 1));
3029 }
3030 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3031 else if ((GET_CODE (x) == SIGN_EXTRACT
3032 || GET_CODE (x) == ZERO_EXTRACT)
3033 && GET_MODE (x) == DImode
3034 && GET_CODE (XEXP (x, 0)) == MULT
3035 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3037 {
3038 type = (GET_CODE (x) == SIGN_EXTRACT)
3039 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3040 index = XEXP (XEXP (x, 0), 0);
3041 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3042 if (INTVAL (XEXP (x, 1)) != 32 + shift
3043 || INTVAL (XEXP (x, 2)) != 0)
3044 shift = -1;
3045 }
3046 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3047 (const_int 0xffffffff<<shift)) */
3048 else if (GET_CODE (x) == AND
3049 && GET_MODE (x) == DImode
3050 && GET_CODE (XEXP (x, 0)) == MULT
3051 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3052 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3053 && CONST_INT_P (XEXP (x, 1)))
3054 {
3055 type = ADDRESS_REG_UXTW;
3056 index = XEXP (XEXP (x, 0), 0);
3057 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3058 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3059 shift = -1;
3060 }
3061 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3062 else if ((GET_CODE (x) == SIGN_EXTRACT
3063 || GET_CODE (x) == ZERO_EXTRACT)
3064 && GET_MODE (x) == DImode
3065 && GET_CODE (XEXP (x, 0)) == ASHIFT
3066 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3067 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3068 {
3069 type = (GET_CODE (x) == SIGN_EXTRACT)
3070 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3071 index = XEXP (XEXP (x, 0), 0);
3072 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3073 if (INTVAL (XEXP (x, 1)) != 32 + shift
3074 || INTVAL (XEXP (x, 2)) != 0)
3075 shift = -1;
3076 }
3077 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3078 (const_int 0xffffffff<<shift)) */
3079 else if (GET_CODE (x) == AND
3080 && GET_MODE (x) == DImode
3081 && GET_CODE (XEXP (x, 0)) == ASHIFT
3082 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3083 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3084 && CONST_INT_P (XEXP (x, 1)))
3085 {
3086 type = ADDRESS_REG_UXTW;
3087 index = XEXP (XEXP (x, 0), 0);
3088 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3089 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3090 shift = -1;
3091 }
3092 /* (mult:P (reg:P) (const_int scale)) */
3093 else if (GET_CODE (x) == MULT
3094 && GET_MODE (x) == Pmode
3095 && GET_MODE (XEXP (x, 0)) == Pmode
3096 && CONST_INT_P (XEXP (x, 1)))
3097 {
3098 type = ADDRESS_REG_REG;
3099 index = XEXP (x, 0);
3100 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3101 }
3102 /* (ashift:P (reg:P) (const_int shift)) */
3103 else if (GET_CODE (x) == ASHIFT
3104 && GET_MODE (x) == Pmode
3105 && GET_MODE (XEXP (x, 0)) == Pmode
3106 && CONST_INT_P (XEXP (x, 1)))
3107 {
3108 type = ADDRESS_REG_REG;
3109 index = XEXP (x, 0);
3110 shift = INTVAL (XEXP (x, 1));
3111 }
3112 else
3113 return false;
3114
3115 if (GET_CODE (index) == SUBREG)
3116 index = SUBREG_REG (index);
3117
3118 if ((shift == 0 ||
3119 (shift > 0 && shift <= 3
3120 && (1 << shift) == GET_MODE_SIZE (mode)))
3121 && REG_P (index)
3122 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3123 {
3124 info->type = type;
3125 info->offset = index;
3126 info->shift = shift;
3127 return true;
3128 }
3129
3130 return false;
3131}
3132
3133static inline bool
3134offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3135{
3136 return (offset >= -64 * GET_MODE_SIZE (mode)
3137 && offset < 64 * GET_MODE_SIZE (mode)
3138 && offset % GET_MODE_SIZE (mode) == 0);
3139}
3140
3141static inline bool
3142offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3143 HOST_WIDE_INT offset)
3144{
3145 return offset >= -256 && offset < 256;
3146}
3147
3148static inline bool
3149offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3150{
3151 return (offset >= 0
3152 && offset < 4096 * GET_MODE_SIZE (mode)
3153 && offset % GET_MODE_SIZE (mode) == 0);
3154}
3155
3156/* Return true if X is a valid address for machine mode MODE. If it is,
3157 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3158 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3159
3160static bool
3161aarch64_classify_address (struct aarch64_address_info *info,
3162 rtx x, enum machine_mode mode,
3163 RTX_CODE outer_code, bool strict_p)
3164{
3165 enum rtx_code code = GET_CODE (x);
3166 rtx op0, op1;
3167 bool allow_reg_index_p =
3168 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3169
3170 /* Don't support anything other than POST_INC or REG addressing for
3171 AdvSIMD. */
3172 if (aarch64_vector_mode_p (mode)
3173 && (code != POST_INC && code != REG))
3174 return false;
3175
3176 switch (code)
3177 {
3178 case REG:
3179 case SUBREG:
3180 info->type = ADDRESS_REG_IMM;
3181 info->base = x;
3182 info->offset = const0_rtx;
3183 return aarch64_base_register_rtx_p (x, strict_p);
3184
3185 case PLUS:
3186 op0 = XEXP (x, 0);
3187 op1 = XEXP (x, 1);
3188 if (GET_MODE_SIZE (mode) != 0
3189 && CONST_INT_P (op1)
3190 && aarch64_base_register_rtx_p (op0, strict_p))
3191 {
3192 HOST_WIDE_INT offset = INTVAL (op1);
3193
3194 info->type = ADDRESS_REG_IMM;
3195 info->base = op0;
3196 info->offset = op1;
3197
3198 /* TImode and TFmode values are allowed in both pairs of X
3199 registers and individual Q registers. The available
3200 address modes are:
3201 X,X: 7-bit signed scaled offset
3202 Q: 9-bit signed offset
3203 We conservatively require an offset representable in either mode.
3204 */
3205 if (mode == TImode || mode == TFmode)
3206 return (offset_7bit_signed_scaled_p (mode, offset)
3207 && offset_9bit_signed_unscaled_p (mode, offset));
3208
3209 if (outer_code == PARALLEL)
3210 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3211 && offset_7bit_signed_scaled_p (mode, offset));
3212 else
3213 return (offset_9bit_signed_unscaled_p (mode, offset)
3214 || offset_12bit_unsigned_scaled_p (mode, offset));
3215 }
3216
3217 if (allow_reg_index_p)
3218 {
3219 /* Look for base + (scaled/extended) index register. */
3220 if (aarch64_base_register_rtx_p (op0, strict_p)
3221 && aarch64_classify_index (info, op1, mode, strict_p))
3222 {
3223 info->base = op0;
3224 return true;
3225 }
3226 if (aarch64_base_register_rtx_p (op1, strict_p)
3227 && aarch64_classify_index (info, op0, mode, strict_p))
3228 {
3229 info->base = op1;
3230 return true;
3231 }
3232 }
3233
3234 return false;
3235
3236 case POST_INC:
3237 case POST_DEC:
3238 case PRE_INC:
3239 case PRE_DEC:
3240 info->type = ADDRESS_REG_WB;
3241 info->base = XEXP (x, 0);
3242 info->offset = NULL_RTX;
3243 return aarch64_base_register_rtx_p (info->base, strict_p);
3244
3245 case POST_MODIFY:
3246 case PRE_MODIFY:
3247 info->type = ADDRESS_REG_WB;
3248 info->base = XEXP (x, 0);
3249 if (GET_CODE (XEXP (x, 1)) == PLUS
3250 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3251 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3252 && aarch64_base_register_rtx_p (info->base, strict_p))
3253 {
3254 HOST_WIDE_INT offset;
3255 info->offset = XEXP (XEXP (x, 1), 1);
3256 offset = INTVAL (info->offset);
3257
3258 /* TImode and TFmode values are allowed in both pairs of X
3259 registers and individual Q registers. The available
3260 address modes are:
3261 X,X: 7-bit signed scaled offset
3262 Q: 9-bit signed offset
3263 We conservatively require an offset representable in either mode.
3264 */
3265 if (mode == TImode || mode == TFmode)
3266 return (offset_7bit_signed_scaled_p (mode, offset)
3267 && offset_9bit_signed_unscaled_p (mode, offset));
3268
3269 if (outer_code == PARALLEL)
3270 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3271 && offset_7bit_signed_scaled_p (mode, offset));
3272 else
3273 return offset_9bit_signed_unscaled_p (mode, offset);
3274 }
3275 return false;
3276
3277 case CONST:
3278 case SYMBOL_REF:
3279 case LABEL_REF:
79517551
SN
3280 /* load literal: pc-relative constant pool entry. Only supported
3281 for SI mode or larger. */
43e9d192 3282 info->type = ADDRESS_SYMBOLIC;
79517551 3283 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3284 {
3285 rtx sym, addend;
3286
3287 split_const (x, &sym, &addend);
3288 return (GET_CODE (sym) == LABEL_REF
3289 || (GET_CODE (sym) == SYMBOL_REF
3290 && CONSTANT_POOL_ADDRESS_P (sym)));
3291 }
3292 return false;
3293
3294 case LO_SUM:
3295 info->type = ADDRESS_LO_SUM;
3296 info->base = XEXP (x, 0);
3297 info->offset = XEXP (x, 1);
3298 if (allow_reg_index_p
3299 && aarch64_base_register_rtx_p (info->base, strict_p))
3300 {
3301 rtx sym, offs;
3302 split_const (info->offset, &sym, &offs);
3303 if (GET_CODE (sym) == SYMBOL_REF
3304 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3305 == SYMBOL_SMALL_ABSOLUTE))
3306 {
3307 /* The symbol and offset must be aligned to the access size. */
3308 unsigned int align;
3309 unsigned int ref_size;
3310
3311 if (CONSTANT_POOL_ADDRESS_P (sym))
3312 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3313 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3314 {
3315 tree exp = SYMBOL_REF_DECL (sym);
3316 align = TYPE_ALIGN (TREE_TYPE (exp));
3317 align = CONSTANT_ALIGNMENT (exp, align);
3318 }
3319 else if (SYMBOL_REF_DECL (sym))
3320 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3321 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3322 && SYMBOL_REF_BLOCK (sym) != NULL)
3323 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3324 else
3325 align = BITS_PER_UNIT;
3326
3327 ref_size = GET_MODE_SIZE (mode);
3328 if (ref_size == 0)
3329 ref_size = GET_MODE_SIZE (DImode);
3330
3331 return ((INTVAL (offs) & (ref_size - 1)) == 0
3332 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3333 }
3334 }
3335 return false;
3336
3337 default:
3338 return false;
3339 }
3340}
3341
3342bool
3343aarch64_symbolic_address_p (rtx x)
3344{
3345 rtx offset;
3346
3347 split_const (x, &x, &offset);
3348 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3349}
3350
3351/* Classify the base of symbolic expression X, given that X appears in
3352 context CONTEXT. */
da4f13a4
MS
3353
3354enum aarch64_symbol_type
3355aarch64_classify_symbolic_expression (rtx x,
3356 enum aarch64_symbol_context context)
43e9d192
IB
3357{
3358 rtx offset;
da4f13a4 3359
43e9d192
IB
3360 split_const (x, &x, &offset);
3361 return aarch64_classify_symbol (x, context);
3362}
3363
3364
3365/* Return TRUE if X is a legitimate address for accessing memory in
3366 mode MODE. */
3367static bool
3368aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3369{
3370 struct aarch64_address_info addr;
3371
3372 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3373}
3374
3375/* Return TRUE if X is a legitimate address for accessing memory in
3376 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3377 pair operation. */
3378bool
3379aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
aef66c94 3380 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3381{
3382 struct aarch64_address_info addr;
3383
3384 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3385}
3386
3387/* Return TRUE if rtx X is immediate constant 0.0 */
3388bool
3520f7cc 3389aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3390{
3391 REAL_VALUE_TYPE r;
3392
3393 if (GET_MODE (x) == VOIDmode)
3394 return false;
3395
3396 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3397 if (REAL_VALUE_MINUS_ZERO (r))
3398 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3399 return REAL_VALUES_EQUAL (r, dconst0);
3400}
3401
70f09188
AP
3402/* Return the fixed registers used for condition codes. */
3403
3404static bool
3405aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3406{
3407 *p1 = CC_REGNUM;
3408 *p2 = INVALID_REGNUM;
3409 return true;
3410}
3411
43e9d192
IB
3412enum machine_mode
3413aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3414{
3415 /* All floating point compares return CCFP if it is an equality
3416 comparison, and CCFPE otherwise. */
3417 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3418 {
3419 switch (code)
3420 {
3421 case EQ:
3422 case NE:
3423 case UNORDERED:
3424 case ORDERED:
3425 case UNLT:
3426 case UNLE:
3427 case UNGT:
3428 case UNGE:
3429 case UNEQ:
3430 case LTGT:
3431 return CCFPmode;
3432
3433 case LT:
3434 case LE:
3435 case GT:
3436 case GE:
3437 return CCFPEmode;
3438
3439 default:
3440 gcc_unreachable ();
3441 }
3442 }
3443
3444 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3445 && y == const0_rtx
3446 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3447 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3448 || GET_CODE (x) == NEG))
43e9d192
IB
3449 return CC_NZmode;
3450
1c992d1e 3451 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3452 the comparison will have to be swapped when we emit the assembly
3453 code. */
3454 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3455 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3456 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3457 || GET_CODE (x) == LSHIFTRT
1c992d1e 3458 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3459 return CC_SWPmode;
3460
1c992d1e
RE
3461 /* Similarly for a negated operand, but we can only do this for
3462 equalities. */
3463 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3464 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3465 && (code == EQ || code == NE)
3466 && GET_CODE (x) == NEG)
3467 return CC_Zmode;
3468
43e9d192
IB
3469 /* A compare of a mode narrower than SI mode against zero can be done
3470 by extending the value in the comparison. */
3471 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3472 && y == const0_rtx)
3473 /* Only use sign-extension if we really need it. */
3474 return ((code == GT || code == GE || code == LE || code == LT)
3475 ? CC_SESWPmode : CC_ZESWPmode);
3476
3477 /* For everything else, return CCmode. */
3478 return CCmode;
3479}
3480
3481static unsigned
3482aarch64_get_condition_code (rtx x)
3483{
3484 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3485 enum rtx_code comp_code = GET_CODE (x);
3486
3487 if (GET_MODE_CLASS (mode) != MODE_CC)
3488 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3489
3490 switch (mode)
3491 {
3492 case CCFPmode:
3493 case CCFPEmode:
3494 switch (comp_code)
3495 {
3496 case GE: return AARCH64_GE;
3497 case GT: return AARCH64_GT;
3498 case LE: return AARCH64_LS;
3499 case LT: return AARCH64_MI;
3500 case NE: return AARCH64_NE;
3501 case EQ: return AARCH64_EQ;
3502 case ORDERED: return AARCH64_VC;
3503 case UNORDERED: return AARCH64_VS;
3504 case UNLT: return AARCH64_LT;
3505 case UNLE: return AARCH64_LE;
3506 case UNGT: return AARCH64_HI;
3507 case UNGE: return AARCH64_PL;
3508 default: gcc_unreachable ();
3509 }
3510 break;
3511
3512 case CCmode:
3513 switch (comp_code)
3514 {
3515 case NE: return AARCH64_NE;
3516 case EQ: return AARCH64_EQ;
3517 case GE: return AARCH64_GE;
3518 case GT: return AARCH64_GT;
3519 case LE: return AARCH64_LE;
3520 case LT: return AARCH64_LT;
3521 case GEU: return AARCH64_CS;
3522 case GTU: return AARCH64_HI;
3523 case LEU: return AARCH64_LS;
3524 case LTU: return AARCH64_CC;
3525 default: gcc_unreachable ();
3526 }
3527 break;
3528
3529 case CC_SWPmode:
3530 case CC_ZESWPmode:
3531 case CC_SESWPmode:
3532 switch (comp_code)
3533 {
3534 case NE: return AARCH64_NE;
3535 case EQ: return AARCH64_EQ;
3536 case GE: return AARCH64_LE;
3537 case GT: return AARCH64_LT;
3538 case LE: return AARCH64_GE;
3539 case LT: return AARCH64_GT;
3540 case GEU: return AARCH64_LS;
3541 case GTU: return AARCH64_CC;
3542 case LEU: return AARCH64_CS;
3543 case LTU: return AARCH64_HI;
3544 default: gcc_unreachable ();
3545 }
3546 break;
3547
3548 case CC_NZmode:
3549 switch (comp_code)
3550 {
3551 case NE: return AARCH64_NE;
3552 case EQ: return AARCH64_EQ;
3553 case GE: return AARCH64_PL;
3554 case LT: return AARCH64_MI;
3555 default: gcc_unreachable ();
3556 }
3557 break;
3558
1c992d1e
RE
3559 case CC_Zmode:
3560 switch (comp_code)
3561 {
3562 case NE: return AARCH64_NE;
3563 case EQ: return AARCH64_EQ;
3564 default: gcc_unreachable ();
3565 }
3566 break;
3567
43e9d192
IB
3568 default:
3569 gcc_unreachable ();
3570 break;
3571 }
3572}
3573
3574static unsigned
3575bit_count (unsigned HOST_WIDE_INT value)
3576{
3577 unsigned count = 0;
3578
3579 while (value)
3580 {
3581 count++;
3582 value &= value - 1;
3583 }
3584
3585 return count;
3586}
3587
3588void
3589aarch64_print_operand (FILE *f, rtx x, char code)
3590{
3591 switch (code)
3592 {
f541a481
KT
3593 /* An integer or symbol address without a preceding # sign. */
3594 case 'c':
3595 switch (GET_CODE (x))
3596 {
3597 case CONST_INT:
3598 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3599 break;
3600
3601 case SYMBOL_REF:
3602 output_addr_const (f, x);
3603 break;
3604
3605 case CONST:
3606 if (GET_CODE (XEXP (x, 0)) == PLUS
3607 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3608 {
3609 output_addr_const (f, x);
3610 break;
3611 }
3612 /* Fall through. */
3613
3614 default:
3615 output_operand_lossage ("Unsupported operand for code '%c'", code);
3616 }
3617 break;
3618
43e9d192
IB
3619 case 'e':
3620 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3621 {
3622 int n;
3623
3624 if (GET_CODE (x) != CONST_INT
3625 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3626 {
3627 output_operand_lossage ("invalid operand for '%%%c'", code);
3628 return;
3629 }
3630
3631 switch (n)
3632 {
3633 case 3:
3634 fputc ('b', f);
3635 break;
3636 case 4:
3637 fputc ('h', f);
3638 break;
3639 case 5:
3640 fputc ('w', f);
3641 break;
3642 default:
3643 output_operand_lossage ("invalid operand for '%%%c'", code);
3644 return;
3645 }
3646 }
3647 break;
3648
3649 case 'p':
3650 {
3651 int n;
3652
3653 /* Print N such that 2^N == X. */
3654 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3655 {
3656 output_operand_lossage ("invalid operand for '%%%c'", code);
3657 return;
3658 }
3659
3660 asm_fprintf (f, "%d", n);
3661 }
3662 break;
3663
3664 case 'P':
3665 /* Print the number of non-zero bits in X (a const_int). */
3666 if (GET_CODE (x) != CONST_INT)
3667 {
3668 output_operand_lossage ("invalid operand for '%%%c'", code);
3669 return;
3670 }
3671
3672 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3673 break;
3674
3675 case 'H':
3676 /* Print the higher numbered register of a pair (TImode) of regs. */
3677 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3678 {
3679 output_operand_lossage ("invalid operand for '%%%c'", code);
3680 return;
3681 }
3682
01a3a324 3683 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3684 break;
3685
43e9d192
IB
3686 case 'm':
3687 /* Print a condition (eq, ne, etc). */
3688
3689 /* CONST_TRUE_RTX means always -- that's the default. */
3690 if (x == const_true_rtx)
3691 return;
3692
3693 if (!COMPARISON_P (x))
3694 {
3695 output_operand_lossage ("invalid operand for '%%%c'", code);
3696 return;
3697 }
3698
3699 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3700 break;
3701
3702 case 'M':
3703 /* Print the inverse of a condition (eq <-> ne, etc). */
3704
3705 /* CONST_TRUE_RTX means never -- that's the default. */
3706 if (x == const_true_rtx)
3707 {
3708 fputs ("nv", f);
3709 return;
3710 }
3711
3712 if (!COMPARISON_P (x))
3713 {
3714 output_operand_lossage ("invalid operand for '%%%c'", code);
3715 return;
3716 }
3717
3718 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3719 (aarch64_get_condition_code (x))], f);
3720 break;
3721
3722 case 'b':
3723 case 'h':
3724 case 's':
3725 case 'd':
3726 case 'q':
3727 /* Print a scalar FP/SIMD register name. */
3728 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3729 {
3730 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3731 return;
3732 }
50ce6f88 3733 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3734 break;
3735
3736 case 'S':
3737 case 'T':
3738 case 'U':
3739 case 'V':
3740 /* Print the first FP/SIMD register name in a list. */
3741 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3742 {
3743 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3744 return;
3745 }
50ce6f88 3746 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3747 break;
3748
a05c0ddf 3749 case 'X':
50d38551 3750 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3751 if (GET_CODE (x) != CONST_INT)
3752 {
3753 output_operand_lossage ("invalid operand for '%%%c'", code);
3754 return;
3755 }
50d38551 3756 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3757 break;
3758
43e9d192
IB
3759 case 'w':
3760 case 'x':
3761 /* Print a general register name or the zero register (32-bit or
3762 64-bit). */
3520f7cc
JG
3763 if (x == const0_rtx
3764 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3765 {
50ce6f88 3766 asm_fprintf (f, "%czr", code);
43e9d192
IB
3767 break;
3768 }
3769
3770 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3771 {
50ce6f88 3772 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3773 break;
3774 }
3775
3776 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3777 {
50ce6f88 3778 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3779 break;
3780 }
3781
3782 /* Fall through */
3783
3784 case 0:
3785 /* Print a normal operand, if it's a general register, then we
3786 assume DImode. */
3787 if (x == NULL)
3788 {
3789 output_operand_lossage ("missing operand");
3790 return;
3791 }
3792
3793 switch (GET_CODE (x))
3794 {
3795 case REG:
01a3a324 3796 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3797 break;
3798
3799 case MEM:
3800 aarch64_memory_reference_mode = GET_MODE (x);
3801 output_address (XEXP (x, 0));
3802 break;
3803
3804 case LABEL_REF:
3805 case SYMBOL_REF:
3806 output_addr_const (asm_out_file, x);
3807 break;
3808
3809 case CONST_INT:
3810 asm_fprintf (f, "%wd", INTVAL (x));
3811 break;
3812
3813 case CONST_VECTOR:
3520f7cc
JG
3814 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3815 {
3816 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3817 HOST_WIDE_INT_MIN,
3818 HOST_WIDE_INT_MAX));
3819 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3820 }
3821 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3822 {
3823 fputc ('0', f);
3824 }
3825 else
3826 gcc_unreachable ();
43e9d192
IB
3827 break;
3828
3520f7cc
JG
3829 case CONST_DOUBLE:
3830 /* CONST_DOUBLE can represent a double-width integer.
3831 In this case, the mode of x is VOIDmode. */
3832 if (GET_MODE (x) == VOIDmode)
3833 ; /* Do Nothing. */
3834 else if (aarch64_float_const_zero_rtx_p (x))
3835 {
3836 fputc ('0', f);
3837 break;
3838 }
3839 else if (aarch64_float_const_representable_p (x))
3840 {
3841#define buf_size 20
3842 char float_buf[buf_size] = {'\0'};
3843 REAL_VALUE_TYPE r;
3844 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3845 real_to_decimal_for_mode (float_buf, &r,
3846 buf_size, buf_size,
3847 1, GET_MODE (x));
3848 asm_fprintf (asm_out_file, "%s", float_buf);
3849 break;
3850#undef buf_size
3851 }
3852 output_operand_lossage ("invalid constant");
3853 return;
43e9d192
IB
3854 default:
3855 output_operand_lossage ("invalid operand");
3856 return;
3857 }
3858 break;
3859
3860 case 'A':
3861 if (GET_CODE (x) == HIGH)
3862 x = XEXP (x, 0);
3863
3864 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3865 {
3866 case SYMBOL_SMALL_GOT:
3867 asm_fprintf (asm_out_file, ":got:");
3868 break;
3869
3870 case SYMBOL_SMALL_TLSGD:
3871 asm_fprintf (asm_out_file, ":tlsgd:");
3872 break;
3873
3874 case SYMBOL_SMALL_TLSDESC:
3875 asm_fprintf (asm_out_file, ":tlsdesc:");
3876 break;
3877
3878 case SYMBOL_SMALL_GOTTPREL:
3879 asm_fprintf (asm_out_file, ":gottprel:");
3880 break;
3881
3882 case SYMBOL_SMALL_TPREL:
3883 asm_fprintf (asm_out_file, ":tprel:");
3884 break;
3885
87dd8ab0
MS
3886 case SYMBOL_TINY_GOT:
3887 gcc_unreachable ();
3888 break;
3889
43e9d192
IB
3890 default:
3891 break;
3892 }
3893 output_addr_const (asm_out_file, x);
3894 break;
3895
3896 case 'L':
3897 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3898 {
3899 case SYMBOL_SMALL_GOT:
3900 asm_fprintf (asm_out_file, ":lo12:");
3901 break;
3902
3903 case SYMBOL_SMALL_TLSGD:
3904 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3905 break;
3906
3907 case SYMBOL_SMALL_TLSDESC:
3908 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3909 break;
3910
3911 case SYMBOL_SMALL_GOTTPREL:
3912 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3913 break;
3914
3915 case SYMBOL_SMALL_TPREL:
3916 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3917 break;
3918
87dd8ab0
MS
3919 case SYMBOL_TINY_GOT:
3920 asm_fprintf (asm_out_file, ":got:");
3921 break;
3922
43e9d192
IB
3923 default:
3924 break;
3925 }
3926 output_addr_const (asm_out_file, x);
3927 break;
3928
3929 case 'G':
3930
3931 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3932 {
3933 case SYMBOL_SMALL_TPREL:
3934 asm_fprintf (asm_out_file, ":tprel_hi12:");
3935 break;
3936 default:
3937 break;
3938 }
3939 output_addr_const (asm_out_file, x);
3940 break;
3941
3942 default:
3943 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3944 return;
3945 }
3946}
3947
3948void
3949aarch64_print_operand_address (FILE *f, rtx x)
3950{
3951 struct aarch64_address_info addr;
3952
3953 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3954 MEM, true))
3955 switch (addr.type)
3956 {
3957 case ADDRESS_REG_IMM:
3958 if (addr.offset == const0_rtx)
01a3a324 3959 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3960 else
16a3246f 3961 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3962 INTVAL (addr.offset));
3963 return;
3964
3965 case ADDRESS_REG_REG:
3966 if (addr.shift == 0)
16a3246f 3967 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 3968 reg_names [REGNO (addr.offset)]);
43e9d192 3969 else
16a3246f 3970 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 3971 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3972 return;
3973
3974 case ADDRESS_REG_UXTW:
3975 if (addr.shift == 0)
16a3246f 3976 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3977 REGNO (addr.offset) - R0_REGNUM);
3978 else
16a3246f 3979 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3980 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3981 return;
3982
3983 case ADDRESS_REG_SXTW:
3984 if (addr.shift == 0)
16a3246f 3985 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3986 REGNO (addr.offset) - R0_REGNUM);
3987 else
16a3246f 3988 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3989 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3990 return;
3991
3992 case ADDRESS_REG_WB:
3993 switch (GET_CODE (x))
3994 {
3995 case PRE_INC:
16a3246f 3996 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3997 GET_MODE_SIZE (aarch64_memory_reference_mode));
3998 return;
3999 case POST_INC:
16a3246f 4000 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4001 GET_MODE_SIZE (aarch64_memory_reference_mode));
4002 return;
4003 case PRE_DEC:
16a3246f 4004 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4005 GET_MODE_SIZE (aarch64_memory_reference_mode));
4006 return;
4007 case POST_DEC:
16a3246f 4008 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4009 GET_MODE_SIZE (aarch64_memory_reference_mode));
4010 return;
4011 case PRE_MODIFY:
16a3246f 4012 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4013 INTVAL (addr.offset));
4014 return;
4015 case POST_MODIFY:
16a3246f 4016 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4017 INTVAL (addr.offset));
4018 return;
4019 default:
4020 break;
4021 }
4022 break;
4023
4024 case ADDRESS_LO_SUM:
16a3246f 4025 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4026 output_addr_const (f, addr.offset);
4027 asm_fprintf (f, "]");
4028 return;
4029
4030 case ADDRESS_SYMBOLIC:
4031 break;
4032 }
4033
4034 output_addr_const (f, x);
4035}
4036
43e9d192
IB
4037bool
4038aarch64_label_mentioned_p (rtx x)
4039{
4040 const char *fmt;
4041 int i;
4042
4043 if (GET_CODE (x) == LABEL_REF)
4044 return true;
4045
4046 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4047 referencing instruction, but they are constant offsets, not
4048 symbols. */
4049 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4050 return false;
4051
4052 fmt = GET_RTX_FORMAT (GET_CODE (x));
4053 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4054 {
4055 if (fmt[i] == 'E')
4056 {
4057 int j;
4058
4059 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4060 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4061 return 1;
4062 }
4063 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4064 return 1;
4065 }
4066
4067 return 0;
4068}
4069
4070/* Implement REGNO_REG_CLASS. */
4071
4072enum reg_class
4073aarch64_regno_regclass (unsigned regno)
4074{
4075 if (GP_REGNUM_P (regno))
a4a182c6 4076 return GENERAL_REGS;
43e9d192
IB
4077
4078 if (regno == SP_REGNUM)
4079 return STACK_REG;
4080
4081 if (regno == FRAME_POINTER_REGNUM
4082 || regno == ARG_POINTER_REGNUM)
f24bb080 4083 return POINTER_REGS;
43e9d192
IB
4084
4085 if (FP_REGNUM_P (regno))
4086 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4087
4088 return NO_REGS;
4089}
4090
4091/* Try a machine-dependent way of reloading an illegitimate address
4092 operand. If we find one, push the reload and return the new rtx. */
4093
4094rtx
4095aarch64_legitimize_reload_address (rtx *x_p,
4096 enum machine_mode mode,
4097 int opnum, int type,
4098 int ind_levels ATTRIBUTE_UNUSED)
4099{
4100 rtx x = *x_p;
4101
4102 /* Do not allow mem (plus (reg, const)) if vector mode. */
4103 if (aarch64_vector_mode_p (mode)
4104 && GET_CODE (x) == PLUS
4105 && REG_P (XEXP (x, 0))
4106 && CONST_INT_P (XEXP (x, 1)))
4107 {
4108 rtx orig_rtx = x;
4109 x = copy_rtx (x);
4110 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4111 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4112 opnum, (enum reload_type) type);
4113 return x;
4114 }
4115
4116 /* We must recognize output that we have already generated ourselves. */
4117 if (GET_CODE (x) == PLUS
4118 && GET_CODE (XEXP (x, 0)) == PLUS
4119 && REG_P (XEXP (XEXP (x, 0), 0))
4120 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4121 && CONST_INT_P (XEXP (x, 1)))
4122 {
4123 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4124 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4125 opnum, (enum reload_type) type);
4126 return x;
4127 }
4128
4129 /* We wish to handle large displacements off a base register by splitting
4130 the addend across an add and the mem insn. This can cut the number of
4131 extra insns needed from 3 to 1. It is only useful for load/store of a
4132 single register with 12 bit offset field. */
4133 if (GET_CODE (x) == PLUS
4134 && REG_P (XEXP (x, 0))
4135 && CONST_INT_P (XEXP (x, 1))
4136 && HARD_REGISTER_P (XEXP (x, 0))
4137 && mode != TImode
4138 && mode != TFmode
4139 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4140 {
4141 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4142 HOST_WIDE_INT low = val & 0xfff;
4143 HOST_WIDE_INT high = val - low;
4144 HOST_WIDE_INT offs;
4145 rtx cst;
28514dda
YZ
4146 enum machine_mode xmode = GET_MODE (x);
4147
4148 /* In ILP32, xmode can be either DImode or SImode. */
4149 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4150
4151 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4152 BLKmode alignment. */
4153 if (GET_MODE_SIZE (mode) == 0)
4154 return NULL_RTX;
4155
4156 offs = low % GET_MODE_SIZE (mode);
4157
4158 /* Align misaligned offset by adjusting high part to compensate. */
4159 if (offs != 0)
4160 {
4161 if (aarch64_uimm12_shift (high + offs))
4162 {
4163 /* Align down. */
4164 low = low - offs;
4165 high = high + offs;
4166 }
4167 else
4168 {
4169 /* Align up. */
4170 offs = GET_MODE_SIZE (mode) - offs;
4171 low = low + offs;
4172 high = high + (low & 0x1000) - offs;
4173 low &= 0xfff;
4174 }
4175 }
4176
4177 /* Check for overflow. */
4178 if (high + low != val)
4179 return NULL_RTX;
4180
4181 cst = GEN_INT (high);
4182 if (!aarch64_uimm12_shift (high))
28514dda 4183 cst = force_const_mem (xmode, cst);
43e9d192
IB
4184
4185 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4186 in the mem instruction.
4187 Note that replacing this gen_rtx_PLUS with plus_constant is
4188 wrong in this case because we rely on the
4189 (plus (plus reg c1) c2) structure being preserved so that
4190 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4191 x = gen_rtx_PLUS (xmode,
4192 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4193 GEN_INT (low));
43e9d192
IB
4194
4195 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4196 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4197 opnum, (enum reload_type) type);
4198 return x;
4199 }
4200
4201 return NULL_RTX;
4202}
4203
4204
4205static reg_class_t
4206aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4207 reg_class_t rclass,
4208 enum machine_mode mode,
4209 secondary_reload_info *sri)
4210{
43e9d192
IB
4211 /* Without the TARGET_SIMD instructions we cannot move a Q register
4212 to a Q register directly. We need a scratch. */
4213 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4214 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4215 && reg_class_subset_p (rclass, FP_REGS))
4216 {
4217 if (mode == TFmode)
4218 sri->icode = CODE_FOR_aarch64_reload_movtf;
4219 else if (mode == TImode)
4220 sri->icode = CODE_FOR_aarch64_reload_movti;
4221 return NO_REGS;
4222 }
4223
4224 /* A TFmode or TImode memory access should be handled via an FP_REGS
4225 because AArch64 has richer addressing modes for LDR/STR instructions
4226 than LDP/STP instructions. */
a4a182c6 4227 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4228 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4229 return FP_REGS;
4230
4231 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4232 return GENERAL_REGS;
43e9d192
IB
4233
4234 return NO_REGS;
4235}
4236
4237static bool
4238aarch64_can_eliminate (const int from, const int to)
4239{
4240 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4241 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4242
4243 if (frame_pointer_needed)
4244 {
4245 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4246 return true;
4247 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4248 return false;
4249 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4250 && !cfun->calls_alloca)
4251 return true;
4252 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4253 return true;
0b7f8166
MS
4254
4255 return false;
43e9d192 4256 }
777e6976 4257
43e9d192
IB
4258 return true;
4259}
4260
4261HOST_WIDE_INT
4262aarch64_initial_elimination_offset (unsigned from, unsigned to)
4263{
4264 HOST_WIDE_INT frame_size;
4265 HOST_WIDE_INT offset;
4266
4267 aarch64_layout_frame ();
4268 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4269 + crtl->outgoing_args_size
4270 + cfun->machine->saved_varargs_size);
4271
78c29983
MS
4272 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4273 offset = frame_size;
4274
4275 if (to == HARD_FRAME_POINTER_REGNUM)
4276 {
4277 if (from == ARG_POINTER_REGNUM)
4278 return offset - crtl->outgoing_args_size;
4279
4280 if (from == FRAME_POINTER_REGNUM)
4281 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4282 }
4283
4284 if (to == STACK_POINTER_REGNUM)
4285 {
4286 if (from == FRAME_POINTER_REGNUM)
4287 {
4288 HOST_WIDE_INT elim = crtl->outgoing_args_size
4289 + cfun->machine->frame.saved_regs_size
53e5ace2 4290 + get_frame_size ();
78c29983
MS
4291 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4292 return elim;
4293 }
4294 }
4295
4296 return offset;
43e9d192
IB
4297}
4298
4299
4300/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4301 previous frame. */
4302
4303rtx
4304aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4305{
4306 if (count != 0)
4307 return const0_rtx;
4308 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4309}
4310
4311
4312static void
4313aarch64_asm_trampoline_template (FILE *f)
4314{
28514dda
YZ
4315 if (TARGET_ILP32)
4316 {
4317 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4318 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4319 }
4320 else
4321 {
4322 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4323 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4324 }
01a3a324 4325 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4326 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4327 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4328 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4329}
4330
4331static void
4332aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4333{
4334 rtx fnaddr, mem, a_tramp;
28514dda 4335 const int tramp_code_sz = 16;
43e9d192
IB
4336
4337 /* Don't need to copy the trailing D-words, we fill those in below. */
4338 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4339 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4340 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4341 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4342 if (GET_MODE (fnaddr) != ptr_mode)
4343 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4344 emit_move_insn (mem, fnaddr);
4345
28514dda 4346 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4347 emit_move_insn (mem, chain_value);
4348
4349 /* XXX We should really define a "clear_cache" pattern and use
4350 gen_clear_cache(). */
4351 a_tramp = XEXP (m_tramp, 0);
4352 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4353 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4354 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4355 ptr_mode);
43e9d192
IB
4356}
4357
4358static unsigned char
4359aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4360{
4361 switch (regclass)
4362 {
43e9d192
IB
4363 case POINTER_REGS:
4364 case GENERAL_REGS:
4365 case ALL_REGS:
4366 case FP_REGS:
4367 case FP_LO_REGS:
4368 return
4369 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4370 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4371 case STACK_REG:
4372 return 1;
4373
4374 case NO_REGS:
4375 return 0;
4376
4377 default:
4378 break;
4379 }
4380 gcc_unreachable ();
4381}
4382
4383static reg_class_t
78d8b9f0 4384aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4385{
51bb310d 4386 if (regclass == POINTER_REGS)
78d8b9f0
IB
4387 return GENERAL_REGS;
4388
51bb310d
MS
4389 if (regclass == STACK_REG)
4390 {
4391 if (REG_P(x)
4392 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4393 return regclass;
4394
4395 return NO_REGS;
4396 }
4397
78d8b9f0
IB
4398 /* If it's an integer immediate that MOVI can't handle, then
4399 FP_REGS is not an option, so we return NO_REGS instead. */
4400 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4401 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4402 return NO_REGS;
4403
27bd251b
IB
4404 /* Register eliminiation can result in a request for
4405 SP+constant->FP_REGS. We cannot support such operations which
4406 use SP as source and an FP_REG as destination, so reject out
4407 right now. */
4408 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4409 {
4410 rtx lhs = XEXP (x, 0);
4411
4412 /* Look through a possible SUBREG introduced by ILP32. */
4413 if (GET_CODE (lhs) == SUBREG)
4414 lhs = SUBREG_REG (lhs);
4415
4416 gcc_assert (REG_P (lhs));
4417 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4418 POINTER_REGS));
4419 return NO_REGS;
4420 }
4421
78d8b9f0 4422 return regclass;
43e9d192
IB
4423}
4424
4425void
4426aarch64_asm_output_labelref (FILE* f, const char *name)
4427{
4428 asm_fprintf (f, "%U%s", name);
4429}
4430
4431static void
4432aarch64_elf_asm_constructor (rtx symbol, int priority)
4433{
4434 if (priority == DEFAULT_INIT_PRIORITY)
4435 default_ctor_section_asm_out_constructor (symbol, priority);
4436 else
4437 {
4438 section *s;
4439 char buf[18];
4440 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4441 s = get_section (buf, SECTION_WRITE, NULL);
4442 switch_to_section (s);
4443 assemble_align (POINTER_SIZE);
28514dda 4444 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4445 }
4446}
4447
4448static void
4449aarch64_elf_asm_destructor (rtx symbol, int priority)
4450{
4451 if (priority == DEFAULT_INIT_PRIORITY)
4452 default_dtor_section_asm_out_destructor (symbol, priority);
4453 else
4454 {
4455 section *s;
4456 char buf[18];
4457 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4458 s = get_section (buf, SECTION_WRITE, NULL);
4459 switch_to_section (s);
4460 assemble_align (POINTER_SIZE);
28514dda 4461 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4462 }
4463}
4464
4465const char*
4466aarch64_output_casesi (rtx *operands)
4467{
4468 char buf[100];
4469 char label[100];
592a16fc 4470 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4471 int index;
4472 static const char *const patterns[4][2] =
4473 {
4474 {
4475 "ldrb\t%w3, [%0,%w1,uxtw]",
4476 "add\t%3, %4, %w3, sxtb #2"
4477 },
4478 {
4479 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4480 "add\t%3, %4, %w3, sxth #2"
4481 },
4482 {
4483 "ldr\t%w3, [%0,%w1,uxtw #2]",
4484 "add\t%3, %4, %w3, sxtw #2"
4485 },
4486 /* We assume that DImode is only generated when not optimizing and
4487 that we don't really need 64-bit address offsets. That would
4488 imply an object file with 8GB of code in a single function! */
4489 {
4490 "ldr\t%w3, [%0,%w1,uxtw #2]",
4491 "add\t%3, %4, %w3, sxtw #2"
4492 }
4493 };
4494
4495 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4496
4497 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4498
4499 gcc_assert (index >= 0 && index <= 3);
4500
4501 /* Need to implement table size reduction, by chaning the code below. */
4502 output_asm_insn (patterns[index][0], operands);
4503 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4504 snprintf (buf, sizeof (buf),
4505 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4506 output_asm_insn (buf, operands);
4507 output_asm_insn (patterns[index][1], operands);
4508 output_asm_insn ("br\t%3", operands);
4509 assemble_label (asm_out_file, label);
4510 return "";
4511}
4512
4513
4514/* Return size in bits of an arithmetic operand which is shifted/scaled and
4515 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4516 operator. */
4517
4518int
4519aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4520{
4521 if (shift >= 0 && shift <= 3)
4522 {
4523 int size;
4524 for (size = 8; size <= 32; size *= 2)
4525 {
4526 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4527 if (mask == bits << shift)
4528 return size;
4529 }
4530 }
4531 return 0;
4532}
4533
4534static bool
4535aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4536 const_rtx x ATTRIBUTE_UNUSED)
4537{
4538 /* We can't use blocks for constants when we're using a per-function
4539 constant pool. */
4540 return false;
4541}
4542
4543static section *
4544aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4545 rtx x ATTRIBUTE_UNUSED,
4546 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4547{
4548 /* Force all constant pool entries into the current function section. */
4549 return function_section (current_function_decl);
4550}
4551
4552
4553/* Costs. */
4554
4555/* Helper function for rtx cost calculation. Strip a shift expression
4556 from X. Returns the inner operand if successful, or the original
4557 expression on failure. */
4558static rtx
4559aarch64_strip_shift (rtx x)
4560{
4561 rtx op = x;
4562
57b77d46
RE
4563 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4564 we can convert both to ROR during final output. */
43e9d192
IB
4565 if ((GET_CODE (op) == ASHIFT
4566 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
4567 || GET_CODE (op) == LSHIFTRT
4568 || GET_CODE (op) == ROTATERT
4569 || GET_CODE (op) == ROTATE)
43e9d192
IB
4570 && CONST_INT_P (XEXP (op, 1)))
4571 return XEXP (op, 0);
4572
4573 if (GET_CODE (op) == MULT
4574 && CONST_INT_P (XEXP (op, 1))
4575 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4576 return XEXP (op, 0);
4577
4578 return x;
4579}
4580
4745e701 4581/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
4582 expression from X. Returns the inner operand if successful, or the
4583 original expression on failure. We deal with a number of possible
4584 canonicalization variations here. */
4585static rtx
4745e701 4586aarch64_strip_extend (rtx x)
43e9d192
IB
4587{
4588 rtx op = x;
4589
4590 /* Zero and sign extraction of a widened value. */
4591 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4592 && XEXP (op, 2) == const0_rtx
4745e701 4593 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
4594 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4595 XEXP (op, 1)))
4596 return XEXP (XEXP (op, 0), 0);
4597
4598 /* It can also be represented (for zero-extend) as an AND with an
4599 immediate. */
4600 if (GET_CODE (op) == AND
4601 && GET_CODE (XEXP (op, 0)) == MULT
4602 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4603 && CONST_INT_P (XEXP (op, 1))
4604 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4605 INTVAL (XEXP (op, 1))) != 0)
4606 return XEXP (XEXP (op, 0), 0);
4607
4608 /* Now handle extended register, as this may also have an optional
4609 left shift by 1..4. */
4610 if (GET_CODE (op) == ASHIFT
4611 && CONST_INT_P (XEXP (op, 1))
4612 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4613 op = XEXP (op, 0);
4614
4615 if (GET_CODE (op) == ZERO_EXTEND
4616 || GET_CODE (op) == SIGN_EXTEND)
4617 op = XEXP (op, 0);
4618
4619 if (op != x)
4620 return op;
4621
4745e701
JG
4622 return x;
4623}
4624
4625/* Helper function for rtx cost calculation. Calculate the cost of
4626 a MULT, which may be part of a multiply-accumulate rtx. Return
4627 the calculated cost of the expression, recursing manually in to
4628 operands where needed. */
4629
4630static int
4631aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4632{
4633 rtx op0, op1;
4634 const struct cpu_cost_table *extra_cost
4635 = aarch64_tune_params->insn_extra_cost;
4636 int cost = 0;
4637 bool maybe_fma = (outer == PLUS || outer == MINUS);
4638 enum machine_mode mode = GET_MODE (x);
4639
4640 gcc_checking_assert (code == MULT);
4641
4642 op0 = XEXP (x, 0);
4643 op1 = XEXP (x, 1);
4644
4645 if (VECTOR_MODE_P (mode))
4646 mode = GET_MODE_INNER (mode);
4647
4648 /* Integer multiply/fma. */
4649 if (GET_MODE_CLASS (mode) == MODE_INT)
4650 {
4651 /* The multiply will be canonicalized as a shift, cost it as such. */
4652 if (CONST_INT_P (op1)
4653 && exact_log2 (INTVAL (op1)) > 0)
4654 {
4655 if (speed)
4656 {
4657 if (maybe_fma)
4658 /* ADD (shifted register). */
4659 cost += extra_cost->alu.arith_shift;
4660 else
4661 /* LSL (immediate). */
4662 cost += extra_cost->alu.shift;
4663 }
4664
4665 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4666
4667 return cost;
4668 }
4669
4670 /* Integer multiplies or FMAs have zero/sign extending variants. */
4671 if ((GET_CODE (op0) == ZERO_EXTEND
4672 && GET_CODE (op1) == ZERO_EXTEND)
4673 || (GET_CODE (op0) == SIGN_EXTEND
4674 && GET_CODE (op1) == SIGN_EXTEND))
4675 {
4676 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4677 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4678
4679 if (speed)
4680 {
4681 if (maybe_fma)
4682 /* MADD/SMADDL/UMADDL. */
4683 cost += extra_cost->mult[0].extend_add;
4684 else
4685 /* MUL/SMULL/UMULL. */
4686 cost += extra_cost->mult[0].extend;
4687 }
4688
4689 return cost;
4690 }
4691
4692 /* This is either an integer multiply or an FMA. In both cases
4693 we want to recurse and cost the operands. */
4694 cost += rtx_cost (op0, MULT, 0, speed)
4695 + rtx_cost (op1, MULT, 1, speed);
4696
4697 if (speed)
4698 {
4699 if (maybe_fma)
4700 /* MADD. */
4701 cost += extra_cost->mult[mode == DImode].add;
4702 else
4703 /* MUL. */
4704 cost += extra_cost->mult[mode == DImode].simple;
4705 }
4706
4707 return cost;
4708 }
4709 else
4710 {
4711 if (speed)
4712 {
3d840f7d 4713 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
4714 operands. */
4715 if (GET_CODE (op0) == NEG)
3d840f7d 4716 op0 = XEXP (op0, 0);
4745e701 4717 if (GET_CODE (op1) == NEG)
3d840f7d 4718 op1 = XEXP (op1, 0);
4745e701
JG
4719
4720 if (maybe_fma)
4721 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4722 cost += extra_cost->fp[mode == DFmode].fma;
4723 else
3d840f7d 4724 /* FMUL/FNMUL. */
4745e701
JG
4725 cost += extra_cost->fp[mode == DFmode].mult;
4726 }
4727
4728 cost += rtx_cost (op0, MULT, 0, speed)
4729 + rtx_cost (op1, MULT, 1, speed);
4730 return cost;
4731 }
43e9d192
IB
4732}
4733
67747367
JG
4734static int
4735aarch64_address_cost (rtx x,
4736 enum machine_mode mode,
4737 addr_space_t as ATTRIBUTE_UNUSED,
4738 bool speed)
4739{
4740 enum rtx_code c = GET_CODE (x);
4741 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4742 struct aarch64_address_info info;
4743 int cost = 0;
4744 info.shift = 0;
4745
4746 if (!aarch64_classify_address (&info, x, mode, c, false))
4747 {
4748 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4749 {
4750 /* This is a CONST or SYMBOL ref which will be split
4751 in a different way depending on the code model in use.
4752 Cost it through the generic infrastructure. */
4753 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4754 /* Divide through by the cost of one instruction to
4755 bring it to the same units as the address costs. */
4756 cost_symbol_ref /= COSTS_N_INSNS (1);
4757 /* The cost is then the cost of preparing the address,
4758 followed by an immediate (possibly 0) offset. */
4759 return cost_symbol_ref + addr_cost->imm_offset;
4760 }
4761 else
4762 {
4763 /* This is most likely a jump table from a case
4764 statement. */
4765 return addr_cost->register_offset;
4766 }
4767 }
4768
4769 switch (info.type)
4770 {
4771 case ADDRESS_LO_SUM:
4772 case ADDRESS_SYMBOLIC:
4773 case ADDRESS_REG_IMM:
4774 cost += addr_cost->imm_offset;
4775 break;
4776
4777 case ADDRESS_REG_WB:
4778 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4779 cost += addr_cost->pre_modify;
4780 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4781 cost += addr_cost->post_modify;
4782 else
4783 gcc_unreachable ();
4784
4785 break;
4786
4787 case ADDRESS_REG_REG:
4788 cost += addr_cost->register_offset;
4789 break;
4790
4791 case ADDRESS_REG_UXTW:
4792 case ADDRESS_REG_SXTW:
4793 cost += addr_cost->register_extend;
4794 break;
4795
4796 default:
4797 gcc_unreachable ();
4798 }
4799
4800
4801 if (info.shift > 0)
4802 {
4803 /* For the sake of calculating the cost of the shifted register
4804 component, we can treat same sized modes in the same way. */
4805 switch (GET_MODE_BITSIZE (mode))
4806 {
4807 case 16:
4808 cost += addr_cost->addr_scale_costs.hi;
4809 break;
4810
4811 case 32:
4812 cost += addr_cost->addr_scale_costs.si;
4813 break;
4814
4815 case 64:
4816 cost += addr_cost->addr_scale_costs.di;
4817 break;
4818
4819 /* We can't tell, or this is a 128-bit vector. */
4820 default:
4821 cost += addr_cost->addr_scale_costs.ti;
4822 break;
4823 }
4824 }
4825
4826 return cost;
4827}
4828
7cc2145f
JG
4829/* Return true if the RTX X in mode MODE is a zero or sign extract
4830 usable in an ADD or SUB (extended register) instruction. */
4831static bool
4832aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4833{
4834 /* Catch add with a sign extract.
4835 This is add_<optab><mode>_multp2. */
4836 if (GET_CODE (x) == SIGN_EXTRACT
4837 || GET_CODE (x) == ZERO_EXTRACT)
4838 {
4839 rtx op0 = XEXP (x, 0);
4840 rtx op1 = XEXP (x, 1);
4841 rtx op2 = XEXP (x, 2);
4842
4843 if (GET_CODE (op0) == MULT
4844 && CONST_INT_P (op1)
4845 && op2 == const0_rtx
4846 && CONST_INT_P (XEXP (op0, 1))
4847 && aarch64_is_extend_from_extract (mode,
4848 XEXP (op0, 1),
4849 op1))
4850 {
4851 return true;
4852 }
4853 }
4854
4855 return false;
4856}
4857
43e9d192
IB
4858/* Calculate the cost of calculating X, storing it in *COST. Result
4859 is true if the total cost of the operation has now been calculated. */
4860static bool
4861aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4862 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4863{
a8eecd00 4864 rtx op0, op1, op2;
73250c4c 4865 const struct cpu_cost_table *extra_cost
43e9d192 4866 = aarch64_tune_params->insn_extra_cost;
9dfc162c 4867 enum machine_mode mode = GET_MODE (x);
43e9d192 4868
7fc5ef02
JG
4869 /* By default, assume that everything has equivalent cost to the
4870 cheapest instruction. Any additional costs are applied as a delta
4871 above this default. */
4872 *cost = COSTS_N_INSNS (1);
4873
4874 /* TODO: The cost infrastructure currently does not handle
4875 vector operations. Assume that all vector operations
4876 are equally expensive. */
4877 if (VECTOR_MODE_P (mode))
4878 {
4879 if (speed)
4880 *cost += extra_cost->vect.alu;
4881 return true;
4882 }
4883
43e9d192
IB
4884 switch (code)
4885 {
4886 case SET:
ba123b0d
JG
4887 /* The cost depends entirely on the operands to SET. */
4888 *cost = 0;
43e9d192
IB
4889 op0 = SET_DEST (x);
4890 op1 = SET_SRC (x);
4891
4892 switch (GET_CODE (op0))
4893 {
4894 case MEM:
4895 if (speed)
2961177e
JG
4896 {
4897 rtx address = XEXP (op0, 0);
4898 if (GET_MODE_CLASS (mode) == MODE_INT)
4899 *cost += extra_cost->ldst.store;
4900 else if (mode == SFmode)
4901 *cost += extra_cost->ldst.storef;
4902 else if (mode == DFmode)
4903 *cost += extra_cost->ldst.stored;
4904
4905 *cost +=
4906 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4907 0, speed));
4908 }
43e9d192 4909
ba123b0d 4910 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
4911 return true;
4912
4913 case SUBREG:
4914 if (! REG_P (SUBREG_REG (op0)))
4915 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 4916
43e9d192
IB
4917 /* Fall through. */
4918 case REG:
ba123b0d
JG
4919 /* const0_rtx is in general free, but we will use an
4920 instruction to set a register to 0. */
4921 if (REG_P (op1) || op1 == const0_rtx)
4922 {
4923 /* The cost is 1 per register copied. */
4924 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4925 / UNITS_PER_WORD;
4926 *cost = COSTS_N_INSNS (n_minus_1 + 1);
4927 }
4928 else
4929 /* Cost is just the cost of the RHS of the set. */
4930 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
4931 return true;
4932
ba123b0d 4933 case ZERO_EXTRACT:
43e9d192 4934 case SIGN_EXTRACT:
ba123b0d
JG
4935 /* Bit-field insertion. Strip any redundant widening of
4936 the RHS to meet the width of the target. */
43e9d192
IB
4937 if (GET_CODE (op1) == SUBREG)
4938 op1 = SUBREG_REG (op1);
4939 if ((GET_CODE (op1) == ZERO_EXTEND
4940 || GET_CODE (op1) == SIGN_EXTEND)
4941 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4942 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4943 >= INTVAL (XEXP (op0, 1))))
4944 op1 = XEXP (op1, 0);
ba123b0d
JG
4945
4946 if (CONST_INT_P (op1))
4947 {
4948 /* MOV immediate is assumed to always be cheap. */
4949 *cost = COSTS_N_INSNS (1);
4950 }
4951 else
4952 {
4953 /* BFM. */
4954 if (speed)
4955 *cost += extra_cost->alu.bfi;
4956 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4957 }
4958
43e9d192
IB
4959 return true;
4960
4961 default:
ba123b0d
JG
4962 /* We can't make sense of this, assume default cost. */
4963 *cost = COSTS_N_INSNS (1);
43e9d192
IB
4964 break;
4965 }
4966 return false;
4967
9dfc162c
JG
4968 case CONST_INT:
4969 /* If an instruction can incorporate a constant within the
4970 instruction, the instruction's expression avoids calling
4971 rtx_cost() on the constant. If rtx_cost() is called on a
4972 constant, then it is usually because the constant must be
4973 moved into a register by one or more instructions.
4974
4975 The exception is constant 0, which can be expressed
4976 as XZR/WZR and is therefore free. The exception to this is
4977 if we have (set (reg) (const0_rtx)) in which case we must cost
4978 the move. However, we can catch that when we cost the SET, so
4979 we don't need to consider that here. */
4980 if (x == const0_rtx)
4981 *cost = 0;
4982 else
4983 {
4984 /* To an approximation, building any other constant is
4985 proportionally expensive to the number of instructions
4986 required to build that constant. This is true whether we
4987 are compiling for SPEED or otherwise. */
4988 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4989 INTVAL (x),
4990 false));
4991 }
4992 return true;
4993
4994 case CONST_DOUBLE:
4995 if (speed)
4996 {
4997 /* mov[df,sf]_aarch64. */
4998 if (aarch64_float_const_representable_p (x))
4999 /* FMOV (scalar immediate). */
5000 *cost += extra_cost->fp[mode == DFmode].fpconst;
5001 else if (!aarch64_float_const_zero_rtx_p (x))
5002 {
5003 /* This will be a load from memory. */
5004 if (mode == DFmode)
5005 *cost += extra_cost->ldst.loadd;
5006 else
5007 *cost += extra_cost->ldst.loadf;
5008 }
5009 else
5010 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5011 or MOV v0.s[0], wzr - neither of which are modeled by the
5012 cost tables. Just use the default cost. */
5013 {
5014 }
5015 }
5016
5017 return true;
5018
43e9d192
IB
5019 case MEM:
5020 if (speed)
2961177e
JG
5021 {
5022 /* For loads we want the base cost of a load, plus an
5023 approximation for the additional cost of the addressing
5024 mode. */
5025 rtx address = XEXP (x, 0);
5026 if (GET_MODE_CLASS (mode) == MODE_INT)
5027 *cost += extra_cost->ldst.load;
5028 else if (mode == SFmode)
5029 *cost += extra_cost->ldst.loadf;
5030 else if (mode == DFmode)
5031 *cost += extra_cost->ldst.loadd;
5032
5033 *cost +=
5034 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5035 0, speed));
5036 }
43e9d192
IB
5037
5038 return true;
5039
5040 case NEG:
4745e701
JG
5041 op0 = XEXP (x, 0);
5042
5043 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5044 {
5045 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5046 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5047 {
5048 /* CSETM. */
5049 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5050 return true;
5051 }
5052
5053 /* Cost this as SUB wzr, X. */
5054 op0 = CONST0_RTX (GET_MODE (x));
5055 op1 = XEXP (x, 0);
5056 goto cost_minus;
5057 }
5058
5059 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5060 {
5061 /* Support (neg(fma...)) as a single instruction only if
5062 sign of zeros is unimportant. This matches the decision
5063 making in aarch64.md. */
5064 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5065 {
5066 /* FNMADD. */
5067 *cost = rtx_cost (op0, NEG, 0, speed);
5068 return true;
5069 }
5070 if (speed)
5071 /* FNEG. */
5072 *cost += extra_cost->fp[mode == DFmode].neg;
5073 return false;
5074 }
5075
5076 return false;
43e9d192
IB
5077
5078 case COMPARE:
5079 op0 = XEXP (x, 0);
5080 op1 = XEXP (x, 1);
5081
5082 if (op1 == const0_rtx
5083 && GET_CODE (op0) == AND)
5084 {
5085 x = op0;
5086 goto cost_logic;
5087 }
5088
a8eecd00
JG
5089 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5090 {
5091 /* TODO: A write to the CC flags possibly costs extra, this
5092 needs encoding in the cost tables. */
5093
5094 /* CC_ZESWPmode supports zero extend for free. */
5095 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5096 op0 = XEXP (op0, 0);
5097
5098 /* ANDS. */
5099 if (GET_CODE (op0) == AND)
5100 {
5101 x = op0;
5102 goto cost_logic;
5103 }
5104
5105 if (GET_CODE (op0) == PLUS)
5106 {
5107 /* ADDS (and CMN alias). */
5108 x = op0;
5109 goto cost_plus;
5110 }
5111
5112 if (GET_CODE (op0) == MINUS)
5113 {
5114 /* SUBS. */
5115 x = op0;
5116 goto cost_minus;
5117 }
5118
5119 if (GET_CODE (op1) == NEG)
5120 {
5121 /* CMN. */
5122 if (speed)
5123 *cost += extra_cost->alu.arith;
5124
5125 *cost += rtx_cost (op0, COMPARE, 0, speed);
5126 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5127 return true;
5128 }
5129
5130 /* CMP.
5131
5132 Compare can freely swap the order of operands, and
5133 canonicalization puts the more complex operation first.
5134 But the integer MINUS logic expects the shift/extend
5135 operation in op1. */
5136 if (! (REG_P (op0)
5137 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5138 {
5139 op0 = XEXP (x, 1);
5140 op1 = XEXP (x, 0);
5141 }
5142 goto cost_minus;
5143 }
5144
5145 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5146 {
5147 /* FCMP. */
5148 if (speed)
5149 *cost += extra_cost->fp[mode == DFmode].compare;
5150
5151 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5152 {
5153 /* FCMP supports constant 0.0 for no extra cost. */
5154 return true;
5155 }
5156 return false;
5157 }
5158
5159 return false;
43e9d192
IB
5160
5161 case MINUS:
4745e701
JG
5162 {
5163 op0 = XEXP (x, 0);
5164 op1 = XEXP (x, 1);
5165
5166cost_minus:
5167 /* Detect valid immediates. */
5168 if ((GET_MODE_CLASS (mode) == MODE_INT
5169 || (GET_MODE_CLASS (mode) == MODE_CC
5170 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5171 && CONST_INT_P (op1)
5172 && aarch64_uimm12_shift (INTVAL (op1)))
5173 {
5174 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5175
4745e701
JG
5176 if (speed)
5177 /* SUB(S) (immediate). */
5178 *cost += extra_cost->alu.arith;
5179 return true;
5180
5181 }
5182
7cc2145f
JG
5183 /* Look for SUB (extended register). */
5184 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5185 {
5186 if (speed)
5187 *cost += extra_cost->alu.arith_shift;
5188
5189 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5190 (enum rtx_code) GET_CODE (op1),
5191 0, speed);
5192 return true;
5193 }
5194
4745e701
JG
5195 rtx new_op1 = aarch64_strip_extend (op1);
5196
5197 /* Cost this as an FMA-alike operation. */
5198 if ((GET_CODE (new_op1) == MULT
5199 || GET_CODE (new_op1) == ASHIFT)
5200 && code != COMPARE)
5201 {
5202 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5203 (enum rtx_code) code,
5204 speed);
43e9d192 5205 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5206 return true;
5207 }
43e9d192 5208
4745e701 5209 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5210
4745e701
JG
5211 if (speed)
5212 {
5213 if (GET_MODE_CLASS (mode) == MODE_INT)
5214 /* SUB(S). */
5215 *cost += extra_cost->alu.arith;
5216 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5217 /* FSUB. */
5218 *cost += extra_cost->fp[mode == DFmode].addsub;
5219 }
5220 return true;
5221 }
43e9d192
IB
5222
5223 case PLUS:
4745e701
JG
5224 {
5225 rtx new_op0;
43e9d192 5226
4745e701
JG
5227 op0 = XEXP (x, 0);
5228 op1 = XEXP (x, 1);
43e9d192 5229
a8eecd00 5230cost_plus:
4745e701
JG
5231 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5232 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5233 {
5234 /* CSINC. */
5235 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5236 *cost += rtx_cost (op1, PLUS, 1, speed);
5237 return true;
5238 }
43e9d192 5239
4745e701
JG
5240 if (GET_MODE_CLASS (mode) == MODE_INT
5241 && CONST_INT_P (op1)
5242 && aarch64_uimm12_shift (INTVAL (op1)))
5243 {
5244 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5245
4745e701
JG
5246 if (speed)
5247 /* ADD (immediate). */
5248 *cost += extra_cost->alu.arith;
5249 return true;
5250 }
5251
7cc2145f
JG
5252 /* Look for ADD (extended register). */
5253 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5254 {
5255 if (speed)
5256 *cost += extra_cost->alu.arith_shift;
5257
5258 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5259 (enum rtx_code) GET_CODE (op0),
5260 0, speed);
5261 return true;
5262 }
5263
4745e701
JG
5264 /* Strip any extend, leave shifts behind as we will
5265 cost them through mult_cost. */
5266 new_op0 = aarch64_strip_extend (op0);
5267
5268 if (GET_CODE (new_op0) == MULT
5269 || GET_CODE (new_op0) == ASHIFT)
5270 {
5271 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5272 speed);
5273 *cost += rtx_cost (op1, PLUS, 1, speed);
5274 return true;
5275 }
5276
5277 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5278 + rtx_cost (op1, PLUS, 1, speed));
5279
5280 if (speed)
5281 {
5282 if (GET_MODE_CLASS (mode) == MODE_INT)
5283 /* ADD. */
5284 *cost += extra_cost->alu.arith;
5285 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5286 /* FADD. */
5287 *cost += extra_cost->fp[mode == DFmode].addsub;
5288 }
5289 return true;
5290 }
43e9d192 5291
18b42b2a
KT
5292 case BSWAP:
5293 *cost = COSTS_N_INSNS (1);
5294
5295 if (speed)
5296 *cost += extra_cost->alu.rev;
5297
5298 return false;
5299
43e9d192 5300 case IOR:
f7d5cf8d
KT
5301 if (aarch_rev16_p (x))
5302 {
5303 *cost = COSTS_N_INSNS (1);
5304
5305 if (speed)
5306 *cost += extra_cost->alu.rev;
5307
5308 return true;
5309 }
5310 /* Fall through. */
43e9d192
IB
5311 case XOR:
5312 case AND:
5313 cost_logic:
5314 op0 = XEXP (x, 0);
5315 op1 = XEXP (x, 1);
5316
268c3b47
JG
5317 if (code == AND
5318 && GET_CODE (op0) == MULT
5319 && CONST_INT_P (XEXP (op0, 1))
5320 && CONST_INT_P (op1)
5321 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5322 INTVAL (op1)) != 0)
5323 {
5324 /* This is a UBFM/SBFM. */
5325 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5326 if (speed)
5327 *cost += extra_cost->alu.bfx;
5328 return true;
5329 }
5330
43e9d192
IB
5331 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5332 {
268c3b47
JG
5333 /* We possibly get the immediate for free, this is not
5334 modelled. */
43e9d192
IB
5335 if (CONST_INT_P (op1)
5336 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5337 {
268c3b47
JG
5338 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5339
5340 if (speed)
5341 *cost += extra_cost->alu.logical;
5342
5343 return true;
43e9d192
IB
5344 }
5345 else
5346 {
268c3b47
JG
5347 rtx new_op0 = op0;
5348
5349 /* Handle ORN, EON, or BIC. */
43e9d192
IB
5350 if (GET_CODE (op0) == NOT)
5351 op0 = XEXP (op0, 0);
268c3b47
JG
5352
5353 new_op0 = aarch64_strip_shift (op0);
5354
5355 /* If we had a shift on op0 then this is a logical-shift-
5356 by-register/immediate operation. Otherwise, this is just
5357 a logical operation. */
5358 if (speed)
5359 {
5360 if (new_op0 != op0)
5361 {
5362 /* Shift by immediate. */
5363 if (CONST_INT_P (XEXP (op0, 1)))
5364 *cost += extra_cost->alu.log_shift;
5365 else
5366 *cost += extra_cost->alu.log_shift_reg;
5367 }
5368 else
5369 *cost += extra_cost->alu.logical;
5370 }
5371
5372 /* In both cases we want to cost both operands. */
5373 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5374 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5375
5376 return true;
43e9d192 5377 }
43e9d192
IB
5378 }
5379 return false;
5380
268c3b47
JG
5381 case NOT:
5382 /* MVN. */
5383 if (speed)
5384 *cost += extra_cost->alu.logical;
5385
5386 /* The logical instruction could have the shifted register form,
5387 but the cost is the same if the shift is processed as a separate
5388 instruction, so we don't bother with it here. */
5389 return false;
5390
43e9d192 5391 case ZERO_EXTEND:
b1685e62
JG
5392
5393 op0 = XEXP (x, 0);
5394 /* If a value is written in SI mode, then zero extended to DI
5395 mode, the operation will in general be free as a write to
5396 a 'w' register implicitly zeroes the upper bits of an 'x'
5397 register. However, if this is
5398
5399 (set (reg) (zero_extend (reg)))
5400
5401 we must cost the explicit register move. */
5402 if (mode == DImode
5403 && GET_MODE (op0) == SImode
5404 && outer == SET)
5405 {
5406 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5407
5408 if (!op_cost && speed)
5409 /* MOV. */
5410 *cost += extra_cost->alu.extend;
5411 else
5412 /* Free, the cost is that of the SI mode operation. */
5413 *cost = op_cost;
5414
5415 return true;
5416 }
5417 else if (MEM_P (XEXP (x, 0)))
43e9d192 5418 {
b1685e62
JG
5419 /* All loads can zero extend to any size for free. */
5420 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
5421 return true;
5422 }
b1685e62
JG
5423
5424 /* UXTB/UXTH. */
5425 if (speed)
5426 *cost += extra_cost->alu.extend;
5427
43e9d192
IB
5428 return false;
5429
5430 case SIGN_EXTEND:
b1685e62 5431 if (MEM_P (XEXP (x, 0)))
43e9d192 5432 {
b1685e62
JG
5433 /* LDRSH. */
5434 if (speed)
5435 {
5436 rtx address = XEXP (XEXP (x, 0), 0);
5437 *cost += extra_cost->ldst.load_sign_extend;
5438
5439 *cost +=
5440 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5441 0, speed));
5442 }
43e9d192
IB
5443 return true;
5444 }
b1685e62
JG
5445
5446 if (speed)
5447 *cost += extra_cost->alu.extend;
43e9d192
IB
5448 return false;
5449
ba0cfa17
JG
5450 case ASHIFT:
5451 op0 = XEXP (x, 0);
5452 op1 = XEXP (x, 1);
5453
5454 if (CONST_INT_P (op1))
5455 {
5456 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5457 aliases. */
5458 if (speed)
5459 *cost += extra_cost->alu.shift;
5460
5461 /* We can incorporate zero/sign extend for free. */
5462 if (GET_CODE (op0) == ZERO_EXTEND
5463 || GET_CODE (op0) == SIGN_EXTEND)
5464 op0 = XEXP (op0, 0);
5465
5466 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5467 return true;
5468 }
5469 else
5470 {
5471 /* LSLV. */
5472 if (speed)
5473 *cost += extra_cost->alu.shift_reg;
5474
5475 return false; /* All arguments need to be in registers. */
5476 }
5477
43e9d192 5478 case ROTATE:
43e9d192
IB
5479 case ROTATERT:
5480 case LSHIFTRT:
43e9d192 5481 case ASHIFTRT:
ba0cfa17
JG
5482 op0 = XEXP (x, 0);
5483 op1 = XEXP (x, 1);
43e9d192 5484
ba0cfa17
JG
5485 if (CONST_INT_P (op1))
5486 {
5487 /* ASR (immediate) and friends. */
5488 if (speed)
5489 *cost += extra_cost->alu.shift;
43e9d192 5490
ba0cfa17
JG
5491 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5492 return true;
5493 }
5494 else
5495 {
5496
5497 /* ASR (register) and friends. */
5498 if (speed)
5499 *cost += extra_cost->alu.shift_reg;
5500
5501 return false; /* All arguments need to be in registers. */
5502 }
43e9d192 5503
909734be
JG
5504 case SYMBOL_REF:
5505
5506 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5507 {
5508 /* LDR. */
5509 if (speed)
5510 *cost += extra_cost->ldst.load;
5511 }
5512 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5513 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5514 {
5515 /* ADRP, followed by ADD. */
5516 *cost += COSTS_N_INSNS (1);
5517 if (speed)
5518 *cost += 2 * extra_cost->alu.arith;
5519 }
5520 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5521 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5522 {
5523 /* ADR. */
5524 if (speed)
5525 *cost += extra_cost->alu.arith;
5526 }
5527
5528 if (flag_pic)
5529 {
5530 /* One extra load instruction, after accessing the GOT. */
5531 *cost += COSTS_N_INSNS (1);
5532 if (speed)
5533 *cost += extra_cost->ldst.load;
5534 }
43e9d192
IB
5535 return true;
5536
909734be 5537 case HIGH:
43e9d192 5538 case LO_SUM:
909734be
JG
5539 /* ADRP/ADD (immediate). */
5540 if (speed)
5541 *cost += extra_cost->alu.arith;
43e9d192
IB
5542 return true;
5543
5544 case ZERO_EXTRACT:
5545 case SIGN_EXTRACT:
7cc2145f
JG
5546 /* UBFX/SBFX. */
5547 if (speed)
5548 *cost += extra_cost->alu.bfx;
5549
5550 /* We can trust that the immediates used will be correct (there
5551 are no by-register forms), so we need only cost op0. */
5552 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
5553 return true;
5554
5555 case MULT:
4745e701
JG
5556 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5557 /* aarch64_rtx_mult_cost always handles recursion to its
5558 operands. */
5559 return true;
43e9d192
IB
5560
5561 case MOD:
5562 case UMOD:
43e9d192
IB
5563 if (speed)
5564 {
5565 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
5566 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5567 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 5568 else if (GET_MODE (x) == DFmode)
73250c4c
KT
5569 *cost += (extra_cost->fp[1].mult
5570 + extra_cost->fp[1].div);
43e9d192 5571 else if (GET_MODE (x) == SFmode)
73250c4c
KT
5572 *cost += (extra_cost->fp[0].mult
5573 + extra_cost->fp[0].div);
43e9d192
IB
5574 }
5575 return false; /* All arguments need to be in registers. */
5576
5577 case DIV:
5578 case UDIV:
4105fe38 5579 case SQRT:
43e9d192
IB
5580 if (speed)
5581 {
4105fe38
JG
5582 if (GET_MODE_CLASS (mode) == MODE_INT)
5583 /* There is no integer SQRT, so only DIV and UDIV can get
5584 here. */
5585 *cost += extra_cost->mult[mode == DImode].idiv;
5586 else
5587 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
5588 }
5589 return false; /* All arguments need to be in registers. */
5590
a8eecd00
JG
5591 case IF_THEN_ELSE:
5592 op2 = XEXP (x, 2);
5593 op0 = XEXP (x, 0);
5594 op1 = XEXP (x, 1);
5595
5596 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5597 {
5598 /* Conditional branch. */
5599 if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
5600 return true;
5601 else
5602 {
5603 if (GET_CODE (op0) == NE
5604 || GET_CODE (op0) == EQ)
5605 {
5606 rtx inner = XEXP (op0, 0);
5607 rtx comparator = XEXP (op0, 1);
5608
5609 if (comparator == const0_rtx)
5610 {
5611 /* TBZ/TBNZ/CBZ/CBNZ. */
5612 if (GET_CODE (inner) == ZERO_EXTRACT)
5613 /* TBZ/TBNZ. */
5614 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5615 0, speed);
5616 else
5617 /* CBZ/CBNZ. */
5618 *cost += rtx_cost (inner, GET_CODE (op0), 0, speed);
5619
5620 return true;
5621 }
5622 }
5623 else if (GET_CODE (op0) == LT
5624 || GET_CODE (op0) == GE)
5625 {
5626 rtx comparator = XEXP (op0, 1);
5627
5628 /* TBZ/TBNZ. */
5629 if (comparator == const0_rtx)
5630 return true;
5631 }
5632 }
5633 }
5634 else if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
5635 {
5636 /* It's a conditional operation based on the status flags,
5637 so it must be some flavor of CSEL. */
5638
5639 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5640 if (GET_CODE (op1) == NEG
5641 || GET_CODE (op1) == NOT
5642 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5643 op1 = XEXP (op1, 0);
5644
5645 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5646 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5647 return true;
5648 }
5649
5650 /* We don't know what this is, cost all operands. */
5651 return false;
5652
5653 case EQ:
5654 case NE:
5655 case GT:
5656 case GTU:
5657 case LT:
5658 case LTU:
5659 case GE:
5660 case GEU:
5661 case LE:
5662 case LEU:
5663
5664 return false; /* All arguments must be in registers. */
5665
b292109f
JG
5666 case FMA:
5667 op0 = XEXP (x, 0);
5668 op1 = XEXP (x, 1);
5669 op2 = XEXP (x, 2);
5670
5671 if (speed)
5672 *cost += extra_cost->fp[mode == DFmode].fma;
5673
5674 /* FMSUB, FNMADD, and FNMSUB are free. */
5675 if (GET_CODE (op0) == NEG)
5676 op0 = XEXP (op0, 0);
5677
5678 if (GET_CODE (op2) == NEG)
5679 op2 = XEXP (op2, 0);
5680
5681 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5682 and the by-element operand as operand 0. */
5683 if (GET_CODE (op1) == NEG)
5684 op1 = XEXP (op1, 0);
5685
5686 /* Catch vector-by-element operations. The by-element operand can
5687 either be (vec_duplicate (vec_select (x))) or just
5688 (vec_select (x)), depending on whether we are multiplying by
5689 a vector or a scalar.
5690
5691 Canonicalization is not very good in these cases, FMA4 will put the
5692 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5693 if (GET_CODE (op0) == VEC_DUPLICATE)
5694 op0 = XEXP (op0, 0);
5695 else if (GET_CODE (op1) == VEC_DUPLICATE)
5696 op1 = XEXP (op1, 0);
5697
5698 if (GET_CODE (op0) == VEC_SELECT)
5699 op0 = XEXP (op0, 0);
5700 else if (GET_CODE (op1) == VEC_SELECT)
5701 op1 = XEXP (op1, 0);
5702
5703 /* If the remaining parameters are not registers,
5704 get the cost to put them into registers. */
5705 *cost += rtx_cost (op0, FMA, 0, speed);
5706 *cost += rtx_cost (op1, FMA, 1, speed);
5707 *cost += rtx_cost (op2, FMA, 2, speed);
5708 return true;
5709
5710 case FLOAT_EXTEND:
5711 if (speed)
5712 *cost += extra_cost->fp[mode == DFmode].widen;
5713 return false;
5714
5715 case FLOAT_TRUNCATE:
5716 if (speed)
5717 *cost += extra_cost->fp[mode == DFmode].narrow;
5718 return false;
5719
5720 case ABS:
5721 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5722 {
5723 /* FABS and FNEG are analogous. */
5724 if (speed)
5725 *cost += extra_cost->fp[mode == DFmode].neg;
5726 }
5727 else
5728 {
5729 /* Integer ABS will either be split to
5730 two arithmetic instructions, or will be an ABS
5731 (scalar), which we don't model. */
5732 *cost = COSTS_N_INSNS (2);
5733 if (speed)
5734 *cost += 2 * extra_cost->alu.arith;
5735 }
5736 return false;
5737
5738 case SMAX:
5739 case SMIN:
5740 if (speed)
5741 {
5742 /* FMAXNM/FMINNM/FMAX/FMIN.
5743 TODO: This may not be accurate for all implementations, but
5744 we do not model this in the cost tables. */
5745 *cost += extra_cost->fp[mode == DFmode].addsub;
5746 }
5747 return false;
5748
fb620c4a
JG
5749 case TRUNCATE:
5750
5751 /* Decompose <su>muldi3_highpart. */
5752 if (/* (truncate:DI */
5753 mode == DImode
5754 /* (lshiftrt:TI */
5755 && GET_MODE (XEXP (x, 0)) == TImode
5756 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5757 /* (mult:TI */
5758 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5759 /* (ANY_EXTEND:TI (reg:DI))
5760 (ANY_EXTEND:TI (reg:DI))) */
5761 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5762 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5763 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5764 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5765 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5766 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5767 /* (const_int 64) */
5768 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5769 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5770 {
5771 /* UMULH/SMULH. */
5772 if (speed)
5773 *cost += extra_cost->mult[mode == DImode].extend;
5774 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5775 MULT, 0, speed);
5776 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5777 MULT, 1, speed);
5778 return true;
5779 }
5780
5781 /* Fall through. */
43e9d192 5782 default:
88d4fbcf
JG
5783 if (dump_file && (dump_flags & TDF_DETAILS))
5784 fprintf (dump_file,
5785 "\nFailed to cost RTX. Assuming default cost.\n");
5786
5787 return true;
43e9d192
IB
5788 }
5789 return false;
5790}
5791
0ee859b5
JG
5792/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5793 calculated for X. This cost is stored in *COST. Returns true
5794 if the total cost of X was calculated. */
5795static bool
5796aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5797 int param, int *cost, bool speed)
5798{
5799 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5800
5801 if (dump_file && (dump_flags & TDF_DETAILS))
5802 {
5803 print_rtl_single (dump_file, x);
5804 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5805 speed ? "Hot" : "Cold",
5806 *cost, result ? "final" : "partial");
5807 }
5808
5809 return result;
5810}
5811
43e9d192 5812static int
8a3a7e67
RH
5813aarch64_register_move_cost (enum machine_mode mode,
5814 reg_class_t from_i, reg_class_t to_i)
43e9d192 5815{
8a3a7e67
RH
5816 enum reg_class from = (enum reg_class) from_i;
5817 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
5818 const struct cpu_regmove_cost *regmove_cost
5819 = aarch64_tune_params->regmove_cost;
5820
6ee70f81
AP
5821 /* Moving between GPR and stack cost is the same as GP2GP. */
5822 if ((from == GENERAL_REGS && to == STACK_REG)
5823 || (to == GENERAL_REGS && from == STACK_REG))
5824 return regmove_cost->GP2GP;
5825
5826 /* To/From the stack register, we move via the gprs. */
5827 if (to == STACK_REG || from == STACK_REG)
5828 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5829 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5830
43e9d192
IB
5831 if (from == GENERAL_REGS && to == GENERAL_REGS)
5832 return regmove_cost->GP2GP;
5833 else if (from == GENERAL_REGS)
5834 return regmove_cost->GP2FP;
5835 else if (to == GENERAL_REGS)
5836 return regmove_cost->FP2GP;
5837
5838 /* When AdvSIMD instructions are disabled it is not possible to move
5839 a 128-bit value directly between Q registers. This is handled in
5840 secondary reload. A general register is used as a scratch to move
5841 the upper DI value and the lower DI value is moved directly,
5842 hence the cost is the sum of three moves. */
8a3a7e67 5843 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
43e9d192
IB
5844 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5845
5846 return regmove_cost->FP2FP;
5847}
5848
5849static int
5850aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5851 reg_class_t rclass ATTRIBUTE_UNUSED,
5852 bool in ATTRIBUTE_UNUSED)
5853{
5854 return aarch64_tune_params->memmov_cost;
5855}
5856
d126a4ae
AP
5857/* Return the number of instructions that can be issued per cycle. */
5858static int
5859aarch64_sched_issue_rate (void)
5860{
5861 return aarch64_tune_params->issue_rate;
5862}
5863
8990e73a
TB
5864/* Vectorizer cost model target hooks. */
5865
5866/* Implement targetm.vectorize.builtin_vectorization_cost. */
5867static int
5868aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5869 tree vectype,
5870 int misalign ATTRIBUTE_UNUSED)
5871{
5872 unsigned elements;
5873
5874 switch (type_of_cost)
5875 {
5876 case scalar_stmt:
5877 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5878
5879 case scalar_load:
5880 return aarch64_tune_params->vec_costs->scalar_load_cost;
5881
5882 case scalar_store:
5883 return aarch64_tune_params->vec_costs->scalar_store_cost;
5884
5885 case vector_stmt:
5886 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5887
5888 case vector_load:
5889 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5890
5891 case vector_store:
5892 return aarch64_tune_params->vec_costs->vec_store_cost;
5893
5894 case vec_to_scalar:
5895 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5896
5897 case scalar_to_vec:
5898 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5899
5900 case unaligned_load:
5901 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5902
5903 case unaligned_store:
5904 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5905
5906 case cond_branch_taken:
5907 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5908
5909 case cond_branch_not_taken:
5910 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5911
5912 case vec_perm:
5913 case vec_promote_demote:
5914 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5915
5916 case vec_construct:
5917 elements = TYPE_VECTOR_SUBPARTS (vectype);
5918 return elements / 2 + 1;
5919
5920 default:
5921 gcc_unreachable ();
5922 }
5923}
5924
5925/* Implement targetm.vectorize.add_stmt_cost. */
5926static unsigned
5927aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5928 struct _stmt_vec_info *stmt_info, int misalign,
5929 enum vect_cost_model_location where)
5930{
5931 unsigned *cost = (unsigned *) data;
5932 unsigned retval = 0;
5933
5934 if (flag_vect_cost_model)
5935 {
5936 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5937 int stmt_cost =
5938 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5939
5940 /* Statements in an inner loop relative to the loop being
5941 vectorized are weighted more heavily. The value here is
5942 a function (linear for now) of the loop nest level. */
5943 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5944 {
5945 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5946 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5947 unsigned nest_level = loop_depth (loop);
5948
5949 count *= nest_level;
5950 }
5951
5952 retval = (unsigned) (count * stmt_cost);
5953 cost[where] += retval;
5954 }
5955
5956 return retval;
5957}
5958
43e9d192
IB
5959static void initialize_aarch64_code_model (void);
5960
5961/* Parse the architecture extension string. */
5962
5963static void
5964aarch64_parse_extension (char *str)
5965{
5966 /* The extension string is parsed left to right. */
5967 const struct aarch64_option_extension *opt = NULL;
5968
5969 /* Flag to say whether we are adding or removing an extension. */
5970 int adding_ext = -1;
5971
5972 while (str != NULL && *str != 0)
5973 {
5974 char *ext;
5975 size_t len;
5976
5977 str++;
5978 ext = strchr (str, '+');
5979
5980 if (ext != NULL)
5981 len = ext - str;
5982 else
5983 len = strlen (str);
5984
5985 if (len >= 2 && strncmp (str, "no", 2) == 0)
5986 {
5987 adding_ext = 0;
5988 len -= 2;
5989 str += 2;
5990 }
5991 else if (len > 0)
5992 adding_ext = 1;
5993
5994 if (len == 0)
5995 {
5996 error ("missing feature modifier after %qs", "+no");
5997 return;
5998 }
5999
6000 /* Scan over the extensions table trying to find an exact match. */
6001 for (opt = all_extensions; opt->name != NULL; opt++)
6002 {
6003 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6004 {
6005 /* Add or remove the extension. */
6006 if (adding_ext)
6007 aarch64_isa_flags |= opt->flags_on;
6008 else
6009 aarch64_isa_flags &= ~(opt->flags_off);
6010 break;
6011 }
6012 }
6013
6014 if (opt->name == NULL)
6015 {
6016 /* Extension not found in list. */
6017 error ("unknown feature modifier %qs", str);
6018 return;
6019 }
6020
6021 str = ext;
6022 };
6023
6024 return;
6025}
6026
6027/* Parse the ARCH string. */
6028
6029static void
6030aarch64_parse_arch (void)
6031{
6032 char *ext;
6033 const struct processor *arch;
6034 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6035 size_t len;
6036
6037 strcpy (str, aarch64_arch_string);
6038
6039 ext = strchr (str, '+');
6040
6041 if (ext != NULL)
6042 len = ext - str;
6043 else
6044 len = strlen (str);
6045
6046 if (len == 0)
6047 {
6048 error ("missing arch name in -march=%qs", str);
6049 return;
6050 }
6051
6052 /* Loop through the list of supported ARCHs to find a match. */
6053 for (arch = all_architectures; arch->name != NULL; arch++)
6054 {
6055 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6056 {
6057 selected_arch = arch;
6058 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6059
6060 if (!selected_cpu)
6061 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6062
6063 if (ext != NULL)
6064 {
6065 /* ARCH string contains at least one extension. */
6066 aarch64_parse_extension (ext);
6067 }
6068
ffee7aa9
JG
6069 if (strcmp (selected_arch->arch, selected_cpu->arch))
6070 {
6071 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6072 selected_cpu->name, selected_arch->name);
6073 }
6074
43e9d192
IB
6075 return;
6076 }
6077 }
6078
6079 /* ARCH name not found in list. */
6080 error ("unknown value %qs for -march", str);
6081 return;
6082}
6083
6084/* Parse the CPU string. */
6085
6086static void
6087aarch64_parse_cpu (void)
6088{
6089 char *ext;
6090 const struct processor *cpu;
6091 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6092 size_t len;
6093
6094 strcpy (str, aarch64_cpu_string);
6095
6096 ext = strchr (str, '+');
6097
6098 if (ext != NULL)
6099 len = ext - str;
6100 else
6101 len = strlen (str);
6102
6103 if (len == 0)
6104 {
6105 error ("missing cpu name in -mcpu=%qs", str);
6106 return;
6107 }
6108
6109 /* Loop through the list of supported CPUs to find a match. */
6110 for (cpu = all_cores; cpu->name != NULL; cpu++)
6111 {
6112 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6113 {
6114 selected_cpu = cpu;
192ed1dd 6115 selected_tune = cpu;
43e9d192
IB
6116 aarch64_isa_flags = selected_cpu->flags;
6117
6118 if (ext != NULL)
6119 {
6120 /* CPU string contains at least one extension. */
6121 aarch64_parse_extension (ext);
6122 }
6123
6124 return;
6125 }
6126 }
6127
6128 /* CPU name not found in list. */
6129 error ("unknown value %qs for -mcpu", str);
6130 return;
6131}
6132
6133/* Parse the TUNE string. */
6134
6135static void
6136aarch64_parse_tune (void)
6137{
6138 const struct processor *cpu;
6139 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6140 strcpy (str, aarch64_tune_string);
6141
6142 /* Loop through the list of supported CPUs to find a match. */
6143 for (cpu = all_cores; cpu->name != NULL; cpu++)
6144 {
6145 if (strcmp (cpu->name, str) == 0)
6146 {
6147 selected_tune = cpu;
6148 return;
6149 }
6150 }
6151
6152 /* CPU name not found in list. */
6153 error ("unknown value %qs for -mtune", str);
6154 return;
6155}
6156
6157
6158/* Implement TARGET_OPTION_OVERRIDE. */
6159
6160static void
6161aarch64_override_options (void)
6162{
ffee7aa9
JG
6163 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6164 If either of -march or -mtune is given, they override their
6165 respective component of -mcpu.
43e9d192 6166
ffee7aa9
JG
6167 So, first parse AARCH64_CPU_STRING, then the others, be careful
6168 with -march as, if -mcpu is not present on the command line, march
6169 must set a sensible default CPU. */
6170 if (aarch64_cpu_string)
43e9d192 6171 {
ffee7aa9 6172 aarch64_parse_cpu ();
43e9d192
IB
6173 }
6174
ffee7aa9 6175 if (aarch64_arch_string)
43e9d192 6176 {
ffee7aa9 6177 aarch64_parse_arch ();
43e9d192
IB
6178 }
6179
6180 if (aarch64_tune_string)
6181 {
6182 aarch64_parse_tune ();
6183 }
6184
63892fa2
KV
6185#ifndef HAVE_AS_MABI_OPTION
6186 /* The compiler may have been configured with 2.23.* binutils, which does
6187 not have support for ILP32. */
6188 if (TARGET_ILP32)
6189 error ("Assembler does not support -mabi=ilp32");
6190#endif
6191
43e9d192
IB
6192 initialize_aarch64_code_model ();
6193
6194 aarch64_build_bitmask_table ();
6195
6196 /* This target defaults to strict volatile bitfields. */
6197 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6198 flag_strict_volatile_bitfields = 1;
6199
6200 /* If the user did not specify a processor, choose the default
6201 one for them. This will be the CPU set during configuration using
a3cd0246 6202 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6203 if (!selected_cpu)
6204 {
6205 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6206 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6207 }
6208
6209 gcc_assert (selected_cpu);
6210
6211 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6212 if (!selected_tune)
6213 selected_tune = &all_cores[selected_cpu->core];
6214
6215 aarch64_tune_flags = selected_tune->flags;
6216 aarch64_tune = selected_tune->core;
6217 aarch64_tune_params = selected_tune->tune;
6218
6219 aarch64_override_options_after_change ();
6220}
6221
6222/* Implement targetm.override_options_after_change. */
6223
6224static void
6225aarch64_override_options_after_change (void)
6226{
0b7f8166
MS
6227 if (flag_omit_frame_pointer)
6228 flag_omit_leaf_frame_pointer = false;
6229 else if (flag_omit_leaf_frame_pointer)
6230 flag_omit_frame_pointer = true;
43e9d192
IB
6231}
6232
6233static struct machine_function *
6234aarch64_init_machine_status (void)
6235{
6236 struct machine_function *machine;
766090c2 6237 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6238 return machine;
6239}
6240
6241void
6242aarch64_init_expanders (void)
6243{
6244 init_machine_status = aarch64_init_machine_status;
6245}
6246
6247/* A checking mechanism for the implementation of the various code models. */
6248static void
6249initialize_aarch64_code_model (void)
6250{
6251 if (flag_pic)
6252 {
6253 switch (aarch64_cmodel_var)
6254 {
6255 case AARCH64_CMODEL_TINY:
6256 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6257 break;
6258 case AARCH64_CMODEL_SMALL:
6259 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6260 break;
6261 case AARCH64_CMODEL_LARGE:
6262 sorry ("code model %qs with -f%s", "large",
6263 flag_pic > 1 ? "PIC" : "pic");
6264 default:
6265 gcc_unreachable ();
6266 }
6267 }
6268 else
6269 aarch64_cmodel = aarch64_cmodel_var;
6270}
6271
6272/* Return true if SYMBOL_REF X binds locally. */
6273
6274static bool
6275aarch64_symbol_binds_local_p (const_rtx x)
6276{
6277 return (SYMBOL_REF_DECL (x)
6278 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6279 : SYMBOL_REF_LOCAL_P (x));
6280}
6281
6282/* Return true if SYMBOL_REF X is thread local */
6283static bool
6284aarch64_tls_symbol_p (rtx x)
6285{
6286 if (! TARGET_HAVE_TLS)
6287 return false;
6288
6289 if (GET_CODE (x) != SYMBOL_REF)
6290 return false;
6291
6292 return SYMBOL_REF_TLS_MODEL (x) != 0;
6293}
6294
6295/* Classify a TLS symbol into one of the TLS kinds. */
6296enum aarch64_symbol_type
6297aarch64_classify_tls_symbol (rtx x)
6298{
6299 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6300
6301 switch (tls_kind)
6302 {
6303 case TLS_MODEL_GLOBAL_DYNAMIC:
6304 case TLS_MODEL_LOCAL_DYNAMIC:
6305 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6306
6307 case TLS_MODEL_INITIAL_EXEC:
6308 return SYMBOL_SMALL_GOTTPREL;
6309
6310 case TLS_MODEL_LOCAL_EXEC:
6311 return SYMBOL_SMALL_TPREL;
6312
6313 case TLS_MODEL_EMULATED:
6314 case TLS_MODEL_NONE:
6315 return SYMBOL_FORCE_TO_MEM;
6316
6317 default:
6318 gcc_unreachable ();
6319 }
6320}
6321
6322/* Return the method that should be used to access SYMBOL_REF or
6323 LABEL_REF X in context CONTEXT. */
17f4d4bf 6324
43e9d192
IB
6325enum aarch64_symbol_type
6326aarch64_classify_symbol (rtx x,
6327 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6328{
6329 if (GET_CODE (x) == LABEL_REF)
6330 {
6331 switch (aarch64_cmodel)
6332 {
6333 case AARCH64_CMODEL_LARGE:
6334 return SYMBOL_FORCE_TO_MEM;
6335
6336 case AARCH64_CMODEL_TINY_PIC:
6337 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6338 return SYMBOL_TINY_ABSOLUTE;
6339
43e9d192
IB
6340 case AARCH64_CMODEL_SMALL_PIC:
6341 case AARCH64_CMODEL_SMALL:
6342 return SYMBOL_SMALL_ABSOLUTE;
6343
6344 default:
6345 gcc_unreachable ();
6346 }
6347 }
6348
17f4d4bf 6349 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 6350 {
4a985a37
MS
6351 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6352 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
6353
6354 if (aarch64_tls_symbol_p (x))
6355 return aarch64_classify_tls_symbol (x);
6356
17f4d4bf
CSS
6357 switch (aarch64_cmodel)
6358 {
6359 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6360 if (SYMBOL_REF_WEAK (x))
6361 return SYMBOL_FORCE_TO_MEM;
6362 return SYMBOL_TINY_ABSOLUTE;
6363
17f4d4bf
CSS
6364 case AARCH64_CMODEL_SMALL:
6365 if (SYMBOL_REF_WEAK (x))
6366 return SYMBOL_FORCE_TO_MEM;
6367 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6368
17f4d4bf 6369 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 6370 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 6371 return SYMBOL_TINY_GOT;
38e6c9a6
MS
6372 return SYMBOL_TINY_ABSOLUTE;
6373
17f4d4bf
CSS
6374 case AARCH64_CMODEL_SMALL_PIC:
6375 if (!aarch64_symbol_binds_local_p (x))
6376 return SYMBOL_SMALL_GOT;
6377 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6378
17f4d4bf
CSS
6379 default:
6380 gcc_unreachable ();
6381 }
43e9d192 6382 }
17f4d4bf 6383
43e9d192
IB
6384 /* By default push everything into the constant pool. */
6385 return SYMBOL_FORCE_TO_MEM;
6386}
6387
43e9d192
IB
6388bool
6389aarch64_constant_address_p (rtx x)
6390{
6391 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6392}
6393
6394bool
6395aarch64_legitimate_pic_operand_p (rtx x)
6396{
6397 if (GET_CODE (x) == SYMBOL_REF
6398 || (GET_CODE (x) == CONST
6399 && GET_CODE (XEXP (x, 0)) == PLUS
6400 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6401 return false;
6402
6403 return true;
6404}
6405
3520f7cc
JG
6406/* Return true if X holds either a quarter-precision or
6407 floating-point +0.0 constant. */
6408static bool
6409aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6410{
6411 if (!CONST_DOUBLE_P (x))
6412 return false;
6413
6414 /* TODO: We could handle moving 0.0 to a TFmode register,
6415 but first we would like to refactor the movtf_aarch64
6416 to be more amicable to split moves properly and
6417 correctly gate on TARGET_SIMD. For now - reject all
6418 constants which are not to SFmode or DFmode registers. */
6419 if (!(mode == SFmode || mode == DFmode))
6420 return false;
6421
6422 if (aarch64_float_const_zero_rtx_p (x))
6423 return true;
6424 return aarch64_float_const_representable_p (x);
6425}
6426
43e9d192
IB
6427static bool
6428aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6429{
6430 /* Do not allow vector struct mode constants. We could support
6431 0 and -1 easily, but they need support in aarch64-simd.md. */
6432 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6433 return false;
6434
6435 /* This could probably go away because
6436 we now decompose CONST_INTs according to expand_mov_immediate. */
6437 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 6438 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
6439 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6440 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
6441
6442 if (GET_CODE (x) == HIGH
6443 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6444 return true;
6445
6446 return aarch64_constant_address_p (x);
6447}
6448
a5bc806c 6449rtx
43e9d192
IB
6450aarch64_load_tp (rtx target)
6451{
6452 if (!target
6453 || GET_MODE (target) != Pmode
6454 || !register_operand (target, Pmode))
6455 target = gen_reg_rtx (Pmode);
6456
6457 /* Can return in any reg. */
6458 emit_insn (gen_aarch64_load_tp_hard (target));
6459 return target;
6460}
6461
43e9d192
IB
6462/* On AAPCS systems, this is the "struct __va_list". */
6463static GTY(()) tree va_list_type;
6464
6465/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6466 Return the type to use as __builtin_va_list.
6467
6468 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6469
6470 struct __va_list
6471 {
6472 void *__stack;
6473 void *__gr_top;
6474 void *__vr_top;
6475 int __gr_offs;
6476 int __vr_offs;
6477 }; */
6478
6479static tree
6480aarch64_build_builtin_va_list (void)
6481{
6482 tree va_list_name;
6483 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6484
6485 /* Create the type. */
6486 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6487 /* Give it the required name. */
6488 va_list_name = build_decl (BUILTINS_LOCATION,
6489 TYPE_DECL,
6490 get_identifier ("__va_list"),
6491 va_list_type);
6492 DECL_ARTIFICIAL (va_list_name) = 1;
6493 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 6494 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
6495
6496 /* Create the fields. */
6497 f_stack = build_decl (BUILTINS_LOCATION,
6498 FIELD_DECL, get_identifier ("__stack"),
6499 ptr_type_node);
6500 f_grtop = build_decl (BUILTINS_LOCATION,
6501 FIELD_DECL, get_identifier ("__gr_top"),
6502 ptr_type_node);
6503 f_vrtop = build_decl (BUILTINS_LOCATION,
6504 FIELD_DECL, get_identifier ("__vr_top"),
6505 ptr_type_node);
6506 f_groff = build_decl (BUILTINS_LOCATION,
6507 FIELD_DECL, get_identifier ("__gr_offs"),
6508 integer_type_node);
6509 f_vroff = build_decl (BUILTINS_LOCATION,
6510 FIELD_DECL, get_identifier ("__vr_offs"),
6511 integer_type_node);
6512
6513 DECL_ARTIFICIAL (f_stack) = 1;
6514 DECL_ARTIFICIAL (f_grtop) = 1;
6515 DECL_ARTIFICIAL (f_vrtop) = 1;
6516 DECL_ARTIFICIAL (f_groff) = 1;
6517 DECL_ARTIFICIAL (f_vroff) = 1;
6518
6519 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6520 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6521 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6522 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6523 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6524
6525 TYPE_FIELDS (va_list_type) = f_stack;
6526 DECL_CHAIN (f_stack) = f_grtop;
6527 DECL_CHAIN (f_grtop) = f_vrtop;
6528 DECL_CHAIN (f_vrtop) = f_groff;
6529 DECL_CHAIN (f_groff) = f_vroff;
6530
6531 /* Compute its layout. */
6532 layout_type (va_list_type);
6533
6534 return va_list_type;
6535}
6536
6537/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6538static void
6539aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6540{
6541 const CUMULATIVE_ARGS *cum;
6542 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6543 tree stack, grtop, vrtop, groff, vroff;
6544 tree t;
6545 int gr_save_area_size;
6546 int vr_save_area_size;
6547 int vr_offset;
6548
6549 cum = &crtl->args.info;
6550 gr_save_area_size
6551 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6552 vr_save_area_size
6553 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6554
6555 if (TARGET_GENERAL_REGS_ONLY)
6556 {
6557 if (cum->aapcs_nvrn > 0)
6558 sorry ("%qs and floating point or vector arguments",
6559 "-mgeneral-regs-only");
6560 vr_save_area_size = 0;
6561 }
6562
6563 f_stack = TYPE_FIELDS (va_list_type_node);
6564 f_grtop = DECL_CHAIN (f_stack);
6565 f_vrtop = DECL_CHAIN (f_grtop);
6566 f_groff = DECL_CHAIN (f_vrtop);
6567 f_vroff = DECL_CHAIN (f_groff);
6568
6569 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6570 NULL_TREE);
6571 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6572 NULL_TREE);
6573 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6574 NULL_TREE);
6575 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6576 NULL_TREE);
6577 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6578 NULL_TREE);
6579
6580 /* Emit code to initialize STACK, which points to the next varargs stack
6581 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6582 by named arguments. STACK is 8-byte aligned. */
6583 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6584 if (cum->aapcs_stack_size > 0)
6585 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6586 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6587 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6588
6589 /* Emit code to initialize GRTOP, the top of the GR save area.
6590 virtual_incoming_args_rtx should have been 16 byte aligned. */
6591 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6592 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6593 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6594
6595 /* Emit code to initialize VRTOP, the top of the VR save area.
6596 This address is gr_save_area_bytes below GRTOP, rounded
6597 down to the next 16-byte boundary. */
6598 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6599 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6600 STACK_BOUNDARY / BITS_PER_UNIT);
6601
6602 if (vr_offset)
6603 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6604 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6605 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6606
6607 /* Emit code to initialize GROFF, the offset from GRTOP of the
6608 next GPR argument. */
6609 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6610 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6611 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6612
6613 /* Likewise emit code to initialize VROFF, the offset from FTOP
6614 of the next VR argument. */
6615 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6616 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6617 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6618}
6619
6620/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6621
6622static tree
6623aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6624 gimple_seq *post_p ATTRIBUTE_UNUSED)
6625{
6626 tree addr;
6627 bool indirect_p;
6628 bool is_ha; /* is HFA or HVA. */
6629 bool dw_align; /* double-word align. */
6630 enum machine_mode ag_mode = VOIDmode;
6631 int nregs;
6632 enum machine_mode mode;
6633
6634 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6635 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6636 HOST_WIDE_INT size, rsize, adjust, align;
6637 tree t, u, cond1, cond2;
6638
6639 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6640 if (indirect_p)
6641 type = build_pointer_type (type);
6642
6643 mode = TYPE_MODE (type);
6644
6645 f_stack = TYPE_FIELDS (va_list_type_node);
6646 f_grtop = DECL_CHAIN (f_stack);
6647 f_vrtop = DECL_CHAIN (f_grtop);
6648 f_groff = DECL_CHAIN (f_vrtop);
6649 f_vroff = DECL_CHAIN (f_groff);
6650
6651 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6652 f_stack, NULL_TREE);
6653 size = int_size_in_bytes (type);
6654 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6655
6656 dw_align = false;
6657 adjust = 0;
6658 if (aarch64_vfp_is_call_or_return_candidate (mode,
6659 type,
6660 &ag_mode,
6661 &nregs,
6662 &is_ha))
6663 {
6664 /* TYPE passed in fp/simd registers. */
6665 if (TARGET_GENERAL_REGS_ONLY)
6666 sorry ("%qs and floating point or vector arguments",
6667 "-mgeneral-regs-only");
6668
6669 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6670 unshare_expr (valist), f_vrtop, NULL_TREE);
6671 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6672 unshare_expr (valist), f_vroff, NULL_TREE);
6673
6674 rsize = nregs * UNITS_PER_VREG;
6675
6676 if (is_ha)
6677 {
6678 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6679 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6680 }
6681 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6682 && size < UNITS_PER_VREG)
6683 {
6684 adjust = UNITS_PER_VREG - size;
6685 }
6686 }
6687 else
6688 {
6689 /* TYPE passed in general registers. */
6690 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6691 unshare_expr (valist), f_grtop, NULL_TREE);
6692 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6693 unshare_expr (valist), f_groff, NULL_TREE);
6694 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6695 nregs = rsize / UNITS_PER_WORD;
6696
6697 if (align > 8)
6698 dw_align = true;
6699
6700 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6701 && size < UNITS_PER_WORD)
6702 {
6703 adjust = UNITS_PER_WORD - size;
6704 }
6705 }
6706
6707 /* Get a local temporary for the field value. */
6708 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6709
6710 /* Emit code to branch if off >= 0. */
6711 t = build2 (GE_EXPR, boolean_type_node, off,
6712 build_int_cst (TREE_TYPE (off), 0));
6713 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6714
6715 if (dw_align)
6716 {
6717 /* Emit: offs = (offs + 15) & -16. */
6718 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6719 build_int_cst (TREE_TYPE (off), 15));
6720 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6721 build_int_cst (TREE_TYPE (off), -16));
6722 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6723 }
6724 else
6725 roundup = NULL;
6726
6727 /* Update ap.__[g|v]r_offs */
6728 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6729 build_int_cst (TREE_TYPE (off), rsize));
6730 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6731
6732 /* String up. */
6733 if (roundup)
6734 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6735
6736 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6737 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6738 build_int_cst (TREE_TYPE (f_off), 0));
6739 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6740
6741 /* String up: make sure the assignment happens before the use. */
6742 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6743 COND_EXPR_ELSE (cond1) = t;
6744
6745 /* Prepare the trees handling the argument that is passed on the stack;
6746 the top level node will store in ON_STACK. */
6747 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6748 if (align > 8)
6749 {
6750 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6751 t = fold_convert (intDI_type_node, arg);
6752 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6753 build_int_cst (TREE_TYPE (t), 15));
6754 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6755 build_int_cst (TREE_TYPE (t), -16));
6756 t = fold_convert (TREE_TYPE (arg), t);
6757 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6758 }
6759 else
6760 roundup = NULL;
6761 /* Advance ap.__stack */
6762 t = fold_convert (intDI_type_node, arg);
6763 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6764 build_int_cst (TREE_TYPE (t), size + 7));
6765 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6766 build_int_cst (TREE_TYPE (t), -8));
6767 t = fold_convert (TREE_TYPE (arg), t);
6768 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6769 /* String up roundup and advance. */
6770 if (roundup)
6771 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6772 /* String up with arg */
6773 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6774 /* Big-endianness related address adjustment. */
6775 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6776 && size < UNITS_PER_WORD)
6777 {
6778 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6779 size_int (UNITS_PER_WORD - size));
6780 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6781 }
6782
6783 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6784 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6785
6786 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6787 t = off;
6788 if (adjust)
6789 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6790 build_int_cst (TREE_TYPE (off), adjust));
6791
6792 t = fold_convert (sizetype, t);
6793 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6794
6795 if (is_ha)
6796 {
6797 /* type ha; // treat as "struct {ftype field[n];}"
6798 ... [computing offs]
6799 for (i = 0; i <nregs; ++i, offs += 16)
6800 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6801 return ha; */
6802 int i;
6803 tree tmp_ha, field_t, field_ptr_t;
6804
6805 /* Declare a local variable. */
6806 tmp_ha = create_tmp_var_raw (type, "ha");
6807 gimple_add_tmp_var (tmp_ha);
6808
6809 /* Establish the base type. */
6810 switch (ag_mode)
6811 {
6812 case SFmode:
6813 field_t = float_type_node;
6814 field_ptr_t = float_ptr_type_node;
6815 break;
6816 case DFmode:
6817 field_t = double_type_node;
6818 field_ptr_t = double_ptr_type_node;
6819 break;
6820 case TFmode:
6821 field_t = long_double_type_node;
6822 field_ptr_t = long_double_ptr_type_node;
6823 break;
6824/* The half precision and quad precision are not fully supported yet. Enable
6825 the following code after the support is complete. Need to find the correct
6826 type node for __fp16 *. */
6827#if 0
6828 case HFmode:
6829 field_t = float_type_node;
6830 field_ptr_t = float_ptr_type_node;
6831 break;
6832#endif
6833 case V2SImode:
6834 case V4SImode:
6835 {
6836 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6837 field_t = build_vector_type_for_mode (innertype, ag_mode);
6838 field_ptr_t = build_pointer_type (field_t);
6839 }
6840 break;
6841 default:
6842 gcc_assert (0);
6843 }
6844
6845 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6846 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6847 addr = t;
6848 t = fold_convert (field_ptr_t, addr);
6849 t = build2 (MODIFY_EXPR, field_t,
6850 build1 (INDIRECT_REF, field_t, tmp_ha),
6851 build1 (INDIRECT_REF, field_t, t));
6852
6853 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6854 for (i = 1; i < nregs; ++i)
6855 {
6856 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6857 u = fold_convert (field_ptr_t, addr);
6858 u = build2 (MODIFY_EXPR, field_t,
6859 build2 (MEM_REF, field_t, tmp_ha,
6860 build_int_cst (field_ptr_t,
6861 (i *
6862 int_size_in_bytes (field_t)))),
6863 build1 (INDIRECT_REF, field_t, u));
6864 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6865 }
6866
6867 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6868 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6869 }
6870
6871 COND_EXPR_ELSE (cond2) = t;
6872 addr = fold_convert (build_pointer_type (type), cond1);
6873 addr = build_va_arg_indirect_ref (addr);
6874
6875 if (indirect_p)
6876 addr = build_va_arg_indirect_ref (addr);
6877
6878 return addr;
6879}
6880
6881/* Implement TARGET_SETUP_INCOMING_VARARGS. */
6882
6883static void
6884aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6885 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6886 int no_rtl)
6887{
6888 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6889 CUMULATIVE_ARGS local_cum;
6890 int gr_saved, vr_saved;
6891
6892 /* The caller has advanced CUM up to, but not beyond, the last named
6893 argument. Advance a local copy of CUM past the last "real" named
6894 argument, to find out how many registers are left over. */
6895 local_cum = *cum;
6896 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6897
6898 /* Found out how many registers we need to save. */
6899 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6900 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6901
6902 if (TARGET_GENERAL_REGS_ONLY)
6903 {
6904 if (local_cum.aapcs_nvrn > 0)
6905 sorry ("%qs and floating point or vector arguments",
6906 "-mgeneral-regs-only");
6907 vr_saved = 0;
6908 }
6909
6910 if (!no_rtl)
6911 {
6912 if (gr_saved > 0)
6913 {
6914 rtx ptr, mem;
6915
6916 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6917 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6918 - gr_saved * UNITS_PER_WORD);
6919 mem = gen_frame_mem (BLKmode, ptr);
6920 set_mem_alias_set (mem, get_varargs_alias_set ());
6921
6922 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6923 mem, gr_saved);
6924 }
6925 if (vr_saved > 0)
6926 {
6927 /* We can't use move_block_from_reg, because it will use
6928 the wrong mode, storing D regs only. */
6929 enum machine_mode mode = TImode;
6930 int off, i;
6931
6932 /* Set OFF to the offset from virtual_incoming_args_rtx of
6933 the first vector register. The VR save area lies below
6934 the GR one, and is aligned to 16 bytes. */
6935 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6936 STACK_BOUNDARY / BITS_PER_UNIT);
6937 off -= vr_saved * UNITS_PER_VREG;
6938
6939 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6940 {
6941 rtx ptr, mem;
6942
6943 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6944 mem = gen_frame_mem (mode, ptr);
6945 set_mem_alias_set (mem, get_varargs_alias_set ());
6946 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6947 off += UNITS_PER_VREG;
6948 }
6949 }
6950 }
6951
6952 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6953 any complication of having crtl->args.pretend_args_size changed. */
6954 cfun->machine->saved_varargs_size
6955 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6956 STACK_BOUNDARY / BITS_PER_UNIT)
6957 + vr_saved * UNITS_PER_VREG);
6958}
6959
6960static void
6961aarch64_conditional_register_usage (void)
6962{
6963 int i;
6964 if (!TARGET_FLOAT)
6965 {
6966 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6967 {
6968 fixed_regs[i] = 1;
6969 call_used_regs[i] = 1;
6970 }
6971 }
6972}
6973
6974/* Walk down the type tree of TYPE counting consecutive base elements.
6975 If *MODEP is VOIDmode, then set it to the first valid floating point
6976 type. If a non-floating point type is found, or if a floating point
6977 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6978 otherwise return the count in the sub-tree. */
6979static int
6980aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6981{
6982 enum machine_mode mode;
6983 HOST_WIDE_INT size;
6984
6985 switch (TREE_CODE (type))
6986 {
6987 case REAL_TYPE:
6988 mode = TYPE_MODE (type);
6989 if (mode != DFmode && mode != SFmode && mode != TFmode)
6990 return -1;
6991
6992 if (*modep == VOIDmode)
6993 *modep = mode;
6994
6995 if (*modep == mode)
6996 return 1;
6997
6998 break;
6999
7000 case COMPLEX_TYPE:
7001 mode = TYPE_MODE (TREE_TYPE (type));
7002 if (mode != DFmode && mode != SFmode && mode != TFmode)
7003 return -1;
7004
7005 if (*modep == VOIDmode)
7006 *modep = mode;
7007
7008 if (*modep == mode)
7009 return 2;
7010
7011 break;
7012
7013 case VECTOR_TYPE:
7014 /* Use V2SImode and V4SImode as representatives of all 64-bit
7015 and 128-bit vector types. */
7016 size = int_size_in_bytes (type);
7017 switch (size)
7018 {
7019 case 8:
7020 mode = V2SImode;
7021 break;
7022 case 16:
7023 mode = V4SImode;
7024 break;
7025 default:
7026 return -1;
7027 }
7028
7029 if (*modep == VOIDmode)
7030 *modep = mode;
7031
7032 /* Vector modes are considered to be opaque: two vectors are
7033 equivalent for the purposes of being homogeneous aggregates
7034 if they are the same size. */
7035 if (*modep == mode)
7036 return 1;
7037
7038 break;
7039
7040 case ARRAY_TYPE:
7041 {
7042 int count;
7043 tree index = TYPE_DOMAIN (type);
7044
807e902e
KZ
7045 /* Can't handle incomplete types nor sizes that are not
7046 fixed. */
7047 if (!COMPLETE_TYPE_P (type)
7048 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7049 return -1;
7050
7051 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7052 if (count == -1
7053 || !index
7054 || !TYPE_MAX_VALUE (index)
cc269bb6 7055 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7056 || !TYPE_MIN_VALUE (index)
cc269bb6 7057 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7058 || count < 0)
7059 return -1;
7060
ae7e9ddd
RS
7061 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7062 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7063
7064 /* There must be no padding. */
807e902e 7065 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7066 return -1;
7067
7068 return count;
7069 }
7070
7071 case RECORD_TYPE:
7072 {
7073 int count = 0;
7074 int sub_count;
7075 tree field;
7076
807e902e
KZ
7077 /* Can't handle incomplete types nor sizes that are not
7078 fixed. */
7079 if (!COMPLETE_TYPE_P (type)
7080 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7081 return -1;
7082
7083 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7084 {
7085 if (TREE_CODE (field) != FIELD_DECL)
7086 continue;
7087
7088 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7089 if (sub_count < 0)
7090 return -1;
7091 count += sub_count;
7092 }
7093
7094 /* There must be no padding. */
807e902e 7095 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7096 return -1;
7097
7098 return count;
7099 }
7100
7101 case UNION_TYPE:
7102 case QUAL_UNION_TYPE:
7103 {
7104 /* These aren't very interesting except in a degenerate case. */
7105 int count = 0;
7106 int sub_count;
7107 tree field;
7108
807e902e
KZ
7109 /* Can't handle incomplete types nor sizes that are not
7110 fixed. */
7111 if (!COMPLETE_TYPE_P (type)
7112 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7113 return -1;
7114
7115 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7116 {
7117 if (TREE_CODE (field) != FIELD_DECL)
7118 continue;
7119
7120 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7121 if (sub_count < 0)
7122 return -1;
7123 count = count > sub_count ? count : sub_count;
7124 }
7125
7126 /* There must be no padding. */
807e902e 7127 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7128 return -1;
7129
7130 return count;
7131 }
7132
7133 default:
7134 break;
7135 }
7136
7137 return -1;
7138}
7139
38e8f663
YR
7140/* Return true if we use LRA instead of reload pass. */
7141static bool
7142aarch64_lra_p (void)
7143{
7144 return aarch64_lra_flag;
7145}
7146
43e9d192
IB
7147/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7148 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7149 array types. The C99 floating-point complex types are also considered
7150 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7151 types, which are GCC extensions and out of the scope of AAPCS64, are
7152 treated as composite types here as well.
7153
7154 Note that MODE itself is not sufficient in determining whether a type
7155 is such a composite type or not. This is because
7156 stor-layout.c:compute_record_mode may have already changed the MODE
7157 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7158 structure with only one field may have its MODE set to the mode of the
7159 field. Also an integer mode whose size matches the size of the
7160 RECORD_TYPE type may be used to substitute the original mode
7161 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7162 solely relied on. */
7163
7164static bool
7165aarch64_composite_type_p (const_tree type,
7166 enum machine_mode mode)
7167{
7168 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7169 return true;
7170
7171 if (mode == BLKmode
7172 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7173 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7174 return true;
7175
7176 return false;
7177}
7178
7179/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7180 type as described in AAPCS64 \S 4.1.2.
7181
7182 See the comment above aarch64_composite_type_p for the notes on MODE. */
7183
7184static bool
7185aarch64_short_vector_p (const_tree type,
7186 enum machine_mode mode)
7187{
7188 HOST_WIDE_INT size = -1;
7189
7190 if (type && TREE_CODE (type) == VECTOR_TYPE)
7191 size = int_size_in_bytes (type);
7192 else if (!aarch64_composite_type_p (type, mode)
7193 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7194 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7195 size = GET_MODE_SIZE (mode);
7196
7197 return (size == 8 || size == 16) ? true : false;
7198}
7199
7200/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7201 shall be passed or returned in simd/fp register(s) (providing these
7202 parameter passing registers are available).
7203
7204 Upon successful return, *COUNT returns the number of needed registers,
7205 *BASE_MODE returns the mode of the individual register and when IS_HAF
7206 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7207 floating-point aggregate or a homogeneous short-vector aggregate. */
7208
7209static bool
7210aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7211 const_tree type,
7212 enum machine_mode *base_mode,
7213 int *count,
7214 bool *is_ha)
7215{
7216 enum machine_mode new_mode = VOIDmode;
7217 bool composite_p = aarch64_composite_type_p (type, mode);
7218
7219 if (is_ha != NULL) *is_ha = false;
7220
7221 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7222 || aarch64_short_vector_p (type, mode))
7223 {
7224 *count = 1;
7225 new_mode = mode;
7226 }
7227 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7228 {
7229 if (is_ha != NULL) *is_ha = true;
7230 *count = 2;
7231 new_mode = GET_MODE_INNER (mode);
7232 }
7233 else if (type && composite_p)
7234 {
7235 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7236
7237 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7238 {
7239 if (is_ha != NULL) *is_ha = true;
7240 *count = ag_count;
7241 }
7242 else
7243 return false;
7244 }
7245 else
7246 return false;
7247
7248 *base_mode = new_mode;
7249 return true;
7250}
7251
7252/* Implement TARGET_STRUCT_VALUE_RTX. */
7253
7254static rtx
7255aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7256 int incoming ATTRIBUTE_UNUSED)
7257{
7258 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7259}
7260
7261/* Implements target hook vector_mode_supported_p. */
7262static bool
7263aarch64_vector_mode_supported_p (enum machine_mode mode)
7264{
7265 if (TARGET_SIMD
7266 && (mode == V4SImode || mode == V8HImode
7267 || mode == V16QImode || mode == V2DImode
7268 || mode == V2SImode || mode == V4HImode
7269 || mode == V8QImode || mode == V2SFmode
7270 || mode == V4SFmode || mode == V2DFmode))
7271 return true;
7272
7273 return false;
7274}
7275
b7342d25
IB
7276/* Return appropriate SIMD container
7277 for MODE within a vector of WIDTH bits. */
43e9d192 7278static enum machine_mode
b7342d25 7279aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 7280{
b7342d25 7281 gcc_assert (width == 64 || width == 128);
43e9d192 7282 if (TARGET_SIMD)
b7342d25
IB
7283 {
7284 if (width == 128)
7285 switch (mode)
7286 {
7287 case DFmode:
7288 return V2DFmode;
7289 case SFmode:
7290 return V4SFmode;
7291 case SImode:
7292 return V4SImode;
7293 case HImode:
7294 return V8HImode;
7295 case QImode:
7296 return V16QImode;
7297 case DImode:
7298 return V2DImode;
7299 default:
7300 break;
7301 }
7302 else
7303 switch (mode)
7304 {
7305 case SFmode:
7306 return V2SFmode;
7307 case SImode:
7308 return V2SImode;
7309 case HImode:
7310 return V4HImode;
7311 case QImode:
7312 return V8QImode;
7313 default:
7314 break;
7315 }
7316 }
43e9d192
IB
7317 return word_mode;
7318}
7319
b7342d25
IB
7320/* Return 128-bit container as the preferred SIMD mode for MODE. */
7321static enum machine_mode
7322aarch64_preferred_simd_mode (enum machine_mode mode)
7323{
7324 return aarch64_simd_container_mode (mode, 128);
7325}
7326
3b357264
JG
7327/* Return the bitmask of possible vector sizes for the vectorizer
7328 to iterate over. */
7329static unsigned int
7330aarch64_autovectorize_vector_sizes (void)
7331{
7332 return (16 | 8);
7333}
7334
c6fc9e43
YZ
7335/* A table to help perform AArch64-specific name mangling for AdvSIMD
7336 vector types in order to conform to the AAPCS64 (see "Procedure
7337 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7338 qualify for emission with the mangled names defined in that document,
7339 a vector type must not only be of the correct mode but also be
7340 composed of AdvSIMD vector element types (e.g.
7341 _builtin_aarch64_simd_qi); these types are registered by
7342 aarch64_init_simd_builtins (). In other words, vector types defined
7343 in other ways e.g. via vector_size attribute will get default
7344 mangled names. */
7345typedef struct
7346{
7347 enum machine_mode mode;
7348 const char *element_type_name;
7349 const char *mangled_name;
7350} aarch64_simd_mangle_map_entry;
7351
7352static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7353 /* 64-bit containerized types. */
7354 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7355 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7356 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7357 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7358 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7359 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7360 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7361 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7362 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7363 /* 128-bit containerized types. */
7364 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7365 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7366 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7367 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7368 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7369 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7370 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7371 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7372 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7373 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7374 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7375 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 7376 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
7377 { VOIDmode, NULL, NULL }
7378};
7379
ac2b960f
YZ
7380/* Implement TARGET_MANGLE_TYPE. */
7381
6f549691 7382static const char *
ac2b960f
YZ
7383aarch64_mangle_type (const_tree type)
7384{
7385 /* The AArch64 ABI documents say that "__va_list" has to be
7386 managled as if it is in the "std" namespace. */
7387 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7388 return "St9__va_list";
7389
c6fc9e43
YZ
7390 /* Check the mode of the vector type, and the name of the vector
7391 element type, against the table. */
7392 if (TREE_CODE (type) == VECTOR_TYPE)
7393 {
7394 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7395
7396 while (pos->mode != VOIDmode)
7397 {
7398 tree elt_type = TREE_TYPE (type);
7399
7400 if (pos->mode == TYPE_MODE (type)
7401 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7402 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7403 pos->element_type_name))
7404 return pos->mangled_name;
7405
7406 pos++;
7407 }
7408 }
7409
ac2b960f
YZ
7410 /* Use the default mangling. */
7411 return NULL;
7412}
7413
43e9d192 7414/* Return the equivalent letter for size. */
81c2dfb9 7415static char
43e9d192
IB
7416sizetochar (int size)
7417{
7418 switch (size)
7419 {
7420 case 64: return 'd';
7421 case 32: return 's';
7422 case 16: return 'h';
7423 case 8 : return 'b';
7424 default: gcc_unreachable ();
7425 }
7426}
7427
3520f7cc
JG
7428/* Return true iff x is a uniform vector of floating-point
7429 constants, and the constant can be represented in
7430 quarter-precision form. Note, as aarch64_float_const_representable
7431 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7432static bool
7433aarch64_vect_float_const_representable_p (rtx x)
7434{
7435 int i = 0;
7436 REAL_VALUE_TYPE r0, ri;
7437 rtx x0, xi;
7438
7439 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7440 return false;
7441
7442 x0 = CONST_VECTOR_ELT (x, 0);
7443 if (!CONST_DOUBLE_P (x0))
7444 return false;
7445
7446 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7447
7448 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7449 {
7450 xi = CONST_VECTOR_ELT (x, i);
7451 if (!CONST_DOUBLE_P (xi))
7452 return false;
7453
7454 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7455 if (!REAL_VALUES_EQUAL (r0, ri))
7456 return false;
7457 }
7458
7459 return aarch64_float_const_representable_p (x0);
7460}
7461
d8edd899 7462/* Return true for valid and false for invalid. */
3ea63f60 7463bool
48063b9d
IB
7464aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7465 struct simd_immediate_info *info)
43e9d192
IB
7466{
7467#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7468 matches = 1; \
7469 for (i = 0; i < idx; i += (STRIDE)) \
7470 if (!(TEST)) \
7471 matches = 0; \
7472 if (matches) \
7473 { \
7474 immtype = (CLASS); \
7475 elsize = (ELSIZE); \
43e9d192
IB
7476 eshift = (SHIFT); \
7477 emvn = (NEG); \
7478 break; \
7479 }
7480
7481 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7482 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7483 unsigned char bytes[16];
43e9d192
IB
7484 int immtype = -1, matches;
7485 unsigned int invmask = inverse ? 0xff : 0;
7486 int eshift, emvn;
7487
43e9d192 7488 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 7489 {
81c2dfb9
IB
7490 if (! (aarch64_simd_imm_zero_p (op, mode)
7491 || aarch64_vect_float_const_representable_p (op)))
d8edd899 7492 return false;
3520f7cc 7493
48063b9d
IB
7494 if (info)
7495 {
7496 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 7497 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
7498 info->mvn = false;
7499 info->shift = 0;
7500 }
3520f7cc 7501
d8edd899 7502 return true;
3520f7cc 7503 }
43e9d192
IB
7504
7505 /* Splat vector constant out into a byte vector. */
7506 for (i = 0; i < n_elts; i++)
7507 {
4b1e108c
AL
7508 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7509 it must be laid out in the vector register in reverse order. */
7510 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
7511 unsigned HOST_WIDE_INT elpart;
7512 unsigned int part, parts;
7513
7514 if (GET_CODE (el) == CONST_INT)
7515 {
7516 elpart = INTVAL (el);
7517 parts = 1;
7518 }
7519 else if (GET_CODE (el) == CONST_DOUBLE)
7520 {
7521 elpart = CONST_DOUBLE_LOW (el);
7522 parts = 2;
7523 }
7524 else
7525 gcc_unreachable ();
7526
7527 for (part = 0; part < parts; part++)
7528 {
7529 unsigned int byte;
7530 for (byte = 0; byte < innersize; byte++)
7531 {
7532 bytes[idx++] = (elpart & 0xff) ^ invmask;
7533 elpart >>= BITS_PER_UNIT;
7534 }
7535 if (GET_CODE (el) == CONST_DOUBLE)
7536 elpart = CONST_DOUBLE_HIGH (el);
7537 }
7538 }
7539
7540 /* Sanity check. */
7541 gcc_assert (idx == GET_MODE_SIZE (mode));
7542
7543 do
7544 {
7545 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7546 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7547
7548 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7549 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7550
7551 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7552 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7553
7554 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7555 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7556
7557 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7558
7559 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7560
7561 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7562 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7563
7564 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7565 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7566
7567 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7568 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7569
7570 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7571 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7572
7573 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7574
7575 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7576
7577 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 7578 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
7579
7580 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 7581 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
7582
7583 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 7584 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
7585
7586 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 7587 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
7588
7589 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7590
7591 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7592 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7593 }
7594 while (0);
7595
e4f0f84d 7596 if (immtype == -1)
d8edd899 7597 return false;
43e9d192 7598
48063b9d 7599 if (info)
43e9d192 7600 {
48063b9d 7601 info->element_width = elsize;
48063b9d
IB
7602 info->mvn = emvn != 0;
7603 info->shift = eshift;
7604
43e9d192
IB
7605 unsigned HOST_WIDE_INT imm = 0;
7606
e4f0f84d
TB
7607 if (immtype >= 12 && immtype <= 15)
7608 info->msl = true;
7609
43e9d192
IB
7610 /* Un-invert bytes of recognized vector, if necessary. */
7611 if (invmask != 0)
7612 for (i = 0; i < idx; i++)
7613 bytes[i] ^= invmask;
7614
7615 if (immtype == 17)
7616 {
7617 /* FIXME: Broken on 32-bit H_W_I hosts. */
7618 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7619
7620 for (i = 0; i < 8; i++)
7621 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7622 << (i * BITS_PER_UNIT);
7623
43e9d192 7624
48063b9d
IB
7625 info->value = GEN_INT (imm);
7626 }
7627 else
7628 {
7629 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7630 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
7631
7632 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
7633 generic constants. */
7634 if (info->mvn)
43e9d192 7635 imm = ~imm;
48063b9d
IB
7636 imm = (imm >> info->shift) & 0xff;
7637 info->value = GEN_INT (imm);
7638 }
43e9d192
IB
7639 }
7640
48063b9d 7641 return true;
43e9d192
IB
7642#undef CHECK
7643}
7644
43e9d192
IB
7645static bool
7646aarch64_const_vec_all_same_int_p (rtx x,
7647 HOST_WIDE_INT minval,
7648 HOST_WIDE_INT maxval)
7649{
7650 HOST_WIDE_INT firstval;
7651 int count, i;
7652
7653 if (GET_CODE (x) != CONST_VECTOR
7654 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7655 return false;
7656
7657 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7658 if (firstval < minval || firstval > maxval)
7659 return false;
7660
7661 count = CONST_VECTOR_NUNITS (x);
7662 for (i = 1; i < count; i++)
7663 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7664 return false;
7665
7666 return true;
7667}
7668
7669/* Check of immediate shift constants are within range. */
7670bool
7671aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7672{
7673 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7674 if (left)
7675 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7676 else
7677 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7678}
7679
3520f7cc
JG
7680/* Return true if X is a uniform vector where all elements
7681 are either the floating-point constant 0.0 or the
7682 integer constant 0. */
43e9d192
IB
7683bool
7684aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7685{
3520f7cc 7686 return x == CONST0_RTX (mode);
43e9d192
IB
7687}
7688
7689bool
7690aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7691{
7692 HOST_WIDE_INT imm = INTVAL (x);
7693 int i;
7694
7695 for (i = 0; i < 8; i++)
7696 {
7697 unsigned int byte = imm & 0xff;
7698 if (byte != 0xff && byte != 0)
7699 return false;
7700 imm >>= 8;
7701 }
7702
7703 return true;
7704}
7705
83f8c414
CSS
7706bool
7707aarch64_mov_operand_p (rtx x,
a5350ddc 7708 enum aarch64_symbol_context context,
83f8c414
CSS
7709 enum machine_mode mode)
7710{
83f8c414
CSS
7711 if (GET_CODE (x) == HIGH
7712 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7713 return true;
7714
7715 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7716 return true;
7717
7718 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7719 return true;
7720
a5350ddc
CSS
7721 return aarch64_classify_symbolic_expression (x, context)
7722 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
7723}
7724
43e9d192
IB
7725/* Return a const_int vector of VAL. */
7726rtx
7727aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7728{
7729 int nunits = GET_MODE_NUNITS (mode);
7730 rtvec v = rtvec_alloc (nunits);
7731 int i;
7732
7733 for (i=0; i < nunits; i++)
7734 RTVEC_ELT (v, i) = GEN_INT (val);
7735
7736 return gen_rtx_CONST_VECTOR (mode, v);
7737}
7738
051d0e2f
SN
7739/* Check OP is a legal scalar immediate for the MOVI instruction. */
7740
7741bool
7742aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7743{
7744 enum machine_mode vmode;
7745
7746 gcc_assert (!VECTOR_MODE_P (mode));
7747 vmode = aarch64_preferred_simd_mode (mode);
7748 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 7749 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
7750}
7751
43e9d192
IB
7752/* Construct and return a PARALLEL RTX vector. */
7753rtx
7754aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7755{
7756 int nunits = GET_MODE_NUNITS (mode);
7757 rtvec v = rtvec_alloc (nunits / 2);
7758 int base = high ? nunits / 2 : 0;
7759 rtx t1;
7760 int i;
7761
7762 for (i=0; i < nunits / 2; i++)
7763 RTVEC_ELT (v, i) = GEN_INT (base + i);
7764
7765 t1 = gen_rtx_PARALLEL (mode, v);
7766 return t1;
7767}
7768
7769/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7770 HIGH (exclusive). */
7771void
7772aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7773{
7774 HOST_WIDE_INT lane;
7775 gcc_assert (GET_CODE (operand) == CONST_INT);
7776 lane = INTVAL (operand);
7777
7778 if (lane < low || lane >= high)
7779 error ("lane out of range");
7780}
7781
7782void
7783aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7784{
7785 gcc_assert (GET_CODE (operand) == CONST_INT);
7786 HOST_WIDE_INT lane = INTVAL (operand);
7787
7788 if (lane < low || lane >= high)
7789 error ("constant out of range");
7790}
7791
7792/* Emit code to reinterpret one AdvSIMD type as another,
7793 without altering bits. */
7794void
7795aarch64_simd_reinterpret (rtx dest, rtx src)
7796{
7797 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7798}
7799
7800/* Emit code to place a AdvSIMD pair result in memory locations (with equal
7801 registers). */
7802void
7803aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7804 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7805 rtx op1)
7806{
7807 rtx mem = gen_rtx_MEM (mode, destaddr);
7808 rtx tmp1 = gen_reg_rtx (mode);
7809 rtx tmp2 = gen_reg_rtx (mode);
7810
7811 emit_insn (intfn (tmp1, op1, tmp2));
7812
7813 emit_move_insn (mem, tmp1);
7814 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7815 emit_move_insn (mem, tmp2);
7816}
7817
7818/* Return TRUE if OP is a valid vector addressing mode. */
7819bool
7820aarch64_simd_mem_operand_p (rtx op)
7821{
7822 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7823 || GET_CODE (XEXP (op, 0)) == REG);
7824}
7825
7826/* Set up OPERANDS for a register copy from SRC to DEST, taking care
7827 not to early-clobber SRC registers in the process.
7828
7829 We assume that the operands described by SRC and DEST represent a
7830 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7831 number of components into which the copy has been decomposed. */
7832void
7833aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7834 rtx *src, unsigned int count)
7835{
7836 unsigned int i;
7837
7838 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7839 || REGNO (operands[0]) < REGNO (operands[1]))
7840 {
7841 for (i = 0; i < count; i++)
7842 {
7843 operands[2 * i] = dest[i];
7844 operands[2 * i + 1] = src[i];
7845 }
7846 }
7847 else
7848 {
7849 for (i = 0; i < count; i++)
7850 {
7851 operands[2 * i] = dest[count - i - 1];
7852 operands[2 * i + 1] = src[count - i - 1];
7853 }
7854 }
7855}
7856
7857/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7858 one of VSTRUCT modes: OI, CI or XI. */
7859int
7860aarch64_simd_attr_length_move (rtx insn)
7861{
43e9d192
IB
7862 enum machine_mode mode;
7863
7864 extract_insn_cached (insn);
7865
7866 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7867 {
7868 mode = GET_MODE (recog_data.operand[0]);
7869 switch (mode)
7870 {
7871 case OImode:
7872 return 8;
7873 case CImode:
7874 return 12;
7875 case XImode:
7876 return 16;
7877 default:
7878 gcc_unreachable ();
7879 }
7880 }
7881 return 4;
7882}
7883
db0253a4
TB
7884/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7885 alignment of a vector to 128 bits. */
7886static HOST_WIDE_INT
7887aarch64_simd_vector_alignment (const_tree type)
7888{
9439e9a1 7889 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
7890 return MIN (align, 128);
7891}
7892
7893/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7894static bool
7895aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7896{
7897 if (is_packed)
7898 return false;
7899
7900 /* We guarantee alignment for vectors up to 128-bits. */
7901 if (tree_int_cst_compare (TYPE_SIZE (type),
7902 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7903 return false;
7904
7905 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7906 return true;
7907}
7908
4369c11e
TB
7909/* If VALS is a vector constant that can be loaded into a register
7910 using DUP, generate instructions to do so and return an RTX to
7911 assign to the register. Otherwise return NULL_RTX. */
7912static rtx
7913aarch64_simd_dup_constant (rtx vals)
7914{
7915 enum machine_mode mode = GET_MODE (vals);
7916 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7917 int n_elts = GET_MODE_NUNITS (mode);
7918 bool all_same = true;
7919 rtx x;
7920 int i;
7921
7922 if (GET_CODE (vals) != CONST_VECTOR)
7923 return NULL_RTX;
7924
7925 for (i = 1; i < n_elts; ++i)
7926 {
7927 x = CONST_VECTOR_ELT (vals, i);
7928 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7929 all_same = false;
7930 }
7931
7932 if (!all_same)
7933 return NULL_RTX;
7934
7935 /* We can load this constant by using DUP and a constant in a
7936 single ARM register. This will be cheaper than a vector
7937 load. */
7938 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7939 return gen_rtx_VEC_DUPLICATE (mode, x);
7940}
7941
7942
7943/* Generate code to load VALS, which is a PARALLEL containing only
7944 constants (for vec_init) or CONST_VECTOR, efficiently into a
7945 register. Returns an RTX to copy into the register, or NULL_RTX
7946 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 7947static rtx
4369c11e
TB
7948aarch64_simd_make_constant (rtx vals)
7949{
7950 enum machine_mode mode = GET_MODE (vals);
7951 rtx const_dup;
7952 rtx const_vec = NULL_RTX;
7953 int n_elts = GET_MODE_NUNITS (mode);
7954 int n_const = 0;
7955 int i;
7956
7957 if (GET_CODE (vals) == CONST_VECTOR)
7958 const_vec = vals;
7959 else if (GET_CODE (vals) == PARALLEL)
7960 {
7961 /* A CONST_VECTOR must contain only CONST_INTs and
7962 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7963 Only store valid constants in a CONST_VECTOR. */
7964 for (i = 0; i < n_elts; ++i)
7965 {
7966 rtx x = XVECEXP (vals, 0, i);
7967 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7968 n_const++;
7969 }
7970 if (n_const == n_elts)
7971 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7972 }
7973 else
7974 gcc_unreachable ();
7975
7976 if (const_vec != NULL_RTX
48063b9d 7977 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
7978 /* Load using MOVI/MVNI. */
7979 return const_vec;
7980 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7981 /* Loaded using DUP. */
7982 return const_dup;
7983 else if (const_vec != NULL_RTX)
7984 /* Load from constant pool. We can not take advantage of single-cycle
7985 LD1 because we need a PC-relative addressing mode. */
7986 return const_vec;
7987 else
7988 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7989 We can not construct an initializer. */
7990 return NULL_RTX;
7991}
7992
7993void
7994aarch64_expand_vector_init (rtx target, rtx vals)
7995{
7996 enum machine_mode mode = GET_MODE (target);
7997 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7998 int n_elts = GET_MODE_NUNITS (mode);
7999 int n_var = 0, one_var = -1;
8000 bool all_same = true;
8001 rtx x, mem;
8002 int i;
8003
8004 x = XVECEXP (vals, 0, 0);
8005 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8006 n_var = 1, one_var = 0;
8007
8008 for (i = 1; i < n_elts; ++i)
8009 {
8010 x = XVECEXP (vals, 0, i);
8011 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8012 ++n_var, one_var = i;
8013
8014 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8015 all_same = false;
8016 }
8017
8018 if (n_var == 0)
8019 {
8020 rtx constant = aarch64_simd_make_constant (vals);
8021 if (constant != NULL_RTX)
8022 {
8023 emit_move_insn (target, constant);
8024 return;
8025 }
8026 }
8027
8028 /* Splat a single non-constant element if we can. */
8029 if (all_same)
8030 {
8031 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8032 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8033 return;
8034 }
8035
8036 /* One field is non-constant. Load constant then overwrite varying
8037 field. This is more efficient than using the stack. */
8038 if (n_var == 1)
8039 {
8040 rtx copy = copy_rtx (vals);
8041 rtx index = GEN_INT (one_var);
8042 enum insn_code icode;
8043
8044 /* Load constant part of vector, substitute neighboring value for
8045 varying element. */
8046 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8047 aarch64_expand_vector_init (target, copy);
8048
8049 /* Insert variable. */
8050 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8051 icode = optab_handler (vec_set_optab, mode);
8052 gcc_assert (icode != CODE_FOR_nothing);
8053 emit_insn (GEN_FCN (icode) (target, x, index));
8054 return;
8055 }
8056
8057 /* Construct the vector in memory one field at a time
8058 and load the whole vector. */
8059 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8060 for (i = 0; i < n_elts; i++)
8061 emit_move_insn (adjust_address_nv (mem, inner_mode,
8062 i * GET_MODE_SIZE (inner_mode)),
8063 XVECEXP (vals, 0, i));
8064 emit_move_insn (target, mem);
8065
8066}
8067
43e9d192
IB
8068static unsigned HOST_WIDE_INT
8069aarch64_shift_truncation_mask (enum machine_mode mode)
8070{
8071 return
8072 (aarch64_vector_mode_supported_p (mode)
8073 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8074}
8075
8076#ifndef TLS_SECTION_ASM_FLAG
8077#define TLS_SECTION_ASM_FLAG 'T'
8078#endif
8079
8080void
8081aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8082 tree decl ATTRIBUTE_UNUSED)
8083{
8084 char flagchars[10], *f = flagchars;
8085
8086 /* If we have already declared this section, we can use an
8087 abbreviated form to switch back to it -- unless this section is
8088 part of a COMDAT groups, in which case GAS requires the full
8089 declaration every time. */
8090 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8091 && (flags & SECTION_DECLARED))
8092 {
8093 fprintf (asm_out_file, "\t.section\t%s\n", name);
8094 return;
8095 }
8096
8097 if (!(flags & SECTION_DEBUG))
8098 *f++ = 'a';
8099 if (flags & SECTION_WRITE)
8100 *f++ = 'w';
8101 if (flags & SECTION_CODE)
8102 *f++ = 'x';
8103 if (flags & SECTION_SMALL)
8104 *f++ = 's';
8105 if (flags & SECTION_MERGE)
8106 *f++ = 'M';
8107 if (flags & SECTION_STRINGS)
8108 *f++ = 'S';
8109 if (flags & SECTION_TLS)
8110 *f++ = TLS_SECTION_ASM_FLAG;
8111 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8112 *f++ = 'G';
8113 *f = '\0';
8114
8115 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8116
8117 if (!(flags & SECTION_NOTYPE))
8118 {
8119 const char *type;
8120 const char *format;
8121
8122 if (flags & SECTION_BSS)
8123 type = "nobits";
8124 else
8125 type = "progbits";
8126
8127#ifdef TYPE_OPERAND_FMT
8128 format = "," TYPE_OPERAND_FMT;
8129#else
8130 format = ",@%s";
8131#endif
8132
8133 fprintf (asm_out_file, format, type);
8134
8135 if (flags & SECTION_ENTSIZE)
8136 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8137 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8138 {
8139 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8140 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8141 else
8142 fprintf (asm_out_file, ",%s,comdat",
8143 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8144 }
8145 }
8146
8147 putc ('\n', asm_out_file);
8148}
8149
8150/* Select a format to encode pointers in exception handling data. */
8151int
8152aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8153{
8154 int type;
8155 switch (aarch64_cmodel)
8156 {
8157 case AARCH64_CMODEL_TINY:
8158 case AARCH64_CMODEL_TINY_PIC:
8159 case AARCH64_CMODEL_SMALL:
8160 case AARCH64_CMODEL_SMALL_PIC:
8161 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8162 for everything. */
8163 type = DW_EH_PE_sdata4;
8164 break;
8165 default:
8166 /* No assumptions here. 8-byte relocs required. */
8167 type = DW_EH_PE_sdata8;
8168 break;
8169 }
8170 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8171}
8172
0462169c
SN
8173/* Emit load exclusive. */
8174
8175static void
8176aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8177 rtx mem, rtx model_rtx)
8178{
8179 rtx (*gen) (rtx, rtx, rtx);
8180
8181 switch (mode)
8182 {
8183 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8184 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8185 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8186 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8187 default:
8188 gcc_unreachable ();
8189 }
8190
8191 emit_insn (gen (rval, mem, model_rtx));
8192}
8193
8194/* Emit store exclusive. */
8195
8196static void
8197aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8198 rtx rval, rtx mem, rtx model_rtx)
8199{
8200 rtx (*gen) (rtx, rtx, rtx, rtx);
8201
8202 switch (mode)
8203 {
8204 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8205 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8206 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8207 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8208 default:
8209 gcc_unreachable ();
8210 }
8211
8212 emit_insn (gen (bval, rval, mem, model_rtx));
8213}
8214
8215/* Mark the previous jump instruction as unlikely. */
8216
8217static void
8218aarch64_emit_unlikely_jump (rtx insn)
8219{
e5af9ddd 8220 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
8221
8222 insn = emit_jump_insn (insn);
e5af9ddd 8223 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
8224}
8225
8226/* Expand a compare and swap pattern. */
8227
8228void
8229aarch64_expand_compare_and_swap (rtx operands[])
8230{
8231 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8232 enum machine_mode mode, cmp_mode;
8233 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8234
8235 bval = operands[0];
8236 rval = operands[1];
8237 mem = operands[2];
8238 oldval = operands[3];
8239 newval = operands[4];
8240 is_weak = operands[5];
8241 mod_s = operands[6];
8242 mod_f = operands[7];
8243 mode = GET_MODE (mem);
8244 cmp_mode = mode;
8245
8246 /* Normally the succ memory model must be stronger than fail, but in the
8247 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8248 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8249
8250 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8251 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8252 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8253
8254 switch (mode)
8255 {
8256 case QImode:
8257 case HImode:
8258 /* For short modes, we're going to perform the comparison in SImode,
8259 so do the zero-extension now. */
8260 cmp_mode = SImode;
8261 rval = gen_reg_rtx (SImode);
8262 oldval = convert_modes (SImode, mode, oldval, true);
8263 /* Fall through. */
8264
8265 case SImode:
8266 case DImode:
8267 /* Force the value into a register if needed. */
8268 if (!aarch64_plus_operand (oldval, mode))
8269 oldval = force_reg (cmp_mode, oldval);
8270 break;
8271
8272 default:
8273 gcc_unreachable ();
8274 }
8275
8276 switch (mode)
8277 {
8278 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8279 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8280 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8281 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8282 default:
8283 gcc_unreachable ();
8284 }
8285
8286 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8287
8288 if (mode == QImode || mode == HImode)
8289 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8290
8291 x = gen_rtx_REG (CCmode, CC_REGNUM);
8292 x = gen_rtx_EQ (SImode, x, const0_rtx);
8293 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8294}
8295
8296/* Split a compare and swap pattern. */
8297
8298void
8299aarch64_split_compare_and_swap (rtx operands[])
8300{
8301 rtx rval, mem, oldval, newval, scratch;
8302 enum machine_mode mode;
0462169c
SN
8303 bool is_weak;
8304 rtx label1, label2, x, cond;
8305
8306 rval = operands[0];
8307 mem = operands[1];
8308 oldval = operands[2];
8309 newval = operands[3];
8310 is_weak = (operands[4] != const0_rtx);
0462169c
SN
8311 scratch = operands[7];
8312 mode = GET_MODE (mem);
8313
8314 label1 = NULL_RTX;
8315 if (!is_weak)
8316 {
8317 label1 = gen_label_rtx ();
8318 emit_label (label1);
8319 }
8320 label2 = gen_label_rtx ();
8321
8322 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8323
8324 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8325 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8326 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8327 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8328 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8329
8330 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8331
8332 if (!is_weak)
8333 {
8334 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8335 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8336 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8337 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8338 }
8339 else
8340 {
8341 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8342 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8343 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8344 }
8345
8346 emit_label (label2);
8347}
8348
8349/* Split an atomic operation. */
8350
8351void
8352aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8353 rtx value, rtx model_rtx, rtx cond)
8354{
8355 enum machine_mode mode = GET_MODE (mem);
8356 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8357 rtx label, x;
8358
8359 label = gen_label_rtx ();
8360 emit_label (label);
8361
8362 if (new_out)
8363 new_out = gen_lowpart (wmode, new_out);
8364 if (old_out)
8365 old_out = gen_lowpart (wmode, old_out);
8366 else
8367 old_out = new_out;
8368 value = simplify_gen_subreg (wmode, value, mode, 0);
8369
8370 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8371
8372 switch (code)
8373 {
8374 case SET:
8375 new_out = value;
8376 break;
8377
8378 case NOT:
8379 x = gen_rtx_AND (wmode, old_out, value);
8380 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8381 x = gen_rtx_NOT (wmode, new_out);
8382 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8383 break;
8384
8385 case MINUS:
8386 if (CONST_INT_P (value))
8387 {
8388 value = GEN_INT (-INTVAL (value));
8389 code = PLUS;
8390 }
8391 /* Fall through. */
8392
8393 default:
8394 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8395 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8396 break;
8397 }
8398
8399 aarch64_emit_store_exclusive (mode, cond, mem,
8400 gen_lowpart (mode, new_out), model_rtx);
8401
8402 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8403 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8404 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8405 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8406}
8407
95ca411e
YZ
8408static void
8409aarch64_print_extension (void)
8410{
8411 const struct aarch64_option_extension *opt = NULL;
8412
8413 for (opt = all_extensions; opt->name != NULL; opt++)
8414 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8415 asm_fprintf (asm_out_file, "+%s", opt->name);
8416
8417 asm_fprintf (asm_out_file, "\n");
8418}
8419
43e9d192
IB
8420static void
8421aarch64_start_file (void)
8422{
8423 if (selected_arch)
95ca411e
YZ
8424 {
8425 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8426 aarch64_print_extension ();
8427 }
43e9d192 8428 else if (selected_cpu)
95ca411e 8429 {
682287fb
JG
8430 const char *truncated_name
8431 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8432 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
8433 aarch64_print_extension ();
8434 }
43e9d192
IB
8435 default_file_start();
8436}
8437
8438/* Target hook for c_mode_for_suffix. */
8439static enum machine_mode
8440aarch64_c_mode_for_suffix (char suffix)
8441{
8442 if (suffix == 'q')
8443 return TFmode;
8444
8445 return VOIDmode;
8446}
8447
3520f7cc
JG
8448/* We can only represent floating point constants which will fit in
8449 "quarter-precision" values. These values are characterised by
8450 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8451 by:
8452
8453 (-1)^s * (n/16) * 2^r
8454
8455 Where:
8456 's' is the sign bit.
8457 'n' is an integer in the range 16 <= n <= 31.
8458 'r' is an integer in the range -3 <= r <= 4. */
8459
8460/* Return true iff X can be represented by a quarter-precision
8461 floating point immediate operand X. Note, we cannot represent 0.0. */
8462bool
8463aarch64_float_const_representable_p (rtx x)
8464{
8465 /* This represents our current view of how many bits
8466 make up the mantissa. */
8467 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 8468 int exponent;
3520f7cc 8469 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 8470 REAL_VALUE_TYPE r, m;
807e902e 8471 bool fail;
3520f7cc
JG
8472
8473 if (!CONST_DOUBLE_P (x))
8474 return false;
8475
8476 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8477
8478 /* We cannot represent infinities, NaNs or +/-zero. We won't
8479 know if we have +zero until we analyse the mantissa, but we
8480 can reject the other invalid values. */
8481 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8482 || REAL_VALUE_MINUS_ZERO (r))
8483 return false;
8484
ba96cdfb 8485 /* Extract exponent. */
3520f7cc
JG
8486 r = real_value_abs (&r);
8487 exponent = REAL_EXP (&r);
8488
8489 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8490 highest (sign) bit, with a fixed binary point at bit point_pos.
8491 m1 holds the low part of the mantissa, m2 the high part.
8492 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8493 bits for the mantissa, this can fail (low bits will be lost). */
8494 real_ldexp (&m, &r, point_pos - exponent);
807e902e 8495 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
8496
8497 /* If the low part of the mantissa has bits set we cannot represent
8498 the value. */
807e902e 8499 if (w.elt (0) != 0)
3520f7cc
JG
8500 return false;
8501 /* We have rejected the lower HOST_WIDE_INT, so update our
8502 understanding of how many bits lie in the mantissa and
8503 look only at the high HOST_WIDE_INT. */
807e902e 8504 mantissa = w.elt (1);
3520f7cc
JG
8505 point_pos -= HOST_BITS_PER_WIDE_INT;
8506
8507 /* We can only represent values with a mantissa of the form 1.xxxx. */
8508 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8509 if ((mantissa & mask) != 0)
8510 return false;
8511
8512 /* Having filtered unrepresentable values, we may now remove all
8513 but the highest 5 bits. */
8514 mantissa >>= point_pos - 5;
8515
8516 /* We cannot represent the value 0.0, so reject it. This is handled
8517 elsewhere. */
8518 if (mantissa == 0)
8519 return false;
8520
8521 /* Then, as bit 4 is always set, we can mask it off, leaving
8522 the mantissa in the range [0, 15]. */
8523 mantissa &= ~(1 << 4);
8524 gcc_assert (mantissa <= 15);
8525
8526 /* GCC internally does not use IEEE754-like encoding (where normalized
8527 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8528 Our mantissa values are shifted 4 places to the left relative to
8529 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8530 by 5 places to correct for GCC's representation. */
8531 exponent = 5 - exponent;
8532
8533 return (exponent >= 0 && exponent <= 7);
8534}
8535
8536char*
81c2dfb9 8537aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
8538 enum machine_mode mode,
8539 unsigned width)
8540{
3ea63f60 8541 bool is_valid;
3520f7cc 8542 static char templ[40];
3520f7cc 8543 const char *mnemonic;
e4f0f84d 8544 const char *shift_op;
3520f7cc 8545 unsigned int lane_count = 0;
81c2dfb9 8546 char element_char;
3520f7cc 8547
e4f0f84d 8548 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
8549
8550 /* This will return true to show const_vector is legal for use as either
8551 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8552 also update INFO to show how the immediate should be generated. */
81c2dfb9 8553 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
8554 gcc_assert (is_valid);
8555
81c2dfb9 8556 element_char = sizetochar (info.element_width);
48063b9d
IB
8557 lane_count = width / info.element_width;
8558
3520f7cc
JG
8559 mode = GET_MODE_INNER (mode);
8560 if (mode == SFmode || mode == DFmode)
8561 {
48063b9d
IB
8562 gcc_assert (info.shift == 0 && ! info.mvn);
8563 if (aarch64_float_const_zero_rtx_p (info.value))
8564 info.value = GEN_INT (0);
8565 else
8566 {
8567#define buf_size 20
8568 REAL_VALUE_TYPE r;
8569 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8570 char float_buf[buf_size] = {'\0'};
8571 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8572#undef buf_size
8573
8574 if (lane_count == 1)
8575 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8576 else
8577 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 8578 lane_count, element_char, float_buf);
48063b9d
IB
8579 return templ;
8580 }
3520f7cc 8581 }
3520f7cc 8582
48063b9d 8583 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 8584 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
8585
8586 if (lane_count == 1)
48063b9d
IB
8587 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8588 mnemonic, UINTVAL (info.value));
8589 else if (info.shift)
8590 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
8591 ", %s %d", mnemonic, lane_count, element_char,
8592 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 8593 else
48063b9d 8594 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 8595 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
8596 return templ;
8597}
8598
b7342d25
IB
8599char*
8600aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8601 enum machine_mode mode)
8602{
8603 enum machine_mode vmode;
8604
8605 gcc_assert (!VECTOR_MODE_P (mode));
8606 vmode = aarch64_simd_container_mode (mode, 64);
8607 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8608 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8609}
8610
88b08073
JG
8611/* Split operands into moves from op[1] + op[2] into op[0]. */
8612
8613void
8614aarch64_split_combinev16qi (rtx operands[3])
8615{
8616 unsigned int dest = REGNO (operands[0]);
8617 unsigned int src1 = REGNO (operands[1]);
8618 unsigned int src2 = REGNO (operands[2]);
8619 enum machine_mode halfmode = GET_MODE (operands[1]);
8620 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8621 rtx destlo, desthi;
8622
8623 gcc_assert (halfmode == V16QImode);
8624
8625 if (src1 == dest && src2 == dest + halfregs)
8626 {
8627 /* No-op move. Can't split to nothing; emit something. */
8628 emit_note (NOTE_INSN_DELETED);
8629 return;
8630 }
8631
8632 /* Preserve register attributes for variable tracking. */
8633 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8634 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8635 GET_MODE_SIZE (halfmode));
8636
8637 /* Special case of reversed high/low parts. */
8638 if (reg_overlap_mentioned_p (operands[2], destlo)
8639 && reg_overlap_mentioned_p (operands[1], desthi))
8640 {
8641 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8642 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8643 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8644 }
8645 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8646 {
8647 /* Try to avoid unnecessary moves if part of the result
8648 is in the right place already. */
8649 if (src1 != dest)
8650 emit_move_insn (destlo, operands[1]);
8651 if (src2 != dest + halfregs)
8652 emit_move_insn (desthi, operands[2]);
8653 }
8654 else
8655 {
8656 if (src2 != dest + halfregs)
8657 emit_move_insn (desthi, operands[2]);
8658 if (src1 != dest)
8659 emit_move_insn (destlo, operands[1]);
8660 }
8661}
8662
8663/* vec_perm support. */
8664
8665#define MAX_VECT_LEN 16
8666
8667struct expand_vec_perm_d
8668{
8669 rtx target, op0, op1;
8670 unsigned char perm[MAX_VECT_LEN];
8671 enum machine_mode vmode;
8672 unsigned char nelt;
8673 bool one_vector_p;
8674 bool testing_p;
8675};
8676
8677/* Generate a variable permutation. */
8678
8679static void
8680aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8681{
8682 enum machine_mode vmode = GET_MODE (target);
8683 bool one_vector_p = rtx_equal_p (op0, op1);
8684
8685 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8686 gcc_checking_assert (GET_MODE (op0) == vmode);
8687 gcc_checking_assert (GET_MODE (op1) == vmode);
8688 gcc_checking_assert (GET_MODE (sel) == vmode);
8689 gcc_checking_assert (TARGET_SIMD);
8690
8691 if (one_vector_p)
8692 {
8693 if (vmode == V8QImode)
8694 {
8695 /* Expand the argument to a V16QI mode by duplicating it. */
8696 rtx pair = gen_reg_rtx (V16QImode);
8697 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8698 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8699 }
8700 else
8701 {
8702 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8703 }
8704 }
8705 else
8706 {
8707 rtx pair;
8708
8709 if (vmode == V8QImode)
8710 {
8711 pair = gen_reg_rtx (V16QImode);
8712 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8713 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8714 }
8715 else
8716 {
8717 pair = gen_reg_rtx (OImode);
8718 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8719 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8720 }
8721 }
8722}
8723
8724void
8725aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8726{
8727 enum machine_mode vmode = GET_MODE (target);
8728 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8729 bool one_vector_p = rtx_equal_p (op0, op1);
8730 rtx rmask[MAX_VECT_LEN], mask;
8731
8732 gcc_checking_assert (!BYTES_BIG_ENDIAN);
8733
8734 /* The TBL instruction does not use a modulo index, so we must take care
8735 of that ourselves. */
8736 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8737 for (i = 0; i < nelt; ++i)
8738 rmask[i] = mask;
8739 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8740 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8741
8742 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8743}
8744
cc4d934f
JG
8745/* Recognize patterns suitable for the TRN instructions. */
8746static bool
8747aarch64_evpc_trn (struct expand_vec_perm_d *d)
8748{
8749 unsigned int i, odd, mask, nelt = d->nelt;
8750 rtx out, in0, in1, x;
8751 rtx (*gen) (rtx, rtx, rtx);
8752 enum machine_mode vmode = d->vmode;
8753
8754 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8755 return false;
8756
8757 /* Note that these are little-endian tests.
8758 We correct for big-endian later. */
8759 if (d->perm[0] == 0)
8760 odd = 0;
8761 else if (d->perm[0] == 1)
8762 odd = 1;
8763 else
8764 return false;
8765 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8766
8767 for (i = 0; i < nelt; i += 2)
8768 {
8769 if (d->perm[i] != i + odd)
8770 return false;
8771 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8772 return false;
8773 }
8774
8775 /* Success! */
8776 if (d->testing_p)
8777 return true;
8778
8779 in0 = d->op0;
8780 in1 = d->op1;
8781 if (BYTES_BIG_ENDIAN)
8782 {
8783 x = in0, in0 = in1, in1 = x;
8784 odd = !odd;
8785 }
8786 out = d->target;
8787
8788 if (odd)
8789 {
8790 switch (vmode)
8791 {
8792 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8793 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8794 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8795 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8796 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8797 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8798 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8799 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8800 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8801 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8802 default:
8803 return false;
8804 }
8805 }
8806 else
8807 {
8808 switch (vmode)
8809 {
8810 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8811 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8812 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8813 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8814 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8815 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8816 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8817 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8818 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8819 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8820 default:
8821 return false;
8822 }
8823 }
8824
8825 emit_insn (gen (out, in0, in1));
8826 return true;
8827}
8828
8829/* Recognize patterns suitable for the UZP instructions. */
8830static bool
8831aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8832{
8833 unsigned int i, odd, mask, nelt = d->nelt;
8834 rtx out, in0, in1, x;
8835 rtx (*gen) (rtx, rtx, rtx);
8836 enum machine_mode vmode = d->vmode;
8837
8838 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8839 return false;
8840
8841 /* Note that these are little-endian tests.
8842 We correct for big-endian later. */
8843 if (d->perm[0] == 0)
8844 odd = 0;
8845 else if (d->perm[0] == 1)
8846 odd = 1;
8847 else
8848 return false;
8849 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8850
8851 for (i = 0; i < nelt; i++)
8852 {
8853 unsigned elt = (i * 2 + odd) & mask;
8854 if (d->perm[i] != elt)
8855 return false;
8856 }
8857
8858 /* Success! */
8859 if (d->testing_p)
8860 return true;
8861
8862 in0 = d->op0;
8863 in1 = d->op1;
8864 if (BYTES_BIG_ENDIAN)
8865 {
8866 x = in0, in0 = in1, in1 = x;
8867 odd = !odd;
8868 }
8869 out = d->target;
8870
8871 if (odd)
8872 {
8873 switch (vmode)
8874 {
8875 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8876 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8877 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8878 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8879 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8880 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8881 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8882 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8883 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8884 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8885 default:
8886 return false;
8887 }
8888 }
8889 else
8890 {
8891 switch (vmode)
8892 {
8893 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8894 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8895 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8896 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8897 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8898 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8899 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8900 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8901 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8902 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8903 default:
8904 return false;
8905 }
8906 }
8907
8908 emit_insn (gen (out, in0, in1));
8909 return true;
8910}
8911
8912/* Recognize patterns suitable for the ZIP instructions. */
8913static bool
8914aarch64_evpc_zip (struct expand_vec_perm_d *d)
8915{
8916 unsigned int i, high, mask, nelt = d->nelt;
8917 rtx out, in0, in1, x;
8918 rtx (*gen) (rtx, rtx, rtx);
8919 enum machine_mode vmode = d->vmode;
8920
8921 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8922 return false;
8923
8924 /* Note that these are little-endian tests.
8925 We correct for big-endian later. */
8926 high = nelt / 2;
8927 if (d->perm[0] == high)
8928 /* Do Nothing. */
8929 ;
8930 else if (d->perm[0] == 0)
8931 high = 0;
8932 else
8933 return false;
8934 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8935
8936 for (i = 0; i < nelt / 2; i++)
8937 {
8938 unsigned elt = (i + high) & mask;
8939 if (d->perm[i * 2] != elt)
8940 return false;
8941 elt = (elt + nelt) & mask;
8942 if (d->perm[i * 2 + 1] != elt)
8943 return false;
8944 }
8945
8946 /* Success! */
8947 if (d->testing_p)
8948 return true;
8949
8950 in0 = d->op0;
8951 in1 = d->op1;
8952 if (BYTES_BIG_ENDIAN)
8953 {
8954 x = in0, in0 = in1, in1 = x;
8955 high = !high;
8956 }
8957 out = d->target;
8958
8959 if (high)
8960 {
8961 switch (vmode)
8962 {
8963 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8964 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8965 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8966 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8967 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8968 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8969 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8970 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8971 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8972 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8973 default:
8974 return false;
8975 }
8976 }
8977 else
8978 {
8979 switch (vmode)
8980 {
8981 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8982 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8983 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8984 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8985 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8986 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8987 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8988 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8989 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8990 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8991 default:
8992 return false;
8993 }
8994 }
8995
8996 emit_insn (gen (out, in0, in1));
8997 return true;
8998}
8999
91bd4114
JG
9000static bool
9001aarch64_evpc_dup (struct expand_vec_perm_d *d)
9002{
9003 rtx (*gen) (rtx, rtx, rtx);
9004 rtx out = d->target;
9005 rtx in0;
9006 enum machine_mode vmode = d->vmode;
9007 unsigned int i, elt, nelt = d->nelt;
9008 rtx lane;
9009
9010 /* TODO: This may not be big-endian safe. */
9011 if (BYTES_BIG_ENDIAN)
9012 return false;
9013
9014 elt = d->perm[0];
9015 for (i = 1; i < nelt; i++)
9016 {
9017 if (elt != d->perm[i])
9018 return false;
9019 }
9020
9021 /* The generic preparation in aarch64_expand_vec_perm_const_1
9022 swaps the operand order and the permute indices if it finds
9023 d->perm[0] to be in the second operand. Thus, we can always
9024 use d->op0 and need not do any extra arithmetic to get the
9025 correct lane number. */
9026 in0 = d->op0;
9027 lane = GEN_INT (elt);
9028
9029 switch (vmode)
9030 {
9031 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9032 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9033 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9034 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9035 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9036 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9037 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9038 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9039 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9040 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9041 default:
9042 return false;
9043 }
9044
9045 emit_insn (gen (out, in0, lane));
9046 return true;
9047}
9048
88b08073
JG
9049static bool
9050aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9051{
9052 rtx rperm[MAX_VECT_LEN], sel;
9053 enum machine_mode vmode = d->vmode;
9054 unsigned int i, nelt = d->nelt;
9055
88b08073
JG
9056 if (d->testing_p)
9057 return true;
9058
9059 /* Generic code will try constant permutation twice. Once with the
9060 original mode and again with the elements lowered to QImode.
9061 So wait and don't do the selector expansion ourselves. */
9062 if (vmode != V8QImode && vmode != V16QImode)
9063 return false;
9064
9065 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
9066 {
9067 int nunits = GET_MODE_NUNITS (vmode);
9068
9069 /* If big-endian and two vectors we end up with a weird mixed-endian
9070 mode on NEON. Reverse the index within each word but not the word
9071 itself. */
9072 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9073 : d->perm[i]);
9074 }
88b08073
JG
9075 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9076 sel = force_reg (vmode, sel);
9077
9078 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9079 return true;
9080}
9081
9082static bool
9083aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9084{
9085 /* The pattern matching functions above are written to look for a small
9086 number to begin the sequence (0, 1, N/2). If we begin with an index
9087 from the second operand, we can swap the operands. */
9088 if (d->perm[0] >= d->nelt)
9089 {
9090 unsigned i, nelt = d->nelt;
9091 rtx x;
9092
0696116a 9093 gcc_assert (nelt == (nelt & -nelt));
88b08073 9094 for (i = 0; i < nelt; ++i)
0696116a 9095 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073
JG
9096
9097 x = d->op0;
9098 d->op0 = d->op1;
9099 d->op1 = x;
9100 }
9101
9102 if (TARGET_SIMD)
cc4d934f
JG
9103 {
9104 if (aarch64_evpc_zip (d))
9105 return true;
9106 else if (aarch64_evpc_uzp (d))
9107 return true;
9108 else if (aarch64_evpc_trn (d))
9109 return true;
91bd4114
JG
9110 else if (aarch64_evpc_dup (d))
9111 return true;
cc4d934f
JG
9112 return aarch64_evpc_tbl (d);
9113 }
88b08073
JG
9114 return false;
9115}
9116
9117/* Expand a vec_perm_const pattern. */
9118
9119bool
9120aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9121{
9122 struct expand_vec_perm_d d;
9123 int i, nelt, which;
9124
9125 d.target = target;
9126 d.op0 = op0;
9127 d.op1 = op1;
9128
9129 d.vmode = GET_MODE (target);
9130 gcc_assert (VECTOR_MODE_P (d.vmode));
9131 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9132 d.testing_p = false;
9133
9134 for (i = which = 0; i < nelt; ++i)
9135 {
9136 rtx e = XVECEXP (sel, 0, i);
9137 int ei = INTVAL (e) & (2 * nelt - 1);
9138 which |= (ei < nelt ? 1 : 2);
9139 d.perm[i] = ei;
9140 }
9141
9142 switch (which)
9143 {
9144 default:
9145 gcc_unreachable ();
9146
9147 case 3:
9148 d.one_vector_p = false;
9149 if (!rtx_equal_p (op0, op1))
9150 break;
9151
9152 /* The elements of PERM do not suggest that only the first operand
9153 is used, but both operands are identical. Allow easier matching
9154 of the permutation by folding the permutation into the single
9155 input vector. */
9156 /* Fall Through. */
9157 case 2:
9158 for (i = 0; i < nelt; ++i)
9159 d.perm[i] &= nelt - 1;
9160 d.op0 = op1;
9161 d.one_vector_p = true;
9162 break;
9163
9164 case 1:
9165 d.op1 = op0;
9166 d.one_vector_p = true;
9167 break;
9168 }
9169
9170 return aarch64_expand_vec_perm_const_1 (&d);
9171}
9172
9173static bool
9174aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9175 const unsigned char *sel)
9176{
9177 struct expand_vec_perm_d d;
9178 unsigned int i, nelt, which;
9179 bool ret;
9180
9181 d.vmode = vmode;
9182 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9183 d.testing_p = true;
9184 memcpy (d.perm, sel, nelt);
9185
9186 /* Calculate whether all elements are in one vector. */
9187 for (i = which = 0; i < nelt; ++i)
9188 {
9189 unsigned char e = d.perm[i];
9190 gcc_assert (e < 2 * nelt);
9191 which |= (e < nelt ? 1 : 2);
9192 }
9193
9194 /* If all elements are from the second vector, reindex as if from the
9195 first vector. */
9196 if (which == 2)
9197 for (i = 0; i < nelt; ++i)
9198 d.perm[i] -= nelt;
9199
9200 /* Check whether the mask can be applied to a single vector. */
9201 d.one_vector_p = (which != 3);
9202
9203 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9204 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9205 if (!d.one_vector_p)
9206 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9207
9208 start_sequence ();
9209 ret = aarch64_expand_vec_perm_const_1 (&d);
9210 end_sequence ();
9211
9212 return ret;
9213}
9214
69675d50
TB
9215/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9216bool
9217aarch64_cannot_change_mode_class (enum machine_mode from,
9218 enum machine_mode to,
9219 enum reg_class rclass)
9220{
9221 /* Full-reg subregs are allowed on general regs or any class if they are
9222 the same size. */
9223 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9224 || !reg_classes_intersect_p (FP_REGS, rclass))
9225 return false;
9226
9227 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9228 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9229 2. Scalar to Scalar for integer modes or same size float modes.
97e1ad78
JG
9230 3. Vector to Vector modes.
9231 4. On little-endian only, Vector-Structure to Vector modes. */
69675d50
TB
9232 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9233 {
9234 if (aarch64_vector_mode_supported_p (from)
9235 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9236 return false;
9237
9238 if (GET_MODE_NUNITS (from) == 1
9239 && GET_MODE_NUNITS (to) == 1
9240 && (GET_MODE_CLASS (from) == MODE_INT
9241 || from == to))
9242 return false;
9243
9244 if (aarch64_vector_mode_supported_p (from)
9245 && aarch64_vector_mode_supported_p (to))
9246 return false;
97e1ad78
JG
9247
9248 /* Within an vector structure straddling multiple vector registers
9249 we are in a mixed-endian representation. As such, we can't
9250 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9251 switch between vectors and vector structures cheaply. */
9252 if (!BYTES_BIG_ENDIAN)
9253 if ((aarch64_vector_mode_supported_p (from)
9254 && aarch64_vect_struct_mode_p (to))
9255 || (aarch64_vector_mode_supported_p (to)
9256 && aarch64_vect_struct_mode_p (from)))
9257 return false;
69675d50
TB
9258 }
9259
9260 return true;
9261}
9262
97e1ad78
JG
9263/* Implement MODES_TIEABLE_P. */
9264
9265bool
9266aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9267{
9268 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9269 return true;
9270
9271 /* We specifically want to allow elements of "structure" modes to
9272 be tieable to the structure. This more general condition allows
9273 other rarer situations too. */
9274 if (TARGET_SIMD
9275 && aarch64_vector_mode_p (mode1)
9276 && aarch64_vector_mode_p (mode2))
9277 return true;
9278
9279 return false;
9280}
9281
43e9d192
IB
9282#undef TARGET_ADDRESS_COST
9283#define TARGET_ADDRESS_COST aarch64_address_cost
9284
9285/* This hook will determines whether unnamed bitfields affect the alignment
9286 of the containing structure. The hook returns true if the structure
9287 should inherit the alignment requirements of an unnamed bitfield's
9288 type. */
9289#undef TARGET_ALIGN_ANON_BITFIELD
9290#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9291
9292#undef TARGET_ASM_ALIGNED_DI_OP
9293#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9294
9295#undef TARGET_ASM_ALIGNED_HI_OP
9296#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9297
9298#undef TARGET_ASM_ALIGNED_SI_OP
9299#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9300
9301#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9302#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9303 hook_bool_const_tree_hwi_hwi_const_tree_true
9304
9305#undef TARGET_ASM_FILE_START
9306#define TARGET_ASM_FILE_START aarch64_start_file
9307
9308#undef TARGET_ASM_OUTPUT_MI_THUNK
9309#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9310
9311#undef TARGET_ASM_SELECT_RTX_SECTION
9312#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9313
9314#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9315#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9316
9317#undef TARGET_BUILD_BUILTIN_VA_LIST
9318#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9319
9320#undef TARGET_CALLEE_COPIES
9321#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9322
9323#undef TARGET_CAN_ELIMINATE
9324#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9325
9326#undef TARGET_CANNOT_FORCE_CONST_MEM
9327#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9328
9329#undef TARGET_CONDITIONAL_REGISTER_USAGE
9330#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9331
9332/* Only the least significant bit is used for initialization guard
9333 variables. */
9334#undef TARGET_CXX_GUARD_MASK_BIT
9335#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9336
9337#undef TARGET_C_MODE_FOR_SUFFIX
9338#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9339
9340#ifdef TARGET_BIG_ENDIAN_DEFAULT
9341#undef TARGET_DEFAULT_TARGET_FLAGS
9342#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9343#endif
9344
9345#undef TARGET_CLASS_MAX_NREGS
9346#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9347
119103ca
JG
9348#undef TARGET_BUILTIN_DECL
9349#define TARGET_BUILTIN_DECL aarch64_builtin_decl
9350
43e9d192
IB
9351#undef TARGET_EXPAND_BUILTIN
9352#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9353
9354#undef TARGET_EXPAND_BUILTIN_VA_START
9355#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9356
9697e620
JG
9357#undef TARGET_FOLD_BUILTIN
9358#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9359
43e9d192
IB
9360#undef TARGET_FUNCTION_ARG
9361#define TARGET_FUNCTION_ARG aarch64_function_arg
9362
9363#undef TARGET_FUNCTION_ARG_ADVANCE
9364#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9365
9366#undef TARGET_FUNCTION_ARG_BOUNDARY
9367#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9368
9369#undef TARGET_FUNCTION_OK_FOR_SIBCALL
9370#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9371
9372#undef TARGET_FUNCTION_VALUE
9373#define TARGET_FUNCTION_VALUE aarch64_function_value
9374
9375#undef TARGET_FUNCTION_VALUE_REGNO_P
9376#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9377
9378#undef TARGET_FRAME_POINTER_REQUIRED
9379#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9380
0ac198d3
JG
9381#undef TARGET_GIMPLE_FOLD_BUILTIN
9382#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9383
43e9d192
IB
9384#undef TARGET_GIMPLIFY_VA_ARG_EXPR
9385#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9386
9387#undef TARGET_INIT_BUILTINS
9388#define TARGET_INIT_BUILTINS aarch64_init_builtins
9389
9390#undef TARGET_LEGITIMATE_ADDRESS_P
9391#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9392
9393#undef TARGET_LEGITIMATE_CONSTANT_P
9394#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9395
9396#undef TARGET_LIBGCC_CMP_RETURN_MODE
9397#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9398
38e8f663
YR
9399#undef TARGET_LRA_P
9400#define TARGET_LRA_P aarch64_lra_p
9401
ac2b960f
YZ
9402#undef TARGET_MANGLE_TYPE
9403#define TARGET_MANGLE_TYPE aarch64_mangle_type
9404
43e9d192
IB
9405#undef TARGET_MEMORY_MOVE_COST
9406#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9407
9408#undef TARGET_MUST_PASS_IN_STACK
9409#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9410
9411/* This target hook should return true if accesses to volatile bitfields
9412 should use the narrowest mode possible. It should return false if these
9413 accesses should use the bitfield container type. */
9414#undef TARGET_NARROW_VOLATILE_BITFIELD
9415#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9416
9417#undef TARGET_OPTION_OVERRIDE
9418#define TARGET_OPTION_OVERRIDE aarch64_override_options
9419
9420#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9421#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9422 aarch64_override_options_after_change
9423
9424#undef TARGET_PASS_BY_REFERENCE
9425#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9426
9427#undef TARGET_PREFERRED_RELOAD_CLASS
9428#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9429
9430#undef TARGET_SECONDARY_RELOAD
9431#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9432
9433#undef TARGET_SHIFT_TRUNCATION_MASK
9434#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9435
9436#undef TARGET_SETUP_INCOMING_VARARGS
9437#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9438
9439#undef TARGET_STRUCT_VALUE_RTX
9440#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9441
9442#undef TARGET_REGISTER_MOVE_COST
9443#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9444
9445#undef TARGET_RETURN_IN_MEMORY
9446#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9447
9448#undef TARGET_RETURN_IN_MSB
9449#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9450
9451#undef TARGET_RTX_COSTS
7cc2145f 9452#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 9453
d126a4ae
AP
9454#undef TARGET_SCHED_ISSUE_RATE
9455#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9456
43e9d192
IB
9457#undef TARGET_TRAMPOLINE_INIT
9458#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9459
9460#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9461#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9462
9463#undef TARGET_VECTOR_MODE_SUPPORTED_P
9464#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9465
9466#undef TARGET_ARRAY_MODE_SUPPORTED_P
9467#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9468
8990e73a
TB
9469#undef TARGET_VECTORIZE_ADD_STMT_COST
9470#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9471
9472#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9473#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9474 aarch64_builtin_vectorization_cost
9475
43e9d192
IB
9476#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9477#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9478
42fc9a7f
JG
9479#undef TARGET_VECTORIZE_BUILTINS
9480#define TARGET_VECTORIZE_BUILTINS
9481
9482#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9483#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9484 aarch64_builtin_vectorized_function
9485
3b357264
JG
9486#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9487#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9488 aarch64_autovectorize_vector_sizes
9489
aa87aced
KV
9490#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9491#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9492 aarch64_atomic_assign_expand_fenv
9493
43e9d192
IB
9494/* Section anchor support. */
9495
9496#undef TARGET_MIN_ANCHOR_OFFSET
9497#define TARGET_MIN_ANCHOR_OFFSET -256
9498
9499/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9500 byte offset; we can do much more for larger data types, but have no way
9501 to determine the size of the access. We assume accesses are aligned. */
9502#undef TARGET_MAX_ANCHOR_OFFSET
9503#define TARGET_MAX_ANCHOR_OFFSET 4095
9504
db0253a4
TB
9505#undef TARGET_VECTOR_ALIGNMENT
9506#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9507
9508#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9509#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9510 aarch64_simd_vector_alignment_reachable
9511
88b08073
JG
9512/* vec_perm support. */
9513
9514#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9515#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9516 aarch64_vectorize_vec_perm_const_ok
9517
70f09188 9518
706b2314 9519#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
9520#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9521
5cb74e90
RR
9522#undef TARGET_FLAGS_REGNUM
9523#define TARGET_FLAGS_REGNUM CC_REGNUM
9524
43e9d192
IB
9525struct gcc_target targetm = TARGET_INITIALIZER;
9526
9527#include "gt-aarch64.h"