]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Fix rtl-check build
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
2fb9a547
AM
49#include "pointer-set.h"
50#include "hash-table.h"
51#include "vec.h"
52#include "basic-block.h"
53#include "tree-ssa-alias.h"
54#include "internal-fn.h"
55#include "gimple-fold.h"
56#include "tree-eh.h"
57#include "gimple-expr.h"
58#include "is-a.h"
18f429e2 59#include "gimple.h"
45b0be94 60#include "gimplify.h"
43e9d192
IB
61#include "optabs.h"
62#include "dwarf2.h"
8990e73a
TB
63#include "cfgloop.h"
64#include "tree-vectorizer.h"
73250c4c 65#include "config/arm/aarch-cost-tables.h"
0ee859b5 66#include "dumpfile.h"
43e9d192 67
28514dda
YZ
68/* Defined for convenience. */
69#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
70
43e9d192
IB
71/* Classifies an address.
72
73 ADDRESS_REG_IMM
74 A simple base register plus immediate offset.
75
76 ADDRESS_REG_WB
77 A base register indexed by immediate offset with writeback.
78
79 ADDRESS_REG_REG
80 A base register indexed by (optionally scaled) register.
81
82 ADDRESS_REG_UXTW
83 A base register indexed by (optionally scaled) zero-extended register.
84
85 ADDRESS_REG_SXTW
86 A base register indexed by (optionally scaled) sign-extended register.
87
88 ADDRESS_LO_SUM
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
90
91 ADDRESS_SYMBOLIC:
92 A constant symbolic address, in pc-relative literal pool. */
93
94enum aarch64_address_type {
95 ADDRESS_REG_IMM,
96 ADDRESS_REG_WB,
97 ADDRESS_REG_REG,
98 ADDRESS_REG_UXTW,
99 ADDRESS_REG_SXTW,
100 ADDRESS_LO_SUM,
101 ADDRESS_SYMBOLIC
102};
103
104struct aarch64_address_info {
105 enum aarch64_address_type type;
106 rtx base;
107 rtx offset;
108 int shift;
109 enum aarch64_symbol_type symbol_type;
110};
111
48063b9d
IB
112struct simd_immediate_info
113{
114 rtx value;
115 int shift;
116 int element_width;
48063b9d 117 bool mvn;
e4f0f84d 118 bool msl;
48063b9d
IB
119};
120
43e9d192
IB
121/* The current code model. */
122enum aarch64_code_model aarch64_cmodel;
123
124#ifdef HAVE_AS_TLS
125#undef TARGET_HAVE_TLS
126#define TARGET_HAVE_TLS 1
127#endif
128
38e8f663 129static bool aarch64_lra_p (void);
43e9d192
IB
130static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
132 const_tree,
133 enum machine_mode *, int *,
134 bool *);
135static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 137static void aarch64_override_options_after_change (void);
43e9d192
IB
138static bool aarch64_vector_mode_supported_p (enum machine_mode);
139static unsigned bit_count (unsigned HOST_WIDE_INT);
140static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
142
88b08073
JG
143static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
2961177e 145static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
88b08073 146
43e9d192 147/* The processor for which instructions should be scheduled. */
02fdbd5b 148enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
149
150/* The current tuning set. */
151const struct tune_params *aarch64_tune_params;
152
153/* Mask to specify which instructions we are allowed to generate. */
154unsigned long aarch64_isa_flags = 0;
155
156/* Mask to specify which instruction scheduling options should be used. */
157unsigned long aarch64_tune_flags = 0;
158
159/* Tuning parameters. */
160
161#if HAVE_DESIGNATED_INITIALIZERS
162#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
163#else
164#define NAMED_PARAM(NAME, VAL) (VAL)
165#endif
166
167#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168__extension__
169#endif
43e9d192
IB
170
171#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
172__extension__
173#endif
174static const struct cpu_addrcost_table generic_addrcost_table =
175{
67747367
JG
176#if HAVE_DESIGNATED_INITIALIZERS
177 .addr_scale_costs =
178#endif
179 {
180 NAMED_PARAM (qi, 0),
181 NAMED_PARAM (hi, 0),
182 NAMED_PARAM (si, 0),
183 NAMED_PARAM (ti, 0),
184 },
43e9d192
IB
185 NAMED_PARAM (pre_modify, 0),
186 NAMED_PARAM (post_modify, 0),
187 NAMED_PARAM (register_offset, 0),
188 NAMED_PARAM (register_extend, 0),
189 NAMED_PARAM (imm_offset, 0)
190};
191
60bff090
JG
192#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
193__extension__
194#endif
195static const struct cpu_addrcost_table cortexa57_addrcost_table =
196{
197#if HAVE_DESIGNATED_INITIALIZERS
198 .addr_scale_costs =
199#endif
200 {
201 NAMED_PARAM (qi, 0),
202 NAMED_PARAM (hi, 1),
203 NAMED_PARAM (si, 0),
204 NAMED_PARAM (ti, 1),
205 },
206 NAMED_PARAM (pre_modify, 0),
207 NAMED_PARAM (post_modify, 0),
208 NAMED_PARAM (register_offset, 0),
209 NAMED_PARAM (register_extend, 0),
210 NAMED_PARAM (imm_offset, 0),
211};
212
43e9d192
IB
213#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
214__extension__
215#endif
216static const struct cpu_regmove_cost generic_regmove_cost =
217{
218 NAMED_PARAM (GP2GP, 1),
219 NAMED_PARAM (GP2FP, 2),
220 NAMED_PARAM (FP2GP, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP, 4)
225};
226
8990e73a
TB
227/* Generic costs for vector insn classes. */
228#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
229__extension__
230#endif
231static const struct cpu_vector_cost generic_vector_cost =
232{
233 NAMED_PARAM (scalar_stmt_cost, 1),
234 NAMED_PARAM (scalar_load_cost, 1),
235 NAMED_PARAM (scalar_store_cost, 1),
236 NAMED_PARAM (vec_stmt_cost, 1),
237 NAMED_PARAM (vec_to_scalar_cost, 1),
238 NAMED_PARAM (scalar_to_vec_cost, 1),
239 NAMED_PARAM (vec_align_load_cost, 1),
240 NAMED_PARAM (vec_unalign_load_cost, 1),
241 NAMED_PARAM (vec_unalign_store_cost, 1),
242 NAMED_PARAM (vec_store_cost, 1),
243 NAMED_PARAM (cond_taken_branch_cost, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost, 1)
245};
246
60bff090
JG
247/* Generic costs for vector insn classes. */
248#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
249__extension__
250#endif
251static const struct cpu_vector_cost cortexa57_vector_cost =
252{
253 NAMED_PARAM (scalar_stmt_cost, 1),
254 NAMED_PARAM (scalar_load_cost, 4),
255 NAMED_PARAM (scalar_store_cost, 1),
256 NAMED_PARAM (vec_stmt_cost, 3),
257 NAMED_PARAM (vec_to_scalar_cost, 8),
258 NAMED_PARAM (scalar_to_vec_cost, 8),
259 NAMED_PARAM (vec_align_load_cost, 5),
260 NAMED_PARAM (vec_unalign_load_cost, 5),
261 NAMED_PARAM (vec_unalign_store_cost, 1),
262 NAMED_PARAM (vec_store_cost, 1),
263 NAMED_PARAM (cond_taken_branch_cost, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost, 1)
265};
266
43e9d192
IB
267#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
268__extension__
269#endif
270static const struct tune_params generic_tunings =
271{
4e2cd668 272 &cortexa57_extra_costs,
43e9d192
IB
273 &generic_addrcost_table,
274 &generic_regmove_cost,
8990e73a 275 &generic_vector_cost,
d126a4ae
AP
276 NAMED_PARAM (memmov_cost, 4),
277 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
278};
279
984239ad
KT
280static const struct tune_params cortexa53_tunings =
281{
282 &cortexa53_extra_costs,
283 &generic_addrcost_table,
284 &generic_regmove_cost,
285 &generic_vector_cost,
d126a4ae
AP
286 NAMED_PARAM (memmov_cost, 4),
287 NAMED_PARAM (issue_rate, 2)
984239ad
KT
288};
289
4fd92af6
KT
290static const struct tune_params cortexa57_tunings =
291{
292 &cortexa57_extra_costs,
60bff090 293 &cortexa57_addrcost_table,
4fd92af6 294 &generic_regmove_cost,
60bff090 295 &cortexa57_vector_cost,
4fd92af6
KT
296 NAMED_PARAM (memmov_cost, 4),
297 NAMED_PARAM (issue_rate, 3)
298};
299
43e9d192
IB
300/* A processor implementing AArch64. */
301struct processor
302{
303 const char *const name;
304 enum aarch64_processor core;
305 const char *arch;
306 const unsigned long flags;
307 const struct tune_params *const tune;
308};
309
310/* Processor cores implementing AArch64. */
311static const struct processor all_cores[] =
312{
192ed1dd 313#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315#include "aarch64-cores.def"
316#undef AARCH64_CORE
02fdbd5b 317 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
318 {NULL, aarch64_none, NULL, 0, NULL}
319};
320
321/* Architectures implementing AArch64. */
322static const struct processor all_architectures[] =
323{
324#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326#include "aarch64-arches.def"
327#undef AARCH64_ARCH
43e9d192
IB
328 {NULL, aarch64_none, NULL, 0, NULL}
329};
330
331/* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333static const struct processor *selected_arch;
334static const struct processor *selected_cpu;
335static const struct processor *selected_tune;
336
337#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
338
339/* An ISA extension in the co-processor and main instruction set space. */
340struct aarch64_option_extension
341{
342 const char *const name;
343 const unsigned long flags_on;
344 const unsigned long flags_off;
345};
346
347/* ISA extensions in AArch64. */
348static const struct aarch64_option_extension all_extensions[] =
349{
350#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352#include "aarch64-option-extensions.def"
353#undef AARCH64_OPT_EXTENSION
354 {NULL, 0, 0}
355};
356
357/* Used to track the size of an address when generating a pre/post
358 increment address. */
359static enum machine_mode aarch64_memory_reference_mode;
360
361/* Used to force GTY into this file. */
362static GTY(()) int gty_dummy;
363
364/* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
366
367#define AARCH64_NUM_BITMASKS 5334
368static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
369
43e9d192
IB
370typedef enum aarch64_cond_code
371{
372 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
373 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
374 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
375}
376aarch64_cc;
377
378#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
379
380/* The condition codes of the processor, and the inverse function. */
381static const char * const aarch64_condition_codes[] =
382{
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
385};
386
387/* Provide a mapping from gcc register numbers to dwarf register numbers. */
388unsigned
389aarch64_dbx_register_number (unsigned regno)
390{
391 if (GP_REGNUM_P (regno))
392 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
393 else if (regno == SP_REGNUM)
394 return AARCH64_DWARF_SP;
395 else if (FP_REGNUM_P (regno))
396 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
397
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS;
401}
402
403/* Return TRUE if MODE is any of the large INT modes. */
404static bool
405aarch64_vect_struct_mode_p (enum machine_mode mode)
406{
407 return mode == OImode || mode == CImode || mode == XImode;
408}
409
410/* Return TRUE if MODE is any of the vector modes. */
411static bool
412aarch64_vector_mode_p (enum machine_mode mode)
413{
414 return aarch64_vector_mode_supported_p (mode)
415 || aarch64_vect_struct_mode_p (mode);
416}
417
418/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
419static bool
420aarch64_array_mode_supported_p (enum machine_mode mode,
421 unsigned HOST_WIDE_INT nelems)
422{
423 if (TARGET_SIMD
424 && AARCH64_VALID_SIMD_QREG_MODE (mode)
425 && (nelems >= 2 && nelems <= 4))
426 return true;
427
428 return false;
429}
430
431/* Implement HARD_REGNO_NREGS. */
432
433int
434aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
435{
436 switch (aarch64_regno_regclass (regno))
437 {
438 case FP_REGS:
439 case FP_LO_REGS:
440 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
441 default:
442 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
443 }
444 gcc_unreachable ();
445}
446
447/* Implement HARD_REGNO_MODE_OK. */
448
449int
450aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
451{
452 if (GET_MODE_CLASS (mode) == MODE_CC)
453 return regno == CC_REGNUM;
454
9259db42
YZ
455 if (regno == SP_REGNUM)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode == Pmode || mode == ptr_mode;
460
461 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
462 return mode == Pmode;
463
464 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
465 return 1;
466
467 if (FP_REGNUM_P (regno))
468 {
469 if (aarch64_vect_struct_mode_p (mode))
470 return
471 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
472 else
473 return 1;
474 }
475
476 return 0;
477}
478
73d9ac6a
IB
479/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
480enum machine_mode
481aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
482 enum machine_mode mode)
483{
484 /* Handle modes that fit within single registers. */
485 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
486 {
487 if (GET_MODE_SIZE (mode) >= 4)
488 return mode;
489 else
490 return SImode;
491 }
492 /* Fall back to generic for multi-reg and very large modes. */
493 else
494 return choose_hard_reg_mode (regno, nregs, false);
495}
496
43e9d192
IB
497/* Return true if calls to DECL should be treated as
498 long-calls (ie called via a register). */
499static bool
500aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
501{
502 return false;
503}
504
505/* Return true if calls to symbol-ref SYM should be treated as
506 long-calls (ie called via a register). */
507bool
508aarch64_is_long_call_p (rtx sym)
509{
510 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
511}
512
513/* Return true if the offsets to a zero/sign-extract operation
514 represent an expression that matches an extend operation. The
515 operands represent the paramters from
516
4745e701 517 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192
IB
518bool
519aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
520 rtx extract_imm)
521{
522 HOST_WIDE_INT mult_val, extract_val;
523
524 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
525 return false;
526
527 mult_val = INTVAL (mult_imm);
528 extract_val = INTVAL (extract_imm);
529
530 if (extract_val > 8
531 && extract_val < GET_MODE_BITSIZE (mode)
532 && exact_log2 (extract_val & ~7) > 0
533 && (extract_val & 7) <= 4
534 && mult_val == (1 << (extract_val & 7)))
535 return true;
536
537 return false;
538}
539
540/* Emit an insn that's a simple single-set. Both the operands must be
541 known to be valid. */
542inline static rtx
543emit_set_insn (rtx x, rtx y)
544{
545 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
546}
547
548/* X and Y are two things to compare using CODE. Emit the compare insn and
549 return the rtx for register 0 in the proper mode. */
550rtx
551aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
552{
553 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
554 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
555
556 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
557 return cc_reg;
558}
559
560/* Build the SYMBOL_REF for __tls_get_addr. */
561
562static GTY(()) rtx tls_get_addr_libfunc;
563
564rtx
565aarch64_tls_get_addr (void)
566{
567 if (!tls_get_addr_libfunc)
568 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
569 return tls_get_addr_libfunc;
570}
571
572/* Return the TLS model to use for ADDR. */
573
574static enum tls_model
575tls_symbolic_operand_type (rtx addr)
576{
577 enum tls_model tls_kind = TLS_MODEL_NONE;
578 rtx sym, addend;
579
580 if (GET_CODE (addr) == CONST)
581 {
582 split_const (addr, &sym, &addend);
583 if (GET_CODE (sym) == SYMBOL_REF)
584 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
585 }
586 else if (GET_CODE (addr) == SYMBOL_REF)
587 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
588
589 return tls_kind;
590}
591
592/* We'll allow lo_sum's in addresses in our legitimate addresses
593 so that combine would take care of combining addresses where
594 necessary, but for generation purposes, we'll generate the address
595 as :
596 RTL Absolute
597 tmp = hi (symbol_ref); adrp x1, foo
598 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
599 nop
600
601 PIC TLS
602 adrp x1, :got:foo adrp tmp, :tlsgd:foo
603 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
604 bl __tls_get_addr
605 nop
606
607 Load TLS symbol, depending on TLS mechanism and TLS access model.
608
609 Global Dynamic - Traditional TLS:
610 adrp tmp, :tlsgd:imm
611 add dest, tmp, #:tlsgd_lo12:imm
612 bl __tls_get_addr
613
614 Global Dynamic - TLS Descriptors:
615 adrp dest, :tlsdesc:imm
616 ldr tmp, [dest, #:tlsdesc_lo12:imm]
617 add dest, dest, #:tlsdesc_lo12:imm
618 blr tmp
619 mrs tp, tpidr_el0
620 add dest, dest, tp
621
622 Initial Exec:
623 mrs tp, tpidr_el0
624 adrp tmp, :gottprel:imm
625 ldr dest, [tmp, #:gottprel_lo12:imm]
626 add dest, dest, tp
627
628 Local Exec:
629 mrs tp, tpidr_el0
630 add t0, tp, #:tprel_hi12:imm
631 add t0, #:tprel_lo12_nc:imm
632*/
633
634static void
635aarch64_load_symref_appropriately (rtx dest, rtx imm,
636 enum aarch64_symbol_type type)
637{
638 switch (type)
639 {
640 case SYMBOL_SMALL_ABSOLUTE:
641 {
28514dda 642 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 643 rtx tmp_reg = dest;
28514dda
YZ
644 enum machine_mode mode = GET_MODE (dest);
645
646 gcc_assert (mode == Pmode || mode == ptr_mode);
647
43e9d192 648 if (can_create_pseudo_p ())
28514dda 649 tmp_reg = gen_reg_rtx (mode);
43e9d192 650
28514dda 651 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
652 emit_insn (gen_add_losym (dest, tmp_reg, imm));
653 return;
654 }
655
a5350ddc
CSS
656 case SYMBOL_TINY_ABSOLUTE:
657 emit_insn (gen_rtx_SET (Pmode, dest, imm));
658 return;
659
43e9d192
IB
660 case SYMBOL_SMALL_GOT:
661 {
28514dda
YZ
662 /* In ILP32, the mode of dest can be either SImode or DImode,
663 while the got entry is always of SImode size. The mode of
664 dest depends on how dest is used: if dest is assigned to a
665 pointer (e.g. in the memory), it has SImode; it may have
666 DImode if dest is dereferenced to access the memeory.
667 This is why we have to handle three different ldr_got_small
668 patterns here (two patterns for ILP32). */
43e9d192 669 rtx tmp_reg = dest;
28514dda
YZ
670 enum machine_mode mode = GET_MODE (dest);
671
43e9d192 672 if (can_create_pseudo_p ())
28514dda
YZ
673 tmp_reg = gen_reg_rtx (mode);
674
675 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
676 if (mode == ptr_mode)
677 {
678 if (mode == DImode)
679 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
680 else
681 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
682 }
683 else
684 {
685 gcc_assert (mode == Pmode);
686 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
687 }
688
43e9d192
IB
689 return;
690 }
691
692 case SYMBOL_SMALL_TLSGD:
693 {
694 rtx insns;
695 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
696
697 start_sequence ();
698 emit_call_insn (gen_tlsgd_small (result, imm));
699 insns = get_insns ();
700 end_sequence ();
701
702 RTL_CONST_CALL_P (insns) = 1;
703 emit_libcall_block (insns, dest, result, imm);
704 return;
705 }
706
707 case SYMBOL_SMALL_TLSDESC:
708 {
621ad2de
AP
709 enum machine_mode mode = GET_MODE (dest);
710 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
711 rtx tp;
712
621ad2de
AP
713 gcc_assert (mode == Pmode || mode == ptr_mode);
714
715 /* In ILP32, the got entry is always of SImode size. Unlike
716 small GOT, the dest is fixed at reg 0. */
717 if (TARGET_ILP32)
718 emit_insn (gen_tlsdesc_small_si (imm));
719 else
720 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 721 tp = aarch64_load_tp (NULL);
621ad2de
AP
722
723 if (mode != Pmode)
724 tp = gen_lowpart (mode, tp);
725
726 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
727 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
728 return;
729 }
730
731 case SYMBOL_SMALL_GOTTPREL:
732 {
621ad2de
AP
733 /* In ILP32, the mode of dest can be either SImode or DImode,
734 while the got entry is always of SImode size. The mode of
735 dest depends on how dest is used: if dest is assigned to a
736 pointer (e.g. in the memory), it has SImode; it may have
737 DImode if dest is dereferenced to access the memeory.
738 This is why we have to handle three different tlsie_small
739 patterns here (two patterns for ILP32). */
740 enum machine_mode mode = GET_MODE (dest);
741 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 742 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
743
744 if (mode == ptr_mode)
745 {
746 if (mode == DImode)
747 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
748 else
749 {
750 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
751 tp = gen_lowpart (mode, tp);
752 }
753 }
754 else
755 {
756 gcc_assert (mode == Pmode);
757 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
758 }
759
760 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
761 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
762 return;
763 }
764
765 case SYMBOL_SMALL_TPREL:
766 {
767 rtx tp = aarch64_load_tp (NULL);
768 emit_insn (gen_tlsle_small (dest, tp, imm));
769 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
770 return;
771 }
772
87dd8ab0
MS
773 case SYMBOL_TINY_GOT:
774 emit_insn (gen_ldr_got_tiny (dest, imm));
775 return;
776
43e9d192
IB
777 default:
778 gcc_unreachable ();
779 }
780}
781
782/* Emit a move from SRC to DEST. Assume that the move expanders can
783 handle all moves if !can_create_pseudo_p (). The distinction is
784 important because, unlike emit_move_insn, the move expanders know
785 how to force Pmode objects into the constant pool even when the
786 constant pool address is not itself legitimate. */
787static rtx
788aarch64_emit_move (rtx dest, rtx src)
789{
790 return (can_create_pseudo_p ()
791 ? emit_move_insn (dest, src)
792 : emit_move_insn_1 (dest, src));
793}
794
030d03b8
RE
795/* Split a 128-bit move operation into two 64-bit move operations,
796 taking care to handle partial overlap of register to register
797 copies. Special cases are needed when moving between GP regs and
798 FP regs. SRC can be a register, constant or memory; DST a register
799 or memory. If either operand is memory it must not have any side
800 effects. */
43e9d192
IB
801void
802aarch64_split_128bit_move (rtx dst, rtx src)
803{
030d03b8
RE
804 rtx dst_lo, dst_hi;
805 rtx src_lo, src_hi;
43e9d192 806
030d03b8 807 enum machine_mode mode = GET_MODE (dst);
12dc6974 808
030d03b8
RE
809 gcc_assert (mode == TImode || mode == TFmode);
810 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
811 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
812
813 if (REG_P (dst) && REG_P (src))
814 {
030d03b8
RE
815 int src_regno = REGNO (src);
816 int dst_regno = REGNO (dst);
43e9d192 817
030d03b8 818 /* Handle FP <-> GP regs. */
43e9d192
IB
819 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
820 {
030d03b8
RE
821 src_lo = gen_lowpart (word_mode, src);
822 src_hi = gen_highpart (word_mode, src);
823
824 if (mode == TImode)
825 {
826 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
827 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
828 }
829 else
830 {
831 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
832 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
833 }
834 return;
43e9d192
IB
835 }
836 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
837 {
030d03b8
RE
838 dst_lo = gen_lowpart (word_mode, dst);
839 dst_hi = gen_highpart (word_mode, dst);
840
841 if (mode == TImode)
842 {
843 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
844 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
845 }
846 else
847 {
848 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
849 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
850 }
851 return;
43e9d192 852 }
43e9d192
IB
853 }
854
030d03b8
RE
855 dst_lo = gen_lowpart (word_mode, dst);
856 dst_hi = gen_highpart (word_mode, dst);
857 src_lo = gen_lowpart (word_mode, src);
858 src_hi = gen_highpart_mode (word_mode, mode, src);
859
860 /* At most one pairing may overlap. */
861 if (reg_overlap_mentioned_p (dst_lo, src_hi))
862 {
863 aarch64_emit_move (dst_hi, src_hi);
864 aarch64_emit_move (dst_lo, src_lo);
865 }
866 else
867 {
868 aarch64_emit_move (dst_lo, src_lo);
869 aarch64_emit_move (dst_hi, src_hi);
870 }
43e9d192
IB
871}
872
873bool
874aarch64_split_128bit_move_p (rtx dst, rtx src)
875{
876 return (! REG_P (src)
877 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
878}
879
8b033a8a
SN
880/* Split a complex SIMD combine. */
881
882void
883aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
884{
885 enum machine_mode src_mode = GET_MODE (src1);
886 enum machine_mode dst_mode = GET_MODE (dst);
887
888 gcc_assert (VECTOR_MODE_P (dst_mode));
889
890 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
891 {
892 rtx (*gen) (rtx, rtx, rtx);
893
894 switch (src_mode)
895 {
896 case V8QImode:
897 gen = gen_aarch64_simd_combinev8qi;
898 break;
899 case V4HImode:
900 gen = gen_aarch64_simd_combinev4hi;
901 break;
902 case V2SImode:
903 gen = gen_aarch64_simd_combinev2si;
904 break;
905 case V2SFmode:
906 gen = gen_aarch64_simd_combinev2sf;
907 break;
908 case DImode:
909 gen = gen_aarch64_simd_combinedi;
910 break;
911 case DFmode:
912 gen = gen_aarch64_simd_combinedf;
913 break;
914 default:
915 gcc_unreachable ();
916 }
917
918 emit_insn (gen (dst, src1, src2));
919 return;
920 }
921}
922
fd4842cd
SN
923/* Split a complex SIMD move. */
924
925void
926aarch64_split_simd_move (rtx dst, rtx src)
927{
928 enum machine_mode src_mode = GET_MODE (src);
929 enum machine_mode dst_mode = GET_MODE (dst);
930
931 gcc_assert (VECTOR_MODE_P (dst_mode));
932
933 if (REG_P (dst) && REG_P (src))
934 {
c59b7e28
SN
935 rtx (*gen) (rtx, rtx);
936
fd4842cd
SN
937 gcc_assert (VECTOR_MODE_P (src_mode));
938
939 switch (src_mode)
940 {
941 case V16QImode:
c59b7e28 942 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
943 break;
944 case V8HImode:
c59b7e28 945 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
946 break;
947 case V4SImode:
c59b7e28 948 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
949 break;
950 case V2DImode:
c59b7e28 951 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
952 break;
953 case V4SFmode:
c59b7e28 954 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
955 break;
956 case V2DFmode:
c59b7e28 957 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
958 break;
959 default:
960 gcc_unreachable ();
961 }
c59b7e28
SN
962
963 emit_insn (gen (dst, src));
fd4842cd
SN
964 return;
965 }
966}
967
43e9d192 968static rtx
e18b4a81 969aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
970{
971 if (can_create_pseudo_p ())
e18b4a81 972 return force_reg (mode, value);
43e9d192
IB
973 else
974 {
975 x = aarch64_emit_move (x, value);
976 return x;
977 }
978}
979
980
981static rtx
982aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
983{
9c023bf0 984 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
985 {
986 rtx high;
987 /* Load the full offset into a register. This
988 might be improvable in the future. */
989 high = GEN_INT (offset);
990 offset = 0;
e18b4a81
YZ
991 high = aarch64_force_temporary (mode, temp, high);
992 reg = aarch64_force_temporary (mode, temp,
993 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
994 }
995 return plus_constant (mode, reg, offset);
996}
997
998void
999aarch64_expand_mov_immediate (rtx dest, rtx imm)
1000{
1001 enum machine_mode mode = GET_MODE (dest);
1002 unsigned HOST_WIDE_INT mask;
1003 int i;
1004 bool first;
1005 unsigned HOST_WIDE_INT val;
1006 bool subtargets;
1007 rtx subtarget;
1008 int one_match, zero_match;
1009
1010 gcc_assert (mode == SImode || mode == DImode);
1011
1012 /* Check on what type of symbol it is. */
1013 if (GET_CODE (imm) == SYMBOL_REF
1014 || GET_CODE (imm) == LABEL_REF
1015 || GET_CODE (imm) == CONST)
1016 {
1017 rtx mem, base, offset;
1018 enum aarch64_symbol_type sty;
1019
1020 /* If we have (const (plus symbol offset)), separate out the offset
1021 before we start classifying the symbol. */
1022 split_const (imm, &base, &offset);
1023
1024 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1025 switch (sty)
1026 {
1027 case SYMBOL_FORCE_TO_MEM:
1028 if (offset != const0_rtx
1029 && targetm.cannot_force_const_mem (mode, imm))
1030 {
aef66c94 1031 gcc_assert (can_create_pseudo_p ());
e18b4a81 1032 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1033 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1034 aarch64_emit_move (dest, base);
1035 return;
1036 }
28514dda 1037 mem = force_const_mem (ptr_mode, imm);
43e9d192 1038 gcc_assert (mem);
28514dda
YZ
1039 if (mode != ptr_mode)
1040 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
1041 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1042 return;
1043
1044 case SYMBOL_SMALL_TLSGD:
1045 case SYMBOL_SMALL_TLSDESC:
1046 case SYMBOL_SMALL_GOTTPREL:
1047 case SYMBOL_SMALL_GOT:
87dd8ab0 1048 case SYMBOL_TINY_GOT:
43e9d192
IB
1049 if (offset != const0_rtx)
1050 {
1051 gcc_assert(can_create_pseudo_p ());
e18b4a81 1052 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1053 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1054 aarch64_emit_move (dest, base);
1055 return;
1056 }
1057 /* FALLTHRU */
1058
1059 case SYMBOL_SMALL_TPREL:
1060 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 1061 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
1062 aarch64_load_symref_appropriately (dest, imm, sty);
1063 return;
1064
1065 default:
1066 gcc_unreachable ();
1067 }
1068 }
1069
1070 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1071 {
1072 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1073 return;
1074 }
1075
1076 if (!CONST_INT_P (imm))
1077 {
1078 if (GET_CODE (imm) == HIGH)
1079 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1080 else
1081 {
1082 rtx mem = force_const_mem (mode, imm);
1083 gcc_assert (mem);
1084 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1085 }
1086
1087 return;
1088 }
1089
1090 if (mode == SImode)
1091 {
1092 /* We know we can't do this in 1 insn, and we must be able to do it
1093 in two; so don't mess around looking for sequences that don't buy
1094 us anything. */
1095 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1096 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1097 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1098 return;
1099 }
1100
1101 /* Remaining cases are all for DImode. */
1102
1103 val = INTVAL (imm);
1104 subtargets = optimize && can_create_pseudo_p ();
1105
1106 one_match = 0;
1107 zero_match = 0;
1108 mask = 0xffff;
1109
1110 for (i = 0; i < 64; i += 16, mask <<= 16)
1111 {
1112 if ((val & mask) == 0)
1113 zero_match++;
1114 else if ((val & mask) == mask)
1115 one_match++;
1116 }
1117
1118 if (one_match == 2)
1119 {
1120 mask = 0xffff;
1121 for (i = 0; i < 64; i += 16, mask <<= 16)
1122 {
1123 if ((val & mask) != mask)
1124 {
1125 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1126 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1127 GEN_INT ((val >> i) & 0xffff)));
1128 return;
1129 }
1130 }
1131 gcc_unreachable ();
1132 }
1133
1134 if (zero_match == 2)
1135 goto simple_sequence;
1136
1137 mask = 0x0ffff0000UL;
1138 for (i = 16; i < 64; i += 16, mask <<= 16)
1139 {
1140 HOST_WIDE_INT comp = mask & ~(mask - 1);
1141
1142 if (aarch64_uimm12_shift (val - (val & mask)))
1143 {
1144 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1145
1146 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1147 emit_insn (gen_adddi3 (dest, subtarget,
1148 GEN_INT (val - (val & mask))));
1149 return;
1150 }
1151 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1152 {
1153 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1154
1155 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1156 GEN_INT ((val + comp) & mask)));
1157 emit_insn (gen_adddi3 (dest, subtarget,
1158 GEN_INT (val - ((val + comp) & mask))));
1159 return;
1160 }
1161 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1162 {
1163 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1164
1165 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1166 GEN_INT ((val - comp) | ~mask)));
1167 emit_insn (gen_adddi3 (dest, subtarget,
1168 GEN_INT (val - ((val - comp) | ~mask))));
1169 return;
1170 }
1171 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1172 {
1173 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1174
1175 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176 GEN_INT (val | ~mask)));
1177 emit_insn (gen_adddi3 (dest, subtarget,
1178 GEN_INT (val - (val | ~mask))));
1179 return;
1180 }
1181 }
1182
1183 /* See if we can do it by arithmetically combining two
1184 immediates. */
1185 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1186 {
1187 int j;
1188 mask = 0xffff;
1189
1190 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1191 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1192 {
1193 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1194 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1195 GEN_INT (aarch64_bitmasks[i])));
1196 emit_insn (gen_adddi3 (dest, subtarget,
1197 GEN_INT (val - aarch64_bitmasks[i])));
1198 return;
1199 }
1200
1201 for (j = 0; j < 64; j += 16, mask <<= 16)
1202 {
1203 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1204 {
1205 emit_insn (gen_rtx_SET (VOIDmode, dest,
1206 GEN_INT (aarch64_bitmasks[i])));
1207 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1208 GEN_INT ((val >> j) & 0xffff)));
1209 return;
1210 }
1211 }
1212 }
1213
1214 /* See if we can do it by logically combining two immediates. */
1215 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1216 {
1217 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1218 {
1219 int j;
1220
1221 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1222 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1223 {
1224 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1225 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1226 GEN_INT (aarch64_bitmasks[i])));
1227 emit_insn (gen_iordi3 (dest, subtarget,
1228 GEN_INT (aarch64_bitmasks[j])));
1229 return;
1230 }
1231 }
1232 else if ((val & aarch64_bitmasks[i]) == val)
1233 {
1234 int j;
1235
1236 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1238 {
1239
1240 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1241 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1242 GEN_INT (aarch64_bitmasks[j])));
1243 emit_insn (gen_anddi3 (dest, subtarget,
1244 GEN_INT (aarch64_bitmasks[i])));
1245 return;
1246 }
1247 }
1248 }
1249
1250 simple_sequence:
1251 first = true;
1252 mask = 0xffff;
1253 for (i = 0; i < 64; i += 16, mask <<= 16)
1254 {
1255 if ((val & mask) != 0)
1256 {
1257 if (first)
1258 {
1259 emit_insn (gen_rtx_SET (VOIDmode, dest,
1260 GEN_INT (val & mask)));
1261 first = false;
1262 }
1263 else
1264 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1265 GEN_INT ((val >> i) & 0xffff)));
1266 }
1267 }
1268}
1269
1270static bool
fee9ba42
JW
1271aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1272 tree exp ATTRIBUTE_UNUSED)
43e9d192 1273{
fee9ba42 1274 /* Currently, always true. */
43e9d192
IB
1275 return true;
1276}
1277
1278/* Implement TARGET_PASS_BY_REFERENCE. */
1279
1280static bool
1281aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1282 enum machine_mode mode,
1283 const_tree type,
1284 bool named ATTRIBUTE_UNUSED)
1285{
1286 HOST_WIDE_INT size;
1287 enum machine_mode dummymode;
1288 int nregs;
1289
1290 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1291 size = (mode == BLKmode && type)
1292 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1293
aadc1c43
MHD
1294 /* Aggregates are passed by reference based on their size. */
1295 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1296 {
aadc1c43 1297 size = int_size_in_bytes (type);
43e9d192
IB
1298 }
1299
1300 /* Variable sized arguments are always returned by reference. */
1301 if (size < 0)
1302 return true;
1303
1304 /* Can this be a candidate to be passed in fp/simd register(s)? */
1305 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1306 &dummymode, &nregs,
1307 NULL))
1308 return false;
1309
1310 /* Arguments which are variable sized or larger than 2 registers are
1311 passed by reference unless they are a homogenous floating point
1312 aggregate. */
1313 return size > 2 * UNITS_PER_WORD;
1314}
1315
1316/* Return TRUE if VALTYPE is padded to its least significant bits. */
1317static bool
1318aarch64_return_in_msb (const_tree valtype)
1319{
1320 enum machine_mode dummy_mode;
1321 int dummy_int;
1322
1323 /* Never happens in little-endian mode. */
1324 if (!BYTES_BIG_ENDIAN)
1325 return false;
1326
1327 /* Only composite types smaller than or equal to 16 bytes can
1328 be potentially returned in registers. */
1329 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1330 || int_size_in_bytes (valtype) <= 0
1331 || int_size_in_bytes (valtype) > 16)
1332 return false;
1333
1334 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1335 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1336 is always passed/returned in the least significant bits of fp/simd
1337 register(s). */
1338 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1339 &dummy_mode, &dummy_int, NULL))
1340 return false;
1341
1342 return true;
1343}
1344
1345/* Implement TARGET_FUNCTION_VALUE.
1346 Define how to find the value returned by a function. */
1347
1348static rtx
1349aarch64_function_value (const_tree type, const_tree func,
1350 bool outgoing ATTRIBUTE_UNUSED)
1351{
1352 enum machine_mode mode;
1353 int unsignedp;
1354 int count;
1355 enum machine_mode ag_mode;
1356
1357 mode = TYPE_MODE (type);
1358 if (INTEGRAL_TYPE_P (type))
1359 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1360
1361 if (aarch64_return_in_msb (type))
1362 {
1363 HOST_WIDE_INT size = int_size_in_bytes (type);
1364
1365 if (size % UNITS_PER_WORD != 0)
1366 {
1367 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1368 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1369 }
1370 }
1371
1372 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1373 &ag_mode, &count, NULL))
1374 {
1375 if (!aarch64_composite_type_p (type, mode))
1376 {
1377 gcc_assert (count == 1 && mode == ag_mode);
1378 return gen_rtx_REG (mode, V0_REGNUM);
1379 }
1380 else
1381 {
1382 int i;
1383 rtx par;
1384
1385 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1386 for (i = 0; i < count; i++)
1387 {
1388 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1389 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1390 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1391 XVECEXP (par, 0, i) = tmp;
1392 }
1393 return par;
1394 }
1395 }
1396 else
1397 return gen_rtx_REG (mode, R0_REGNUM);
1398}
1399
1400/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1401 Return true if REGNO is the number of a hard register in which the values
1402 of called function may come back. */
1403
1404static bool
1405aarch64_function_value_regno_p (const unsigned int regno)
1406{
1407 /* Maximum of 16 bytes can be returned in the general registers. Examples
1408 of 16-byte return values are: 128-bit integers and 16-byte small
1409 structures (excluding homogeneous floating-point aggregates). */
1410 if (regno == R0_REGNUM || regno == R1_REGNUM)
1411 return true;
1412
1413 /* Up to four fp/simd registers can return a function value, e.g. a
1414 homogeneous floating-point aggregate having four members. */
1415 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1416 return !TARGET_GENERAL_REGS_ONLY;
1417
1418 return false;
1419}
1420
1421/* Implement TARGET_RETURN_IN_MEMORY.
1422
1423 If the type T of the result of a function is such that
1424 void func (T arg)
1425 would require that arg be passed as a value in a register (or set of
1426 registers) according to the parameter passing rules, then the result
1427 is returned in the same registers as would be used for such an
1428 argument. */
1429
1430static bool
1431aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1432{
1433 HOST_WIDE_INT size;
1434 enum machine_mode ag_mode;
1435 int count;
1436
1437 if (!AGGREGATE_TYPE_P (type)
1438 && TREE_CODE (type) != COMPLEX_TYPE
1439 && TREE_CODE (type) != VECTOR_TYPE)
1440 /* Simple scalar types always returned in registers. */
1441 return false;
1442
1443 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1444 type,
1445 &ag_mode,
1446 &count,
1447 NULL))
1448 return false;
1449
1450 /* Types larger than 2 registers returned in memory. */
1451 size = int_size_in_bytes (type);
1452 return (size < 0 || size > 2 * UNITS_PER_WORD);
1453}
1454
1455static bool
1456aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1457 const_tree type, int *nregs)
1458{
1459 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1460 return aarch64_vfp_is_call_or_return_candidate (mode,
1461 type,
1462 &pcum->aapcs_vfp_rmode,
1463 nregs,
1464 NULL);
1465}
1466
1467/* Given MODE and TYPE of a function argument, return the alignment in
1468 bits. The idea is to suppress any stronger alignment requested by
1469 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1470 This is a helper function for local use only. */
1471
1472static unsigned int
1473aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1474{
1475 unsigned int alignment;
1476
1477 if (type)
1478 {
1479 if (!integer_zerop (TYPE_SIZE (type)))
1480 {
1481 if (TYPE_MODE (type) == mode)
1482 alignment = TYPE_ALIGN (type);
1483 else
1484 alignment = GET_MODE_ALIGNMENT (mode);
1485 }
1486 else
1487 alignment = 0;
1488 }
1489 else
1490 alignment = GET_MODE_ALIGNMENT (mode);
1491
1492 return alignment;
1493}
1494
1495/* Layout a function argument according to the AAPCS64 rules. The rule
1496 numbers refer to the rule numbers in the AAPCS64. */
1497
1498static void
1499aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1500 const_tree type,
1501 bool named ATTRIBUTE_UNUSED)
1502{
1503 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1504 int ncrn, nvrn, nregs;
1505 bool allocate_ncrn, allocate_nvrn;
1506
1507 /* We need to do this once per argument. */
1508 if (pcum->aapcs_arg_processed)
1509 return;
1510
1511 pcum->aapcs_arg_processed = true;
1512
1513 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1514 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1515 mode,
1516 type,
1517 &nregs);
1518
1519 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1520 The following code thus handles passing by SIMD/FP registers first. */
1521
1522 nvrn = pcum->aapcs_nvrn;
1523
1524 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1525 and homogenous short-vector aggregates (HVA). */
1526 if (allocate_nvrn)
1527 {
1528 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1529 {
1530 pcum->aapcs_nextnvrn = nvrn + nregs;
1531 if (!aarch64_composite_type_p (type, mode))
1532 {
1533 gcc_assert (nregs == 1);
1534 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1535 }
1536 else
1537 {
1538 rtx par;
1539 int i;
1540 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1541 for (i = 0; i < nregs; i++)
1542 {
1543 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1544 V0_REGNUM + nvrn + i);
1545 tmp = gen_rtx_EXPR_LIST
1546 (VOIDmode, tmp,
1547 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1548 XVECEXP (par, 0, i) = tmp;
1549 }
1550 pcum->aapcs_reg = par;
1551 }
1552 return;
1553 }
1554 else
1555 {
1556 /* C.3 NSRN is set to 8. */
1557 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1558 goto on_stack;
1559 }
1560 }
1561
1562 ncrn = pcum->aapcs_ncrn;
1563 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1564 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1565
1566
1567 /* C6 - C9. though the sign and zero extension semantics are
1568 handled elsewhere. This is the case where the argument fits
1569 entirely general registers. */
1570 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1571 {
1572 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1573
1574 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1575
1576 /* C.8 if the argument has an alignment of 16 then the NGRN is
1577 rounded up to the next even number. */
1578 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1579 {
1580 ++ncrn;
1581 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1582 }
1583 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1584 A reg is still generated for it, but the caller should be smart
1585 enough not to use it. */
1586 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1587 {
1588 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1589 }
1590 else
1591 {
1592 rtx par;
1593 int i;
1594
1595 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1596 for (i = 0; i < nregs; i++)
1597 {
1598 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1599 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1600 GEN_INT (i * UNITS_PER_WORD));
1601 XVECEXP (par, 0, i) = tmp;
1602 }
1603 pcum->aapcs_reg = par;
1604 }
1605
1606 pcum->aapcs_nextncrn = ncrn + nregs;
1607 return;
1608 }
1609
1610 /* C.11 */
1611 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1612
1613 /* The argument is passed on stack; record the needed number of words for
1614 this argument (we can re-use NREGS) and align the total size if
1615 necessary. */
1616on_stack:
1617 pcum->aapcs_stack_words = nregs;
1618 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1619 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1620 16 / UNITS_PER_WORD) + 1;
1621 return;
1622}
1623
1624/* Implement TARGET_FUNCTION_ARG. */
1625
1626static rtx
1627aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1628 const_tree type, bool named)
1629{
1630 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1631 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1632
1633 if (mode == VOIDmode)
1634 return NULL_RTX;
1635
1636 aarch64_layout_arg (pcum_v, mode, type, named);
1637 return pcum->aapcs_reg;
1638}
1639
1640void
1641aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1642 const_tree fntype ATTRIBUTE_UNUSED,
1643 rtx libname ATTRIBUTE_UNUSED,
1644 const_tree fndecl ATTRIBUTE_UNUSED,
1645 unsigned n_named ATTRIBUTE_UNUSED)
1646{
1647 pcum->aapcs_ncrn = 0;
1648 pcum->aapcs_nvrn = 0;
1649 pcum->aapcs_nextncrn = 0;
1650 pcum->aapcs_nextnvrn = 0;
1651 pcum->pcs_variant = ARM_PCS_AAPCS64;
1652 pcum->aapcs_reg = NULL_RTX;
1653 pcum->aapcs_arg_processed = false;
1654 pcum->aapcs_stack_words = 0;
1655 pcum->aapcs_stack_size = 0;
1656
1657 return;
1658}
1659
1660static void
1661aarch64_function_arg_advance (cumulative_args_t pcum_v,
1662 enum machine_mode mode,
1663 const_tree type,
1664 bool named)
1665{
1666 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1667 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1668 {
1669 aarch64_layout_arg (pcum_v, mode, type, named);
1670 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1671 != (pcum->aapcs_stack_words != 0));
1672 pcum->aapcs_arg_processed = false;
1673 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1674 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1675 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1676 pcum->aapcs_stack_words = 0;
1677 pcum->aapcs_reg = NULL_RTX;
1678 }
1679}
1680
1681bool
1682aarch64_function_arg_regno_p (unsigned regno)
1683{
1684 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1685 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1686}
1687
1688/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1689 PARM_BOUNDARY bits of alignment, but will be given anything up
1690 to STACK_BOUNDARY bits if the type requires it. This makes sure
1691 that both before and after the layout of each argument, the Next
1692 Stacked Argument Address (NSAA) will have a minimum alignment of
1693 8 bytes. */
1694
1695static unsigned int
1696aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1697{
1698 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1699
1700 if (alignment < PARM_BOUNDARY)
1701 alignment = PARM_BOUNDARY;
1702 if (alignment > STACK_BOUNDARY)
1703 alignment = STACK_BOUNDARY;
1704 return alignment;
1705}
1706
1707/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1708
1709 Return true if an argument passed on the stack should be padded upwards,
1710 i.e. if the least-significant byte of the stack slot has useful data.
1711
1712 Small aggregate types are placed in the lowest memory address.
1713
1714 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1715
1716bool
1717aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1718{
1719 /* On little-endian targets, the least significant byte of every stack
1720 argument is passed at the lowest byte address of the stack slot. */
1721 if (!BYTES_BIG_ENDIAN)
1722 return true;
1723
00edcfbe 1724 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1725 the least significant byte of a stack argument is passed at the highest
1726 byte address of the stack slot. */
1727 if (type
00edcfbe
YZ
1728 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1729 || POINTER_TYPE_P (type))
43e9d192
IB
1730 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1731 return false;
1732
1733 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1734 return true;
1735}
1736
1737/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1738
1739 It specifies padding for the last (may also be the only)
1740 element of a block move between registers and memory. If
1741 assuming the block is in the memory, padding upward means that
1742 the last element is padded after its highest significant byte,
1743 while in downward padding, the last element is padded at the
1744 its least significant byte side.
1745
1746 Small aggregates and small complex types are always padded
1747 upwards.
1748
1749 We don't need to worry about homogeneous floating-point or
1750 short-vector aggregates; their move is not affected by the
1751 padding direction determined here. Regardless of endianness,
1752 each element of such an aggregate is put in the least
1753 significant bits of a fp/simd register.
1754
1755 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1756 register has useful data, and return the opposite if the most
1757 significant byte does. */
1758
1759bool
1760aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1761 bool first ATTRIBUTE_UNUSED)
1762{
1763
1764 /* Small composite types are always padded upward. */
1765 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1766 {
1767 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1768 : GET_MODE_SIZE (mode));
1769 if (size < 2 * UNITS_PER_WORD)
1770 return true;
1771 }
1772
1773 /* Otherwise, use the default padding. */
1774 return !BYTES_BIG_ENDIAN;
1775}
1776
1777static enum machine_mode
1778aarch64_libgcc_cmp_return_mode (void)
1779{
1780 return SImode;
1781}
1782
1783static bool
1784aarch64_frame_pointer_required (void)
1785{
1786 /* If the function contains dynamic stack allocations, we need to
1787 use the frame pointer to access the static parts of the frame. */
1788 if (cfun->calls_alloca)
1789 return true;
1790
0b7f8166
MS
1791 /* In aarch64_override_options_after_change
1792 flag_omit_leaf_frame_pointer turns off the frame pointer by
1793 default. Turn it back on now if we've not got a leaf
1794 function. */
1795 if (flag_omit_leaf_frame_pointer
1796 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1797 return true;
43e9d192 1798
0b7f8166 1799 return false;
43e9d192
IB
1800}
1801
1802/* Mark the registers that need to be saved by the callee and calculate
1803 the size of the callee-saved registers area and frame record (both FP
1804 and LR may be omitted). */
1805static void
1806aarch64_layout_frame (void)
1807{
1808 HOST_WIDE_INT offset = 0;
1809 int regno;
1810
1811 if (reload_completed && cfun->machine->frame.laid_out)
1812 return;
1813
43e9d192
IB
1814 /* First mark all the registers that really need to be saved... */
1815 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1816 cfun->machine->frame.reg_offset[regno] = -1;
1817
1818 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1819 cfun->machine->frame.reg_offset[regno] = -1;
1820
1821 /* ... that includes the eh data registers (if needed)... */
1822 if (crtl->calls_eh_return)
1823 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1824 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1825
1826 /* ... and any callee saved register that dataflow says is live. */
1827 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1828 if (df_regs_ever_live_p (regno)
1829 && !call_used_regs[regno])
1830 cfun->machine->frame.reg_offset[regno] = 0;
1831
1832 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1833 if (df_regs_ever_live_p (regno)
1834 && !call_used_regs[regno])
1835 cfun->machine->frame.reg_offset[regno] = 0;
1836
1837 if (frame_pointer_needed)
1838 {
1839 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1840 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1841 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1842 }
1843
1844 /* Now assign stack slots for them. */
1845 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1846 if (cfun->machine->frame.reg_offset[regno] != -1)
1847 {
1848 cfun->machine->frame.reg_offset[regno] = offset;
1849 offset += UNITS_PER_WORD;
1850 }
1851
1852 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1853 if (cfun->machine->frame.reg_offset[regno] != -1)
1854 {
1855 cfun->machine->frame.reg_offset[regno] = offset;
1856 offset += UNITS_PER_WORD;
1857 }
1858
1859 if (frame_pointer_needed)
1860 {
1861 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1862 offset += UNITS_PER_WORD;
43e9d192
IB
1863 }
1864
1865 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1866 {
1867 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1868 offset += UNITS_PER_WORD;
43e9d192
IB
1869 }
1870
1871 cfun->machine->frame.padding0 =
1872 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1873 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1874
1875 cfun->machine->frame.saved_regs_size = offset;
1876 cfun->machine->frame.laid_out = true;
1877}
1878
1879/* Make the last instruction frame-related and note that it performs
1880 the operation described by FRAME_PATTERN. */
1881
1882static void
1883aarch64_set_frame_expr (rtx frame_pattern)
1884{
1885 rtx insn;
1886
1887 insn = get_last_insn ();
1888 RTX_FRAME_RELATED_P (insn) = 1;
1889 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1890 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1891 frame_pattern,
1892 REG_NOTES (insn));
1893}
1894
1895static bool
1896aarch64_register_saved_on_entry (int regno)
1897{
1898 return cfun->machine->frame.reg_offset[regno] != -1;
1899}
1900
1901
1902static void
1903aarch64_save_or_restore_fprs (int start_offset, int increment,
1904 bool restore, rtx base_rtx)
1905
1906{
1907 unsigned regno;
1908 unsigned regno2;
1909 rtx insn;
e0f396bc
MS
1910 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1911 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
43e9d192
IB
1912
1913
1914 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1915 {
1916 if (aarch64_register_saved_on_entry (regno))
1917 {
1918 rtx mem;
1919 mem = gen_mem_ref (DFmode,
1920 plus_constant (Pmode,
1921 base_rtx,
1922 start_offset));
1923
1924 for (regno2 = regno + 1;
1925 regno2 <= V31_REGNUM
1926 && !aarch64_register_saved_on_entry (regno2);
1927 regno2++)
1928 {
1929 /* Empty loop. */
1930 }
1931 if (regno2 <= V31_REGNUM &&
1932 aarch64_register_saved_on_entry (regno2))
1933 {
1934 rtx mem2;
1935 /* Next highest register to be saved. */
1936 mem2 = gen_mem_ref (DFmode,
1937 plus_constant
1938 (Pmode,
1939 base_rtx,
1940 start_offset + increment));
1941 if (restore == false)
1942 {
1943 insn = emit_insn
1944 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1945 mem2, gen_rtx_REG (DFmode, regno2)));
1946
1947 }
1948 else
1949 {
1950 insn = emit_insn
1951 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1952 gen_rtx_REG (DFmode, regno2), mem2));
1953
e0f396bc
MS
1954 add_reg_note (insn, REG_CFA_RESTORE,
1955 gen_rtx_REG (DFmode, regno));
1956 add_reg_note (insn, REG_CFA_RESTORE,
1957 gen_rtx_REG (DFmode, regno2));
43e9d192
IB
1958 }
1959
1960 /* The first part of a frame-related parallel insn
1961 is always assumed to be relevant to the frame
1962 calculations; subsequent parts, are only
1963 frame-related if explicitly marked. */
e0f396bc 1964 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
43e9d192
IB
1965 regno = regno2;
1966 start_offset += increment * 2;
1967 }
1968 else
1969 {
1970 if (restore == false)
1971 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1972 else
1973 {
1974 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
e0f396bc
MS
1975 add_reg_note (insn, REG_CFA_RESTORE,
1976 gen_rtx_REG (DImode, regno));
43e9d192
IB
1977 }
1978 start_offset += increment;
1979 }
1980 RTX_FRAME_RELATED_P (insn) = 1;
1981 }
1982 }
1983
1984}
1985
1986
1987/* offset from the stack pointer of where the saves and
1988 restore's have to happen. */
1989static void
1990aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1991 bool restore)
1992{
1993 rtx insn;
1994 rtx base_rtx = stack_pointer_rtx;
1995 HOST_WIDE_INT start_offset = offset;
1996 HOST_WIDE_INT increment = UNITS_PER_WORD;
1997 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1998 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1999 unsigned regno;
2000 unsigned regno2;
2001
2002 for (regno = R0_REGNUM; regno <= limit; regno++)
2003 {
2004 if (aarch64_register_saved_on_entry (regno))
2005 {
2006 rtx mem;
2007 mem = gen_mem_ref (Pmode,
2008 plus_constant (Pmode,
2009 base_rtx,
2010 start_offset));
2011
2012 for (regno2 = regno + 1;
2013 regno2 <= limit
2014 && !aarch64_register_saved_on_entry (regno2);
2015 regno2++)
2016 {
2017 /* Empty loop. */
2018 }
2019 if (regno2 <= limit &&
2020 aarch64_register_saved_on_entry (regno2))
2021 {
2022 rtx mem2;
2023 /* Next highest register to be saved. */
2024 mem2 = gen_mem_ref (Pmode,
2025 plus_constant
2026 (Pmode,
2027 base_rtx,
2028 start_offset + increment));
2029 if (restore == false)
2030 {
2031 insn = emit_insn
2032 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2033 mem2, gen_rtx_REG (DImode, regno2)));
2034
2035 }
2036 else
2037 {
2038 insn = emit_insn
2039 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2040 gen_rtx_REG (DImode, regno2), mem2));
2041
2042 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2043 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2044 }
2045
2046 /* The first part of a frame-related parallel insn
2047 is always assumed to be relevant to the frame
2048 calculations; subsequent parts, are only
2049 frame-related if explicitly marked. */
2050 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2051 1)) = 1;
2052 regno = regno2;
2053 start_offset += increment * 2;
2054 }
2055 else
2056 {
2057 if (restore == false)
2058 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2059 else
2060 {
2061 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2062 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2063 }
2064 start_offset += increment;
2065 }
2066 RTX_FRAME_RELATED_P (insn) = 1;
2067 }
2068 }
2069
2070 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2071
2072}
2073
2074/* AArch64 stack frames generated by this compiler look like:
2075
2076 +-------------------------------+
2077 | |
2078 | incoming stack arguments |
2079 | |
2080 +-------------------------------+ <-- arg_pointer_rtx
2081 | |
2082 | callee-allocated save area |
2083 | for register varargs |
2084 | |
43e9d192
IB
2085 +-------------------------------+ <-- frame_pointer_rtx
2086 | |
454fdba9 2087 | local variables |
43e9d192
IB
2088 | |
2089 +-------------------------------+
454fdba9
RL
2090 | padding0 | \
2091 +-------------------------------+ |
2092 | | |
2093 | | |
2094 | callee-saved registers | | frame.saved_regs_size
2095 | | |
2096 +-------------------------------+ |
2097 | LR' | |
2098 +-------------------------------+ |
2099 | FP' | /
43e9d192
IB
2100 P +-------------------------------+ <-- hard_frame_pointer_rtx
2101 | dynamic allocation |
2102 +-------------------------------+
2103 | |
2104 | outgoing stack arguments |
2105 | |
2106 +-------------------------------+ <-- stack_pointer_rtx
2107
2108 Dynamic stack allocations such as alloca insert data at point P.
2109 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2110 hard_frame_pointer_rtx unchanged. */
2111
2112/* Generate the prologue instructions for entry into a function.
2113 Establish the stack frame by decreasing the stack pointer with a
2114 properly calculated size and, if necessary, create a frame record
2115 filled with the values of LR and previous frame pointer. The
6991c977 2116 current FP is also set up if it is in use. */
43e9d192
IB
2117
2118void
2119aarch64_expand_prologue (void)
2120{
2121 /* sub sp, sp, #<frame_size>
2122 stp {fp, lr}, [sp, #<frame_size> - 16]
2123 add fp, sp, #<frame_size> - hardfp_offset
2124 stp {cs_reg}, [fp, #-16] etc.
2125
2126 sub sp, sp, <final_adjustment_if_any>
2127 */
2128 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2129 HOST_WIDE_INT frame_size, offset;
2130 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2131 rtx insn;
2132
2133 aarch64_layout_frame ();
2134 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2135 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2136 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2137 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2138 + crtl->outgoing_args_size);
2139 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2140 STACK_BOUNDARY / BITS_PER_UNIT);
2141
2142 if (flag_stack_usage_info)
2143 current_function_static_stack_size = frame_size;
2144
2145 fp_offset = (offset
2146 - original_frame_size
2147 - cfun->machine->frame.saved_regs_size);
2148
44c0e7b9 2149 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2150 if (offset >= 512)
2151 {
2152 /* When the frame has a large size, an initial decrease is done on
2153 the stack pointer to jump over the callee-allocated save area for
2154 register varargs, the local variable area and/or the callee-saved
2155 register area. This will allow the pre-index write-back
2156 store pair instructions to be used for setting up the stack frame
2157 efficiently. */
2158 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2159 if (offset >= 512)
2160 offset = cfun->machine->frame.saved_regs_size;
2161
2162 frame_size -= (offset + crtl->outgoing_args_size);
2163 fp_offset = 0;
2164
2165 if (frame_size >= 0x1000000)
2166 {
2167 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2168 emit_move_insn (op0, GEN_INT (-frame_size));
2169 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2170 aarch64_set_frame_expr (gen_rtx_SET
2171 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2172 plus_constant (Pmode,
2173 stack_pointer_rtx,
2174 -frame_size)));
43e9d192
IB
2175 }
2176 else if (frame_size > 0)
2177 {
2178 if ((frame_size & 0xfff) != frame_size)
2179 {
2180 insn = emit_insn (gen_add2_insn
2181 (stack_pointer_rtx,
2182 GEN_INT (-(frame_size
2183 & ~(HOST_WIDE_INT)0xfff))));
2184 RTX_FRAME_RELATED_P (insn) = 1;
2185 }
2186 if ((frame_size & 0xfff) != 0)
2187 {
2188 insn = emit_insn (gen_add2_insn
2189 (stack_pointer_rtx,
2190 GEN_INT (-(frame_size
2191 & (HOST_WIDE_INT)0xfff))));
2192 RTX_FRAME_RELATED_P (insn) = 1;
2193 }
2194 }
2195 }
2196 else
2197 frame_size = -1;
2198
2199 if (offset > 0)
2200 {
2201 /* Save the frame pointer and lr if the frame pointer is needed
2202 first. Make the frame pointer point to the location of the
2203 old frame pointer on the stack. */
2204 if (frame_pointer_needed)
2205 {
2206 rtx mem_fp, mem_lr;
2207
2208 if (fp_offset)
2209 {
2210 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2211 GEN_INT (-offset)));
2212 RTX_FRAME_RELATED_P (insn) = 1;
2213 aarch64_set_frame_expr (gen_rtx_SET
2214 (Pmode, stack_pointer_rtx,
2215 gen_rtx_MINUS (Pmode,
2216 stack_pointer_rtx,
2217 GEN_INT (offset))));
2218 mem_fp = gen_frame_mem (DImode,
2219 plus_constant (Pmode,
2220 stack_pointer_rtx,
2221 fp_offset));
2222 mem_lr = gen_frame_mem (DImode,
2223 plus_constant (Pmode,
2224 stack_pointer_rtx,
2225 fp_offset
2226 + UNITS_PER_WORD));
2227 insn = emit_insn (gen_store_pairdi (mem_fp,
2228 hard_frame_pointer_rtx,
2229 mem_lr,
2230 gen_rtx_REG (DImode,
2231 LR_REGNUM)));
2232 }
2233 else
2234 {
2235 insn = emit_insn (gen_storewb_pairdi_di
2236 (stack_pointer_rtx, stack_pointer_rtx,
2237 hard_frame_pointer_rtx,
2238 gen_rtx_REG (DImode, LR_REGNUM),
2239 GEN_INT (-offset),
2240 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2241 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2242 }
2243
2244 /* The first part of a frame-related parallel insn is always
2245 assumed to be relevant to the frame calculations;
2246 subsequent parts, are only frame-related if explicitly
2247 marked. */
2248 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2249 RTX_FRAME_RELATED_P (insn) = 1;
2250
2251 /* Set up frame pointer to point to the location of the
2252 previous frame pointer on the stack. */
2253 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2254 stack_pointer_rtx,
2255 GEN_INT (fp_offset)));
2256 aarch64_set_frame_expr (gen_rtx_SET
2257 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2258 plus_constant (Pmode,
2259 stack_pointer_rtx,
2260 fp_offset)));
43e9d192
IB
2261 RTX_FRAME_RELATED_P (insn) = 1;
2262 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2263 hard_frame_pointer_rtx));
2264 }
2265 else
2266 {
2267 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2268 GEN_INT (-offset)));
2269 RTX_FRAME_RELATED_P (insn) = 1;
2270 }
2271
2272 aarch64_save_or_restore_callee_save_registers
2273 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2274 }
2275
2276 /* when offset >= 512,
2277 sub sp, sp, #<outgoing_args_size> */
2278 if (frame_size > -1)
2279 {
2280 if (crtl->outgoing_args_size > 0)
2281 {
2282 insn = emit_insn (gen_add2_insn
2283 (stack_pointer_rtx,
2284 GEN_INT (- crtl->outgoing_args_size)));
2285 RTX_FRAME_RELATED_P (insn) = 1;
2286 }
2287 }
2288}
2289
2290/* Generate the epilogue instructions for returning from a function. */
2291void
2292aarch64_expand_epilogue (bool for_sibcall)
2293{
2294 HOST_WIDE_INT original_frame_size, frame_size, offset;
2295 HOST_WIDE_INT fp_offset;
2296 rtx insn;
44c0e7b9 2297 rtx cfa_reg;
43e9d192
IB
2298
2299 aarch64_layout_frame ();
2300 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2301 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2302 + crtl->outgoing_args_size);
2303 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2304 STACK_BOUNDARY / BITS_PER_UNIT);
2305
2306 fp_offset = (offset
2307 - original_frame_size
2308 - cfun->machine->frame.saved_regs_size);
2309
44c0e7b9
YZ
2310 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2311
2312 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2313 if (offset >= 512)
2314 {
2315 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2316 if (offset >= 512)
2317 offset = cfun->machine->frame.saved_regs_size;
2318
2319 frame_size -= (offset + crtl->outgoing_args_size);
2320 fp_offset = 0;
2321 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2322 {
2323 insn = emit_insn (gen_add2_insn
2324 (stack_pointer_rtx,
2325 GEN_INT (crtl->outgoing_args_size)));
2326 RTX_FRAME_RELATED_P (insn) = 1;
2327 }
2328 }
2329 else
2330 frame_size = -1;
2331
2332 /* If there were outgoing arguments or we've done dynamic stack
2333 allocation, then restore the stack pointer from the frame
2334 pointer. This is at most one insn and more efficient than using
2335 GCC's internal mechanism. */
2336 if (frame_pointer_needed
2337 && (crtl->outgoing_args_size || cfun->calls_alloca))
2338 {
2339 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2340 hard_frame_pointer_rtx,
2341 GEN_INT (- fp_offset)));
2342 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2343 /* As SP is set to (FP - fp_offset), according to the rules in
2344 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2345 from the value of SP from now on. */
2346 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2347 }
2348
2349 aarch64_save_or_restore_callee_save_registers
2350 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2351
2352 /* Restore the frame pointer and lr if the frame pointer is needed. */
2353 if (offset > 0)
2354 {
2355 if (frame_pointer_needed)
2356 {
2357 rtx mem_fp, mem_lr;
2358
2359 if (fp_offset)
2360 {
2361 mem_fp = gen_frame_mem (DImode,
2362 plus_constant (Pmode,
2363 stack_pointer_rtx,
2364 fp_offset));
2365 mem_lr = gen_frame_mem (DImode,
2366 plus_constant (Pmode,
2367 stack_pointer_rtx,
2368 fp_offset
2369 + UNITS_PER_WORD));
2370 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2371 mem_fp,
2372 gen_rtx_REG (DImode,
2373 LR_REGNUM),
2374 mem_lr));
2375 }
2376 else
2377 {
2378 insn = emit_insn (gen_loadwb_pairdi_di
2379 (stack_pointer_rtx,
2380 stack_pointer_rtx,
2381 hard_frame_pointer_rtx,
2382 gen_rtx_REG (DImode, LR_REGNUM),
2383 GEN_INT (offset),
2384 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2385 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2386 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2387 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2388 plus_constant (Pmode, cfa_reg,
2389 offset))));
43e9d192
IB
2390 }
2391
2392 /* The first part of a frame-related parallel insn
2393 is always assumed to be relevant to the frame
2394 calculations; subsequent parts, are only
2395 frame-related if explicitly marked. */
2396 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2397 RTX_FRAME_RELATED_P (insn) = 1;
2398 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2399 add_reg_note (insn, REG_CFA_RESTORE,
2400 gen_rtx_REG (DImode, LR_REGNUM));
2401
2402 if (fp_offset)
2403 {
2404 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2405 GEN_INT (offset)));
2406 RTX_FRAME_RELATED_P (insn) = 1;
2407 }
2408 }
43e9d192
IB
2409 else
2410 {
2411 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2412 GEN_INT (offset)));
2413 RTX_FRAME_RELATED_P (insn) = 1;
2414 }
2415 }
2416
2417 /* Stack adjustment for exception handler. */
2418 if (crtl->calls_eh_return)
2419 {
2420 /* We need to unwind the stack by the offset computed by
2421 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2422 based on SP. Ideally we would update the SP and define the
2423 CFA along the lines of:
2424
2425 SP = SP + EH_RETURN_STACKADJ_RTX
2426 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2427
2428 However the dwarf emitter only understands a constant
2429 register offset.
2430
631b20a7 2431 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2432 as a temporary register to hold the current SP value. The
2433 CFA is described using IP0 then SP is modified. */
2434
2435 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2436
2437 insn = emit_move_insn (ip0, stack_pointer_rtx);
2438 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2439 RTX_FRAME_RELATED_P (insn) = 1;
2440
2441 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2442
2443 /* Ensure the assignment to IP0 does not get optimized away. */
2444 emit_use (ip0);
2445 }
2446
2447 if (frame_size > -1)
2448 {
2449 if (frame_size >= 0x1000000)
2450 {
2451 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2452 emit_move_insn (op0, GEN_INT (frame_size));
2453 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2454 aarch64_set_frame_expr (gen_rtx_SET
2455 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2456 plus_constant (Pmode,
2457 stack_pointer_rtx,
2458 frame_size)));
43e9d192
IB
2459 }
2460 else if (frame_size > 0)
2461 {
2462 if ((frame_size & 0xfff) != 0)
2463 {
2464 insn = emit_insn (gen_add2_insn
2465 (stack_pointer_rtx,
2466 GEN_INT ((frame_size
2467 & (HOST_WIDE_INT) 0xfff))));
2468 RTX_FRAME_RELATED_P (insn) = 1;
2469 }
2470 if ((frame_size & 0xfff) != frame_size)
2471 {
2472 insn = emit_insn (gen_add2_insn
2473 (stack_pointer_rtx,
2474 GEN_INT ((frame_size
2475 & ~ (HOST_WIDE_INT) 0xfff))));
2476 RTX_FRAME_RELATED_P (insn) = 1;
2477 }
2478 }
2479
f6fe771a
RL
2480 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2481 plus_constant (Pmode,
2482 stack_pointer_rtx,
2483 offset)));
43e9d192
IB
2484 }
2485
2486 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2487 if (!for_sibcall)
2488 emit_jump_insn (ret_rtx);
2489}
2490
2491/* Return the place to copy the exception unwinding return address to.
2492 This will probably be a stack slot, but could (in theory be the
2493 return register). */
2494rtx
2495aarch64_final_eh_return_addr (void)
2496{
2497 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2498 aarch64_layout_frame ();
2499 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2500 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2501 + crtl->outgoing_args_size);
2502 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2503 STACK_BOUNDARY / BITS_PER_UNIT);
2504 fp_offset = offset
2505 - original_frame_size
2506 - cfun->machine->frame.saved_regs_size;
2507
2508 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2509 return gen_rtx_REG (DImode, LR_REGNUM);
2510
2511 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2512 result in a store to save LR introduced by builtin_eh_return () being
2513 incorrectly deleted because the alias is not detected.
2514 So in the calculation of the address to copy the exception unwinding
2515 return address to, we note 2 cases.
2516 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2517 we return a SP-relative location since all the addresses are SP-relative
2518 in this case. This prevents the store from being optimized away.
2519 If the fp_offset is not 0, then the addresses will be FP-relative and
2520 therefore we return a FP-relative location. */
2521
2522 if (frame_pointer_needed)
2523 {
2524 if (fp_offset)
2525 return gen_frame_mem (DImode,
2526 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2527 else
2528 return gen_frame_mem (DImode,
2529 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2530 }
2531
2532 /* If FP is not needed, we calculate the location of LR, which would be
2533 at the top of the saved registers block. */
2534
2535 return gen_frame_mem (DImode,
2536 plus_constant (Pmode,
2537 stack_pointer_rtx,
2538 fp_offset
2539 + cfun->machine->frame.saved_regs_size
2540 - 2 * UNITS_PER_WORD));
2541}
2542
9dfc162c
JG
2543/* Possibly output code to build up a constant in a register. For
2544 the benefit of the costs infrastructure, returns the number of
2545 instructions which would be emitted. GENERATE inhibits or
2546 enables code generation. */
2547
2548static int
2549aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2550{
9dfc162c
JG
2551 int insns = 0;
2552
43e9d192 2553 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2554 {
2555 if (generate)
2556 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2557 insns = 1;
2558 }
43e9d192
IB
2559 else
2560 {
2561 int i;
2562 int ncount = 0;
2563 int zcount = 0;
2564 HOST_WIDE_INT valp = val >> 16;
2565 HOST_WIDE_INT valm;
2566 HOST_WIDE_INT tval;
2567
2568 for (i = 16; i < 64; i += 16)
2569 {
2570 valm = (valp & 0xffff);
2571
2572 if (valm != 0)
2573 ++ zcount;
2574
2575 if (valm != 0xffff)
2576 ++ ncount;
2577
2578 valp >>= 16;
2579 }
2580
2581 /* zcount contains the number of additional MOVK instructions
2582 required if the constant is built up with an initial MOVZ instruction,
2583 while ncount is the number of MOVK instructions required if starting
2584 with a MOVN instruction. Choose the sequence that yields the fewest
2585 number of instructions, preferring MOVZ instructions when they are both
2586 the same. */
2587 if (ncount < zcount)
2588 {
9dfc162c
JG
2589 if (generate)
2590 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2591 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2592 tval = 0xffff;
9dfc162c 2593 insns++;
43e9d192
IB
2594 }
2595 else
2596 {
9dfc162c
JG
2597 if (generate)
2598 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2599 GEN_INT (val & 0xffff));
43e9d192 2600 tval = 0;
9dfc162c 2601 insns++;
43e9d192
IB
2602 }
2603
2604 val >>= 16;
2605
2606 for (i = 16; i < 64; i += 16)
2607 {
2608 if ((val & 0xffff) != tval)
9dfc162c
JG
2609 {
2610 if (generate)
2611 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2612 GEN_INT (i),
2613 GEN_INT (val & 0xffff)));
2614 insns++;
2615 }
43e9d192
IB
2616 val >>= 16;
2617 }
2618 }
9dfc162c 2619 return insns;
43e9d192
IB
2620}
2621
2622static void
d9600ae5 2623aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2624{
2625 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2626 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2627 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2628
2629 if (mdelta < 0)
2630 mdelta = -mdelta;
2631
2632 if (mdelta >= 4096 * 4096)
2633 {
9dfc162c 2634 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2635 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2636 }
2637 else if (mdelta > 0)
2638 {
43e9d192 2639 if (mdelta >= 4096)
d9600ae5
SN
2640 {
2641 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2642 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2643 if (delta < 0)
2644 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2645 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2646 else
2647 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2648 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2649 }
43e9d192 2650 if (mdelta % 4096 != 0)
d9600ae5
SN
2651 {
2652 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2653 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2654 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2655 }
43e9d192
IB
2656 }
2657}
2658
2659/* Output code to add DELTA to the first argument, and then jump
2660 to FUNCTION. Used for C++ multiple inheritance. */
2661static void
2662aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2663 HOST_WIDE_INT delta,
2664 HOST_WIDE_INT vcall_offset,
2665 tree function)
2666{
2667 /* The this pointer is always in x0. Note that this differs from
2668 Arm where the this pointer maybe bumped to r1 if r0 is required
2669 to return a pointer to an aggregate. On AArch64 a result value
2670 pointer will be in x8. */
2671 int this_regno = R0_REGNUM;
75f1d6fc 2672 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2673
75f1d6fc
SN
2674 reload_completed = 1;
2675 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2676
2677 if (vcall_offset == 0)
d9600ae5 2678 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2679 else
2680 {
28514dda 2681 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2682
75f1d6fc
SN
2683 this_rtx = gen_rtx_REG (Pmode, this_regno);
2684 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2685 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2686
75f1d6fc
SN
2687 addr = this_rtx;
2688 if (delta != 0)
2689 {
2690 if (delta >= -256 && delta < 256)
2691 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2692 plus_constant (Pmode, this_rtx, delta));
2693 else
d9600ae5 2694 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2695 }
2696
28514dda
YZ
2697 if (Pmode == ptr_mode)
2698 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2699 else
2700 aarch64_emit_move (temp0,
2701 gen_rtx_ZERO_EXTEND (Pmode,
2702 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2703
28514dda 2704 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2705 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2706 else
2707 {
9dfc162c 2708 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2709 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2710 }
2711
28514dda
YZ
2712 if (Pmode == ptr_mode)
2713 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2714 else
2715 aarch64_emit_move (temp1,
2716 gen_rtx_SIGN_EXTEND (Pmode,
2717 gen_rtx_MEM (ptr_mode, addr)));
2718
75f1d6fc 2719 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2720 }
2721
75f1d6fc
SN
2722 /* Generate a tail call to the target function. */
2723 if (!TREE_USED (function))
2724 {
2725 assemble_external (function);
2726 TREE_USED (function) = 1;
2727 }
2728 funexp = XEXP (DECL_RTL (function), 0);
2729 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2730 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2731 SIBLING_CALL_P (insn) = 1;
2732
2733 insn = get_insns ();
2734 shorten_branches (insn);
2735 final_start_function (insn, file, 1);
2736 final (insn, file, 1);
43e9d192 2737 final_end_function ();
75f1d6fc
SN
2738
2739 /* Stop pretending to be a post-reload pass. */
2740 reload_completed = 0;
43e9d192
IB
2741}
2742
43e9d192
IB
2743static int
2744aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2745{
2746 if (GET_CODE (*x) == SYMBOL_REF)
2747 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2748
2749 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2750 TLS offsets, not real symbol references. */
2751 if (GET_CODE (*x) == UNSPEC
2752 && XINT (*x, 1) == UNSPEC_TLS)
2753 return -1;
2754
2755 return 0;
2756}
2757
2758static bool
2759aarch64_tls_referenced_p (rtx x)
2760{
2761 if (!TARGET_HAVE_TLS)
2762 return false;
2763
2764 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2765}
2766
2767
2768static int
2769aarch64_bitmasks_cmp (const void *i1, const void *i2)
2770{
2771 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2772 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2773
2774 if (*imm1 < *imm2)
2775 return -1;
2776 if (*imm1 > *imm2)
2777 return +1;
2778 return 0;
2779}
2780
2781
2782static void
2783aarch64_build_bitmask_table (void)
2784{
2785 unsigned HOST_WIDE_INT mask, imm;
2786 unsigned int log_e, e, s, r;
2787 unsigned int nimms = 0;
2788
2789 for (log_e = 1; log_e <= 6; log_e++)
2790 {
2791 e = 1 << log_e;
2792 if (e == 64)
2793 mask = ~(HOST_WIDE_INT) 0;
2794 else
2795 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2796 for (s = 1; s < e; s++)
2797 {
2798 for (r = 0; r < e; r++)
2799 {
2800 /* set s consecutive bits to 1 (s < 64) */
2801 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2802 /* rotate right by r */
2803 if (r != 0)
2804 imm = ((imm >> r) | (imm << (e - r))) & mask;
2805 /* replicate the constant depending on SIMD size */
2806 switch (log_e) {
2807 case 1: imm |= (imm << 2);
2808 case 2: imm |= (imm << 4);
2809 case 3: imm |= (imm << 8);
2810 case 4: imm |= (imm << 16);
2811 case 5: imm |= (imm << 32);
2812 case 6:
2813 break;
2814 default:
2815 gcc_unreachable ();
2816 }
2817 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2818 aarch64_bitmasks[nimms++] = imm;
2819 }
2820 }
2821 }
2822
2823 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2824 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2825 aarch64_bitmasks_cmp);
2826}
2827
2828
2829/* Return true if val can be encoded as a 12-bit unsigned immediate with
2830 a left shift of 0 or 12 bits. */
2831bool
2832aarch64_uimm12_shift (HOST_WIDE_INT val)
2833{
2834 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2835 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2836 );
2837}
2838
2839
2840/* Return true if val is an immediate that can be loaded into a
2841 register by a MOVZ instruction. */
2842static bool
2843aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2844{
2845 if (GET_MODE_SIZE (mode) > 4)
2846 {
2847 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2848 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2849 return 1;
2850 }
2851 else
2852 {
2853 /* Ignore sign extension. */
2854 val &= (HOST_WIDE_INT) 0xffffffff;
2855 }
2856 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2857 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2858}
2859
2860
2861/* Return true if val is a valid bitmask immediate. */
2862bool
2863aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2864{
2865 if (GET_MODE_SIZE (mode) < 8)
2866 {
2867 /* Replicate bit pattern. */
2868 val &= (HOST_WIDE_INT) 0xffffffff;
2869 val |= val << 32;
2870 }
2871 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2872 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2873}
2874
2875
2876/* Return true if val is an immediate that can be loaded into a
2877 register in a single instruction. */
2878bool
2879aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2880{
2881 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2882 return 1;
2883 return aarch64_bitmask_imm (val, mode);
2884}
2885
2886static bool
2887aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2888{
2889 rtx base, offset;
7eda14e1 2890
43e9d192
IB
2891 if (GET_CODE (x) == HIGH)
2892 return true;
2893
2894 split_const (x, &base, &offset);
2895 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2896 {
2897 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2898 != SYMBOL_FORCE_TO_MEM)
2899 return true;
2900 else
2901 /* Avoid generating a 64-bit relocation in ILP32; leave
2902 to aarch64_expand_mov_immediate to handle it properly. */
2903 return mode != ptr_mode;
2904 }
43e9d192
IB
2905
2906 return aarch64_tls_referenced_p (x);
2907}
2908
2909/* Return true if register REGNO is a valid index register.
2910 STRICT_P is true if REG_OK_STRICT is in effect. */
2911
2912bool
2913aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2914{
2915 if (!HARD_REGISTER_NUM_P (regno))
2916 {
2917 if (!strict_p)
2918 return true;
2919
2920 if (!reg_renumber)
2921 return false;
2922
2923 regno = reg_renumber[regno];
2924 }
2925 return GP_REGNUM_P (regno);
2926}
2927
2928/* Return true if register REGNO is a valid base register for mode MODE.
2929 STRICT_P is true if REG_OK_STRICT is in effect. */
2930
2931bool
2932aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2933{
2934 if (!HARD_REGISTER_NUM_P (regno))
2935 {
2936 if (!strict_p)
2937 return true;
2938
2939 if (!reg_renumber)
2940 return false;
2941
2942 regno = reg_renumber[regno];
2943 }
2944
2945 /* The fake registers will be eliminated to either the stack or
2946 hard frame pointer, both of which are usually valid base registers.
2947 Reload deals with the cases where the eliminated form isn't valid. */
2948 return (GP_REGNUM_P (regno)
2949 || regno == SP_REGNUM
2950 || regno == FRAME_POINTER_REGNUM
2951 || regno == ARG_POINTER_REGNUM);
2952}
2953
2954/* Return true if X is a valid base register for mode MODE.
2955 STRICT_P is true if REG_OK_STRICT is in effect. */
2956
2957static bool
2958aarch64_base_register_rtx_p (rtx x, bool strict_p)
2959{
2960 if (!strict_p && GET_CODE (x) == SUBREG)
2961 x = SUBREG_REG (x);
2962
2963 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2964}
2965
2966/* Return true if address offset is a valid index. If it is, fill in INFO
2967 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2968
2969static bool
2970aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2971 enum machine_mode mode, bool strict_p)
2972{
2973 enum aarch64_address_type type;
2974 rtx index;
2975 int shift;
2976
2977 /* (reg:P) */
2978 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2979 && GET_MODE (x) == Pmode)
2980 {
2981 type = ADDRESS_REG_REG;
2982 index = x;
2983 shift = 0;
2984 }
2985 /* (sign_extend:DI (reg:SI)) */
2986 else if ((GET_CODE (x) == SIGN_EXTEND
2987 || GET_CODE (x) == ZERO_EXTEND)
2988 && GET_MODE (x) == DImode
2989 && GET_MODE (XEXP (x, 0)) == SImode)
2990 {
2991 type = (GET_CODE (x) == SIGN_EXTEND)
2992 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2993 index = XEXP (x, 0);
2994 shift = 0;
2995 }
2996 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2997 else if (GET_CODE (x) == MULT
2998 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2999 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3000 && GET_MODE (XEXP (x, 0)) == DImode
3001 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3002 && CONST_INT_P (XEXP (x, 1)))
3003 {
3004 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3005 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3006 index = XEXP (XEXP (x, 0), 0);
3007 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3008 }
3009 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3010 else if (GET_CODE (x) == ASHIFT
3011 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3012 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3013 && GET_MODE (XEXP (x, 0)) == DImode
3014 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3015 && CONST_INT_P (XEXP (x, 1)))
3016 {
3017 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3018 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3019 index = XEXP (XEXP (x, 0), 0);
3020 shift = INTVAL (XEXP (x, 1));
3021 }
3022 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3023 else if ((GET_CODE (x) == SIGN_EXTRACT
3024 || GET_CODE (x) == ZERO_EXTRACT)
3025 && GET_MODE (x) == DImode
3026 && GET_CODE (XEXP (x, 0)) == MULT
3027 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3028 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3029 {
3030 type = (GET_CODE (x) == SIGN_EXTRACT)
3031 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3032 index = XEXP (XEXP (x, 0), 0);
3033 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3034 if (INTVAL (XEXP (x, 1)) != 32 + shift
3035 || INTVAL (XEXP (x, 2)) != 0)
3036 shift = -1;
3037 }
3038 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3039 (const_int 0xffffffff<<shift)) */
3040 else if (GET_CODE (x) == AND
3041 && GET_MODE (x) == DImode
3042 && GET_CODE (XEXP (x, 0)) == MULT
3043 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3044 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3045 && CONST_INT_P (XEXP (x, 1)))
3046 {
3047 type = ADDRESS_REG_UXTW;
3048 index = XEXP (XEXP (x, 0), 0);
3049 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3050 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3051 shift = -1;
3052 }
3053 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3054 else if ((GET_CODE (x) == SIGN_EXTRACT
3055 || GET_CODE (x) == ZERO_EXTRACT)
3056 && GET_MODE (x) == DImode
3057 && GET_CODE (XEXP (x, 0)) == ASHIFT
3058 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3059 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3060 {
3061 type = (GET_CODE (x) == SIGN_EXTRACT)
3062 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3063 index = XEXP (XEXP (x, 0), 0);
3064 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3065 if (INTVAL (XEXP (x, 1)) != 32 + shift
3066 || INTVAL (XEXP (x, 2)) != 0)
3067 shift = -1;
3068 }
3069 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3070 (const_int 0xffffffff<<shift)) */
3071 else if (GET_CODE (x) == AND
3072 && GET_MODE (x) == DImode
3073 && GET_CODE (XEXP (x, 0)) == ASHIFT
3074 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3075 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3076 && CONST_INT_P (XEXP (x, 1)))
3077 {
3078 type = ADDRESS_REG_UXTW;
3079 index = XEXP (XEXP (x, 0), 0);
3080 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3081 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3082 shift = -1;
3083 }
3084 /* (mult:P (reg:P) (const_int scale)) */
3085 else if (GET_CODE (x) == MULT
3086 && GET_MODE (x) == Pmode
3087 && GET_MODE (XEXP (x, 0)) == Pmode
3088 && CONST_INT_P (XEXP (x, 1)))
3089 {
3090 type = ADDRESS_REG_REG;
3091 index = XEXP (x, 0);
3092 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3093 }
3094 /* (ashift:P (reg:P) (const_int shift)) */
3095 else if (GET_CODE (x) == ASHIFT
3096 && GET_MODE (x) == Pmode
3097 && GET_MODE (XEXP (x, 0)) == Pmode
3098 && CONST_INT_P (XEXP (x, 1)))
3099 {
3100 type = ADDRESS_REG_REG;
3101 index = XEXP (x, 0);
3102 shift = INTVAL (XEXP (x, 1));
3103 }
3104 else
3105 return false;
3106
3107 if (GET_CODE (index) == SUBREG)
3108 index = SUBREG_REG (index);
3109
3110 if ((shift == 0 ||
3111 (shift > 0 && shift <= 3
3112 && (1 << shift) == GET_MODE_SIZE (mode)))
3113 && REG_P (index)
3114 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3115 {
3116 info->type = type;
3117 info->offset = index;
3118 info->shift = shift;
3119 return true;
3120 }
3121
3122 return false;
3123}
3124
3125static inline bool
3126offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3127{
3128 return (offset >= -64 * GET_MODE_SIZE (mode)
3129 && offset < 64 * GET_MODE_SIZE (mode)
3130 && offset % GET_MODE_SIZE (mode) == 0);
3131}
3132
3133static inline bool
3134offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3135 HOST_WIDE_INT offset)
3136{
3137 return offset >= -256 && offset < 256;
3138}
3139
3140static inline bool
3141offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3142{
3143 return (offset >= 0
3144 && offset < 4096 * GET_MODE_SIZE (mode)
3145 && offset % GET_MODE_SIZE (mode) == 0);
3146}
3147
3148/* Return true if X is a valid address for machine mode MODE. If it is,
3149 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3150 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3151
3152static bool
3153aarch64_classify_address (struct aarch64_address_info *info,
3154 rtx x, enum machine_mode mode,
3155 RTX_CODE outer_code, bool strict_p)
3156{
3157 enum rtx_code code = GET_CODE (x);
3158 rtx op0, op1;
3159 bool allow_reg_index_p =
3160 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3161
3162 /* Don't support anything other than POST_INC or REG addressing for
3163 AdvSIMD. */
3164 if (aarch64_vector_mode_p (mode)
3165 && (code != POST_INC && code != REG))
3166 return false;
3167
3168 switch (code)
3169 {
3170 case REG:
3171 case SUBREG:
3172 info->type = ADDRESS_REG_IMM;
3173 info->base = x;
3174 info->offset = const0_rtx;
3175 return aarch64_base_register_rtx_p (x, strict_p);
3176
3177 case PLUS:
3178 op0 = XEXP (x, 0);
3179 op1 = XEXP (x, 1);
3180 if (GET_MODE_SIZE (mode) != 0
3181 && CONST_INT_P (op1)
3182 && aarch64_base_register_rtx_p (op0, strict_p))
3183 {
3184 HOST_WIDE_INT offset = INTVAL (op1);
3185
3186 info->type = ADDRESS_REG_IMM;
3187 info->base = op0;
3188 info->offset = op1;
3189
3190 /* TImode and TFmode values are allowed in both pairs of X
3191 registers and individual Q registers. The available
3192 address modes are:
3193 X,X: 7-bit signed scaled offset
3194 Q: 9-bit signed offset
3195 We conservatively require an offset representable in either mode.
3196 */
3197 if (mode == TImode || mode == TFmode)
3198 return (offset_7bit_signed_scaled_p (mode, offset)
3199 && offset_9bit_signed_unscaled_p (mode, offset));
3200
3201 if (outer_code == PARALLEL)
3202 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3203 && offset_7bit_signed_scaled_p (mode, offset));
3204 else
3205 return (offset_9bit_signed_unscaled_p (mode, offset)
3206 || offset_12bit_unsigned_scaled_p (mode, offset));
3207 }
3208
3209 if (allow_reg_index_p)
3210 {
3211 /* Look for base + (scaled/extended) index register. */
3212 if (aarch64_base_register_rtx_p (op0, strict_p)
3213 && aarch64_classify_index (info, op1, mode, strict_p))
3214 {
3215 info->base = op0;
3216 return true;
3217 }
3218 if (aarch64_base_register_rtx_p (op1, strict_p)
3219 && aarch64_classify_index (info, op0, mode, strict_p))
3220 {
3221 info->base = op1;
3222 return true;
3223 }
3224 }
3225
3226 return false;
3227
3228 case POST_INC:
3229 case POST_DEC:
3230 case PRE_INC:
3231 case PRE_DEC:
3232 info->type = ADDRESS_REG_WB;
3233 info->base = XEXP (x, 0);
3234 info->offset = NULL_RTX;
3235 return aarch64_base_register_rtx_p (info->base, strict_p);
3236
3237 case POST_MODIFY:
3238 case PRE_MODIFY:
3239 info->type = ADDRESS_REG_WB;
3240 info->base = XEXP (x, 0);
3241 if (GET_CODE (XEXP (x, 1)) == PLUS
3242 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3243 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3244 && aarch64_base_register_rtx_p (info->base, strict_p))
3245 {
3246 HOST_WIDE_INT offset;
3247 info->offset = XEXP (XEXP (x, 1), 1);
3248 offset = INTVAL (info->offset);
3249
3250 /* TImode and TFmode values are allowed in both pairs of X
3251 registers and individual Q registers. The available
3252 address modes are:
3253 X,X: 7-bit signed scaled offset
3254 Q: 9-bit signed offset
3255 We conservatively require an offset representable in either mode.
3256 */
3257 if (mode == TImode || mode == TFmode)
3258 return (offset_7bit_signed_scaled_p (mode, offset)
3259 && offset_9bit_signed_unscaled_p (mode, offset));
3260
3261 if (outer_code == PARALLEL)
3262 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3263 && offset_7bit_signed_scaled_p (mode, offset));
3264 else
3265 return offset_9bit_signed_unscaled_p (mode, offset);
3266 }
3267 return false;
3268
3269 case CONST:
3270 case SYMBOL_REF:
3271 case LABEL_REF:
79517551
SN
3272 /* load literal: pc-relative constant pool entry. Only supported
3273 for SI mode or larger. */
43e9d192 3274 info->type = ADDRESS_SYMBOLIC;
79517551 3275 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3276 {
3277 rtx sym, addend;
3278
3279 split_const (x, &sym, &addend);
3280 return (GET_CODE (sym) == LABEL_REF
3281 || (GET_CODE (sym) == SYMBOL_REF
3282 && CONSTANT_POOL_ADDRESS_P (sym)));
3283 }
3284 return false;
3285
3286 case LO_SUM:
3287 info->type = ADDRESS_LO_SUM;
3288 info->base = XEXP (x, 0);
3289 info->offset = XEXP (x, 1);
3290 if (allow_reg_index_p
3291 && aarch64_base_register_rtx_p (info->base, strict_p))
3292 {
3293 rtx sym, offs;
3294 split_const (info->offset, &sym, &offs);
3295 if (GET_CODE (sym) == SYMBOL_REF
3296 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3297 == SYMBOL_SMALL_ABSOLUTE))
3298 {
3299 /* The symbol and offset must be aligned to the access size. */
3300 unsigned int align;
3301 unsigned int ref_size;
3302
3303 if (CONSTANT_POOL_ADDRESS_P (sym))
3304 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3305 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3306 {
3307 tree exp = SYMBOL_REF_DECL (sym);
3308 align = TYPE_ALIGN (TREE_TYPE (exp));
3309 align = CONSTANT_ALIGNMENT (exp, align);
3310 }
3311 else if (SYMBOL_REF_DECL (sym))
3312 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3313 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3314 && SYMBOL_REF_BLOCK (sym) != NULL)
3315 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3316 else
3317 align = BITS_PER_UNIT;
3318
3319 ref_size = GET_MODE_SIZE (mode);
3320 if (ref_size == 0)
3321 ref_size = GET_MODE_SIZE (DImode);
3322
3323 return ((INTVAL (offs) & (ref_size - 1)) == 0
3324 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3325 }
3326 }
3327 return false;
3328
3329 default:
3330 return false;
3331 }
3332}
3333
3334bool
3335aarch64_symbolic_address_p (rtx x)
3336{
3337 rtx offset;
3338
3339 split_const (x, &x, &offset);
3340 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3341}
3342
3343/* Classify the base of symbolic expression X, given that X appears in
3344 context CONTEXT. */
da4f13a4
MS
3345
3346enum aarch64_symbol_type
3347aarch64_classify_symbolic_expression (rtx x,
3348 enum aarch64_symbol_context context)
43e9d192
IB
3349{
3350 rtx offset;
da4f13a4 3351
43e9d192
IB
3352 split_const (x, &x, &offset);
3353 return aarch64_classify_symbol (x, context);
3354}
3355
3356
3357/* Return TRUE if X is a legitimate address for accessing memory in
3358 mode MODE. */
3359static bool
3360aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3361{
3362 struct aarch64_address_info addr;
3363
3364 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3365}
3366
3367/* Return TRUE if X is a legitimate address for accessing memory in
3368 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3369 pair operation. */
3370bool
3371aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
aef66c94 3372 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3373{
3374 struct aarch64_address_info addr;
3375
3376 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3377}
3378
3379/* Return TRUE if rtx X is immediate constant 0.0 */
3380bool
3520f7cc 3381aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3382{
3383 REAL_VALUE_TYPE r;
3384
3385 if (GET_MODE (x) == VOIDmode)
3386 return false;
3387
3388 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3389 if (REAL_VALUE_MINUS_ZERO (r))
3390 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3391 return REAL_VALUES_EQUAL (r, dconst0);
3392}
3393
70f09188
AP
3394/* Return the fixed registers used for condition codes. */
3395
3396static bool
3397aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3398{
3399 *p1 = CC_REGNUM;
3400 *p2 = INVALID_REGNUM;
3401 return true;
3402}
3403
43e9d192
IB
3404enum machine_mode
3405aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3406{
3407 /* All floating point compares return CCFP if it is an equality
3408 comparison, and CCFPE otherwise. */
3409 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3410 {
3411 switch (code)
3412 {
3413 case EQ:
3414 case NE:
3415 case UNORDERED:
3416 case ORDERED:
3417 case UNLT:
3418 case UNLE:
3419 case UNGT:
3420 case UNGE:
3421 case UNEQ:
3422 case LTGT:
3423 return CCFPmode;
3424
3425 case LT:
3426 case LE:
3427 case GT:
3428 case GE:
3429 return CCFPEmode;
3430
3431 default:
3432 gcc_unreachable ();
3433 }
3434 }
3435
3436 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3437 && y == const0_rtx
3438 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3439 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3440 || GET_CODE (x) == NEG))
43e9d192
IB
3441 return CC_NZmode;
3442
1c992d1e 3443 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3444 the comparison will have to be swapped when we emit the assembly
3445 code. */
3446 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3447 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3448 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3449 || GET_CODE (x) == LSHIFTRT
1c992d1e 3450 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3451 return CC_SWPmode;
3452
1c992d1e
RE
3453 /* Similarly for a negated operand, but we can only do this for
3454 equalities. */
3455 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3456 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3457 && (code == EQ || code == NE)
3458 && GET_CODE (x) == NEG)
3459 return CC_Zmode;
3460
43e9d192
IB
3461 /* A compare of a mode narrower than SI mode against zero can be done
3462 by extending the value in the comparison. */
3463 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3464 && y == const0_rtx)
3465 /* Only use sign-extension if we really need it. */
3466 return ((code == GT || code == GE || code == LE || code == LT)
3467 ? CC_SESWPmode : CC_ZESWPmode);
3468
3469 /* For everything else, return CCmode. */
3470 return CCmode;
3471}
3472
3473static unsigned
3474aarch64_get_condition_code (rtx x)
3475{
3476 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3477 enum rtx_code comp_code = GET_CODE (x);
3478
3479 if (GET_MODE_CLASS (mode) != MODE_CC)
3480 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3481
3482 switch (mode)
3483 {
3484 case CCFPmode:
3485 case CCFPEmode:
3486 switch (comp_code)
3487 {
3488 case GE: return AARCH64_GE;
3489 case GT: return AARCH64_GT;
3490 case LE: return AARCH64_LS;
3491 case LT: return AARCH64_MI;
3492 case NE: return AARCH64_NE;
3493 case EQ: return AARCH64_EQ;
3494 case ORDERED: return AARCH64_VC;
3495 case UNORDERED: return AARCH64_VS;
3496 case UNLT: return AARCH64_LT;
3497 case UNLE: return AARCH64_LE;
3498 case UNGT: return AARCH64_HI;
3499 case UNGE: return AARCH64_PL;
3500 default: gcc_unreachable ();
3501 }
3502 break;
3503
3504 case CCmode:
3505 switch (comp_code)
3506 {
3507 case NE: return AARCH64_NE;
3508 case EQ: return AARCH64_EQ;
3509 case GE: return AARCH64_GE;
3510 case GT: return AARCH64_GT;
3511 case LE: return AARCH64_LE;
3512 case LT: return AARCH64_LT;
3513 case GEU: return AARCH64_CS;
3514 case GTU: return AARCH64_HI;
3515 case LEU: return AARCH64_LS;
3516 case LTU: return AARCH64_CC;
3517 default: gcc_unreachable ();
3518 }
3519 break;
3520
3521 case CC_SWPmode:
3522 case CC_ZESWPmode:
3523 case CC_SESWPmode:
3524 switch (comp_code)
3525 {
3526 case NE: return AARCH64_NE;
3527 case EQ: return AARCH64_EQ;
3528 case GE: return AARCH64_LE;
3529 case GT: return AARCH64_LT;
3530 case LE: return AARCH64_GE;
3531 case LT: return AARCH64_GT;
3532 case GEU: return AARCH64_LS;
3533 case GTU: return AARCH64_CC;
3534 case LEU: return AARCH64_CS;
3535 case LTU: return AARCH64_HI;
3536 default: gcc_unreachable ();
3537 }
3538 break;
3539
3540 case CC_NZmode:
3541 switch (comp_code)
3542 {
3543 case NE: return AARCH64_NE;
3544 case EQ: return AARCH64_EQ;
3545 case GE: return AARCH64_PL;
3546 case LT: return AARCH64_MI;
3547 default: gcc_unreachable ();
3548 }
3549 break;
3550
1c992d1e
RE
3551 case CC_Zmode:
3552 switch (comp_code)
3553 {
3554 case NE: return AARCH64_NE;
3555 case EQ: return AARCH64_EQ;
3556 default: gcc_unreachable ();
3557 }
3558 break;
3559
43e9d192
IB
3560 default:
3561 gcc_unreachable ();
3562 break;
3563 }
3564}
3565
3566static unsigned
3567bit_count (unsigned HOST_WIDE_INT value)
3568{
3569 unsigned count = 0;
3570
3571 while (value)
3572 {
3573 count++;
3574 value &= value - 1;
3575 }
3576
3577 return count;
3578}
3579
3580void
3581aarch64_print_operand (FILE *f, rtx x, char code)
3582{
3583 switch (code)
3584 {
f541a481
KT
3585 /* An integer or symbol address without a preceding # sign. */
3586 case 'c':
3587 switch (GET_CODE (x))
3588 {
3589 case CONST_INT:
3590 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3591 break;
3592
3593 case SYMBOL_REF:
3594 output_addr_const (f, x);
3595 break;
3596
3597 case CONST:
3598 if (GET_CODE (XEXP (x, 0)) == PLUS
3599 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3600 {
3601 output_addr_const (f, x);
3602 break;
3603 }
3604 /* Fall through. */
3605
3606 default:
3607 output_operand_lossage ("Unsupported operand for code '%c'", code);
3608 }
3609 break;
3610
43e9d192
IB
3611 case 'e':
3612 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3613 {
3614 int n;
3615
3616 if (GET_CODE (x) != CONST_INT
3617 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3618 {
3619 output_operand_lossage ("invalid operand for '%%%c'", code);
3620 return;
3621 }
3622
3623 switch (n)
3624 {
3625 case 3:
3626 fputc ('b', f);
3627 break;
3628 case 4:
3629 fputc ('h', f);
3630 break;
3631 case 5:
3632 fputc ('w', f);
3633 break;
3634 default:
3635 output_operand_lossage ("invalid operand for '%%%c'", code);
3636 return;
3637 }
3638 }
3639 break;
3640
3641 case 'p':
3642 {
3643 int n;
3644
3645 /* Print N such that 2^N == X. */
3646 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3647 {
3648 output_operand_lossage ("invalid operand for '%%%c'", code);
3649 return;
3650 }
3651
3652 asm_fprintf (f, "%d", n);
3653 }
3654 break;
3655
3656 case 'P':
3657 /* Print the number of non-zero bits in X (a const_int). */
3658 if (GET_CODE (x) != CONST_INT)
3659 {
3660 output_operand_lossage ("invalid operand for '%%%c'", code);
3661 return;
3662 }
3663
3664 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3665 break;
3666
3667 case 'H':
3668 /* Print the higher numbered register of a pair (TImode) of regs. */
3669 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3670 {
3671 output_operand_lossage ("invalid operand for '%%%c'", code);
3672 return;
3673 }
3674
01a3a324 3675 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3676 break;
3677
43e9d192
IB
3678 case 'm':
3679 /* Print a condition (eq, ne, etc). */
3680
3681 /* CONST_TRUE_RTX means always -- that's the default. */
3682 if (x == const_true_rtx)
3683 return;
3684
3685 if (!COMPARISON_P (x))
3686 {
3687 output_operand_lossage ("invalid operand for '%%%c'", code);
3688 return;
3689 }
3690
3691 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3692 break;
3693
3694 case 'M':
3695 /* Print the inverse of a condition (eq <-> ne, etc). */
3696
3697 /* CONST_TRUE_RTX means never -- that's the default. */
3698 if (x == const_true_rtx)
3699 {
3700 fputs ("nv", f);
3701 return;
3702 }
3703
3704 if (!COMPARISON_P (x))
3705 {
3706 output_operand_lossage ("invalid operand for '%%%c'", code);
3707 return;
3708 }
3709
3710 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3711 (aarch64_get_condition_code (x))], f);
3712 break;
3713
3714 case 'b':
3715 case 'h':
3716 case 's':
3717 case 'd':
3718 case 'q':
3719 /* Print a scalar FP/SIMD register name. */
3720 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3721 {
3722 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3723 return;
3724 }
50ce6f88 3725 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3726 break;
3727
3728 case 'S':
3729 case 'T':
3730 case 'U':
3731 case 'V':
3732 /* Print the first FP/SIMD register name in a list. */
3733 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3734 {
3735 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3736 return;
3737 }
50ce6f88 3738 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3739 break;
3740
a05c0ddf 3741 case 'X':
50d38551 3742 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3743 if (GET_CODE (x) != CONST_INT)
3744 {
3745 output_operand_lossage ("invalid operand for '%%%c'", code);
3746 return;
3747 }
50d38551 3748 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3749 break;
3750
43e9d192
IB
3751 case 'w':
3752 case 'x':
3753 /* Print a general register name or the zero register (32-bit or
3754 64-bit). */
3520f7cc
JG
3755 if (x == const0_rtx
3756 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3757 {
50ce6f88 3758 asm_fprintf (f, "%czr", code);
43e9d192
IB
3759 break;
3760 }
3761
3762 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3763 {
50ce6f88 3764 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3765 break;
3766 }
3767
3768 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3769 {
50ce6f88 3770 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3771 break;
3772 }
3773
3774 /* Fall through */
3775
3776 case 0:
3777 /* Print a normal operand, if it's a general register, then we
3778 assume DImode. */
3779 if (x == NULL)
3780 {
3781 output_operand_lossage ("missing operand");
3782 return;
3783 }
3784
3785 switch (GET_CODE (x))
3786 {
3787 case REG:
01a3a324 3788 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3789 break;
3790
3791 case MEM:
3792 aarch64_memory_reference_mode = GET_MODE (x);
3793 output_address (XEXP (x, 0));
3794 break;
3795
3796 case LABEL_REF:
3797 case SYMBOL_REF:
3798 output_addr_const (asm_out_file, x);
3799 break;
3800
3801 case CONST_INT:
3802 asm_fprintf (f, "%wd", INTVAL (x));
3803 break;
3804
3805 case CONST_VECTOR:
3520f7cc
JG
3806 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3807 {
3808 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3809 HOST_WIDE_INT_MIN,
3810 HOST_WIDE_INT_MAX));
3811 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3812 }
3813 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3814 {
3815 fputc ('0', f);
3816 }
3817 else
3818 gcc_unreachable ();
43e9d192
IB
3819 break;
3820
3520f7cc
JG
3821 case CONST_DOUBLE:
3822 /* CONST_DOUBLE can represent a double-width integer.
3823 In this case, the mode of x is VOIDmode. */
3824 if (GET_MODE (x) == VOIDmode)
3825 ; /* Do Nothing. */
3826 else if (aarch64_float_const_zero_rtx_p (x))
3827 {
3828 fputc ('0', f);
3829 break;
3830 }
3831 else if (aarch64_float_const_representable_p (x))
3832 {
3833#define buf_size 20
3834 char float_buf[buf_size] = {'\0'};
3835 REAL_VALUE_TYPE r;
3836 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3837 real_to_decimal_for_mode (float_buf, &r,
3838 buf_size, buf_size,
3839 1, GET_MODE (x));
3840 asm_fprintf (asm_out_file, "%s", float_buf);
3841 break;
3842#undef buf_size
3843 }
3844 output_operand_lossage ("invalid constant");
3845 return;
43e9d192
IB
3846 default:
3847 output_operand_lossage ("invalid operand");
3848 return;
3849 }
3850 break;
3851
3852 case 'A':
3853 if (GET_CODE (x) == HIGH)
3854 x = XEXP (x, 0);
3855
3856 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3857 {
3858 case SYMBOL_SMALL_GOT:
3859 asm_fprintf (asm_out_file, ":got:");
3860 break;
3861
3862 case SYMBOL_SMALL_TLSGD:
3863 asm_fprintf (asm_out_file, ":tlsgd:");
3864 break;
3865
3866 case SYMBOL_SMALL_TLSDESC:
3867 asm_fprintf (asm_out_file, ":tlsdesc:");
3868 break;
3869
3870 case SYMBOL_SMALL_GOTTPREL:
3871 asm_fprintf (asm_out_file, ":gottprel:");
3872 break;
3873
3874 case SYMBOL_SMALL_TPREL:
3875 asm_fprintf (asm_out_file, ":tprel:");
3876 break;
3877
87dd8ab0
MS
3878 case SYMBOL_TINY_GOT:
3879 gcc_unreachable ();
3880 break;
3881
43e9d192
IB
3882 default:
3883 break;
3884 }
3885 output_addr_const (asm_out_file, x);
3886 break;
3887
3888 case 'L':
3889 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3890 {
3891 case SYMBOL_SMALL_GOT:
3892 asm_fprintf (asm_out_file, ":lo12:");
3893 break;
3894
3895 case SYMBOL_SMALL_TLSGD:
3896 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3897 break;
3898
3899 case SYMBOL_SMALL_TLSDESC:
3900 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3901 break;
3902
3903 case SYMBOL_SMALL_GOTTPREL:
3904 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3905 break;
3906
3907 case SYMBOL_SMALL_TPREL:
3908 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3909 break;
3910
87dd8ab0
MS
3911 case SYMBOL_TINY_GOT:
3912 asm_fprintf (asm_out_file, ":got:");
3913 break;
3914
43e9d192
IB
3915 default:
3916 break;
3917 }
3918 output_addr_const (asm_out_file, x);
3919 break;
3920
3921 case 'G':
3922
3923 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3924 {
3925 case SYMBOL_SMALL_TPREL:
3926 asm_fprintf (asm_out_file, ":tprel_hi12:");
3927 break;
3928 default:
3929 break;
3930 }
3931 output_addr_const (asm_out_file, x);
3932 break;
3933
3934 default:
3935 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3936 return;
3937 }
3938}
3939
3940void
3941aarch64_print_operand_address (FILE *f, rtx x)
3942{
3943 struct aarch64_address_info addr;
3944
3945 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3946 MEM, true))
3947 switch (addr.type)
3948 {
3949 case ADDRESS_REG_IMM:
3950 if (addr.offset == const0_rtx)
01a3a324 3951 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3952 else
16a3246f 3953 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3954 INTVAL (addr.offset));
3955 return;
3956
3957 case ADDRESS_REG_REG:
3958 if (addr.shift == 0)
16a3246f 3959 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 3960 reg_names [REGNO (addr.offset)]);
43e9d192 3961 else
16a3246f 3962 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 3963 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3964 return;
3965
3966 case ADDRESS_REG_UXTW:
3967 if (addr.shift == 0)
16a3246f 3968 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3969 REGNO (addr.offset) - R0_REGNUM);
3970 else
16a3246f 3971 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3972 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3973 return;
3974
3975 case ADDRESS_REG_SXTW:
3976 if (addr.shift == 0)
16a3246f 3977 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3978 REGNO (addr.offset) - R0_REGNUM);
3979 else
16a3246f 3980 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3981 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3982 return;
3983
3984 case ADDRESS_REG_WB:
3985 switch (GET_CODE (x))
3986 {
3987 case PRE_INC:
16a3246f 3988 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3989 GET_MODE_SIZE (aarch64_memory_reference_mode));
3990 return;
3991 case POST_INC:
16a3246f 3992 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
3993 GET_MODE_SIZE (aarch64_memory_reference_mode));
3994 return;
3995 case PRE_DEC:
16a3246f 3996 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3997 GET_MODE_SIZE (aarch64_memory_reference_mode));
3998 return;
3999 case POST_DEC:
16a3246f 4000 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4001 GET_MODE_SIZE (aarch64_memory_reference_mode));
4002 return;
4003 case PRE_MODIFY:
16a3246f 4004 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4005 INTVAL (addr.offset));
4006 return;
4007 case POST_MODIFY:
16a3246f 4008 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4009 INTVAL (addr.offset));
4010 return;
4011 default:
4012 break;
4013 }
4014 break;
4015
4016 case ADDRESS_LO_SUM:
16a3246f 4017 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4018 output_addr_const (f, addr.offset);
4019 asm_fprintf (f, "]");
4020 return;
4021
4022 case ADDRESS_SYMBOLIC:
4023 break;
4024 }
4025
4026 output_addr_const (f, x);
4027}
4028
43e9d192
IB
4029bool
4030aarch64_label_mentioned_p (rtx x)
4031{
4032 const char *fmt;
4033 int i;
4034
4035 if (GET_CODE (x) == LABEL_REF)
4036 return true;
4037
4038 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4039 referencing instruction, but they are constant offsets, not
4040 symbols. */
4041 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4042 return false;
4043
4044 fmt = GET_RTX_FORMAT (GET_CODE (x));
4045 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4046 {
4047 if (fmt[i] == 'E')
4048 {
4049 int j;
4050
4051 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4052 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4053 return 1;
4054 }
4055 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4056 return 1;
4057 }
4058
4059 return 0;
4060}
4061
4062/* Implement REGNO_REG_CLASS. */
4063
4064enum reg_class
4065aarch64_regno_regclass (unsigned regno)
4066{
4067 if (GP_REGNUM_P (regno))
a4a182c6 4068 return GENERAL_REGS;
43e9d192
IB
4069
4070 if (regno == SP_REGNUM)
4071 return STACK_REG;
4072
4073 if (regno == FRAME_POINTER_REGNUM
4074 || regno == ARG_POINTER_REGNUM)
f24bb080 4075 return POINTER_REGS;
43e9d192
IB
4076
4077 if (FP_REGNUM_P (regno))
4078 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4079
4080 return NO_REGS;
4081}
4082
4083/* Try a machine-dependent way of reloading an illegitimate address
4084 operand. If we find one, push the reload and return the new rtx. */
4085
4086rtx
4087aarch64_legitimize_reload_address (rtx *x_p,
4088 enum machine_mode mode,
4089 int opnum, int type,
4090 int ind_levels ATTRIBUTE_UNUSED)
4091{
4092 rtx x = *x_p;
4093
4094 /* Do not allow mem (plus (reg, const)) if vector mode. */
4095 if (aarch64_vector_mode_p (mode)
4096 && GET_CODE (x) == PLUS
4097 && REG_P (XEXP (x, 0))
4098 && CONST_INT_P (XEXP (x, 1)))
4099 {
4100 rtx orig_rtx = x;
4101 x = copy_rtx (x);
4102 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4103 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4104 opnum, (enum reload_type) type);
4105 return x;
4106 }
4107
4108 /* We must recognize output that we have already generated ourselves. */
4109 if (GET_CODE (x) == PLUS
4110 && GET_CODE (XEXP (x, 0)) == PLUS
4111 && REG_P (XEXP (XEXP (x, 0), 0))
4112 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4113 && CONST_INT_P (XEXP (x, 1)))
4114 {
4115 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4116 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4117 opnum, (enum reload_type) type);
4118 return x;
4119 }
4120
4121 /* We wish to handle large displacements off a base register by splitting
4122 the addend across an add and the mem insn. This can cut the number of
4123 extra insns needed from 3 to 1. It is only useful for load/store of a
4124 single register with 12 bit offset field. */
4125 if (GET_CODE (x) == PLUS
4126 && REG_P (XEXP (x, 0))
4127 && CONST_INT_P (XEXP (x, 1))
4128 && HARD_REGISTER_P (XEXP (x, 0))
4129 && mode != TImode
4130 && mode != TFmode
4131 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4132 {
4133 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4134 HOST_WIDE_INT low = val & 0xfff;
4135 HOST_WIDE_INT high = val - low;
4136 HOST_WIDE_INT offs;
4137 rtx cst;
28514dda
YZ
4138 enum machine_mode xmode = GET_MODE (x);
4139
4140 /* In ILP32, xmode can be either DImode or SImode. */
4141 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4142
4143 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4144 BLKmode alignment. */
4145 if (GET_MODE_SIZE (mode) == 0)
4146 return NULL_RTX;
4147
4148 offs = low % GET_MODE_SIZE (mode);
4149
4150 /* Align misaligned offset by adjusting high part to compensate. */
4151 if (offs != 0)
4152 {
4153 if (aarch64_uimm12_shift (high + offs))
4154 {
4155 /* Align down. */
4156 low = low - offs;
4157 high = high + offs;
4158 }
4159 else
4160 {
4161 /* Align up. */
4162 offs = GET_MODE_SIZE (mode) - offs;
4163 low = low + offs;
4164 high = high + (low & 0x1000) - offs;
4165 low &= 0xfff;
4166 }
4167 }
4168
4169 /* Check for overflow. */
4170 if (high + low != val)
4171 return NULL_RTX;
4172
4173 cst = GEN_INT (high);
4174 if (!aarch64_uimm12_shift (high))
28514dda 4175 cst = force_const_mem (xmode, cst);
43e9d192
IB
4176
4177 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4178 in the mem instruction.
4179 Note that replacing this gen_rtx_PLUS with plus_constant is
4180 wrong in this case because we rely on the
4181 (plus (plus reg c1) c2) structure being preserved so that
4182 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4183 x = gen_rtx_PLUS (xmode,
4184 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4185 GEN_INT (low));
43e9d192
IB
4186
4187 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4188 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4189 opnum, (enum reload_type) type);
4190 return x;
4191 }
4192
4193 return NULL_RTX;
4194}
4195
4196
4197static reg_class_t
4198aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4199 reg_class_t rclass,
4200 enum machine_mode mode,
4201 secondary_reload_info *sri)
4202{
43e9d192
IB
4203 /* Without the TARGET_SIMD instructions we cannot move a Q register
4204 to a Q register directly. We need a scratch. */
4205 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4206 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4207 && reg_class_subset_p (rclass, FP_REGS))
4208 {
4209 if (mode == TFmode)
4210 sri->icode = CODE_FOR_aarch64_reload_movtf;
4211 else if (mode == TImode)
4212 sri->icode = CODE_FOR_aarch64_reload_movti;
4213 return NO_REGS;
4214 }
4215
4216 /* A TFmode or TImode memory access should be handled via an FP_REGS
4217 because AArch64 has richer addressing modes for LDR/STR instructions
4218 than LDP/STP instructions. */
a4a182c6 4219 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4220 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4221 return FP_REGS;
4222
4223 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4224 return GENERAL_REGS;
43e9d192
IB
4225
4226 return NO_REGS;
4227}
4228
4229static bool
4230aarch64_can_eliminate (const int from, const int to)
4231{
4232 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4233 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4234
4235 if (frame_pointer_needed)
4236 {
4237 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4238 return true;
4239 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4240 return false;
4241 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4242 && !cfun->calls_alloca)
4243 return true;
4244 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4245 return true;
0b7f8166
MS
4246
4247 return false;
43e9d192 4248 }
777e6976 4249
43e9d192
IB
4250 return true;
4251}
4252
4253HOST_WIDE_INT
4254aarch64_initial_elimination_offset (unsigned from, unsigned to)
4255{
4256 HOST_WIDE_INT frame_size;
4257 HOST_WIDE_INT offset;
4258
4259 aarch64_layout_frame ();
4260 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4261 + crtl->outgoing_args_size
4262 + cfun->machine->saved_varargs_size);
4263
78c29983
MS
4264 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4265 offset = frame_size;
4266
4267 if (to == HARD_FRAME_POINTER_REGNUM)
4268 {
4269 if (from == ARG_POINTER_REGNUM)
4270 return offset - crtl->outgoing_args_size;
4271
4272 if (from == FRAME_POINTER_REGNUM)
4273 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4274 }
4275
4276 if (to == STACK_POINTER_REGNUM)
4277 {
4278 if (from == FRAME_POINTER_REGNUM)
4279 {
4280 HOST_WIDE_INT elim = crtl->outgoing_args_size
4281 + cfun->machine->frame.saved_regs_size
53e5ace2 4282 + get_frame_size ();
78c29983
MS
4283 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4284 return elim;
4285 }
4286 }
4287
4288 return offset;
43e9d192
IB
4289}
4290
4291
4292/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4293 previous frame. */
4294
4295rtx
4296aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4297{
4298 if (count != 0)
4299 return const0_rtx;
4300 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4301}
4302
4303
4304static void
4305aarch64_asm_trampoline_template (FILE *f)
4306{
28514dda
YZ
4307 if (TARGET_ILP32)
4308 {
4309 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4310 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4311 }
4312 else
4313 {
4314 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4315 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4316 }
01a3a324 4317 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4318 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4319 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4320 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4321}
4322
4323static void
4324aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4325{
4326 rtx fnaddr, mem, a_tramp;
28514dda 4327 const int tramp_code_sz = 16;
43e9d192
IB
4328
4329 /* Don't need to copy the trailing D-words, we fill those in below. */
4330 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4331 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4332 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4333 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4334 if (GET_MODE (fnaddr) != ptr_mode)
4335 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4336 emit_move_insn (mem, fnaddr);
4337
28514dda 4338 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4339 emit_move_insn (mem, chain_value);
4340
4341 /* XXX We should really define a "clear_cache" pattern and use
4342 gen_clear_cache(). */
4343 a_tramp = XEXP (m_tramp, 0);
4344 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4345 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4346 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4347 ptr_mode);
43e9d192
IB
4348}
4349
4350static unsigned char
4351aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4352{
4353 switch (regclass)
4354 {
fee9ba42 4355 case CALLER_SAVE_REGS:
43e9d192
IB
4356 case POINTER_REGS:
4357 case GENERAL_REGS:
4358 case ALL_REGS:
4359 case FP_REGS:
4360 case FP_LO_REGS:
4361 return
4362 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4363 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4364 case STACK_REG:
4365 return 1;
4366
4367 case NO_REGS:
4368 return 0;
4369
4370 default:
4371 break;
4372 }
4373 gcc_unreachable ();
4374}
4375
4376static reg_class_t
78d8b9f0 4377aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4378{
51bb310d 4379 if (regclass == POINTER_REGS)
78d8b9f0
IB
4380 return GENERAL_REGS;
4381
51bb310d
MS
4382 if (regclass == STACK_REG)
4383 {
4384 if (REG_P(x)
4385 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4386 return regclass;
4387
4388 return NO_REGS;
4389 }
4390
78d8b9f0
IB
4391 /* If it's an integer immediate that MOVI can't handle, then
4392 FP_REGS is not an option, so we return NO_REGS instead. */
4393 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4394 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4395 return NO_REGS;
4396
27bd251b
IB
4397 /* Register eliminiation can result in a request for
4398 SP+constant->FP_REGS. We cannot support such operations which
4399 use SP as source and an FP_REG as destination, so reject out
4400 right now. */
4401 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4402 {
4403 rtx lhs = XEXP (x, 0);
4404
4405 /* Look through a possible SUBREG introduced by ILP32. */
4406 if (GET_CODE (lhs) == SUBREG)
4407 lhs = SUBREG_REG (lhs);
4408
4409 gcc_assert (REG_P (lhs));
4410 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4411 POINTER_REGS));
4412 return NO_REGS;
4413 }
4414
78d8b9f0 4415 return regclass;
43e9d192
IB
4416}
4417
4418void
4419aarch64_asm_output_labelref (FILE* f, const char *name)
4420{
4421 asm_fprintf (f, "%U%s", name);
4422}
4423
4424static void
4425aarch64_elf_asm_constructor (rtx symbol, int priority)
4426{
4427 if (priority == DEFAULT_INIT_PRIORITY)
4428 default_ctor_section_asm_out_constructor (symbol, priority);
4429 else
4430 {
4431 section *s;
4432 char buf[18];
4433 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4434 s = get_section (buf, SECTION_WRITE, NULL);
4435 switch_to_section (s);
4436 assemble_align (POINTER_SIZE);
28514dda 4437 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4438 }
4439}
4440
4441static void
4442aarch64_elf_asm_destructor (rtx symbol, int priority)
4443{
4444 if (priority == DEFAULT_INIT_PRIORITY)
4445 default_dtor_section_asm_out_destructor (symbol, priority);
4446 else
4447 {
4448 section *s;
4449 char buf[18];
4450 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4451 s = get_section (buf, SECTION_WRITE, NULL);
4452 switch_to_section (s);
4453 assemble_align (POINTER_SIZE);
28514dda 4454 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4455 }
4456}
4457
4458const char*
4459aarch64_output_casesi (rtx *operands)
4460{
4461 char buf[100];
4462 char label[100];
592a16fc 4463 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4464 int index;
4465 static const char *const patterns[4][2] =
4466 {
4467 {
4468 "ldrb\t%w3, [%0,%w1,uxtw]",
4469 "add\t%3, %4, %w3, sxtb #2"
4470 },
4471 {
4472 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4473 "add\t%3, %4, %w3, sxth #2"
4474 },
4475 {
4476 "ldr\t%w3, [%0,%w1,uxtw #2]",
4477 "add\t%3, %4, %w3, sxtw #2"
4478 },
4479 /* We assume that DImode is only generated when not optimizing and
4480 that we don't really need 64-bit address offsets. That would
4481 imply an object file with 8GB of code in a single function! */
4482 {
4483 "ldr\t%w3, [%0,%w1,uxtw #2]",
4484 "add\t%3, %4, %w3, sxtw #2"
4485 }
4486 };
4487
4488 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4489
4490 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4491
4492 gcc_assert (index >= 0 && index <= 3);
4493
4494 /* Need to implement table size reduction, by chaning the code below. */
4495 output_asm_insn (patterns[index][0], operands);
4496 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4497 snprintf (buf, sizeof (buf),
4498 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4499 output_asm_insn (buf, operands);
4500 output_asm_insn (patterns[index][1], operands);
4501 output_asm_insn ("br\t%3", operands);
4502 assemble_label (asm_out_file, label);
4503 return "";
4504}
4505
4506
4507/* Return size in bits of an arithmetic operand which is shifted/scaled and
4508 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4509 operator. */
4510
4511int
4512aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4513{
4514 if (shift >= 0 && shift <= 3)
4515 {
4516 int size;
4517 for (size = 8; size <= 32; size *= 2)
4518 {
4519 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4520 if (mask == bits << shift)
4521 return size;
4522 }
4523 }
4524 return 0;
4525}
4526
4527static bool
4528aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4529 const_rtx x ATTRIBUTE_UNUSED)
4530{
4531 /* We can't use blocks for constants when we're using a per-function
4532 constant pool. */
4533 return false;
4534}
4535
4536static section *
4537aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4538 rtx x ATTRIBUTE_UNUSED,
4539 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4540{
4541 /* Force all constant pool entries into the current function section. */
4542 return function_section (current_function_decl);
4543}
4544
4545
4546/* Costs. */
4547
4548/* Helper function for rtx cost calculation. Strip a shift expression
4549 from X. Returns the inner operand if successful, or the original
4550 expression on failure. */
4551static rtx
4552aarch64_strip_shift (rtx x)
4553{
4554 rtx op = x;
4555
57b77d46
RE
4556 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4557 we can convert both to ROR during final output. */
43e9d192
IB
4558 if ((GET_CODE (op) == ASHIFT
4559 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
4560 || GET_CODE (op) == LSHIFTRT
4561 || GET_CODE (op) == ROTATERT
4562 || GET_CODE (op) == ROTATE)
43e9d192
IB
4563 && CONST_INT_P (XEXP (op, 1)))
4564 return XEXP (op, 0);
4565
4566 if (GET_CODE (op) == MULT
4567 && CONST_INT_P (XEXP (op, 1))
4568 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4569 return XEXP (op, 0);
4570
4571 return x;
4572}
4573
4745e701 4574/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
4575 expression from X. Returns the inner operand if successful, or the
4576 original expression on failure. We deal with a number of possible
4577 canonicalization variations here. */
4578static rtx
4745e701 4579aarch64_strip_extend (rtx x)
43e9d192
IB
4580{
4581 rtx op = x;
4582
4583 /* Zero and sign extraction of a widened value. */
4584 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4585 && XEXP (op, 2) == const0_rtx
4745e701 4586 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
4587 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4588 XEXP (op, 1)))
4589 return XEXP (XEXP (op, 0), 0);
4590
4591 /* It can also be represented (for zero-extend) as an AND with an
4592 immediate. */
4593 if (GET_CODE (op) == AND
4594 && GET_CODE (XEXP (op, 0)) == MULT
4595 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4596 && CONST_INT_P (XEXP (op, 1))
4597 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4598 INTVAL (XEXP (op, 1))) != 0)
4599 return XEXP (XEXP (op, 0), 0);
4600
4601 /* Now handle extended register, as this may also have an optional
4602 left shift by 1..4. */
4603 if (GET_CODE (op) == ASHIFT
4604 && CONST_INT_P (XEXP (op, 1))
4605 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4606 op = XEXP (op, 0);
4607
4608 if (GET_CODE (op) == ZERO_EXTEND
4609 || GET_CODE (op) == SIGN_EXTEND)
4610 op = XEXP (op, 0);
4611
4612 if (op != x)
4613 return op;
4614
4745e701
JG
4615 return x;
4616}
4617
4618/* Helper function for rtx cost calculation. Calculate the cost of
4619 a MULT, which may be part of a multiply-accumulate rtx. Return
4620 the calculated cost of the expression, recursing manually in to
4621 operands where needed. */
4622
4623static int
4624aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4625{
4626 rtx op0, op1;
4627 const struct cpu_cost_table *extra_cost
4628 = aarch64_tune_params->insn_extra_cost;
4629 int cost = 0;
4630 bool maybe_fma = (outer == PLUS || outer == MINUS);
4631 enum machine_mode mode = GET_MODE (x);
4632
4633 gcc_checking_assert (code == MULT);
4634
4635 op0 = XEXP (x, 0);
4636 op1 = XEXP (x, 1);
4637
4638 if (VECTOR_MODE_P (mode))
4639 mode = GET_MODE_INNER (mode);
4640
4641 /* Integer multiply/fma. */
4642 if (GET_MODE_CLASS (mode) == MODE_INT)
4643 {
4644 /* The multiply will be canonicalized as a shift, cost it as such. */
4645 if (CONST_INT_P (op1)
4646 && exact_log2 (INTVAL (op1)) > 0)
4647 {
4648 if (speed)
4649 {
4650 if (maybe_fma)
4651 /* ADD (shifted register). */
4652 cost += extra_cost->alu.arith_shift;
4653 else
4654 /* LSL (immediate). */
4655 cost += extra_cost->alu.shift;
4656 }
4657
4658 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4659
4660 return cost;
4661 }
4662
4663 /* Integer multiplies or FMAs have zero/sign extending variants. */
4664 if ((GET_CODE (op0) == ZERO_EXTEND
4665 && GET_CODE (op1) == ZERO_EXTEND)
4666 || (GET_CODE (op0) == SIGN_EXTEND
4667 && GET_CODE (op1) == SIGN_EXTEND))
4668 {
4669 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4670 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4671
4672 if (speed)
4673 {
4674 if (maybe_fma)
4675 /* MADD/SMADDL/UMADDL. */
4676 cost += extra_cost->mult[0].extend_add;
4677 else
4678 /* MUL/SMULL/UMULL. */
4679 cost += extra_cost->mult[0].extend;
4680 }
4681
4682 return cost;
4683 }
4684
4685 /* This is either an integer multiply or an FMA. In both cases
4686 we want to recurse and cost the operands. */
4687 cost += rtx_cost (op0, MULT, 0, speed)
4688 + rtx_cost (op1, MULT, 1, speed);
4689
4690 if (speed)
4691 {
4692 if (maybe_fma)
4693 /* MADD. */
4694 cost += extra_cost->mult[mode == DImode].add;
4695 else
4696 /* MUL. */
4697 cost += extra_cost->mult[mode == DImode].simple;
4698 }
4699
4700 return cost;
4701 }
4702 else
4703 {
4704 if (speed)
4705 {
3d840f7d 4706 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
4707 operands. */
4708 if (GET_CODE (op0) == NEG)
3d840f7d 4709 op0 = XEXP (op0, 0);
4745e701 4710 if (GET_CODE (op1) == NEG)
3d840f7d 4711 op1 = XEXP (op1, 0);
4745e701
JG
4712
4713 if (maybe_fma)
4714 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4715 cost += extra_cost->fp[mode == DFmode].fma;
4716 else
3d840f7d 4717 /* FMUL/FNMUL. */
4745e701
JG
4718 cost += extra_cost->fp[mode == DFmode].mult;
4719 }
4720
4721 cost += rtx_cost (op0, MULT, 0, speed)
4722 + rtx_cost (op1, MULT, 1, speed);
4723 return cost;
4724 }
43e9d192
IB
4725}
4726
67747367
JG
4727static int
4728aarch64_address_cost (rtx x,
4729 enum machine_mode mode,
4730 addr_space_t as ATTRIBUTE_UNUSED,
4731 bool speed)
4732{
4733 enum rtx_code c = GET_CODE (x);
4734 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4735 struct aarch64_address_info info;
4736 int cost = 0;
4737 info.shift = 0;
4738
4739 if (!aarch64_classify_address (&info, x, mode, c, false))
4740 {
4741 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4742 {
4743 /* This is a CONST or SYMBOL ref which will be split
4744 in a different way depending on the code model in use.
4745 Cost it through the generic infrastructure. */
4746 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4747 /* Divide through by the cost of one instruction to
4748 bring it to the same units as the address costs. */
4749 cost_symbol_ref /= COSTS_N_INSNS (1);
4750 /* The cost is then the cost of preparing the address,
4751 followed by an immediate (possibly 0) offset. */
4752 return cost_symbol_ref + addr_cost->imm_offset;
4753 }
4754 else
4755 {
4756 /* This is most likely a jump table from a case
4757 statement. */
4758 return addr_cost->register_offset;
4759 }
4760 }
4761
4762 switch (info.type)
4763 {
4764 case ADDRESS_LO_SUM:
4765 case ADDRESS_SYMBOLIC:
4766 case ADDRESS_REG_IMM:
4767 cost += addr_cost->imm_offset;
4768 break;
4769
4770 case ADDRESS_REG_WB:
4771 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4772 cost += addr_cost->pre_modify;
4773 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4774 cost += addr_cost->post_modify;
4775 else
4776 gcc_unreachable ();
4777
4778 break;
4779
4780 case ADDRESS_REG_REG:
4781 cost += addr_cost->register_offset;
4782 break;
4783
4784 case ADDRESS_REG_UXTW:
4785 case ADDRESS_REG_SXTW:
4786 cost += addr_cost->register_extend;
4787 break;
4788
4789 default:
4790 gcc_unreachable ();
4791 }
4792
4793
4794 if (info.shift > 0)
4795 {
4796 /* For the sake of calculating the cost of the shifted register
4797 component, we can treat same sized modes in the same way. */
4798 switch (GET_MODE_BITSIZE (mode))
4799 {
4800 case 16:
4801 cost += addr_cost->addr_scale_costs.hi;
4802 break;
4803
4804 case 32:
4805 cost += addr_cost->addr_scale_costs.si;
4806 break;
4807
4808 case 64:
4809 cost += addr_cost->addr_scale_costs.di;
4810 break;
4811
4812 /* We can't tell, or this is a 128-bit vector. */
4813 default:
4814 cost += addr_cost->addr_scale_costs.ti;
4815 break;
4816 }
4817 }
4818
4819 return cost;
4820}
4821
7cc2145f
JG
4822/* Return true if the RTX X in mode MODE is a zero or sign extract
4823 usable in an ADD or SUB (extended register) instruction. */
4824static bool
4825aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4826{
4827 /* Catch add with a sign extract.
4828 This is add_<optab><mode>_multp2. */
4829 if (GET_CODE (x) == SIGN_EXTRACT
4830 || GET_CODE (x) == ZERO_EXTRACT)
4831 {
4832 rtx op0 = XEXP (x, 0);
4833 rtx op1 = XEXP (x, 1);
4834 rtx op2 = XEXP (x, 2);
4835
4836 if (GET_CODE (op0) == MULT
4837 && CONST_INT_P (op1)
4838 && op2 == const0_rtx
4839 && CONST_INT_P (XEXP (op0, 1))
4840 && aarch64_is_extend_from_extract (mode,
4841 XEXP (op0, 1),
4842 op1))
4843 {
4844 return true;
4845 }
4846 }
4847
4848 return false;
4849}
4850
43e9d192
IB
4851/* Calculate the cost of calculating X, storing it in *COST. Result
4852 is true if the total cost of the operation has now been calculated. */
4853static bool
4854aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4855 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4856{
a8eecd00 4857 rtx op0, op1, op2;
73250c4c 4858 const struct cpu_cost_table *extra_cost
43e9d192 4859 = aarch64_tune_params->insn_extra_cost;
9dfc162c 4860 enum machine_mode mode = GET_MODE (x);
43e9d192 4861
7fc5ef02
JG
4862 /* By default, assume that everything has equivalent cost to the
4863 cheapest instruction. Any additional costs are applied as a delta
4864 above this default. */
4865 *cost = COSTS_N_INSNS (1);
4866
4867 /* TODO: The cost infrastructure currently does not handle
4868 vector operations. Assume that all vector operations
4869 are equally expensive. */
4870 if (VECTOR_MODE_P (mode))
4871 {
4872 if (speed)
4873 *cost += extra_cost->vect.alu;
4874 return true;
4875 }
4876
43e9d192
IB
4877 switch (code)
4878 {
4879 case SET:
ba123b0d
JG
4880 /* The cost depends entirely on the operands to SET. */
4881 *cost = 0;
43e9d192
IB
4882 op0 = SET_DEST (x);
4883 op1 = SET_SRC (x);
4884
4885 switch (GET_CODE (op0))
4886 {
4887 case MEM:
4888 if (speed)
2961177e
JG
4889 {
4890 rtx address = XEXP (op0, 0);
4891 if (GET_MODE_CLASS (mode) == MODE_INT)
4892 *cost += extra_cost->ldst.store;
4893 else if (mode == SFmode)
4894 *cost += extra_cost->ldst.storef;
4895 else if (mode == DFmode)
4896 *cost += extra_cost->ldst.stored;
4897
4898 *cost +=
4899 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4900 0, speed));
4901 }
43e9d192 4902
ba123b0d 4903 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
4904 return true;
4905
4906 case SUBREG:
4907 if (! REG_P (SUBREG_REG (op0)))
4908 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 4909
43e9d192
IB
4910 /* Fall through. */
4911 case REG:
ba123b0d
JG
4912 /* const0_rtx is in general free, but we will use an
4913 instruction to set a register to 0. */
4914 if (REG_P (op1) || op1 == const0_rtx)
4915 {
4916 /* The cost is 1 per register copied. */
4917 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4918 / UNITS_PER_WORD;
4919 *cost = COSTS_N_INSNS (n_minus_1 + 1);
4920 }
4921 else
4922 /* Cost is just the cost of the RHS of the set. */
4923 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
4924 return true;
4925
ba123b0d 4926 case ZERO_EXTRACT:
43e9d192 4927 case SIGN_EXTRACT:
ba123b0d
JG
4928 /* Bit-field insertion. Strip any redundant widening of
4929 the RHS to meet the width of the target. */
43e9d192
IB
4930 if (GET_CODE (op1) == SUBREG)
4931 op1 = SUBREG_REG (op1);
4932 if ((GET_CODE (op1) == ZERO_EXTEND
4933 || GET_CODE (op1) == SIGN_EXTEND)
4934 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4935 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4936 >= INTVAL (XEXP (op0, 1))))
4937 op1 = XEXP (op1, 0);
ba123b0d
JG
4938
4939 if (CONST_INT_P (op1))
4940 {
4941 /* MOV immediate is assumed to always be cheap. */
4942 *cost = COSTS_N_INSNS (1);
4943 }
4944 else
4945 {
4946 /* BFM. */
4947 if (speed)
4948 *cost += extra_cost->alu.bfi;
4949 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4950 }
4951
43e9d192
IB
4952 return true;
4953
4954 default:
ba123b0d
JG
4955 /* We can't make sense of this, assume default cost. */
4956 *cost = COSTS_N_INSNS (1);
43e9d192
IB
4957 break;
4958 }
4959 return false;
4960
9dfc162c
JG
4961 case CONST_INT:
4962 /* If an instruction can incorporate a constant within the
4963 instruction, the instruction's expression avoids calling
4964 rtx_cost() on the constant. If rtx_cost() is called on a
4965 constant, then it is usually because the constant must be
4966 moved into a register by one or more instructions.
4967
4968 The exception is constant 0, which can be expressed
4969 as XZR/WZR and is therefore free. The exception to this is
4970 if we have (set (reg) (const0_rtx)) in which case we must cost
4971 the move. However, we can catch that when we cost the SET, so
4972 we don't need to consider that here. */
4973 if (x == const0_rtx)
4974 *cost = 0;
4975 else
4976 {
4977 /* To an approximation, building any other constant is
4978 proportionally expensive to the number of instructions
4979 required to build that constant. This is true whether we
4980 are compiling for SPEED or otherwise. */
4981 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4982 INTVAL (x),
4983 false));
4984 }
4985 return true;
4986
4987 case CONST_DOUBLE:
4988 if (speed)
4989 {
4990 /* mov[df,sf]_aarch64. */
4991 if (aarch64_float_const_representable_p (x))
4992 /* FMOV (scalar immediate). */
4993 *cost += extra_cost->fp[mode == DFmode].fpconst;
4994 else if (!aarch64_float_const_zero_rtx_p (x))
4995 {
4996 /* This will be a load from memory. */
4997 if (mode == DFmode)
4998 *cost += extra_cost->ldst.loadd;
4999 else
5000 *cost += extra_cost->ldst.loadf;
5001 }
5002 else
5003 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5004 or MOV v0.s[0], wzr - neither of which are modeled by the
5005 cost tables. Just use the default cost. */
5006 {
5007 }
5008 }
5009
5010 return true;
5011
43e9d192
IB
5012 case MEM:
5013 if (speed)
2961177e
JG
5014 {
5015 /* For loads we want the base cost of a load, plus an
5016 approximation for the additional cost of the addressing
5017 mode. */
5018 rtx address = XEXP (x, 0);
5019 if (GET_MODE_CLASS (mode) == MODE_INT)
5020 *cost += extra_cost->ldst.load;
5021 else if (mode == SFmode)
5022 *cost += extra_cost->ldst.loadf;
5023 else if (mode == DFmode)
5024 *cost += extra_cost->ldst.loadd;
5025
5026 *cost +=
5027 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5028 0, speed));
5029 }
43e9d192
IB
5030
5031 return true;
5032
5033 case NEG:
4745e701
JG
5034 op0 = XEXP (x, 0);
5035
5036 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5037 {
5038 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5039 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5040 {
5041 /* CSETM. */
5042 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5043 return true;
5044 }
5045
5046 /* Cost this as SUB wzr, X. */
5047 op0 = CONST0_RTX (GET_MODE (x));
5048 op1 = XEXP (x, 0);
5049 goto cost_minus;
5050 }
5051
5052 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5053 {
5054 /* Support (neg(fma...)) as a single instruction only if
5055 sign of zeros is unimportant. This matches the decision
5056 making in aarch64.md. */
5057 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5058 {
5059 /* FNMADD. */
5060 *cost = rtx_cost (op0, NEG, 0, speed);
5061 return true;
5062 }
5063 if (speed)
5064 /* FNEG. */
5065 *cost += extra_cost->fp[mode == DFmode].neg;
5066 return false;
5067 }
5068
5069 return false;
43e9d192
IB
5070
5071 case COMPARE:
5072 op0 = XEXP (x, 0);
5073 op1 = XEXP (x, 1);
5074
5075 if (op1 == const0_rtx
5076 && GET_CODE (op0) == AND)
5077 {
5078 x = op0;
5079 goto cost_logic;
5080 }
5081
a8eecd00
JG
5082 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5083 {
5084 /* TODO: A write to the CC flags possibly costs extra, this
5085 needs encoding in the cost tables. */
5086
5087 /* CC_ZESWPmode supports zero extend for free. */
5088 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5089 op0 = XEXP (op0, 0);
5090
5091 /* ANDS. */
5092 if (GET_CODE (op0) == AND)
5093 {
5094 x = op0;
5095 goto cost_logic;
5096 }
5097
5098 if (GET_CODE (op0) == PLUS)
5099 {
5100 /* ADDS (and CMN alias). */
5101 x = op0;
5102 goto cost_plus;
5103 }
5104
5105 if (GET_CODE (op0) == MINUS)
5106 {
5107 /* SUBS. */
5108 x = op0;
5109 goto cost_minus;
5110 }
5111
5112 if (GET_CODE (op1) == NEG)
5113 {
5114 /* CMN. */
5115 if (speed)
5116 *cost += extra_cost->alu.arith;
5117
5118 *cost += rtx_cost (op0, COMPARE, 0, speed);
5119 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5120 return true;
5121 }
5122
5123 /* CMP.
5124
5125 Compare can freely swap the order of operands, and
5126 canonicalization puts the more complex operation first.
5127 But the integer MINUS logic expects the shift/extend
5128 operation in op1. */
5129 if (! (REG_P (op0)
5130 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5131 {
5132 op0 = XEXP (x, 1);
5133 op1 = XEXP (x, 0);
5134 }
5135 goto cost_minus;
5136 }
5137
5138 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5139 {
5140 /* FCMP. */
5141 if (speed)
5142 *cost += extra_cost->fp[mode == DFmode].compare;
5143
5144 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5145 {
5146 /* FCMP supports constant 0.0 for no extra cost. */
5147 return true;
5148 }
5149 return false;
5150 }
5151
5152 return false;
43e9d192
IB
5153
5154 case MINUS:
4745e701
JG
5155 {
5156 op0 = XEXP (x, 0);
5157 op1 = XEXP (x, 1);
5158
5159cost_minus:
5160 /* Detect valid immediates. */
5161 if ((GET_MODE_CLASS (mode) == MODE_INT
5162 || (GET_MODE_CLASS (mode) == MODE_CC
5163 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5164 && CONST_INT_P (op1)
5165 && aarch64_uimm12_shift (INTVAL (op1)))
5166 {
5167 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5168
4745e701
JG
5169 if (speed)
5170 /* SUB(S) (immediate). */
5171 *cost += extra_cost->alu.arith;
5172 return true;
5173
5174 }
5175
7cc2145f
JG
5176 /* Look for SUB (extended register). */
5177 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5178 {
5179 if (speed)
5180 *cost += extra_cost->alu.arith_shift;
5181
5182 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5183 (enum rtx_code) GET_CODE (op1),
5184 0, speed);
5185 return true;
5186 }
5187
4745e701
JG
5188 rtx new_op1 = aarch64_strip_extend (op1);
5189
5190 /* Cost this as an FMA-alike operation. */
5191 if ((GET_CODE (new_op1) == MULT
5192 || GET_CODE (new_op1) == ASHIFT)
5193 && code != COMPARE)
5194 {
5195 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5196 (enum rtx_code) code,
5197 speed);
43e9d192 5198 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5199 return true;
5200 }
43e9d192 5201
4745e701 5202 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5203
4745e701
JG
5204 if (speed)
5205 {
5206 if (GET_MODE_CLASS (mode) == MODE_INT)
5207 /* SUB(S). */
5208 *cost += extra_cost->alu.arith;
5209 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5210 /* FSUB. */
5211 *cost += extra_cost->fp[mode == DFmode].addsub;
5212 }
5213 return true;
5214 }
43e9d192
IB
5215
5216 case PLUS:
4745e701
JG
5217 {
5218 rtx new_op0;
43e9d192 5219
4745e701
JG
5220 op0 = XEXP (x, 0);
5221 op1 = XEXP (x, 1);
43e9d192 5222
a8eecd00 5223cost_plus:
4745e701
JG
5224 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5225 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5226 {
5227 /* CSINC. */
5228 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5229 *cost += rtx_cost (op1, PLUS, 1, speed);
5230 return true;
5231 }
43e9d192 5232
4745e701
JG
5233 if (GET_MODE_CLASS (mode) == MODE_INT
5234 && CONST_INT_P (op1)
5235 && aarch64_uimm12_shift (INTVAL (op1)))
5236 {
5237 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5238
4745e701
JG
5239 if (speed)
5240 /* ADD (immediate). */
5241 *cost += extra_cost->alu.arith;
5242 return true;
5243 }
5244
7cc2145f
JG
5245 /* Look for ADD (extended register). */
5246 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5247 {
5248 if (speed)
5249 *cost += extra_cost->alu.arith_shift;
5250
5251 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5252 (enum rtx_code) GET_CODE (op0),
5253 0, speed);
5254 return true;
5255 }
5256
4745e701
JG
5257 /* Strip any extend, leave shifts behind as we will
5258 cost them through mult_cost. */
5259 new_op0 = aarch64_strip_extend (op0);
5260
5261 if (GET_CODE (new_op0) == MULT
5262 || GET_CODE (new_op0) == ASHIFT)
5263 {
5264 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5265 speed);
5266 *cost += rtx_cost (op1, PLUS, 1, speed);
5267 return true;
5268 }
5269
5270 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5271 + rtx_cost (op1, PLUS, 1, speed));
5272
5273 if (speed)
5274 {
5275 if (GET_MODE_CLASS (mode) == MODE_INT)
5276 /* ADD. */
5277 *cost += extra_cost->alu.arith;
5278 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5279 /* FADD. */
5280 *cost += extra_cost->fp[mode == DFmode].addsub;
5281 }
5282 return true;
5283 }
43e9d192 5284
18b42b2a
KT
5285 case BSWAP:
5286 *cost = COSTS_N_INSNS (1);
5287
5288 if (speed)
5289 *cost += extra_cost->alu.rev;
5290
5291 return false;
5292
43e9d192 5293 case IOR:
f7d5cf8d
KT
5294 if (aarch_rev16_p (x))
5295 {
5296 *cost = COSTS_N_INSNS (1);
5297
5298 if (speed)
5299 *cost += extra_cost->alu.rev;
5300
5301 return true;
5302 }
5303 /* Fall through. */
43e9d192
IB
5304 case XOR:
5305 case AND:
5306 cost_logic:
5307 op0 = XEXP (x, 0);
5308 op1 = XEXP (x, 1);
5309
268c3b47
JG
5310 if (code == AND
5311 && GET_CODE (op0) == MULT
5312 && CONST_INT_P (XEXP (op0, 1))
5313 && CONST_INT_P (op1)
5314 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5315 INTVAL (op1)) != 0)
5316 {
5317 /* This is a UBFM/SBFM. */
5318 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5319 if (speed)
5320 *cost += extra_cost->alu.bfx;
5321 return true;
5322 }
5323
43e9d192
IB
5324 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5325 {
268c3b47
JG
5326 /* We possibly get the immediate for free, this is not
5327 modelled. */
43e9d192
IB
5328 if (CONST_INT_P (op1)
5329 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5330 {
268c3b47
JG
5331 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5332
5333 if (speed)
5334 *cost += extra_cost->alu.logical;
5335
5336 return true;
43e9d192
IB
5337 }
5338 else
5339 {
268c3b47
JG
5340 rtx new_op0 = op0;
5341
5342 /* Handle ORN, EON, or BIC. */
43e9d192
IB
5343 if (GET_CODE (op0) == NOT)
5344 op0 = XEXP (op0, 0);
268c3b47
JG
5345
5346 new_op0 = aarch64_strip_shift (op0);
5347
5348 /* If we had a shift on op0 then this is a logical-shift-
5349 by-register/immediate operation. Otherwise, this is just
5350 a logical operation. */
5351 if (speed)
5352 {
5353 if (new_op0 != op0)
5354 {
5355 /* Shift by immediate. */
5356 if (CONST_INT_P (XEXP (op0, 1)))
5357 *cost += extra_cost->alu.log_shift;
5358 else
5359 *cost += extra_cost->alu.log_shift_reg;
5360 }
5361 else
5362 *cost += extra_cost->alu.logical;
5363 }
5364
5365 /* In both cases we want to cost both operands. */
5366 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5367 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5368
5369 return true;
43e9d192 5370 }
43e9d192
IB
5371 }
5372 return false;
5373
268c3b47
JG
5374 case NOT:
5375 /* MVN. */
5376 if (speed)
5377 *cost += extra_cost->alu.logical;
5378
5379 /* The logical instruction could have the shifted register form,
5380 but the cost is the same if the shift is processed as a separate
5381 instruction, so we don't bother with it here. */
5382 return false;
5383
43e9d192 5384 case ZERO_EXTEND:
b1685e62
JG
5385
5386 op0 = XEXP (x, 0);
5387 /* If a value is written in SI mode, then zero extended to DI
5388 mode, the operation will in general be free as a write to
5389 a 'w' register implicitly zeroes the upper bits of an 'x'
5390 register. However, if this is
5391
5392 (set (reg) (zero_extend (reg)))
5393
5394 we must cost the explicit register move. */
5395 if (mode == DImode
5396 && GET_MODE (op0) == SImode
5397 && outer == SET)
5398 {
5399 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5400
5401 if (!op_cost && speed)
5402 /* MOV. */
5403 *cost += extra_cost->alu.extend;
5404 else
5405 /* Free, the cost is that of the SI mode operation. */
5406 *cost = op_cost;
5407
5408 return true;
5409 }
5410 else if (MEM_P (XEXP (x, 0)))
43e9d192 5411 {
b1685e62
JG
5412 /* All loads can zero extend to any size for free. */
5413 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
5414 return true;
5415 }
b1685e62
JG
5416
5417 /* UXTB/UXTH. */
5418 if (speed)
5419 *cost += extra_cost->alu.extend;
5420
43e9d192
IB
5421 return false;
5422
5423 case SIGN_EXTEND:
b1685e62 5424 if (MEM_P (XEXP (x, 0)))
43e9d192 5425 {
b1685e62
JG
5426 /* LDRSH. */
5427 if (speed)
5428 {
5429 rtx address = XEXP (XEXP (x, 0), 0);
5430 *cost += extra_cost->ldst.load_sign_extend;
5431
5432 *cost +=
5433 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5434 0, speed));
5435 }
43e9d192
IB
5436 return true;
5437 }
b1685e62
JG
5438
5439 if (speed)
5440 *cost += extra_cost->alu.extend;
43e9d192
IB
5441 return false;
5442
ba0cfa17
JG
5443 case ASHIFT:
5444 op0 = XEXP (x, 0);
5445 op1 = XEXP (x, 1);
5446
5447 if (CONST_INT_P (op1))
5448 {
5449 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5450 aliases. */
5451 if (speed)
5452 *cost += extra_cost->alu.shift;
5453
5454 /* We can incorporate zero/sign extend for free. */
5455 if (GET_CODE (op0) == ZERO_EXTEND
5456 || GET_CODE (op0) == SIGN_EXTEND)
5457 op0 = XEXP (op0, 0);
5458
5459 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5460 return true;
5461 }
5462 else
5463 {
5464 /* LSLV. */
5465 if (speed)
5466 *cost += extra_cost->alu.shift_reg;
5467
5468 return false; /* All arguments need to be in registers. */
5469 }
5470
43e9d192 5471 case ROTATE:
43e9d192
IB
5472 case ROTATERT:
5473 case LSHIFTRT:
43e9d192 5474 case ASHIFTRT:
ba0cfa17
JG
5475 op0 = XEXP (x, 0);
5476 op1 = XEXP (x, 1);
43e9d192 5477
ba0cfa17
JG
5478 if (CONST_INT_P (op1))
5479 {
5480 /* ASR (immediate) and friends. */
5481 if (speed)
5482 *cost += extra_cost->alu.shift;
43e9d192 5483
ba0cfa17
JG
5484 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5485 return true;
5486 }
5487 else
5488 {
5489
5490 /* ASR (register) and friends. */
5491 if (speed)
5492 *cost += extra_cost->alu.shift_reg;
5493
5494 return false; /* All arguments need to be in registers. */
5495 }
43e9d192 5496
909734be
JG
5497 case SYMBOL_REF:
5498
5499 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5500 {
5501 /* LDR. */
5502 if (speed)
5503 *cost += extra_cost->ldst.load;
5504 }
5505 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5506 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5507 {
5508 /* ADRP, followed by ADD. */
5509 *cost += COSTS_N_INSNS (1);
5510 if (speed)
5511 *cost += 2 * extra_cost->alu.arith;
5512 }
5513 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5514 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5515 {
5516 /* ADR. */
5517 if (speed)
5518 *cost += extra_cost->alu.arith;
5519 }
5520
5521 if (flag_pic)
5522 {
5523 /* One extra load instruction, after accessing the GOT. */
5524 *cost += COSTS_N_INSNS (1);
5525 if (speed)
5526 *cost += extra_cost->ldst.load;
5527 }
43e9d192
IB
5528 return true;
5529
909734be 5530 case HIGH:
43e9d192 5531 case LO_SUM:
909734be
JG
5532 /* ADRP/ADD (immediate). */
5533 if (speed)
5534 *cost += extra_cost->alu.arith;
43e9d192
IB
5535 return true;
5536
5537 case ZERO_EXTRACT:
5538 case SIGN_EXTRACT:
7cc2145f
JG
5539 /* UBFX/SBFX. */
5540 if (speed)
5541 *cost += extra_cost->alu.bfx;
5542
5543 /* We can trust that the immediates used will be correct (there
5544 are no by-register forms), so we need only cost op0. */
5545 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
5546 return true;
5547
5548 case MULT:
4745e701
JG
5549 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5550 /* aarch64_rtx_mult_cost always handles recursion to its
5551 operands. */
5552 return true;
43e9d192
IB
5553
5554 case MOD:
5555 case UMOD:
43e9d192
IB
5556 if (speed)
5557 {
5558 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
5559 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5560 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 5561 else if (GET_MODE (x) == DFmode)
73250c4c
KT
5562 *cost += (extra_cost->fp[1].mult
5563 + extra_cost->fp[1].div);
43e9d192 5564 else if (GET_MODE (x) == SFmode)
73250c4c
KT
5565 *cost += (extra_cost->fp[0].mult
5566 + extra_cost->fp[0].div);
43e9d192
IB
5567 }
5568 return false; /* All arguments need to be in registers. */
5569
5570 case DIV:
5571 case UDIV:
4105fe38 5572 case SQRT:
43e9d192
IB
5573 if (speed)
5574 {
4105fe38
JG
5575 if (GET_MODE_CLASS (mode) == MODE_INT)
5576 /* There is no integer SQRT, so only DIV and UDIV can get
5577 here. */
5578 *cost += extra_cost->mult[mode == DImode].idiv;
5579 else
5580 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
5581 }
5582 return false; /* All arguments need to be in registers. */
5583
a8eecd00
JG
5584 case IF_THEN_ELSE:
5585 op2 = XEXP (x, 2);
5586 op0 = XEXP (x, 0);
5587 op1 = XEXP (x, 1);
5588
5589 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5590 {
5591 /* Conditional branch. */
5592 if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
5593 return true;
5594 else
5595 {
5596 if (GET_CODE (op0) == NE
5597 || GET_CODE (op0) == EQ)
5598 {
5599 rtx inner = XEXP (op0, 0);
5600 rtx comparator = XEXP (op0, 1);
5601
5602 if (comparator == const0_rtx)
5603 {
5604 /* TBZ/TBNZ/CBZ/CBNZ. */
5605 if (GET_CODE (inner) == ZERO_EXTRACT)
5606 /* TBZ/TBNZ. */
5607 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5608 0, speed);
5609 else
5610 /* CBZ/CBNZ. */
5611 *cost += rtx_cost (inner, GET_CODE (op0), 0, speed);
5612
5613 return true;
5614 }
5615 }
5616 else if (GET_CODE (op0) == LT
5617 || GET_CODE (op0) == GE)
5618 {
5619 rtx comparator = XEXP (op0, 1);
5620
5621 /* TBZ/TBNZ. */
5622 if (comparator == const0_rtx)
5623 return true;
5624 }
5625 }
5626 }
5627 else if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
5628 {
5629 /* It's a conditional operation based on the status flags,
5630 so it must be some flavor of CSEL. */
5631
5632 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5633 if (GET_CODE (op1) == NEG
5634 || GET_CODE (op1) == NOT
5635 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5636 op1 = XEXP (op1, 0);
5637
5638 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5639 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5640 return true;
5641 }
5642
5643 /* We don't know what this is, cost all operands. */
5644 return false;
5645
5646 case EQ:
5647 case NE:
5648 case GT:
5649 case GTU:
5650 case LT:
5651 case LTU:
5652 case GE:
5653 case GEU:
5654 case LE:
5655 case LEU:
5656
5657 return false; /* All arguments must be in registers. */
5658
b292109f
JG
5659 case FMA:
5660 op0 = XEXP (x, 0);
5661 op1 = XEXP (x, 1);
5662 op2 = XEXP (x, 2);
5663
5664 if (speed)
5665 *cost += extra_cost->fp[mode == DFmode].fma;
5666
5667 /* FMSUB, FNMADD, and FNMSUB are free. */
5668 if (GET_CODE (op0) == NEG)
5669 op0 = XEXP (op0, 0);
5670
5671 if (GET_CODE (op2) == NEG)
5672 op2 = XEXP (op2, 0);
5673
5674 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5675 and the by-element operand as operand 0. */
5676 if (GET_CODE (op1) == NEG)
5677 op1 = XEXP (op1, 0);
5678
5679 /* Catch vector-by-element operations. The by-element operand can
5680 either be (vec_duplicate (vec_select (x))) or just
5681 (vec_select (x)), depending on whether we are multiplying by
5682 a vector or a scalar.
5683
5684 Canonicalization is not very good in these cases, FMA4 will put the
5685 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5686 if (GET_CODE (op0) == VEC_DUPLICATE)
5687 op0 = XEXP (op0, 0);
5688 else if (GET_CODE (op1) == VEC_DUPLICATE)
5689 op1 = XEXP (op1, 0);
5690
5691 if (GET_CODE (op0) == VEC_SELECT)
5692 op0 = XEXP (op0, 0);
5693 else if (GET_CODE (op1) == VEC_SELECT)
5694 op1 = XEXP (op1, 0);
5695
5696 /* If the remaining parameters are not registers,
5697 get the cost to put them into registers. */
5698 *cost += rtx_cost (op0, FMA, 0, speed);
5699 *cost += rtx_cost (op1, FMA, 1, speed);
5700 *cost += rtx_cost (op2, FMA, 2, speed);
5701 return true;
5702
5703 case FLOAT_EXTEND:
5704 if (speed)
5705 *cost += extra_cost->fp[mode == DFmode].widen;
5706 return false;
5707
5708 case FLOAT_TRUNCATE:
5709 if (speed)
5710 *cost += extra_cost->fp[mode == DFmode].narrow;
5711 return false;
5712
5713 case ABS:
5714 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5715 {
5716 /* FABS and FNEG are analogous. */
5717 if (speed)
5718 *cost += extra_cost->fp[mode == DFmode].neg;
5719 }
5720 else
5721 {
5722 /* Integer ABS will either be split to
5723 two arithmetic instructions, or will be an ABS
5724 (scalar), which we don't model. */
5725 *cost = COSTS_N_INSNS (2);
5726 if (speed)
5727 *cost += 2 * extra_cost->alu.arith;
5728 }
5729 return false;
5730
5731 case SMAX:
5732 case SMIN:
5733 if (speed)
5734 {
5735 /* FMAXNM/FMINNM/FMAX/FMIN.
5736 TODO: This may not be accurate for all implementations, but
5737 we do not model this in the cost tables. */
5738 *cost += extra_cost->fp[mode == DFmode].addsub;
5739 }
5740 return false;
5741
fb620c4a
JG
5742 case TRUNCATE:
5743
5744 /* Decompose <su>muldi3_highpart. */
5745 if (/* (truncate:DI */
5746 mode == DImode
5747 /* (lshiftrt:TI */
5748 && GET_MODE (XEXP (x, 0)) == TImode
5749 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5750 /* (mult:TI */
5751 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5752 /* (ANY_EXTEND:TI (reg:DI))
5753 (ANY_EXTEND:TI (reg:DI))) */
5754 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5755 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5756 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5757 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5758 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5759 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5760 /* (const_int 64) */
5761 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5762 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5763 {
5764 /* UMULH/SMULH. */
5765 if (speed)
5766 *cost += extra_cost->mult[mode == DImode].extend;
5767 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5768 MULT, 0, speed);
5769 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5770 MULT, 1, speed);
5771 return true;
5772 }
5773
5774 /* Fall through. */
43e9d192 5775 default:
88d4fbcf
JG
5776 if (dump_file && (dump_flags & TDF_DETAILS))
5777 fprintf (dump_file,
5778 "\nFailed to cost RTX. Assuming default cost.\n");
5779
5780 return true;
43e9d192
IB
5781 }
5782 return false;
5783}
5784
0ee859b5
JG
5785/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5786 calculated for X. This cost is stored in *COST. Returns true
5787 if the total cost of X was calculated. */
5788static bool
5789aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5790 int param, int *cost, bool speed)
5791{
5792 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5793
5794 if (dump_file && (dump_flags & TDF_DETAILS))
5795 {
5796 print_rtl_single (dump_file, x);
5797 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5798 speed ? "Hot" : "Cold",
5799 *cost, result ? "final" : "partial");
5800 }
5801
5802 return result;
5803}
5804
43e9d192 5805static int
8a3a7e67
RH
5806aarch64_register_move_cost (enum machine_mode mode,
5807 reg_class_t from_i, reg_class_t to_i)
43e9d192 5808{
8a3a7e67
RH
5809 enum reg_class from = (enum reg_class) from_i;
5810 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
5811 const struct cpu_regmove_cost *regmove_cost
5812 = aarch64_tune_params->regmove_cost;
5813
6ee70f81
AP
5814 /* Moving between GPR and stack cost is the same as GP2GP. */
5815 if ((from == GENERAL_REGS && to == STACK_REG)
5816 || (to == GENERAL_REGS && from == STACK_REG))
5817 return regmove_cost->GP2GP;
5818
5819 /* To/From the stack register, we move via the gprs. */
5820 if (to == STACK_REG || from == STACK_REG)
5821 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5822 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5823
43e9d192
IB
5824 if (from == GENERAL_REGS && to == GENERAL_REGS)
5825 return regmove_cost->GP2GP;
5826 else if (from == GENERAL_REGS)
5827 return regmove_cost->GP2FP;
5828 else if (to == GENERAL_REGS)
5829 return regmove_cost->FP2GP;
5830
5831 /* When AdvSIMD instructions are disabled it is not possible to move
5832 a 128-bit value directly between Q registers. This is handled in
5833 secondary reload. A general register is used as a scratch to move
5834 the upper DI value and the lower DI value is moved directly,
5835 hence the cost is the sum of three moves. */
8a3a7e67 5836 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
43e9d192
IB
5837 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5838
5839 return regmove_cost->FP2FP;
5840}
5841
5842static int
5843aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5844 reg_class_t rclass ATTRIBUTE_UNUSED,
5845 bool in ATTRIBUTE_UNUSED)
5846{
5847 return aarch64_tune_params->memmov_cost;
5848}
5849
d126a4ae
AP
5850/* Return the number of instructions that can be issued per cycle. */
5851static int
5852aarch64_sched_issue_rate (void)
5853{
5854 return aarch64_tune_params->issue_rate;
5855}
5856
8990e73a
TB
5857/* Vectorizer cost model target hooks. */
5858
5859/* Implement targetm.vectorize.builtin_vectorization_cost. */
5860static int
5861aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5862 tree vectype,
5863 int misalign ATTRIBUTE_UNUSED)
5864{
5865 unsigned elements;
5866
5867 switch (type_of_cost)
5868 {
5869 case scalar_stmt:
5870 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5871
5872 case scalar_load:
5873 return aarch64_tune_params->vec_costs->scalar_load_cost;
5874
5875 case scalar_store:
5876 return aarch64_tune_params->vec_costs->scalar_store_cost;
5877
5878 case vector_stmt:
5879 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5880
5881 case vector_load:
5882 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5883
5884 case vector_store:
5885 return aarch64_tune_params->vec_costs->vec_store_cost;
5886
5887 case vec_to_scalar:
5888 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5889
5890 case scalar_to_vec:
5891 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5892
5893 case unaligned_load:
5894 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5895
5896 case unaligned_store:
5897 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5898
5899 case cond_branch_taken:
5900 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5901
5902 case cond_branch_not_taken:
5903 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5904
5905 case vec_perm:
5906 case vec_promote_demote:
5907 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5908
5909 case vec_construct:
5910 elements = TYPE_VECTOR_SUBPARTS (vectype);
5911 return elements / 2 + 1;
5912
5913 default:
5914 gcc_unreachable ();
5915 }
5916}
5917
5918/* Implement targetm.vectorize.add_stmt_cost. */
5919static unsigned
5920aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5921 struct _stmt_vec_info *stmt_info, int misalign,
5922 enum vect_cost_model_location where)
5923{
5924 unsigned *cost = (unsigned *) data;
5925 unsigned retval = 0;
5926
5927 if (flag_vect_cost_model)
5928 {
5929 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5930 int stmt_cost =
5931 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5932
5933 /* Statements in an inner loop relative to the loop being
5934 vectorized are weighted more heavily. The value here is
5935 a function (linear for now) of the loop nest level. */
5936 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5937 {
5938 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5939 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5940 unsigned nest_level = loop_depth (loop);
5941
5942 count *= nest_level;
5943 }
5944
5945 retval = (unsigned) (count * stmt_cost);
5946 cost[where] += retval;
5947 }
5948
5949 return retval;
5950}
5951
43e9d192
IB
5952static void initialize_aarch64_code_model (void);
5953
5954/* Parse the architecture extension string. */
5955
5956static void
5957aarch64_parse_extension (char *str)
5958{
5959 /* The extension string is parsed left to right. */
5960 const struct aarch64_option_extension *opt = NULL;
5961
5962 /* Flag to say whether we are adding or removing an extension. */
5963 int adding_ext = -1;
5964
5965 while (str != NULL && *str != 0)
5966 {
5967 char *ext;
5968 size_t len;
5969
5970 str++;
5971 ext = strchr (str, '+');
5972
5973 if (ext != NULL)
5974 len = ext - str;
5975 else
5976 len = strlen (str);
5977
5978 if (len >= 2 && strncmp (str, "no", 2) == 0)
5979 {
5980 adding_ext = 0;
5981 len -= 2;
5982 str += 2;
5983 }
5984 else if (len > 0)
5985 adding_ext = 1;
5986
5987 if (len == 0)
5988 {
5989 error ("missing feature modifier after %qs", "+no");
5990 return;
5991 }
5992
5993 /* Scan over the extensions table trying to find an exact match. */
5994 for (opt = all_extensions; opt->name != NULL; opt++)
5995 {
5996 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5997 {
5998 /* Add or remove the extension. */
5999 if (adding_ext)
6000 aarch64_isa_flags |= opt->flags_on;
6001 else
6002 aarch64_isa_flags &= ~(opt->flags_off);
6003 break;
6004 }
6005 }
6006
6007 if (opt->name == NULL)
6008 {
6009 /* Extension not found in list. */
6010 error ("unknown feature modifier %qs", str);
6011 return;
6012 }
6013
6014 str = ext;
6015 };
6016
6017 return;
6018}
6019
6020/* Parse the ARCH string. */
6021
6022static void
6023aarch64_parse_arch (void)
6024{
6025 char *ext;
6026 const struct processor *arch;
6027 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6028 size_t len;
6029
6030 strcpy (str, aarch64_arch_string);
6031
6032 ext = strchr (str, '+');
6033
6034 if (ext != NULL)
6035 len = ext - str;
6036 else
6037 len = strlen (str);
6038
6039 if (len == 0)
6040 {
6041 error ("missing arch name in -march=%qs", str);
6042 return;
6043 }
6044
6045 /* Loop through the list of supported ARCHs to find a match. */
6046 for (arch = all_architectures; arch->name != NULL; arch++)
6047 {
6048 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6049 {
6050 selected_arch = arch;
6051 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6052
6053 if (!selected_cpu)
6054 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6055
6056 if (ext != NULL)
6057 {
6058 /* ARCH string contains at least one extension. */
6059 aarch64_parse_extension (ext);
6060 }
6061
ffee7aa9
JG
6062 if (strcmp (selected_arch->arch, selected_cpu->arch))
6063 {
6064 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6065 selected_cpu->name, selected_arch->name);
6066 }
6067
43e9d192
IB
6068 return;
6069 }
6070 }
6071
6072 /* ARCH name not found in list. */
6073 error ("unknown value %qs for -march", str);
6074 return;
6075}
6076
6077/* Parse the CPU string. */
6078
6079static void
6080aarch64_parse_cpu (void)
6081{
6082 char *ext;
6083 const struct processor *cpu;
6084 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6085 size_t len;
6086
6087 strcpy (str, aarch64_cpu_string);
6088
6089 ext = strchr (str, '+');
6090
6091 if (ext != NULL)
6092 len = ext - str;
6093 else
6094 len = strlen (str);
6095
6096 if (len == 0)
6097 {
6098 error ("missing cpu name in -mcpu=%qs", str);
6099 return;
6100 }
6101
6102 /* Loop through the list of supported CPUs to find a match. */
6103 for (cpu = all_cores; cpu->name != NULL; cpu++)
6104 {
6105 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6106 {
6107 selected_cpu = cpu;
192ed1dd 6108 selected_tune = cpu;
43e9d192
IB
6109 aarch64_isa_flags = selected_cpu->flags;
6110
6111 if (ext != NULL)
6112 {
6113 /* CPU string contains at least one extension. */
6114 aarch64_parse_extension (ext);
6115 }
6116
6117 return;
6118 }
6119 }
6120
6121 /* CPU name not found in list. */
6122 error ("unknown value %qs for -mcpu", str);
6123 return;
6124}
6125
6126/* Parse the TUNE string. */
6127
6128static void
6129aarch64_parse_tune (void)
6130{
6131 const struct processor *cpu;
6132 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6133 strcpy (str, aarch64_tune_string);
6134
6135 /* Loop through the list of supported CPUs to find a match. */
6136 for (cpu = all_cores; cpu->name != NULL; cpu++)
6137 {
6138 if (strcmp (cpu->name, str) == 0)
6139 {
6140 selected_tune = cpu;
6141 return;
6142 }
6143 }
6144
6145 /* CPU name not found in list. */
6146 error ("unknown value %qs for -mtune", str);
6147 return;
6148}
6149
6150
6151/* Implement TARGET_OPTION_OVERRIDE. */
6152
6153static void
6154aarch64_override_options (void)
6155{
ffee7aa9
JG
6156 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6157 If either of -march or -mtune is given, they override their
6158 respective component of -mcpu.
43e9d192 6159
ffee7aa9
JG
6160 So, first parse AARCH64_CPU_STRING, then the others, be careful
6161 with -march as, if -mcpu is not present on the command line, march
6162 must set a sensible default CPU. */
6163 if (aarch64_cpu_string)
43e9d192 6164 {
ffee7aa9 6165 aarch64_parse_cpu ();
43e9d192
IB
6166 }
6167
ffee7aa9 6168 if (aarch64_arch_string)
43e9d192 6169 {
ffee7aa9 6170 aarch64_parse_arch ();
43e9d192
IB
6171 }
6172
6173 if (aarch64_tune_string)
6174 {
6175 aarch64_parse_tune ();
6176 }
6177
63892fa2
KV
6178#ifndef HAVE_AS_MABI_OPTION
6179 /* The compiler may have been configured with 2.23.* binutils, which does
6180 not have support for ILP32. */
6181 if (TARGET_ILP32)
6182 error ("Assembler does not support -mabi=ilp32");
6183#endif
6184
43e9d192
IB
6185 initialize_aarch64_code_model ();
6186
6187 aarch64_build_bitmask_table ();
6188
6189 /* This target defaults to strict volatile bitfields. */
6190 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6191 flag_strict_volatile_bitfields = 1;
6192
6193 /* If the user did not specify a processor, choose the default
6194 one for them. This will be the CPU set during configuration using
a3cd0246 6195 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6196 if (!selected_cpu)
6197 {
6198 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6199 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6200 }
6201
6202 gcc_assert (selected_cpu);
6203
6204 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6205 if (!selected_tune)
6206 selected_tune = &all_cores[selected_cpu->core];
6207
6208 aarch64_tune_flags = selected_tune->flags;
6209 aarch64_tune = selected_tune->core;
6210 aarch64_tune_params = selected_tune->tune;
6211
6212 aarch64_override_options_after_change ();
6213}
6214
6215/* Implement targetm.override_options_after_change. */
6216
6217static void
6218aarch64_override_options_after_change (void)
6219{
0b7f8166
MS
6220 if (flag_omit_frame_pointer)
6221 flag_omit_leaf_frame_pointer = false;
6222 else if (flag_omit_leaf_frame_pointer)
6223 flag_omit_frame_pointer = true;
43e9d192
IB
6224}
6225
6226static struct machine_function *
6227aarch64_init_machine_status (void)
6228{
6229 struct machine_function *machine;
766090c2 6230 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6231 return machine;
6232}
6233
6234void
6235aarch64_init_expanders (void)
6236{
6237 init_machine_status = aarch64_init_machine_status;
6238}
6239
6240/* A checking mechanism for the implementation of the various code models. */
6241static void
6242initialize_aarch64_code_model (void)
6243{
6244 if (flag_pic)
6245 {
6246 switch (aarch64_cmodel_var)
6247 {
6248 case AARCH64_CMODEL_TINY:
6249 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6250 break;
6251 case AARCH64_CMODEL_SMALL:
6252 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6253 break;
6254 case AARCH64_CMODEL_LARGE:
6255 sorry ("code model %qs with -f%s", "large",
6256 flag_pic > 1 ? "PIC" : "pic");
6257 default:
6258 gcc_unreachable ();
6259 }
6260 }
6261 else
6262 aarch64_cmodel = aarch64_cmodel_var;
6263}
6264
6265/* Return true if SYMBOL_REF X binds locally. */
6266
6267static bool
6268aarch64_symbol_binds_local_p (const_rtx x)
6269{
6270 return (SYMBOL_REF_DECL (x)
6271 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6272 : SYMBOL_REF_LOCAL_P (x));
6273}
6274
6275/* Return true if SYMBOL_REF X is thread local */
6276static bool
6277aarch64_tls_symbol_p (rtx x)
6278{
6279 if (! TARGET_HAVE_TLS)
6280 return false;
6281
6282 if (GET_CODE (x) != SYMBOL_REF)
6283 return false;
6284
6285 return SYMBOL_REF_TLS_MODEL (x) != 0;
6286}
6287
6288/* Classify a TLS symbol into one of the TLS kinds. */
6289enum aarch64_symbol_type
6290aarch64_classify_tls_symbol (rtx x)
6291{
6292 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6293
6294 switch (tls_kind)
6295 {
6296 case TLS_MODEL_GLOBAL_DYNAMIC:
6297 case TLS_MODEL_LOCAL_DYNAMIC:
6298 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6299
6300 case TLS_MODEL_INITIAL_EXEC:
6301 return SYMBOL_SMALL_GOTTPREL;
6302
6303 case TLS_MODEL_LOCAL_EXEC:
6304 return SYMBOL_SMALL_TPREL;
6305
6306 case TLS_MODEL_EMULATED:
6307 case TLS_MODEL_NONE:
6308 return SYMBOL_FORCE_TO_MEM;
6309
6310 default:
6311 gcc_unreachable ();
6312 }
6313}
6314
6315/* Return the method that should be used to access SYMBOL_REF or
6316 LABEL_REF X in context CONTEXT. */
17f4d4bf 6317
43e9d192
IB
6318enum aarch64_symbol_type
6319aarch64_classify_symbol (rtx x,
6320 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6321{
6322 if (GET_CODE (x) == LABEL_REF)
6323 {
6324 switch (aarch64_cmodel)
6325 {
6326 case AARCH64_CMODEL_LARGE:
6327 return SYMBOL_FORCE_TO_MEM;
6328
6329 case AARCH64_CMODEL_TINY_PIC:
6330 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6331 return SYMBOL_TINY_ABSOLUTE;
6332
43e9d192
IB
6333 case AARCH64_CMODEL_SMALL_PIC:
6334 case AARCH64_CMODEL_SMALL:
6335 return SYMBOL_SMALL_ABSOLUTE;
6336
6337 default:
6338 gcc_unreachable ();
6339 }
6340 }
6341
17f4d4bf 6342 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 6343 {
4a985a37
MS
6344 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6345 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
6346
6347 if (aarch64_tls_symbol_p (x))
6348 return aarch64_classify_tls_symbol (x);
6349
17f4d4bf
CSS
6350 switch (aarch64_cmodel)
6351 {
6352 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6353 if (SYMBOL_REF_WEAK (x))
6354 return SYMBOL_FORCE_TO_MEM;
6355 return SYMBOL_TINY_ABSOLUTE;
6356
17f4d4bf
CSS
6357 case AARCH64_CMODEL_SMALL:
6358 if (SYMBOL_REF_WEAK (x))
6359 return SYMBOL_FORCE_TO_MEM;
6360 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6361
17f4d4bf 6362 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 6363 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 6364 return SYMBOL_TINY_GOT;
38e6c9a6
MS
6365 return SYMBOL_TINY_ABSOLUTE;
6366
17f4d4bf
CSS
6367 case AARCH64_CMODEL_SMALL_PIC:
6368 if (!aarch64_symbol_binds_local_p (x))
6369 return SYMBOL_SMALL_GOT;
6370 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6371
17f4d4bf
CSS
6372 default:
6373 gcc_unreachable ();
6374 }
43e9d192 6375 }
17f4d4bf 6376
43e9d192
IB
6377 /* By default push everything into the constant pool. */
6378 return SYMBOL_FORCE_TO_MEM;
6379}
6380
43e9d192
IB
6381bool
6382aarch64_constant_address_p (rtx x)
6383{
6384 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6385}
6386
6387bool
6388aarch64_legitimate_pic_operand_p (rtx x)
6389{
6390 if (GET_CODE (x) == SYMBOL_REF
6391 || (GET_CODE (x) == CONST
6392 && GET_CODE (XEXP (x, 0)) == PLUS
6393 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6394 return false;
6395
6396 return true;
6397}
6398
3520f7cc
JG
6399/* Return true if X holds either a quarter-precision or
6400 floating-point +0.0 constant. */
6401static bool
6402aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6403{
6404 if (!CONST_DOUBLE_P (x))
6405 return false;
6406
6407 /* TODO: We could handle moving 0.0 to a TFmode register,
6408 but first we would like to refactor the movtf_aarch64
6409 to be more amicable to split moves properly and
6410 correctly gate on TARGET_SIMD. For now - reject all
6411 constants which are not to SFmode or DFmode registers. */
6412 if (!(mode == SFmode || mode == DFmode))
6413 return false;
6414
6415 if (aarch64_float_const_zero_rtx_p (x))
6416 return true;
6417 return aarch64_float_const_representable_p (x);
6418}
6419
43e9d192
IB
6420static bool
6421aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6422{
6423 /* Do not allow vector struct mode constants. We could support
6424 0 and -1 easily, but they need support in aarch64-simd.md. */
6425 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6426 return false;
6427
6428 /* This could probably go away because
6429 we now decompose CONST_INTs according to expand_mov_immediate. */
6430 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 6431 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
6432 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6433 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
6434
6435 if (GET_CODE (x) == HIGH
6436 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6437 return true;
6438
6439 return aarch64_constant_address_p (x);
6440}
6441
a5bc806c 6442rtx
43e9d192
IB
6443aarch64_load_tp (rtx target)
6444{
6445 if (!target
6446 || GET_MODE (target) != Pmode
6447 || !register_operand (target, Pmode))
6448 target = gen_reg_rtx (Pmode);
6449
6450 /* Can return in any reg. */
6451 emit_insn (gen_aarch64_load_tp_hard (target));
6452 return target;
6453}
6454
43e9d192
IB
6455/* On AAPCS systems, this is the "struct __va_list". */
6456static GTY(()) tree va_list_type;
6457
6458/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6459 Return the type to use as __builtin_va_list.
6460
6461 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6462
6463 struct __va_list
6464 {
6465 void *__stack;
6466 void *__gr_top;
6467 void *__vr_top;
6468 int __gr_offs;
6469 int __vr_offs;
6470 }; */
6471
6472static tree
6473aarch64_build_builtin_va_list (void)
6474{
6475 tree va_list_name;
6476 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6477
6478 /* Create the type. */
6479 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6480 /* Give it the required name. */
6481 va_list_name = build_decl (BUILTINS_LOCATION,
6482 TYPE_DECL,
6483 get_identifier ("__va_list"),
6484 va_list_type);
6485 DECL_ARTIFICIAL (va_list_name) = 1;
6486 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 6487 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
6488
6489 /* Create the fields. */
6490 f_stack = build_decl (BUILTINS_LOCATION,
6491 FIELD_DECL, get_identifier ("__stack"),
6492 ptr_type_node);
6493 f_grtop = build_decl (BUILTINS_LOCATION,
6494 FIELD_DECL, get_identifier ("__gr_top"),
6495 ptr_type_node);
6496 f_vrtop = build_decl (BUILTINS_LOCATION,
6497 FIELD_DECL, get_identifier ("__vr_top"),
6498 ptr_type_node);
6499 f_groff = build_decl (BUILTINS_LOCATION,
6500 FIELD_DECL, get_identifier ("__gr_offs"),
6501 integer_type_node);
6502 f_vroff = build_decl (BUILTINS_LOCATION,
6503 FIELD_DECL, get_identifier ("__vr_offs"),
6504 integer_type_node);
6505
6506 DECL_ARTIFICIAL (f_stack) = 1;
6507 DECL_ARTIFICIAL (f_grtop) = 1;
6508 DECL_ARTIFICIAL (f_vrtop) = 1;
6509 DECL_ARTIFICIAL (f_groff) = 1;
6510 DECL_ARTIFICIAL (f_vroff) = 1;
6511
6512 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6513 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6514 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6515 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6516 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6517
6518 TYPE_FIELDS (va_list_type) = f_stack;
6519 DECL_CHAIN (f_stack) = f_grtop;
6520 DECL_CHAIN (f_grtop) = f_vrtop;
6521 DECL_CHAIN (f_vrtop) = f_groff;
6522 DECL_CHAIN (f_groff) = f_vroff;
6523
6524 /* Compute its layout. */
6525 layout_type (va_list_type);
6526
6527 return va_list_type;
6528}
6529
6530/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6531static void
6532aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6533{
6534 const CUMULATIVE_ARGS *cum;
6535 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6536 tree stack, grtop, vrtop, groff, vroff;
6537 tree t;
6538 int gr_save_area_size;
6539 int vr_save_area_size;
6540 int vr_offset;
6541
6542 cum = &crtl->args.info;
6543 gr_save_area_size
6544 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6545 vr_save_area_size
6546 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6547
6548 if (TARGET_GENERAL_REGS_ONLY)
6549 {
6550 if (cum->aapcs_nvrn > 0)
6551 sorry ("%qs and floating point or vector arguments",
6552 "-mgeneral-regs-only");
6553 vr_save_area_size = 0;
6554 }
6555
6556 f_stack = TYPE_FIELDS (va_list_type_node);
6557 f_grtop = DECL_CHAIN (f_stack);
6558 f_vrtop = DECL_CHAIN (f_grtop);
6559 f_groff = DECL_CHAIN (f_vrtop);
6560 f_vroff = DECL_CHAIN (f_groff);
6561
6562 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6563 NULL_TREE);
6564 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6565 NULL_TREE);
6566 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6567 NULL_TREE);
6568 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6569 NULL_TREE);
6570 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6571 NULL_TREE);
6572
6573 /* Emit code to initialize STACK, which points to the next varargs stack
6574 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6575 by named arguments. STACK is 8-byte aligned. */
6576 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6577 if (cum->aapcs_stack_size > 0)
6578 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6579 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6580 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6581
6582 /* Emit code to initialize GRTOP, the top of the GR save area.
6583 virtual_incoming_args_rtx should have been 16 byte aligned. */
6584 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6585 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6586 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6587
6588 /* Emit code to initialize VRTOP, the top of the VR save area.
6589 This address is gr_save_area_bytes below GRTOP, rounded
6590 down to the next 16-byte boundary. */
6591 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6592 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6593 STACK_BOUNDARY / BITS_PER_UNIT);
6594
6595 if (vr_offset)
6596 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6597 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6598 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6599
6600 /* Emit code to initialize GROFF, the offset from GRTOP of the
6601 next GPR argument. */
6602 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6603 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6604 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6605
6606 /* Likewise emit code to initialize VROFF, the offset from FTOP
6607 of the next VR argument. */
6608 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6609 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6610 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6611}
6612
6613/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6614
6615static tree
6616aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6617 gimple_seq *post_p ATTRIBUTE_UNUSED)
6618{
6619 tree addr;
6620 bool indirect_p;
6621 bool is_ha; /* is HFA or HVA. */
6622 bool dw_align; /* double-word align. */
6623 enum machine_mode ag_mode = VOIDmode;
6624 int nregs;
6625 enum machine_mode mode;
6626
6627 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6628 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6629 HOST_WIDE_INT size, rsize, adjust, align;
6630 tree t, u, cond1, cond2;
6631
6632 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6633 if (indirect_p)
6634 type = build_pointer_type (type);
6635
6636 mode = TYPE_MODE (type);
6637
6638 f_stack = TYPE_FIELDS (va_list_type_node);
6639 f_grtop = DECL_CHAIN (f_stack);
6640 f_vrtop = DECL_CHAIN (f_grtop);
6641 f_groff = DECL_CHAIN (f_vrtop);
6642 f_vroff = DECL_CHAIN (f_groff);
6643
6644 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6645 f_stack, NULL_TREE);
6646 size = int_size_in_bytes (type);
6647 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6648
6649 dw_align = false;
6650 adjust = 0;
6651 if (aarch64_vfp_is_call_or_return_candidate (mode,
6652 type,
6653 &ag_mode,
6654 &nregs,
6655 &is_ha))
6656 {
6657 /* TYPE passed in fp/simd registers. */
6658 if (TARGET_GENERAL_REGS_ONLY)
6659 sorry ("%qs and floating point or vector arguments",
6660 "-mgeneral-regs-only");
6661
6662 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6663 unshare_expr (valist), f_vrtop, NULL_TREE);
6664 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6665 unshare_expr (valist), f_vroff, NULL_TREE);
6666
6667 rsize = nregs * UNITS_PER_VREG;
6668
6669 if (is_ha)
6670 {
6671 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6672 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6673 }
6674 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6675 && size < UNITS_PER_VREG)
6676 {
6677 adjust = UNITS_PER_VREG - size;
6678 }
6679 }
6680 else
6681 {
6682 /* TYPE passed in general registers. */
6683 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6684 unshare_expr (valist), f_grtop, NULL_TREE);
6685 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6686 unshare_expr (valist), f_groff, NULL_TREE);
6687 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6688 nregs = rsize / UNITS_PER_WORD;
6689
6690 if (align > 8)
6691 dw_align = true;
6692
6693 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6694 && size < UNITS_PER_WORD)
6695 {
6696 adjust = UNITS_PER_WORD - size;
6697 }
6698 }
6699
6700 /* Get a local temporary for the field value. */
6701 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6702
6703 /* Emit code to branch if off >= 0. */
6704 t = build2 (GE_EXPR, boolean_type_node, off,
6705 build_int_cst (TREE_TYPE (off), 0));
6706 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6707
6708 if (dw_align)
6709 {
6710 /* Emit: offs = (offs + 15) & -16. */
6711 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6712 build_int_cst (TREE_TYPE (off), 15));
6713 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6714 build_int_cst (TREE_TYPE (off), -16));
6715 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6716 }
6717 else
6718 roundup = NULL;
6719
6720 /* Update ap.__[g|v]r_offs */
6721 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6722 build_int_cst (TREE_TYPE (off), rsize));
6723 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6724
6725 /* String up. */
6726 if (roundup)
6727 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6728
6729 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6730 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6731 build_int_cst (TREE_TYPE (f_off), 0));
6732 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6733
6734 /* String up: make sure the assignment happens before the use. */
6735 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6736 COND_EXPR_ELSE (cond1) = t;
6737
6738 /* Prepare the trees handling the argument that is passed on the stack;
6739 the top level node will store in ON_STACK. */
6740 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6741 if (align > 8)
6742 {
6743 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6744 t = fold_convert (intDI_type_node, arg);
6745 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6746 build_int_cst (TREE_TYPE (t), 15));
6747 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6748 build_int_cst (TREE_TYPE (t), -16));
6749 t = fold_convert (TREE_TYPE (arg), t);
6750 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6751 }
6752 else
6753 roundup = NULL;
6754 /* Advance ap.__stack */
6755 t = fold_convert (intDI_type_node, arg);
6756 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6757 build_int_cst (TREE_TYPE (t), size + 7));
6758 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6759 build_int_cst (TREE_TYPE (t), -8));
6760 t = fold_convert (TREE_TYPE (arg), t);
6761 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6762 /* String up roundup and advance. */
6763 if (roundup)
6764 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6765 /* String up with arg */
6766 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6767 /* Big-endianness related address adjustment. */
6768 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6769 && size < UNITS_PER_WORD)
6770 {
6771 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6772 size_int (UNITS_PER_WORD - size));
6773 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6774 }
6775
6776 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6777 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6778
6779 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6780 t = off;
6781 if (adjust)
6782 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6783 build_int_cst (TREE_TYPE (off), adjust));
6784
6785 t = fold_convert (sizetype, t);
6786 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6787
6788 if (is_ha)
6789 {
6790 /* type ha; // treat as "struct {ftype field[n];}"
6791 ... [computing offs]
6792 for (i = 0; i <nregs; ++i, offs += 16)
6793 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6794 return ha; */
6795 int i;
6796 tree tmp_ha, field_t, field_ptr_t;
6797
6798 /* Declare a local variable. */
6799 tmp_ha = create_tmp_var_raw (type, "ha");
6800 gimple_add_tmp_var (tmp_ha);
6801
6802 /* Establish the base type. */
6803 switch (ag_mode)
6804 {
6805 case SFmode:
6806 field_t = float_type_node;
6807 field_ptr_t = float_ptr_type_node;
6808 break;
6809 case DFmode:
6810 field_t = double_type_node;
6811 field_ptr_t = double_ptr_type_node;
6812 break;
6813 case TFmode:
6814 field_t = long_double_type_node;
6815 field_ptr_t = long_double_ptr_type_node;
6816 break;
6817/* The half precision and quad precision are not fully supported yet. Enable
6818 the following code after the support is complete. Need to find the correct
6819 type node for __fp16 *. */
6820#if 0
6821 case HFmode:
6822 field_t = float_type_node;
6823 field_ptr_t = float_ptr_type_node;
6824 break;
6825#endif
6826 case V2SImode:
6827 case V4SImode:
6828 {
6829 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6830 field_t = build_vector_type_for_mode (innertype, ag_mode);
6831 field_ptr_t = build_pointer_type (field_t);
6832 }
6833 break;
6834 default:
6835 gcc_assert (0);
6836 }
6837
6838 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6839 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6840 addr = t;
6841 t = fold_convert (field_ptr_t, addr);
6842 t = build2 (MODIFY_EXPR, field_t,
6843 build1 (INDIRECT_REF, field_t, tmp_ha),
6844 build1 (INDIRECT_REF, field_t, t));
6845
6846 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6847 for (i = 1; i < nregs; ++i)
6848 {
6849 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6850 u = fold_convert (field_ptr_t, addr);
6851 u = build2 (MODIFY_EXPR, field_t,
6852 build2 (MEM_REF, field_t, tmp_ha,
6853 build_int_cst (field_ptr_t,
6854 (i *
6855 int_size_in_bytes (field_t)))),
6856 build1 (INDIRECT_REF, field_t, u));
6857 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6858 }
6859
6860 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6861 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6862 }
6863
6864 COND_EXPR_ELSE (cond2) = t;
6865 addr = fold_convert (build_pointer_type (type), cond1);
6866 addr = build_va_arg_indirect_ref (addr);
6867
6868 if (indirect_p)
6869 addr = build_va_arg_indirect_ref (addr);
6870
6871 return addr;
6872}
6873
6874/* Implement TARGET_SETUP_INCOMING_VARARGS. */
6875
6876static void
6877aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6878 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6879 int no_rtl)
6880{
6881 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6882 CUMULATIVE_ARGS local_cum;
6883 int gr_saved, vr_saved;
6884
6885 /* The caller has advanced CUM up to, but not beyond, the last named
6886 argument. Advance a local copy of CUM past the last "real" named
6887 argument, to find out how many registers are left over. */
6888 local_cum = *cum;
6889 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6890
6891 /* Found out how many registers we need to save. */
6892 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6893 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6894
6895 if (TARGET_GENERAL_REGS_ONLY)
6896 {
6897 if (local_cum.aapcs_nvrn > 0)
6898 sorry ("%qs and floating point or vector arguments",
6899 "-mgeneral-regs-only");
6900 vr_saved = 0;
6901 }
6902
6903 if (!no_rtl)
6904 {
6905 if (gr_saved > 0)
6906 {
6907 rtx ptr, mem;
6908
6909 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6910 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6911 - gr_saved * UNITS_PER_WORD);
6912 mem = gen_frame_mem (BLKmode, ptr);
6913 set_mem_alias_set (mem, get_varargs_alias_set ());
6914
6915 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6916 mem, gr_saved);
6917 }
6918 if (vr_saved > 0)
6919 {
6920 /* We can't use move_block_from_reg, because it will use
6921 the wrong mode, storing D regs only. */
6922 enum machine_mode mode = TImode;
6923 int off, i;
6924
6925 /* Set OFF to the offset from virtual_incoming_args_rtx of
6926 the first vector register. The VR save area lies below
6927 the GR one, and is aligned to 16 bytes. */
6928 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6929 STACK_BOUNDARY / BITS_PER_UNIT);
6930 off -= vr_saved * UNITS_PER_VREG;
6931
6932 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6933 {
6934 rtx ptr, mem;
6935
6936 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6937 mem = gen_frame_mem (mode, ptr);
6938 set_mem_alias_set (mem, get_varargs_alias_set ());
6939 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6940 off += UNITS_PER_VREG;
6941 }
6942 }
6943 }
6944
6945 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6946 any complication of having crtl->args.pretend_args_size changed. */
6947 cfun->machine->saved_varargs_size
6948 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6949 STACK_BOUNDARY / BITS_PER_UNIT)
6950 + vr_saved * UNITS_PER_VREG);
6951}
6952
6953static void
6954aarch64_conditional_register_usage (void)
6955{
6956 int i;
6957 if (!TARGET_FLOAT)
6958 {
6959 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6960 {
6961 fixed_regs[i] = 1;
6962 call_used_regs[i] = 1;
6963 }
6964 }
6965}
6966
6967/* Walk down the type tree of TYPE counting consecutive base elements.
6968 If *MODEP is VOIDmode, then set it to the first valid floating point
6969 type. If a non-floating point type is found, or if a floating point
6970 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6971 otherwise return the count in the sub-tree. */
6972static int
6973aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6974{
6975 enum machine_mode mode;
6976 HOST_WIDE_INT size;
6977
6978 switch (TREE_CODE (type))
6979 {
6980 case REAL_TYPE:
6981 mode = TYPE_MODE (type);
6982 if (mode != DFmode && mode != SFmode && mode != TFmode)
6983 return -1;
6984
6985 if (*modep == VOIDmode)
6986 *modep = mode;
6987
6988 if (*modep == mode)
6989 return 1;
6990
6991 break;
6992
6993 case COMPLEX_TYPE:
6994 mode = TYPE_MODE (TREE_TYPE (type));
6995 if (mode != DFmode && mode != SFmode && mode != TFmode)
6996 return -1;
6997
6998 if (*modep == VOIDmode)
6999 *modep = mode;
7000
7001 if (*modep == mode)
7002 return 2;
7003
7004 break;
7005
7006 case VECTOR_TYPE:
7007 /* Use V2SImode and V4SImode as representatives of all 64-bit
7008 and 128-bit vector types. */
7009 size = int_size_in_bytes (type);
7010 switch (size)
7011 {
7012 case 8:
7013 mode = V2SImode;
7014 break;
7015 case 16:
7016 mode = V4SImode;
7017 break;
7018 default:
7019 return -1;
7020 }
7021
7022 if (*modep == VOIDmode)
7023 *modep = mode;
7024
7025 /* Vector modes are considered to be opaque: two vectors are
7026 equivalent for the purposes of being homogeneous aggregates
7027 if they are the same size. */
7028 if (*modep == mode)
7029 return 1;
7030
7031 break;
7032
7033 case ARRAY_TYPE:
7034 {
7035 int count;
7036 tree index = TYPE_DOMAIN (type);
7037
807e902e
KZ
7038 /* Can't handle incomplete types nor sizes that are not
7039 fixed. */
7040 if (!COMPLETE_TYPE_P (type)
7041 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7042 return -1;
7043
7044 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7045 if (count == -1
7046 || !index
7047 || !TYPE_MAX_VALUE (index)
cc269bb6 7048 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7049 || !TYPE_MIN_VALUE (index)
cc269bb6 7050 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7051 || count < 0)
7052 return -1;
7053
ae7e9ddd
RS
7054 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7055 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7056
7057 /* There must be no padding. */
807e902e 7058 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7059 return -1;
7060
7061 return count;
7062 }
7063
7064 case RECORD_TYPE:
7065 {
7066 int count = 0;
7067 int sub_count;
7068 tree field;
7069
807e902e
KZ
7070 /* Can't handle incomplete types nor sizes that are not
7071 fixed. */
7072 if (!COMPLETE_TYPE_P (type)
7073 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7074 return -1;
7075
7076 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7077 {
7078 if (TREE_CODE (field) != FIELD_DECL)
7079 continue;
7080
7081 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7082 if (sub_count < 0)
7083 return -1;
7084 count += sub_count;
7085 }
7086
7087 /* There must be no padding. */
807e902e 7088 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7089 return -1;
7090
7091 return count;
7092 }
7093
7094 case UNION_TYPE:
7095 case QUAL_UNION_TYPE:
7096 {
7097 /* These aren't very interesting except in a degenerate case. */
7098 int count = 0;
7099 int sub_count;
7100 tree field;
7101
807e902e
KZ
7102 /* Can't handle incomplete types nor sizes that are not
7103 fixed. */
7104 if (!COMPLETE_TYPE_P (type)
7105 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7106 return -1;
7107
7108 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7109 {
7110 if (TREE_CODE (field) != FIELD_DECL)
7111 continue;
7112
7113 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7114 if (sub_count < 0)
7115 return -1;
7116 count = count > sub_count ? count : sub_count;
7117 }
7118
7119 /* There must be no padding. */
807e902e 7120 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7121 return -1;
7122
7123 return count;
7124 }
7125
7126 default:
7127 break;
7128 }
7129
7130 return -1;
7131}
7132
38e8f663
YR
7133/* Return true if we use LRA instead of reload pass. */
7134static bool
7135aarch64_lra_p (void)
7136{
7137 return aarch64_lra_flag;
7138}
7139
43e9d192
IB
7140/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7141 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7142 array types. The C99 floating-point complex types are also considered
7143 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7144 types, which are GCC extensions and out of the scope of AAPCS64, are
7145 treated as composite types here as well.
7146
7147 Note that MODE itself is not sufficient in determining whether a type
7148 is such a composite type or not. This is because
7149 stor-layout.c:compute_record_mode may have already changed the MODE
7150 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7151 structure with only one field may have its MODE set to the mode of the
7152 field. Also an integer mode whose size matches the size of the
7153 RECORD_TYPE type may be used to substitute the original mode
7154 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7155 solely relied on. */
7156
7157static bool
7158aarch64_composite_type_p (const_tree type,
7159 enum machine_mode mode)
7160{
7161 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7162 return true;
7163
7164 if (mode == BLKmode
7165 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7166 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7167 return true;
7168
7169 return false;
7170}
7171
7172/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7173 type as described in AAPCS64 \S 4.1.2.
7174
7175 See the comment above aarch64_composite_type_p for the notes on MODE. */
7176
7177static bool
7178aarch64_short_vector_p (const_tree type,
7179 enum machine_mode mode)
7180{
7181 HOST_WIDE_INT size = -1;
7182
7183 if (type && TREE_CODE (type) == VECTOR_TYPE)
7184 size = int_size_in_bytes (type);
7185 else if (!aarch64_composite_type_p (type, mode)
7186 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7187 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7188 size = GET_MODE_SIZE (mode);
7189
7190 return (size == 8 || size == 16) ? true : false;
7191}
7192
7193/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7194 shall be passed or returned in simd/fp register(s) (providing these
7195 parameter passing registers are available).
7196
7197 Upon successful return, *COUNT returns the number of needed registers,
7198 *BASE_MODE returns the mode of the individual register and when IS_HAF
7199 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7200 floating-point aggregate or a homogeneous short-vector aggregate. */
7201
7202static bool
7203aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7204 const_tree type,
7205 enum machine_mode *base_mode,
7206 int *count,
7207 bool *is_ha)
7208{
7209 enum machine_mode new_mode = VOIDmode;
7210 bool composite_p = aarch64_composite_type_p (type, mode);
7211
7212 if (is_ha != NULL) *is_ha = false;
7213
7214 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7215 || aarch64_short_vector_p (type, mode))
7216 {
7217 *count = 1;
7218 new_mode = mode;
7219 }
7220 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7221 {
7222 if (is_ha != NULL) *is_ha = true;
7223 *count = 2;
7224 new_mode = GET_MODE_INNER (mode);
7225 }
7226 else if (type && composite_p)
7227 {
7228 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7229
7230 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7231 {
7232 if (is_ha != NULL) *is_ha = true;
7233 *count = ag_count;
7234 }
7235 else
7236 return false;
7237 }
7238 else
7239 return false;
7240
7241 *base_mode = new_mode;
7242 return true;
7243}
7244
7245/* Implement TARGET_STRUCT_VALUE_RTX. */
7246
7247static rtx
7248aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7249 int incoming ATTRIBUTE_UNUSED)
7250{
7251 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7252}
7253
7254/* Implements target hook vector_mode_supported_p. */
7255static bool
7256aarch64_vector_mode_supported_p (enum machine_mode mode)
7257{
7258 if (TARGET_SIMD
7259 && (mode == V4SImode || mode == V8HImode
7260 || mode == V16QImode || mode == V2DImode
7261 || mode == V2SImode || mode == V4HImode
7262 || mode == V8QImode || mode == V2SFmode
7263 || mode == V4SFmode || mode == V2DFmode))
7264 return true;
7265
7266 return false;
7267}
7268
b7342d25
IB
7269/* Return appropriate SIMD container
7270 for MODE within a vector of WIDTH bits. */
43e9d192 7271static enum machine_mode
b7342d25 7272aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 7273{
b7342d25 7274 gcc_assert (width == 64 || width == 128);
43e9d192 7275 if (TARGET_SIMD)
b7342d25
IB
7276 {
7277 if (width == 128)
7278 switch (mode)
7279 {
7280 case DFmode:
7281 return V2DFmode;
7282 case SFmode:
7283 return V4SFmode;
7284 case SImode:
7285 return V4SImode;
7286 case HImode:
7287 return V8HImode;
7288 case QImode:
7289 return V16QImode;
7290 case DImode:
7291 return V2DImode;
7292 default:
7293 break;
7294 }
7295 else
7296 switch (mode)
7297 {
7298 case SFmode:
7299 return V2SFmode;
7300 case SImode:
7301 return V2SImode;
7302 case HImode:
7303 return V4HImode;
7304 case QImode:
7305 return V8QImode;
7306 default:
7307 break;
7308 }
7309 }
43e9d192
IB
7310 return word_mode;
7311}
7312
b7342d25
IB
7313/* Return 128-bit container as the preferred SIMD mode for MODE. */
7314static enum machine_mode
7315aarch64_preferred_simd_mode (enum machine_mode mode)
7316{
7317 return aarch64_simd_container_mode (mode, 128);
7318}
7319
3b357264
JG
7320/* Return the bitmask of possible vector sizes for the vectorizer
7321 to iterate over. */
7322static unsigned int
7323aarch64_autovectorize_vector_sizes (void)
7324{
7325 return (16 | 8);
7326}
7327
c6fc9e43
YZ
7328/* A table to help perform AArch64-specific name mangling for AdvSIMD
7329 vector types in order to conform to the AAPCS64 (see "Procedure
7330 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7331 qualify for emission with the mangled names defined in that document,
7332 a vector type must not only be of the correct mode but also be
7333 composed of AdvSIMD vector element types (e.g.
7334 _builtin_aarch64_simd_qi); these types are registered by
7335 aarch64_init_simd_builtins (). In other words, vector types defined
7336 in other ways e.g. via vector_size attribute will get default
7337 mangled names. */
7338typedef struct
7339{
7340 enum machine_mode mode;
7341 const char *element_type_name;
7342 const char *mangled_name;
7343} aarch64_simd_mangle_map_entry;
7344
7345static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7346 /* 64-bit containerized types. */
7347 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7348 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7349 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7350 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7351 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7352 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7353 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7354 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7355 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7356 /* 128-bit containerized types. */
7357 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7358 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7359 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7360 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7361 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7362 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7363 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7364 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7365 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7366 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7367 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7368 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 7369 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
7370 { VOIDmode, NULL, NULL }
7371};
7372
ac2b960f
YZ
7373/* Implement TARGET_MANGLE_TYPE. */
7374
6f549691 7375static const char *
ac2b960f
YZ
7376aarch64_mangle_type (const_tree type)
7377{
7378 /* The AArch64 ABI documents say that "__va_list" has to be
7379 managled as if it is in the "std" namespace. */
7380 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7381 return "St9__va_list";
7382
c6fc9e43
YZ
7383 /* Check the mode of the vector type, and the name of the vector
7384 element type, against the table. */
7385 if (TREE_CODE (type) == VECTOR_TYPE)
7386 {
7387 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7388
7389 while (pos->mode != VOIDmode)
7390 {
7391 tree elt_type = TREE_TYPE (type);
7392
7393 if (pos->mode == TYPE_MODE (type)
7394 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7395 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7396 pos->element_type_name))
7397 return pos->mangled_name;
7398
7399 pos++;
7400 }
7401 }
7402
ac2b960f
YZ
7403 /* Use the default mangling. */
7404 return NULL;
7405}
7406
43e9d192 7407/* Return the equivalent letter for size. */
81c2dfb9 7408static char
43e9d192
IB
7409sizetochar (int size)
7410{
7411 switch (size)
7412 {
7413 case 64: return 'd';
7414 case 32: return 's';
7415 case 16: return 'h';
7416 case 8 : return 'b';
7417 default: gcc_unreachable ();
7418 }
7419}
7420
3520f7cc
JG
7421/* Return true iff x is a uniform vector of floating-point
7422 constants, and the constant can be represented in
7423 quarter-precision form. Note, as aarch64_float_const_representable
7424 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7425static bool
7426aarch64_vect_float_const_representable_p (rtx x)
7427{
7428 int i = 0;
7429 REAL_VALUE_TYPE r0, ri;
7430 rtx x0, xi;
7431
7432 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7433 return false;
7434
7435 x0 = CONST_VECTOR_ELT (x, 0);
7436 if (!CONST_DOUBLE_P (x0))
7437 return false;
7438
7439 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7440
7441 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7442 {
7443 xi = CONST_VECTOR_ELT (x, i);
7444 if (!CONST_DOUBLE_P (xi))
7445 return false;
7446
7447 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7448 if (!REAL_VALUES_EQUAL (r0, ri))
7449 return false;
7450 }
7451
7452 return aarch64_float_const_representable_p (x0);
7453}
7454
d8edd899 7455/* Return true for valid and false for invalid. */
3ea63f60 7456bool
48063b9d
IB
7457aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7458 struct simd_immediate_info *info)
43e9d192
IB
7459{
7460#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7461 matches = 1; \
7462 for (i = 0; i < idx; i += (STRIDE)) \
7463 if (!(TEST)) \
7464 matches = 0; \
7465 if (matches) \
7466 { \
7467 immtype = (CLASS); \
7468 elsize = (ELSIZE); \
43e9d192
IB
7469 eshift = (SHIFT); \
7470 emvn = (NEG); \
7471 break; \
7472 }
7473
7474 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7475 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7476 unsigned char bytes[16];
43e9d192
IB
7477 int immtype = -1, matches;
7478 unsigned int invmask = inverse ? 0xff : 0;
7479 int eshift, emvn;
7480
43e9d192 7481 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 7482 {
81c2dfb9
IB
7483 if (! (aarch64_simd_imm_zero_p (op, mode)
7484 || aarch64_vect_float_const_representable_p (op)))
d8edd899 7485 return false;
3520f7cc 7486
48063b9d
IB
7487 if (info)
7488 {
7489 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 7490 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
7491 info->mvn = false;
7492 info->shift = 0;
7493 }
3520f7cc 7494
d8edd899 7495 return true;
3520f7cc 7496 }
43e9d192
IB
7497
7498 /* Splat vector constant out into a byte vector. */
7499 for (i = 0; i < n_elts; i++)
7500 {
4b1e108c
AL
7501 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7502 it must be laid out in the vector register in reverse order. */
7503 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
7504 unsigned HOST_WIDE_INT elpart;
7505 unsigned int part, parts;
7506
7507 if (GET_CODE (el) == CONST_INT)
7508 {
7509 elpart = INTVAL (el);
7510 parts = 1;
7511 }
7512 else if (GET_CODE (el) == CONST_DOUBLE)
7513 {
7514 elpart = CONST_DOUBLE_LOW (el);
7515 parts = 2;
7516 }
7517 else
7518 gcc_unreachable ();
7519
7520 for (part = 0; part < parts; part++)
7521 {
7522 unsigned int byte;
7523 for (byte = 0; byte < innersize; byte++)
7524 {
7525 bytes[idx++] = (elpart & 0xff) ^ invmask;
7526 elpart >>= BITS_PER_UNIT;
7527 }
7528 if (GET_CODE (el) == CONST_DOUBLE)
7529 elpart = CONST_DOUBLE_HIGH (el);
7530 }
7531 }
7532
7533 /* Sanity check. */
7534 gcc_assert (idx == GET_MODE_SIZE (mode));
7535
7536 do
7537 {
7538 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7539 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7540
7541 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7542 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7543
7544 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7545 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7546
7547 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7548 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7549
7550 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7551
7552 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7553
7554 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7555 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7556
7557 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7558 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7559
7560 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7561 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7562
7563 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7564 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7565
7566 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7567
7568 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7569
7570 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 7571 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
7572
7573 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 7574 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
7575
7576 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 7577 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
7578
7579 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 7580 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
7581
7582 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7583
7584 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7585 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7586 }
7587 while (0);
7588
e4f0f84d 7589 if (immtype == -1)
d8edd899 7590 return false;
43e9d192 7591
48063b9d 7592 if (info)
43e9d192 7593 {
48063b9d 7594 info->element_width = elsize;
48063b9d
IB
7595 info->mvn = emvn != 0;
7596 info->shift = eshift;
7597
43e9d192
IB
7598 unsigned HOST_WIDE_INT imm = 0;
7599
e4f0f84d
TB
7600 if (immtype >= 12 && immtype <= 15)
7601 info->msl = true;
7602
43e9d192
IB
7603 /* Un-invert bytes of recognized vector, if necessary. */
7604 if (invmask != 0)
7605 for (i = 0; i < idx; i++)
7606 bytes[i] ^= invmask;
7607
7608 if (immtype == 17)
7609 {
7610 /* FIXME: Broken on 32-bit H_W_I hosts. */
7611 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7612
7613 for (i = 0; i < 8; i++)
7614 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7615 << (i * BITS_PER_UNIT);
7616
43e9d192 7617
48063b9d
IB
7618 info->value = GEN_INT (imm);
7619 }
7620 else
7621 {
7622 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7623 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
7624
7625 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
7626 generic constants. */
7627 if (info->mvn)
43e9d192 7628 imm = ~imm;
48063b9d
IB
7629 imm = (imm >> info->shift) & 0xff;
7630 info->value = GEN_INT (imm);
7631 }
43e9d192
IB
7632 }
7633
48063b9d 7634 return true;
43e9d192
IB
7635#undef CHECK
7636}
7637
43e9d192
IB
7638static bool
7639aarch64_const_vec_all_same_int_p (rtx x,
7640 HOST_WIDE_INT minval,
7641 HOST_WIDE_INT maxval)
7642{
7643 HOST_WIDE_INT firstval;
7644 int count, i;
7645
7646 if (GET_CODE (x) != CONST_VECTOR
7647 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7648 return false;
7649
7650 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7651 if (firstval < minval || firstval > maxval)
7652 return false;
7653
7654 count = CONST_VECTOR_NUNITS (x);
7655 for (i = 1; i < count; i++)
7656 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7657 return false;
7658
7659 return true;
7660}
7661
7662/* Check of immediate shift constants are within range. */
7663bool
7664aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7665{
7666 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7667 if (left)
7668 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7669 else
7670 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7671}
7672
3520f7cc
JG
7673/* Return true if X is a uniform vector where all elements
7674 are either the floating-point constant 0.0 or the
7675 integer constant 0. */
43e9d192
IB
7676bool
7677aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7678{
3520f7cc 7679 return x == CONST0_RTX (mode);
43e9d192
IB
7680}
7681
7682bool
7683aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7684{
7685 HOST_WIDE_INT imm = INTVAL (x);
7686 int i;
7687
7688 for (i = 0; i < 8; i++)
7689 {
7690 unsigned int byte = imm & 0xff;
7691 if (byte != 0xff && byte != 0)
7692 return false;
7693 imm >>= 8;
7694 }
7695
7696 return true;
7697}
7698
83f8c414
CSS
7699bool
7700aarch64_mov_operand_p (rtx x,
a5350ddc 7701 enum aarch64_symbol_context context,
83f8c414
CSS
7702 enum machine_mode mode)
7703{
83f8c414
CSS
7704 if (GET_CODE (x) == HIGH
7705 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7706 return true;
7707
7708 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7709 return true;
7710
7711 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7712 return true;
7713
a5350ddc
CSS
7714 return aarch64_classify_symbolic_expression (x, context)
7715 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
7716}
7717
43e9d192
IB
7718/* Return a const_int vector of VAL. */
7719rtx
7720aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7721{
7722 int nunits = GET_MODE_NUNITS (mode);
7723 rtvec v = rtvec_alloc (nunits);
7724 int i;
7725
7726 for (i=0; i < nunits; i++)
7727 RTVEC_ELT (v, i) = GEN_INT (val);
7728
7729 return gen_rtx_CONST_VECTOR (mode, v);
7730}
7731
051d0e2f
SN
7732/* Check OP is a legal scalar immediate for the MOVI instruction. */
7733
7734bool
7735aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7736{
7737 enum machine_mode vmode;
7738
7739 gcc_assert (!VECTOR_MODE_P (mode));
7740 vmode = aarch64_preferred_simd_mode (mode);
7741 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 7742 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
7743}
7744
43e9d192
IB
7745/* Construct and return a PARALLEL RTX vector. */
7746rtx
7747aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7748{
7749 int nunits = GET_MODE_NUNITS (mode);
7750 rtvec v = rtvec_alloc (nunits / 2);
7751 int base = high ? nunits / 2 : 0;
7752 rtx t1;
7753 int i;
7754
7755 for (i=0; i < nunits / 2; i++)
7756 RTVEC_ELT (v, i) = GEN_INT (base + i);
7757
7758 t1 = gen_rtx_PARALLEL (mode, v);
7759 return t1;
7760}
7761
7762/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7763 HIGH (exclusive). */
7764void
7765aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7766{
7767 HOST_WIDE_INT lane;
7768 gcc_assert (GET_CODE (operand) == CONST_INT);
7769 lane = INTVAL (operand);
7770
7771 if (lane < low || lane >= high)
7772 error ("lane out of range");
7773}
7774
7775void
7776aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7777{
7778 gcc_assert (GET_CODE (operand) == CONST_INT);
7779 HOST_WIDE_INT lane = INTVAL (operand);
7780
7781 if (lane < low || lane >= high)
7782 error ("constant out of range");
7783}
7784
7785/* Emit code to reinterpret one AdvSIMD type as another,
7786 without altering bits. */
7787void
7788aarch64_simd_reinterpret (rtx dest, rtx src)
7789{
7790 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7791}
7792
7793/* Emit code to place a AdvSIMD pair result in memory locations (with equal
7794 registers). */
7795void
7796aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7797 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7798 rtx op1)
7799{
7800 rtx mem = gen_rtx_MEM (mode, destaddr);
7801 rtx tmp1 = gen_reg_rtx (mode);
7802 rtx tmp2 = gen_reg_rtx (mode);
7803
7804 emit_insn (intfn (tmp1, op1, tmp2));
7805
7806 emit_move_insn (mem, tmp1);
7807 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7808 emit_move_insn (mem, tmp2);
7809}
7810
7811/* Return TRUE if OP is a valid vector addressing mode. */
7812bool
7813aarch64_simd_mem_operand_p (rtx op)
7814{
7815 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7816 || GET_CODE (XEXP (op, 0)) == REG);
7817}
7818
7819/* Set up OPERANDS for a register copy from SRC to DEST, taking care
7820 not to early-clobber SRC registers in the process.
7821
7822 We assume that the operands described by SRC and DEST represent a
7823 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7824 number of components into which the copy has been decomposed. */
7825void
7826aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7827 rtx *src, unsigned int count)
7828{
7829 unsigned int i;
7830
7831 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7832 || REGNO (operands[0]) < REGNO (operands[1]))
7833 {
7834 for (i = 0; i < count; i++)
7835 {
7836 operands[2 * i] = dest[i];
7837 operands[2 * i + 1] = src[i];
7838 }
7839 }
7840 else
7841 {
7842 for (i = 0; i < count; i++)
7843 {
7844 operands[2 * i] = dest[count - i - 1];
7845 operands[2 * i + 1] = src[count - i - 1];
7846 }
7847 }
7848}
7849
7850/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7851 one of VSTRUCT modes: OI, CI or XI. */
7852int
7853aarch64_simd_attr_length_move (rtx insn)
7854{
43e9d192
IB
7855 enum machine_mode mode;
7856
7857 extract_insn_cached (insn);
7858
7859 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7860 {
7861 mode = GET_MODE (recog_data.operand[0]);
7862 switch (mode)
7863 {
7864 case OImode:
7865 return 8;
7866 case CImode:
7867 return 12;
7868 case XImode:
7869 return 16;
7870 default:
7871 gcc_unreachable ();
7872 }
7873 }
7874 return 4;
7875}
7876
db0253a4
TB
7877/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7878 alignment of a vector to 128 bits. */
7879static HOST_WIDE_INT
7880aarch64_simd_vector_alignment (const_tree type)
7881{
9439e9a1 7882 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
7883 return MIN (align, 128);
7884}
7885
7886/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7887static bool
7888aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7889{
7890 if (is_packed)
7891 return false;
7892
7893 /* We guarantee alignment for vectors up to 128-bits. */
7894 if (tree_int_cst_compare (TYPE_SIZE (type),
7895 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7896 return false;
7897
7898 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7899 return true;
7900}
7901
4369c11e
TB
7902/* If VALS is a vector constant that can be loaded into a register
7903 using DUP, generate instructions to do so and return an RTX to
7904 assign to the register. Otherwise return NULL_RTX. */
7905static rtx
7906aarch64_simd_dup_constant (rtx vals)
7907{
7908 enum machine_mode mode = GET_MODE (vals);
7909 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7910 int n_elts = GET_MODE_NUNITS (mode);
7911 bool all_same = true;
7912 rtx x;
7913 int i;
7914
7915 if (GET_CODE (vals) != CONST_VECTOR)
7916 return NULL_RTX;
7917
7918 for (i = 1; i < n_elts; ++i)
7919 {
7920 x = CONST_VECTOR_ELT (vals, i);
7921 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7922 all_same = false;
7923 }
7924
7925 if (!all_same)
7926 return NULL_RTX;
7927
7928 /* We can load this constant by using DUP and a constant in a
7929 single ARM register. This will be cheaper than a vector
7930 load. */
7931 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7932 return gen_rtx_VEC_DUPLICATE (mode, x);
7933}
7934
7935
7936/* Generate code to load VALS, which is a PARALLEL containing only
7937 constants (for vec_init) or CONST_VECTOR, efficiently into a
7938 register. Returns an RTX to copy into the register, or NULL_RTX
7939 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 7940static rtx
4369c11e
TB
7941aarch64_simd_make_constant (rtx vals)
7942{
7943 enum machine_mode mode = GET_MODE (vals);
7944 rtx const_dup;
7945 rtx const_vec = NULL_RTX;
7946 int n_elts = GET_MODE_NUNITS (mode);
7947 int n_const = 0;
7948 int i;
7949
7950 if (GET_CODE (vals) == CONST_VECTOR)
7951 const_vec = vals;
7952 else if (GET_CODE (vals) == PARALLEL)
7953 {
7954 /* A CONST_VECTOR must contain only CONST_INTs and
7955 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7956 Only store valid constants in a CONST_VECTOR. */
7957 for (i = 0; i < n_elts; ++i)
7958 {
7959 rtx x = XVECEXP (vals, 0, i);
7960 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7961 n_const++;
7962 }
7963 if (n_const == n_elts)
7964 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7965 }
7966 else
7967 gcc_unreachable ();
7968
7969 if (const_vec != NULL_RTX
48063b9d 7970 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
7971 /* Load using MOVI/MVNI. */
7972 return const_vec;
7973 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7974 /* Loaded using DUP. */
7975 return const_dup;
7976 else if (const_vec != NULL_RTX)
7977 /* Load from constant pool. We can not take advantage of single-cycle
7978 LD1 because we need a PC-relative addressing mode. */
7979 return const_vec;
7980 else
7981 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7982 We can not construct an initializer. */
7983 return NULL_RTX;
7984}
7985
7986void
7987aarch64_expand_vector_init (rtx target, rtx vals)
7988{
7989 enum machine_mode mode = GET_MODE (target);
7990 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7991 int n_elts = GET_MODE_NUNITS (mode);
7992 int n_var = 0, one_var = -1;
7993 bool all_same = true;
7994 rtx x, mem;
7995 int i;
7996
7997 x = XVECEXP (vals, 0, 0);
7998 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7999 n_var = 1, one_var = 0;
8000
8001 for (i = 1; i < n_elts; ++i)
8002 {
8003 x = XVECEXP (vals, 0, i);
8004 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8005 ++n_var, one_var = i;
8006
8007 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8008 all_same = false;
8009 }
8010
8011 if (n_var == 0)
8012 {
8013 rtx constant = aarch64_simd_make_constant (vals);
8014 if (constant != NULL_RTX)
8015 {
8016 emit_move_insn (target, constant);
8017 return;
8018 }
8019 }
8020
8021 /* Splat a single non-constant element if we can. */
8022 if (all_same)
8023 {
8024 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8025 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8026 return;
8027 }
8028
8029 /* One field is non-constant. Load constant then overwrite varying
8030 field. This is more efficient than using the stack. */
8031 if (n_var == 1)
8032 {
8033 rtx copy = copy_rtx (vals);
8034 rtx index = GEN_INT (one_var);
8035 enum insn_code icode;
8036
8037 /* Load constant part of vector, substitute neighboring value for
8038 varying element. */
8039 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8040 aarch64_expand_vector_init (target, copy);
8041
8042 /* Insert variable. */
8043 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8044 icode = optab_handler (vec_set_optab, mode);
8045 gcc_assert (icode != CODE_FOR_nothing);
8046 emit_insn (GEN_FCN (icode) (target, x, index));
8047 return;
8048 }
8049
8050 /* Construct the vector in memory one field at a time
8051 and load the whole vector. */
8052 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8053 for (i = 0; i < n_elts; i++)
8054 emit_move_insn (adjust_address_nv (mem, inner_mode,
8055 i * GET_MODE_SIZE (inner_mode)),
8056 XVECEXP (vals, 0, i));
8057 emit_move_insn (target, mem);
8058
8059}
8060
43e9d192
IB
8061static unsigned HOST_WIDE_INT
8062aarch64_shift_truncation_mask (enum machine_mode mode)
8063{
8064 return
8065 (aarch64_vector_mode_supported_p (mode)
8066 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8067}
8068
8069#ifndef TLS_SECTION_ASM_FLAG
8070#define TLS_SECTION_ASM_FLAG 'T'
8071#endif
8072
8073void
8074aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8075 tree decl ATTRIBUTE_UNUSED)
8076{
8077 char flagchars[10], *f = flagchars;
8078
8079 /* If we have already declared this section, we can use an
8080 abbreviated form to switch back to it -- unless this section is
8081 part of a COMDAT groups, in which case GAS requires the full
8082 declaration every time. */
8083 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8084 && (flags & SECTION_DECLARED))
8085 {
8086 fprintf (asm_out_file, "\t.section\t%s\n", name);
8087 return;
8088 }
8089
8090 if (!(flags & SECTION_DEBUG))
8091 *f++ = 'a';
8092 if (flags & SECTION_WRITE)
8093 *f++ = 'w';
8094 if (flags & SECTION_CODE)
8095 *f++ = 'x';
8096 if (flags & SECTION_SMALL)
8097 *f++ = 's';
8098 if (flags & SECTION_MERGE)
8099 *f++ = 'M';
8100 if (flags & SECTION_STRINGS)
8101 *f++ = 'S';
8102 if (flags & SECTION_TLS)
8103 *f++ = TLS_SECTION_ASM_FLAG;
8104 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8105 *f++ = 'G';
8106 *f = '\0';
8107
8108 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8109
8110 if (!(flags & SECTION_NOTYPE))
8111 {
8112 const char *type;
8113 const char *format;
8114
8115 if (flags & SECTION_BSS)
8116 type = "nobits";
8117 else
8118 type = "progbits";
8119
8120#ifdef TYPE_OPERAND_FMT
8121 format = "," TYPE_OPERAND_FMT;
8122#else
8123 format = ",@%s";
8124#endif
8125
8126 fprintf (asm_out_file, format, type);
8127
8128 if (flags & SECTION_ENTSIZE)
8129 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8130 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8131 {
8132 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8133 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8134 else
8135 fprintf (asm_out_file, ",%s,comdat",
8136 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8137 }
8138 }
8139
8140 putc ('\n', asm_out_file);
8141}
8142
8143/* Select a format to encode pointers in exception handling data. */
8144int
8145aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8146{
8147 int type;
8148 switch (aarch64_cmodel)
8149 {
8150 case AARCH64_CMODEL_TINY:
8151 case AARCH64_CMODEL_TINY_PIC:
8152 case AARCH64_CMODEL_SMALL:
8153 case AARCH64_CMODEL_SMALL_PIC:
8154 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8155 for everything. */
8156 type = DW_EH_PE_sdata4;
8157 break;
8158 default:
8159 /* No assumptions here. 8-byte relocs required. */
8160 type = DW_EH_PE_sdata8;
8161 break;
8162 }
8163 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8164}
8165
0462169c
SN
8166/* Emit load exclusive. */
8167
8168static void
8169aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8170 rtx mem, rtx model_rtx)
8171{
8172 rtx (*gen) (rtx, rtx, rtx);
8173
8174 switch (mode)
8175 {
8176 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8177 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8178 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8179 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8180 default:
8181 gcc_unreachable ();
8182 }
8183
8184 emit_insn (gen (rval, mem, model_rtx));
8185}
8186
8187/* Emit store exclusive. */
8188
8189static void
8190aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8191 rtx rval, rtx mem, rtx model_rtx)
8192{
8193 rtx (*gen) (rtx, rtx, rtx, rtx);
8194
8195 switch (mode)
8196 {
8197 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8198 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8199 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8200 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8201 default:
8202 gcc_unreachable ();
8203 }
8204
8205 emit_insn (gen (bval, rval, mem, model_rtx));
8206}
8207
8208/* Mark the previous jump instruction as unlikely. */
8209
8210static void
8211aarch64_emit_unlikely_jump (rtx insn)
8212{
e5af9ddd 8213 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
8214
8215 insn = emit_jump_insn (insn);
e5af9ddd 8216 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
8217}
8218
8219/* Expand a compare and swap pattern. */
8220
8221void
8222aarch64_expand_compare_and_swap (rtx operands[])
8223{
8224 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8225 enum machine_mode mode, cmp_mode;
8226 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8227
8228 bval = operands[0];
8229 rval = operands[1];
8230 mem = operands[2];
8231 oldval = operands[3];
8232 newval = operands[4];
8233 is_weak = operands[5];
8234 mod_s = operands[6];
8235 mod_f = operands[7];
8236 mode = GET_MODE (mem);
8237 cmp_mode = mode;
8238
8239 /* Normally the succ memory model must be stronger than fail, but in the
8240 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8241 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8242
8243 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8244 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8245 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8246
8247 switch (mode)
8248 {
8249 case QImode:
8250 case HImode:
8251 /* For short modes, we're going to perform the comparison in SImode,
8252 so do the zero-extension now. */
8253 cmp_mode = SImode;
8254 rval = gen_reg_rtx (SImode);
8255 oldval = convert_modes (SImode, mode, oldval, true);
8256 /* Fall through. */
8257
8258 case SImode:
8259 case DImode:
8260 /* Force the value into a register if needed. */
8261 if (!aarch64_plus_operand (oldval, mode))
8262 oldval = force_reg (cmp_mode, oldval);
8263 break;
8264
8265 default:
8266 gcc_unreachable ();
8267 }
8268
8269 switch (mode)
8270 {
8271 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8272 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8273 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8274 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8275 default:
8276 gcc_unreachable ();
8277 }
8278
8279 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8280
8281 if (mode == QImode || mode == HImode)
8282 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8283
8284 x = gen_rtx_REG (CCmode, CC_REGNUM);
8285 x = gen_rtx_EQ (SImode, x, const0_rtx);
8286 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8287}
8288
8289/* Split a compare and swap pattern. */
8290
8291void
8292aarch64_split_compare_and_swap (rtx operands[])
8293{
8294 rtx rval, mem, oldval, newval, scratch;
8295 enum machine_mode mode;
0462169c
SN
8296 bool is_weak;
8297 rtx label1, label2, x, cond;
8298
8299 rval = operands[0];
8300 mem = operands[1];
8301 oldval = operands[2];
8302 newval = operands[3];
8303 is_weak = (operands[4] != const0_rtx);
0462169c
SN
8304 scratch = operands[7];
8305 mode = GET_MODE (mem);
8306
8307 label1 = NULL_RTX;
8308 if (!is_weak)
8309 {
8310 label1 = gen_label_rtx ();
8311 emit_label (label1);
8312 }
8313 label2 = gen_label_rtx ();
8314
8315 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8316
8317 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8318 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8319 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8320 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8321 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8322
8323 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8324
8325 if (!is_weak)
8326 {
8327 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8328 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8329 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8330 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8331 }
8332 else
8333 {
8334 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8335 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8336 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8337 }
8338
8339 emit_label (label2);
8340}
8341
8342/* Split an atomic operation. */
8343
8344void
8345aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8346 rtx value, rtx model_rtx, rtx cond)
8347{
8348 enum machine_mode mode = GET_MODE (mem);
8349 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8350 rtx label, x;
8351
8352 label = gen_label_rtx ();
8353 emit_label (label);
8354
8355 if (new_out)
8356 new_out = gen_lowpart (wmode, new_out);
8357 if (old_out)
8358 old_out = gen_lowpart (wmode, old_out);
8359 else
8360 old_out = new_out;
8361 value = simplify_gen_subreg (wmode, value, mode, 0);
8362
8363 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8364
8365 switch (code)
8366 {
8367 case SET:
8368 new_out = value;
8369 break;
8370
8371 case NOT:
8372 x = gen_rtx_AND (wmode, old_out, value);
8373 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8374 x = gen_rtx_NOT (wmode, new_out);
8375 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8376 break;
8377
8378 case MINUS:
8379 if (CONST_INT_P (value))
8380 {
8381 value = GEN_INT (-INTVAL (value));
8382 code = PLUS;
8383 }
8384 /* Fall through. */
8385
8386 default:
8387 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8388 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8389 break;
8390 }
8391
8392 aarch64_emit_store_exclusive (mode, cond, mem,
8393 gen_lowpart (mode, new_out), model_rtx);
8394
8395 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8396 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8397 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8398 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8399}
8400
95ca411e
YZ
8401static void
8402aarch64_print_extension (void)
8403{
8404 const struct aarch64_option_extension *opt = NULL;
8405
8406 for (opt = all_extensions; opt->name != NULL; opt++)
8407 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8408 asm_fprintf (asm_out_file, "+%s", opt->name);
8409
8410 asm_fprintf (asm_out_file, "\n");
8411}
8412
43e9d192
IB
8413static void
8414aarch64_start_file (void)
8415{
8416 if (selected_arch)
95ca411e
YZ
8417 {
8418 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8419 aarch64_print_extension ();
8420 }
43e9d192 8421 else if (selected_cpu)
95ca411e 8422 {
682287fb
JG
8423 const char *truncated_name
8424 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8425 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
8426 aarch64_print_extension ();
8427 }
43e9d192
IB
8428 default_file_start();
8429}
8430
8431/* Target hook for c_mode_for_suffix. */
8432static enum machine_mode
8433aarch64_c_mode_for_suffix (char suffix)
8434{
8435 if (suffix == 'q')
8436 return TFmode;
8437
8438 return VOIDmode;
8439}
8440
3520f7cc
JG
8441/* We can only represent floating point constants which will fit in
8442 "quarter-precision" values. These values are characterised by
8443 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8444 by:
8445
8446 (-1)^s * (n/16) * 2^r
8447
8448 Where:
8449 's' is the sign bit.
8450 'n' is an integer in the range 16 <= n <= 31.
8451 'r' is an integer in the range -3 <= r <= 4. */
8452
8453/* Return true iff X can be represented by a quarter-precision
8454 floating point immediate operand X. Note, we cannot represent 0.0. */
8455bool
8456aarch64_float_const_representable_p (rtx x)
8457{
8458 /* This represents our current view of how many bits
8459 make up the mantissa. */
8460 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 8461 int exponent;
3520f7cc 8462 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 8463 REAL_VALUE_TYPE r, m;
807e902e 8464 bool fail;
3520f7cc
JG
8465
8466 if (!CONST_DOUBLE_P (x))
8467 return false;
8468
8469 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8470
8471 /* We cannot represent infinities, NaNs or +/-zero. We won't
8472 know if we have +zero until we analyse the mantissa, but we
8473 can reject the other invalid values. */
8474 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8475 || REAL_VALUE_MINUS_ZERO (r))
8476 return false;
8477
ba96cdfb 8478 /* Extract exponent. */
3520f7cc
JG
8479 r = real_value_abs (&r);
8480 exponent = REAL_EXP (&r);
8481
8482 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8483 highest (sign) bit, with a fixed binary point at bit point_pos.
8484 m1 holds the low part of the mantissa, m2 the high part.
8485 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8486 bits for the mantissa, this can fail (low bits will be lost). */
8487 real_ldexp (&m, &r, point_pos - exponent);
807e902e 8488 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
8489
8490 /* If the low part of the mantissa has bits set we cannot represent
8491 the value. */
807e902e 8492 if (w.elt (0) != 0)
3520f7cc
JG
8493 return false;
8494 /* We have rejected the lower HOST_WIDE_INT, so update our
8495 understanding of how many bits lie in the mantissa and
8496 look only at the high HOST_WIDE_INT. */
807e902e 8497 mantissa = w.elt (1);
3520f7cc
JG
8498 point_pos -= HOST_BITS_PER_WIDE_INT;
8499
8500 /* We can only represent values with a mantissa of the form 1.xxxx. */
8501 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8502 if ((mantissa & mask) != 0)
8503 return false;
8504
8505 /* Having filtered unrepresentable values, we may now remove all
8506 but the highest 5 bits. */
8507 mantissa >>= point_pos - 5;
8508
8509 /* We cannot represent the value 0.0, so reject it. This is handled
8510 elsewhere. */
8511 if (mantissa == 0)
8512 return false;
8513
8514 /* Then, as bit 4 is always set, we can mask it off, leaving
8515 the mantissa in the range [0, 15]. */
8516 mantissa &= ~(1 << 4);
8517 gcc_assert (mantissa <= 15);
8518
8519 /* GCC internally does not use IEEE754-like encoding (where normalized
8520 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8521 Our mantissa values are shifted 4 places to the left relative to
8522 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8523 by 5 places to correct for GCC's representation. */
8524 exponent = 5 - exponent;
8525
8526 return (exponent >= 0 && exponent <= 7);
8527}
8528
8529char*
81c2dfb9 8530aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
8531 enum machine_mode mode,
8532 unsigned width)
8533{
3ea63f60 8534 bool is_valid;
3520f7cc 8535 static char templ[40];
3520f7cc 8536 const char *mnemonic;
e4f0f84d 8537 const char *shift_op;
3520f7cc 8538 unsigned int lane_count = 0;
81c2dfb9 8539 char element_char;
3520f7cc 8540
e4f0f84d 8541 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
8542
8543 /* This will return true to show const_vector is legal for use as either
8544 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8545 also update INFO to show how the immediate should be generated. */
81c2dfb9 8546 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
8547 gcc_assert (is_valid);
8548
81c2dfb9 8549 element_char = sizetochar (info.element_width);
48063b9d
IB
8550 lane_count = width / info.element_width;
8551
3520f7cc
JG
8552 mode = GET_MODE_INNER (mode);
8553 if (mode == SFmode || mode == DFmode)
8554 {
48063b9d
IB
8555 gcc_assert (info.shift == 0 && ! info.mvn);
8556 if (aarch64_float_const_zero_rtx_p (info.value))
8557 info.value = GEN_INT (0);
8558 else
8559 {
8560#define buf_size 20
8561 REAL_VALUE_TYPE r;
8562 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8563 char float_buf[buf_size] = {'\0'};
8564 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8565#undef buf_size
8566
8567 if (lane_count == 1)
8568 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8569 else
8570 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 8571 lane_count, element_char, float_buf);
48063b9d
IB
8572 return templ;
8573 }
3520f7cc 8574 }
3520f7cc 8575
48063b9d 8576 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 8577 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
8578
8579 if (lane_count == 1)
48063b9d
IB
8580 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8581 mnemonic, UINTVAL (info.value));
8582 else if (info.shift)
8583 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
8584 ", %s %d", mnemonic, lane_count, element_char,
8585 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 8586 else
48063b9d 8587 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 8588 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
8589 return templ;
8590}
8591
b7342d25
IB
8592char*
8593aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8594 enum machine_mode mode)
8595{
8596 enum machine_mode vmode;
8597
8598 gcc_assert (!VECTOR_MODE_P (mode));
8599 vmode = aarch64_simd_container_mode (mode, 64);
8600 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8601 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8602}
8603
88b08073
JG
8604/* Split operands into moves from op[1] + op[2] into op[0]. */
8605
8606void
8607aarch64_split_combinev16qi (rtx operands[3])
8608{
8609 unsigned int dest = REGNO (operands[0]);
8610 unsigned int src1 = REGNO (operands[1]);
8611 unsigned int src2 = REGNO (operands[2]);
8612 enum machine_mode halfmode = GET_MODE (operands[1]);
8613 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8614 rtx destlo, desthi;
8615
8616 gcc_assert (halfmode == V16QImode);
8617
8618 if (src1 == dest && src2 == dest + halfregs)
8619 {
8620 /* No-op move. Can't split to nothing; emit something. */
8621 emit_note (NOTE_INSN_DELETED);
8622 return;
8623 }
8624
8625 /* Preserve register attributes for variable tracking. */
8626 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8627 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8628 GET_MODE_SIZE (halfmode));
8629
8630 /* Special case of reversed high/low parts. */
8631 if (reg_overlap_mentioned_p (operands[2], destlo)
8632 && reg_overlap_mentioned_p (operands[1], desthi))
8633 {
8634 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8635 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8636 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8637 }
8638 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8639 {
8640 /* Try to avoid unnecessary moves if part of the result
8641 is in the right place already. */
8642 if (src1 != dest)
8643 emit_move_insn (destlo, operands[1]);
8644 if (src2 != dest + halfregs)
8645 emit_move_insn (desthi, operands[2]);
8646 }
8647 else
8648 {
8649 if (src2 != dest + halfregs)
8650 emit_move_insn (desthi, operands[2]);
8651 if (src1 != dest)
8652 emit_move_insn (destlo, operands[1]);
8653 }
8654}
8655
8656/* vec_perm support. */
8657
8658#define MAX_VECT_LEN 16
8659
8660struct expand_vec_perm_d
8661{
8662 rtx target, op0, op1;
8663 unsigned char perm[MAX_VECT_LEN];
8664 enum machine_mode vmode;
8665 unsigned char nelt;
8666 bool one_vector_p;
8667 bool testing_p;
8668};
8669
8670/* Generate a variable permutation. */
8671
8672static void
8673aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8674{
8675 enum machine_mode vmode = GET_MODE (target);
8676 bool one_vector_p = rtx_equal_p (op0, op1);
8677
8678 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8679 gcc_checking_assert (GET_MODE (op0) == vmode);
8680 gcc_checking_assert (GET_MODE (op1) == vmode);
8681 gcc_checking_assert (GET_MODE (sel) == vmode);
8682 gcc_checking_assert (TARGET_SIMD);
8683
8684 if (one_vector_p)
8685 {
8686 if (vmode == V8QImode)
8687 {
8688 /* Expand the argument to a V16QI mode by duplicating it. */
8689 rtx pair = gen_reg_rtx (V16QImode);
8690 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8691 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8692 }
8693 else
8694 {
8695 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8696 }
8697 }
8698 else
8699 {
8700 rtx pair;
8701
8702 if (vmode == V8QImode)
8703 {
8704 pair = gen_reg_rtx (V16QImode);
8705 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8706 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8707 }
8708 else
8709 {
8710 pair = gen_reg_rtx (OImode);
8711 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8712 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8713 }
8714 }
8715}
8716
8717void
8718aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8719{
8720 enum machine_mode vmode = GET_MODE (target);
8721 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8722 bool one_vector_p = rtx_equal_p (op0, op1);
8723 rtx rmask[MAX_VECT_LEN], mask;
8724
8725 gcc_checking_assert (!BYTES_BIG_ENDIAN);
8726
8727 /* The TBL instruction does not use a modulo index, so we must take care
8728 of that ourselves. */
8729 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8730 for (i = 0; i < nelt; ++i)
8731 rmask[i] = mask;
8732 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8733 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8734
8735 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8736}
8737
cc4d934f
JG
8738/* Recognize patterns suitable for the TRN instructions. */
8739static bool
8740aarch64_evpc_trn (struct expand_vec_perm_d *d)
8741{
8742 unsigned int i, odd, mask, nelt = d->nelt;
8743 rtx out, in0, in1, x;
8744 rtx (*gen) (rtx, rtx, rtx);
8745 enum machine_mode vmode = d->vmode;
8746
8747 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8748 return false;
8749
8750 /* Note that these are little-endian tests.
8751 We correct for big-endian later. */
8752 if (d->perm[0] == 0)
8753 odd = 0;
8754 else if (d->perm[0] == 1)
8755 odd = 1;
8756 else
8757 return false;
8758 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8759
8760 for (i = 0; i < nelt; i += 2)
8761 {
8762 if (d->perm[i] != i + odd)
8763 return false;
8764 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8765 return false;
8766 }
8767
8768 /* Success! */
8769 if (d->testing_p)
8770 return true;
8771
8772 in0 = d->op0;
8773 in1 = d->op1;
8774 if (BYTES_BIG_ENDIAN)
8775 {
8776 x = in0, in0 = in1, in1 = x;
8777 odd = !odd;
8778 }
8779 out = d->target;
8780
8781 if (odd)
8782 {
8783 switch (vmode)
8784 {
8785 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8786 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8787 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8788 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8789 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8790 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8791 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8792 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8793 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8794 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8795 default:
8796 return false;
8797 }
8798 }
8799 else
8800 {
8801 switch (vmode)
8802 {
8803 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8804 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8805 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8806 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8807 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8808 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8809 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8810 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8811 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8812 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8813 default:
8814 return false;
8815 }
8816 }
8817
8818 emit_insn (gen (out, in0, in1));
8819 return true;
8820}
8821
8822/* Recognize patterns suitable for the UZP instructions. */
8823static bool
8824aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8825{
8826 unsigned int i, odd, mask, nelt = d->nelt;
8827 rtx out, in0, in1, x;
8828 rtx (*gen) (rtx, rtx, rtx);
8829 enum machine_mode vmode = d->vmode;
8830
8831 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8832 return false;
8833
8834 /* Note that these are little-endian tests.
8835 We correct for big-endian later. */
8836 if (d->perm[0] == 0)
8837 odd = 0;
8838 else if (d->perm[0] == 1)
8839 odd = 1;
8840 else
8841 return false;
8842 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8843
8844 for (i = 0; i < nelt; i++)
8845 {
8846 unsigned elt = (i * 2 + odd) & mask;
8847 if (d->perm[i] != elt)
8848 return false;
8849 }
8850
8851 /* Success! */
8852 if (d->testing_p)
8853 return true;
8854
8855 in0 = d->op0;
8856 in1 = d->op1;
8857 if (BYTES_BIG_ENDIAN)
8858 {
8859 x = in0, in0 = in1, in1 = x;
8860 odd = !odd;
8861 }
8862 out = d->target;
8863
8864 if (odd)
8865 {
8866 switch (vmode)
8867 {
8868 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8869 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8870 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8871 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8872 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8873 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8874 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8875 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8876 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8877 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8878 default:
8879 return false;
8880 }
8881 }
8882 else
8883 {
8884 switch (vmode)
8885 {
8886 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8887 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8888 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8889 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8890 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8891 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8892 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8893 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8894 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8895 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8896 default:
8897 return false;
8898 }
8899 }
8900
8901 emit_insn (gen (out, in0, in1));
8902 return true;
8903}
8904
8905/* Recognize patterns suitable for the ZIP instructions. */
8906static bool
8907aarch64_evpc_zip (struct expand_vec_perm_d *d)
8908{
8909 unsigned int i, high, mask, nelt = d->nelt;
8910 rtx out, in0, in1, x;
8911 rtx (*gen) (rtx, rtx, rtx);
8912 enum machine_mode vmode = d->vmode;
8913
8914 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8915 return false;
8916
8917 /* Note that these are little-endian tests.
8918 We correct for big-endian later. */
8919 high = nelt / 2;
8920 if (d->perm[0] == high)
8921 /* Do Nothing. */
8922 ;
8923 else if (d->perm[0] == 0)
8924 high = 0;
8925 else
8926 return false;
8927 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8928
8929 for (i = 0; i < nelt / 2; i++)
8930 {
8931 unsigned elt = (i + high) & mask;
8932 if (d->perm[i * 2] != elt)
8933 return false;
8934 elt = (elt + nelt) & mask;
8935 if (d->perm[i * 2 + 1] != elt)
8936 return false;
8937 }
8938
8939 /* Success! */
8940 if (d->testing_p)
8941 return true;
8942
8943 in0 = d->op0;
8944 in1 = d->op1;
8945 if (BYTES_BIG_ENDIAN)
8946 {
8947 x = in0, in0 = in1, in1 = x;
8948 high = !high;
8949 }
8950 out = d->target;
8951
8952 if (high)
8953 {
8954 switch (vmode)
8955 {
8956 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8957 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8958 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8959 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8960 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8961 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8962 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8963 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8964 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8965 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8966 default:
8967 return false;
8968 }
8969 }
8970 else
8971 {
8972 switch (vmode)
8973 {
8974 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8975 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8976 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8977 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8978 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8979 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8980 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8981 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8982 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8983 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8984 default:
8985 return false;
8986 }
8987 }
8988
8989 emit_insn (gen (out, in0, in1));
8990 return true;
8991}
8992
91bd4114
JG
8993static bool
8994aarch64_evpc_dup (struct expand_vec_perm_d *d)
8995{
8996 rtx (*gen) (rtx, rtx, rtx);
8997 rtx out = d->target;
8998 rtx in0;
8999 enum machine_mode vmode = d->vmode;
9000 unsigned int i, elt, nelt = d->nelt;
9001 rtx lane;
9002
9003 /* TODO: This may not be big-endian safe. */
9004 if (BYTES_BIG_ENDIAN)
9005 return false;
9006
9007 elt = d->perm[0];
9008 for (i = 1; i < nelt; i++)
9009 {
9010 if (elt != d->perm[i])
9011 return false;
9012 }
9013
9014 /* The generic preparation in aarch64_expand_vec_perm_const_1
9015 swaps the operand order and the permute indices if it finds
9016 d->perm[0] to be in the second operand. Thus, we can always
9017 use d->op0 and need not do any extra arithmetic to get the
9018 correct lane number. */
9019 in0 = d->op0;
9020 lane = GEN_INT (elt);
9021
9022 switch (vmode)
9023 {
9024 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9025 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9026 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9027 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9028 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9029 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9030 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9031 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9032 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9033 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9034 default:
9035 return false;
9036 }
9037
9038 emit_insn (gen (out, in0, lane));
9039 return true;
9040}
9041
88b08073
JG
9042static bool
9043aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9044{
9045 rtx rperm[MAX_VECT_LEN], sel;
9046 enum machine_mode vmode = d->vmode;
9047 unsigned int i, nelt = d->nelt;
9048
88b08073
JG
9049 if (d->testing_p)
9050 return true;
9051
9052 /* Generic code will try constant permutation twice. Once with the
9053 original mode and again with the elements lowered to QImode.
9054 So wait and don't do the selector expansion ourselves. */
9055 if (vmode != V8QImode && vmode != V16QImode)
9056 return false;
9057
9058 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
9059 {
9060 int nunits = GET_MODE_NUNITS (vmode);
9061
9062 /* If big-endian and two vectors we end up with a weird mixed-endian
9063 mode on NEON. Reverse the index within each word but not the word
9064 itself. */
9065 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9066 : d->perm[i]);
9067 }
88b08073
JG
9068 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9069 sel = force_reg (vmode, sel);
9070
9071 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9072 return true;
9073}
9074
9075static bool
9076aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9077{
9078 /* The pattern matching functions above are written to look for a small
9079 number to begin the sequence (0, 1, N/2). If we begin with an index
9080 from the second operand, we can swap the operands. */
9081 if (d->perm[0] >= d->nelt)
9082 {
9083 unsigned i, nelt = d->nelt;
9084 rtx x;
9085
0696116a 9086 gcc_assert (nelt == (nelt & -nelt));
88b08073 9087 for (i = 0; i < nelt; ++i)
0696116a 9088 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073
JG
9089
9090 x = d->op0;
9091 d->op0 = d->op1;
9092 d->op1 = x;
9093 }
9094
9095 if (TARGET_SIMD)
cc4d934f
JG
9096 {
9097 if (aarch64_evpc_zip (d))
9098 return true;
9099 else if (aarch64_evpc_uzp (d))
9100 return true;
9101 else if (aarch64_evpc_trn (d))
9102 return true;
91bd4114
JG
9103 else if (aarch64_evpc_dup (d))
9104 return true;
cc4d934f
JG
9105 return aarch64_evpc_tbl (d);
9106 }
88b08073
JG
9107 return false;
9108}
9109
9110/* Expand a vec_perm_const pattern. */
9111
9112bool
9113aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9114{
9115 struct expand_vec_perm_d d;
9116 int i, nelt, which;
9117
9118 d.target = target;
9119 d.op0 = op0;
9120 d.op1 = op1;
9121
9122 d.vmode = GET_MODE (target);
9123 gcc_assert (VECTOR_MODE_P (d.vmode));
9124 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9125 d.testing_p = false;
9126
9127 for (i = which = 0; i < nelt; ++i)
9128 {
9129 rtx e = XVECEXP (sel, 0, i);
9130 int ei = INTVAL (e) & (2 * nelt - 1);
9131 which |= (ei < nelt ? 1 : 2);
9132 d.perm[i] = ei;
9133 }
9134
9135 switch (which)
9136 {
9137 default:
9138 gcc_unreachable ();
9139
9140 case 3:
9141 d.one_vector_p = false;
9142 if (!rtx_equal_p (op0, op1))
9143 break;
9144
9145 /* The elements of PERM do not suggest that only the first operand
9146 is used, but both operands are identical. Allow easier matching
9147 of the permutation by folding the permutation into the single
9148 input vector. */
9149 /* Fall Through. */
9150 case 2:
9151 for (i = 0; i < nelt; ++i)
9152 d.perm[i] &= nelt - 1;
9153 d.op0 = op1;
9154 d.one_vector_p = true;
9155 break;
9156
9157 case 1:
9158 d.op1 = op0;
9159 d.one_vector_p = true;
9160 break;
9161 }
9162
9163 return aarch64_expand_vec_perm_const_1 (&d);
9164}
9165
9166static bool
9167aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9168 const unsigned char *sel)
9169{
9170 struct expand_vec_perm_d d;
9171 unsigned int i, nelt, which;
9172 bool ret;
9173
9174 d.vmode = vmode;
9175 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9176 d.testing_p = true;
9177 memcpy (d.perm, sel, nelt);
9178
9179 /* Calculate whether all elements are in one vector. */
9180 for (i = which = 0; i < nelt; ++i)
9181 {
9182 unsigned char e = d.perm[i];
9183 gcc_assert (e < 2 * nelt);
9184 which |= (e < nelt ? 1 : 2);
9185 }
9186
9187 /* If all elements are from the second vector, reindex as if from the
9188 first vector. */
9189 if (which == 2)
9190 for (i = 0; i < nelt; ++i)
9191 d.perm[i] -= nelt;
9192
9193 /* Check whether the mask can be applied to a single vector. */
9194 d.one_vector_p = (which != 3);
9195
9196 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9197 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9198 if (!d.one_vector_p)
9199 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9200
9201 start_sequence ();
9202 ret = aarch64_expand_vec_perm_const_1 (&d);
9203 end_sequence ();
9204
9205 return ret;
9206}
9207
69675d50
TB
9208/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9209bool
9210aarch64_cannot_change_mode_class (enum machine_mode from,
9211 enum machine_mode to,
9212 enum reg_class rclass)
9213{
9214 /* Full-reg subregs are allowed on general regs or any class if they are
9215 the same size. */
9216 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9217 || !reg_classes_intersect_p (FP_REGS, rclass))
9218 return false;
9219
9220 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9221 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9222 2. Scalar to Scalar for integer modes or same size float modes.
97e1ad78
JG
9223 3. Vector to Vector modes.
9224 4. On little-endian only, Vector-Structure to Vector modes. */
69675d50
TB
9225 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9226 {
9227 if (aarch64_vector_mode_supported_p (from)
9228 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9229 return false;
9230
9231 if (GET_MODE_NUNITS (from) == 1
9232 && GET_MODE_NUNITS (to) == 1
9233 && (GET_MODE_CLASS (from) == MODE_INT
9234 || from == to))
9235 return false;
9236
9237 if (aarch64_vector_mode_supported_p (from)
9238 && aarch64_vector_mode_supported_p (to))
9239 return false;
97e1ad78
JG
9240
9241 /* Within an vector structure straddling multiple vector registers
9242 we are in a mixed-endian representation. As such, we can't
9243 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9244 switch between vectors and vector structures cheaply. */
9245 if (!BYTES_BIG_ENDIAN)
9246 if ((aarch64_vector_mode_supported_p (from)
9247 && aarch64_vect_struct_mode_p (to))
9248 || (aarch64_vector_mode_supported_p (to)
9249 && aarch64_vect_struct_mode_p (from)))
9250 return false;
69675d50
TB
9251 }
9252
9253 return true;
9254}
9255
97e1ad78
JG
9256/* Implement MODES_TIEABLE_P. */
9257
9258bool
9259aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9260{
9261 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9262 return true;
9263
9264 /* We specifically want to allow elements of "structure" modes to
9265 be tieable to the structure. This more general condition allows
9266 other rarer situations too. */
9267 if (TARGET_SIMD
9268 && aarch64_vector_mode_p (mode1)
9269 && aarch64_vector_mode_p (mode2))
9270 return true;
9271
9272 return false;
9273}
9274
43e9d192
IB
9275#undef TARGET_ADDRESS_COST
9276#define TARGET_ADDRESS_COST aarch64_address_cost
9277
9278/* This hook will determines whether unnamed bitfields affect the alignment
9279 of the containing structure. The hook returns true if the structure
9280 should inherit the alignment requirements of an unnamed bitfield's
9281 type. */
9282#undef TARGET_ALIGN_ANON_BITFIELD
9283#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9284
9285#undef TARGET_ASM_ALIGNED_DI_OP
9286#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9287
9288#undef TARGET_ASM_ALIGNED_HI_OP
9289#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9290
9291#undef TARGET_ASM_ALIGNED_SI_OP
9292#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9293
9294#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9295#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9296 hook_bool_const_tree_hwi_hwi_const_tree_true
9297
9298#undef TARGET_ASM_FILE_START
9299#define TARGET_ASM_FILE_START aarch64_start_file
9300
9301#undef TARGET_ASM_OUTPUT_MI_THUNK
9302#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9303
9304#undef TARGET_ASM_SELECT_RTX_SECTION
9305#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9306
9307#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9308#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9309
9310#undef TARGET_BUILD_BUILTIN_VA_LIST
9311#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9312
9313#undef TARGET_CALLEE_COPIES
9314#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9315
9316#undef TARGET_CAN_ELIMINATE
9317#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9318
9319#undef TARGET_CANNOT_FORCE_CONST_MEM
9320#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9321
9322#undef TARGET_CONDITIONAL_REGISTER_USAGE
9323#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9324
9325/* Only the least significant bit is used for initialization guard
9326 variables. */
9327#undef TARGET_CXX_GUARD_MASK_BIT
9328#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9329
9330#undef TARGET_C_MODE_FOR_SUFFIX
9331#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9332
9333#ifdef TARGET_BIG_ENDIAN_DEFAULT
9334#undef TARGET_DEFAULT_TARGET_FLAGS
9335#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9336#endif
9337
9338#undef TARGET_CLASS_MAX_NREGS
9339#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9340
119103ca
JG
9341#undef TARGET_BUILTIN_DECL
9342#define TARGET_BUILTIN_DECL aarch64_builtin_decl
9343
43e9d192
IB
9344#undef TARGET_EXPAND_BUILTIN
9345#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9346
9347#undef TARGET_EXPAND_BUILTIN_VA_START
9348#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9349
9697e620
JG
9350#undef TARGET_FOLD_BUILTIN
9351#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9352
43e9d192
IB
9353#undef TARGET_FUNCTION_ARG
9354#define TARGET_FUNCTION_ARG aarch64_function_arg
9355
9356#undef TARGET_FUNCTION_ARG_ADVANCE
9357#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9358
9359#undef TARGET_FUNCTION_ARG_BOUNDARY
9360#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9361
9362#undef TARGET_FUNCTION_OK_FOR_SIBCALL
9363#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9364
9365#undef TARGET_FUNCTION_VALUE
9366#define TARGET_FUNCTION_VALUE aarch64_function_value
9367
9368#undef TARGET_FUNCTION_VALUE_REGNO_P
9369#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9370
9371#undef TARGET_FRAME_POINTER_REQUIRED
9372#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9373
0ac198d3
JG
9374#undef TARGET_GIMPLE_FOLD_BUILTIN
9375#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9376
43e9d192
IB
9377#undef TARGET_GIMPLIFY_VA_ARG_EXPR
9378#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9379
9380#undef TARGET_INIT_BUILTINS
9381#define TARGET_INIT_BUILTINS aarch64_init_builtins
9382
9383#undef TARGET_LEGITIMATE_ADDRESS_P
9384#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9385
9386#undef TARGET_LEGITIMATE_CONSTANT_P
9387#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9388
9389#undef TARGET_LIBGCC_CMP_RETURN_MODE
9390#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9391
38e8f663
YR
9392#undef TARGET_LRA_P
9393#define TARGET_LRA_P aarch64_lra_p
9394
ac2b960f
YZ
9395#undef TARGET_MANGLE_TYPE
9396#define TARGET_MANGLE_TYPE aarch64_mangle_type
9397
43e9d192
IB
9398#undef TARGET_MEMORY_MOVE_COST
9399#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9400
9401#undef TARGET_MUST_PASS_IN_STACK
9402#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9403
9404/* This target hook should return true if accesses to volatile bitfields
9405 should use the narrowest mode possible. It should return false if these
9406 accesses should use the bitfield container type. */
9407#undef TARGET_NARROW_VOLATILE_BITFIELD
9408#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9409
9410#undef TARGET_OPTION_OVERRIDE
9411#define TARGET_OPTION_OVERRIDE aarch64_override_options
9412
9413#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9414#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9415 aarch64_override_options_after_change
9416
9417#undef TARGET_PASS_BY_REFERENCE
9418#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9419
9420#undef TARGET_PREFERRED_RELOAD_CLASS
9421#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9422
9423#undef TARGET_SECONDARY_RELOAD
9424#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9425
9426#undef TARGET_SHIFT_TRUNCATION_MASK
9427#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9428
9429#undef TARGET_SETUP_INCOMING_VARARGS
9430#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9431
9432#undef TARGET_STRUCT_VALUE_RTX
9433#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9434
9435#undef TARGET_REGISTER_MOVE_COST
9436#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9437
9438#undef TARGET_RETURN_IN_MEMORY
9439#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9440
9441#undef TARGET_RETURN_IN_MSB
9442#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9443
9444#undef TARGET_RTX_COSTS
7cc2145f 9445#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 9446
d126a4ae
AP
9447#undef TARGET_SCHED_ISSUE_RATE
9448#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9449
43e9d192
IB
9450#undef TARGET_TRAMPOLINE_INIT
9451#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9452
9453#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9454#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9455
9456#undef TARGET_VECTOR_MODE_SUPPORTED_P
9457#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9458
9459#undef TARGET_ARRAY_MODE_SUPPORTED_P
9460#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9461
8990e73a
TB
9462#undef TARGET_VECTORIZE_ADD_STMT_COST
9463#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9464
9465#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9466#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9467 aarch64_builtin_vectorization_cost
9468
43e9d192
IB
9469#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9470#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9471
42fc9a7f
JG
9472#undef TARGET_VECTORIZE_BUILTINS
9473#define TARGET_VECTORIZE_BUILTINS
9474
9475#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9476#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9477 aarch64_builtin_vectorized_function
9478
3b357264
JG
9479#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9480#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9481 aarch64_autovectorize_vector_sizes
9482
aa87aced
KV
9483#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9484#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9485 aarch64_atomic_assign_expand_fenv
9486
43e9d192
IB
9487/* Section anchor support. */
9488
9489#undef TARGET_MIN_ANCHOR_OFFSET
9490#define TARGET_MIN_ANCHOR_OFFSET -256
9491
9492/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9493 byte offset; we can do much more for larger data types, but have no way
9494 to determine the size of the access. We assume accesses are aligned. */
9495#undef TARGET_MAX_ANCHOR_OFFSET
9496#define TARGET_MAX_ANCHOR_OFFSET 4095
9497
db0253a4
TB
9498#undef TARGET_VECTOR_ALIGNMENT
9499#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9500
9501#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9502#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9503 aarch64_simd_vector_alignment_reachable
9504
88b08073
JG
9505/* vec_perm support. */
9506
9507#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9508#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9509 aarch64_vectorize_vec_perm_const_ok
9510
70f09188 9511
706b2314 9512#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
9513#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9514
5cb74e90
RR
9515#undef TARGET_FLAGS_REGNUM
9516#define TARGET_FLAGS_REGNUM CC_REGNUM
9517
43e9d192
IB
9518struct gcc_target targetm = TARGET_INITIALIZER;
9519
9520#include "gt-aarch64.h"