]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
gen-mul-tables.cc: Add insn-codes.h to include list for generator file.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192 33#include "regs.h"
60393bbc
AM
34#include "dominance.h"
35#include "cfg.h"
36#include "cfgrtl.h"
37#include "cfganal.h"
38#include "lcm.h"
39#include "cfgbuild.h"
40#include "cfgcleanup.h"
41#include "predict.h"
42#include "basic-block.h"
43e9d192
IB
43#include "df.h"
44#include "hard-reg-set.h"
45#include "output.h"
46#include "expr.h"
47#include "reload.h"
48#include "toplev.h"
49#include "target.h"
50#include "target-def.h"
51#include "targhooks.h"
52#include "ggc.h"
83685514
AM
53#include "hashtab.h"
54#include "hash-set.h"
55#include "vec.h"
56#include "machmode.h"
57#include "input.h"
43e9d192
IB
58#include "function.h"
59#include "tm_p.h"
60#include "recog.h"
61#include "langhooks.h"
62#include "diagnostic-core.h"
2fb9a547 63#include "hash-table.h"
2fb9a547
AM
64#include "tree-ssa-alias.h"
65#include "internal-fn.h"
66#include "gimple-fold.h"
67#include "tree-eh.h"
68#include "gimple-expr.h"
69#include "is-a.h"
18f429e2 70#include "gimple.h"
45b0be94 71#include "gimplify.h"
43e9d192
IB
72#include "optabs.h"
73#include "dwarf2.h"
8990e73a
TB
74#include "cfgloop.h"
75#include "tree-vectorizer.h"
d1bcc29f 76#include "aarch64-cost-tables.h"
0ee859b5 77#include "dumpfile.h"
9b2b7279 78#include "builtins.h"
8baff86e 79#include "rtl-iter.h"
9bbe08fe 80#include "tm-constrs.h"
43e9d192 81
28514dda
YZ
82/* Defined for convenience. */
83#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
84
43e9d192
IB
85/* Classifies an address.
86
87 ADDRESS_REG_IMM
88 A simple base register plus immediate offset.
89
90 ADDRESS_REG_WB
91 A base register indexed by immediate offset with writeback.
92
93 ADDRESS_REG_REG
94 A base register indexed by (optionally scaled) register.
95
96 ADDRESS_REG_UXTW
97 A base register indexed by (optionally scaled) zero-extended register.
98
99 ADDRESS_REG_SXTW
100 A base register indexed by (optionally scaled) sign-extended register.
101
102 ADDRESS_LO_SUM
103 A LO_SUM rtx with a base register and "LO12" symbol relocation.
104
105 ADDRESS_SYMBOLIC:
106 A constant symbolic address, in pc-relative literal pool. */
107
108enum aarch64_address_type {
109 ADDRESS_REG_IMM,
110 ADDRESS_REG_WB,
111 ADDRESS_REG_REG,
112 ADDRESS_REG_UXTW,
113 ADDRESS_REG_SXTW,
114 ADDRESS_LO_SUM,
115 ADDRESS_SYMBOLIC
116};
117
118struct aarch64_address_info {
119 enum aarch64_address_type type;
120 rtx base;
121 rtx offset;
122 int shift;
123 enum aarch64_symbol_type symbol_type;
124};
125
48063b9d
IB
126struct simd_immediate_info
127{
128 rtx value;
129 int shift;
130 int element_width;
48063b9d 131 bool mvn;
e4f0f84d 132 bool msl;
48063b9d
IB
133};
134
43e9d192
IB
135/* The current code model. */
136enum aarch64_code_model aarch64_cmodel;
137
138#ifdef HAVE_AS_TLS
139#undef TARGET_HAVE_TLS
140#define TARGET_HAVE_TLS 1
141#endif
142
38e8f663 143static bool aarch64_lra_p (void);
ef4bddc2
RS
144static bool aarch64_composite_type_p (const_tree, machine_mode);
145static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 146 const_tree,
ef4bddc2 147 machine_mode *, int *,
43e9d192
IB
148 bool *);
149static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
150static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 151static void aarch64_override_options_after_change (void);
ef4bddc2 152static bool aarch64_vector_mode_supported_p (machine_mode);
43e9d192 153static unsigned bit_count (unsigned HOST_WIDE_INT);
ef4bddc2 154static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 155 const unsigned char *sel);
ef4bddc2 156static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 157
0c6caaf8
RL
158/* Major revision number of the ARM Architecture implemented by the target. */
159unsigned aarch64_architecture_version;
160
43e9d192 161/* The processor for which instructions should be scheduled. */
02fdbd5b 162enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
163
164/* The current tuning set. */
165const struct tune_params *aarch64_tune_params;
166
167/* Mask to specify which instructions we are allowed to generate. */
168unsigned long aarch64_isa_flags = 0;
169
170/* Mask to specify which instruction scheduling options should be used. */
171unsigned long aarch64_tune_flags = 0;
172
173/* Tuning parameters. */
174
175#if HAVE_DESIGNATED_INITIALIZERS
176#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
177#else
178#define NAMED_PARAM(NAME, VAL) (VAL)
179#endif
180
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
43e9d192
IB
184
185#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
186__extension__
187#endif
188static const struct cpu_addrcost_table generic_addrcost_table =
189{
67747367
JG
190#if HAVE_DESIGNATED_INITIALIZERS
191 .addr_scale_costs =
192#endif
193 {
67747367
JG
194 NAMED_PARAM (hi, 0),
195 NAMED_PARAM (si, 0),
8d805e02 196 NAMED_PARAM (di, 0),
67747367
JG
197 NAMED_PARAM (ti, 0),
198 },
43e9d192
IB
199 NAMED_PARAM (pre_modify, 0),
200 NAMED_PARAM (post_modify, 0),
201 NAMED_PARAM (register_offset, 0),
202 NAMED_PARAM (register_extend, 0),
203 NAMED_PARAM (imm_offset, 0)
204};
205
60bff090
JG
206#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
207__extension__
208#endif
209static const struct cpu_addrcost_table cortexa57_addrcost_table =
210{
211#if HAVE_DESIGNATED_INITIALIZERS
212 .addr_scale_costs =
213#endif
214 {
60bff090
JG
215 NAMED_PARAM (hi, 1),
216 NAMED_PARAM (si, 0),
8d805e02 217 NAMED_PARAM (di, 0),
60bff090
JG
218 NAMED_PARAM (ti, 1),
219 },
220 NAMED_PARAM (pre_modify, 0),
221 NAMED_PARAM (post_modify, 0),
222 NAMED_PARAM (register_offset, 0),
223 NAMED_PARAM (register_extend, 0),
224 NAMED_PARAM (imm_offset, 0),
225};
226
43e9d192
IB
227#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
228__extension__
229#endif
230static const struct cpu_regmove_cost generic_regmove_cost =
231{
232 NAMED_PARAM (GP2GP, 1),
3969c510
WD
233 /* Avoid the use of slow int<->fp moves for spilling by setting
234 their cost higher than memmov_cost. */
235 NAMED_PARAM (GP2FP, 5),
236 NAMED_PARAM (FP2GP, 5),
20b32e50 237 NAMED_PARAM (FP2FP, 2)
43e9d192
IB
238};
239
e4a9c55a
WD
240static const struct cpu_regmove_cost cortexa57_regmove_cost =
241{
242 NAMED_PARAM (GP2GP, 1),
243 /* Avoid the use of slow int<->fp moves for spilling by setting
244 their cost higher than memmov_cost. */
245 NAMED_PARAM (GP2FP, 5),
246 NAMED_PARAM (FP2GP, 5),
247 NAMED_PARAM (FP2FP, 2)
248};
249
250static const struct cpu_regmove_cost cortexa53_regmove_cost =
251{
252 NAMED_PARAM (GP2GP, 1),
253 /* Avoid the use of slow int<->fp moves for spilling by setting
254 their cost higher than memmov_cost. */
255 NAMED_PARAM (GP2FP, 5),
256 NAMED_PARAM (FP2GP, 5),
257 NAMED_PARAM (FP2FP, 2)
258};
259
d1bcc29f
AP
260static const struct cpu_regmove_cost thunderx_regmove_cost =
261{
262 NAMED_PARAM (GP2GP, 2),
263 NAMED_PARAM (GP2FP, 2),
264 NAMED_PARAM (FP2GP, 6),
265 NAMED_PARAM (FP2FP, 4)
266};
267
8990e73a
TB
268/* Generic costs for vector insn classes. */
269#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
270__extension__
271#endif
272static const struct cpu_vector_cost generic_vector_cost =
273{
274 NAMED_PARAM (scalar_stmt_cost, 1),
275 NAMED_PARAM (scalar_load_cost, 1),
276 NAMED_PARAM (scalar_store_cost, 1),
277 NAMED_PARAM (vec_stmt_cost, 1),
278 NAMED_PARAM (vec_to_scalar_cost, 1),
279 NAMED_PARAM (scalar_to_vec_cost, 1),
280 NAMED_PARAM (vec_align_load_cost, 1),
281 NAMED_PARAM (vec_unalign_load_cost, 1),
282 NAMED_PARAM (vec_unalign_store_cost, 1),
283 NAMED_PARAM (vec_store_cost, 1),
284 NAMED_PARAM (cond_taken_branch_cost, 3),
285 NAMED_PARAM (cond_not_taken_branch_cost, 1)
286};
287
60bff090
JG
288/* Generic costs for vector insn classes. */
289#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
290__extension__
291#endif
292static const struct cpu_vector_cost cortexa57_vector_cost =
293{
294 NAMED_PARAM (scalar_stmt_cost, 1),
295 NAMED_PARAM (scalar_load_cost, 4),
296 NAMED_PARAM (scalar_store_cost, 1),
297 NAMED_PARAM (vec_stmt_cost, 3),
298 NAMED_PARAM (vec_to_scalar_cost, 8),
299 NAMED_PARAM (scalar_to_vec_cost, 8),
300 NAMED_PARAM (vec_align_load_cost, 5),
301 NAMED_PARAM (vec_unalign_load_cost, 5),
302 NAMED_PARAM (vec_unalign_store_cost, 1),
303 NAMED_PARAM (vec_store_cost, 1),
304 NAMED_PARAM (cond_taken_branch_cost, 1),
305 NAMED_PARAM (cond_not_taken_branch_cost, 1)
306};
307
6a569cdd
KT
308#define AARCH64_FUSE_NOTHING (0)
309#define AARCH64_FUSE_MOV_MOVK (1 << 0)
9bbe08fe 310#define AARCH64_FUSE_ADRP_ADD (1 << 1)
cd0cb232 311#define AARCH64_FUSE_MOVK_MOVK (1 << 2)
d8354ad7 312#define AARCH64_FUSE_ADRP_LDR (1 << 3)
3759108f 313#define AARCH64_FUSE_CMP_BRANCH (1 << 4)
6a569cdd 314
43e9d192
IB
315#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
316__extension__
317#endif
318static const struct tune_params generic_tunings =
319{
4e2cd668 320 &cortexa57_extra_costs,
43e9d192
IB
321 &generic_addrcost_table,
322 &generic_regmove_cost,
8990e73a 323 &generic_vector_cost,
d126a4ae 324 NAMED_PARAM (memmov_cost, 4),
6a569cdd 325 NAMED_PARAM (issue_rate, 2),
cee66c68
WD
326 NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
327 2, /* int_reassoc_width. */
328 4, /* fp_reassoc_width. */
329 1 /* vec_reassoc_width. */
43e9d192
IB
330};
331
984239ad
KT
332static const struct tune_params cortexa53_tunings =
333{
334 &cortexa53_extra_costs,
335 &generic_addrcost_table,
e4a9c55a 336 &cortexa53_regmove_cost,
984239ad 337 &generic_vector_cost,
d126a4ae 338 NAMED_PARAM (memmov_cost, 4),
6a569cdd 339 NAMED_PARAM (issue_rate, 2),
d8354ad7 340 NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
cee66c68
WD
341 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR)),
342 2, /* int_reassoc_width. */
343 4, /* fp_reassoc_width. */
344 1 /* vec_reassoc_width. */
984239ad
KT
345};
346
4fd92af6
KT
347static const struct tune_params cortexa57_tunings =
348{
349 &cortexa57_extra_costs,
60bff090 350 &cortexa57_addrcost_table,
e4a9c55a 351 &cortexa57_regmove_cost,
60bff090 352 &cortexa57_vector_cost,
4fd92af6 353 NAMED_PARAM (memmov_cost, 4),
6a569cdd 354 NAMED_PARAM (issue_rate, 3),
cee66c68
WD
355 NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK)),
356 2, /* int_reassoc_width. */
357 4, /* fp_reassoc_width. */
358 1 /* vec_reassoc_width. */
4fd92af6
KT
359};
360
d1bcc29f
AP
361static const struct tune_params thunderx_tunings =
362{
363 &thunderx_extra_costs,
364 &generic_addrcost_table,
365 &thunderx_regmove_cost,
366 &generic_vector_cost,
367 NAMED_PARAM (memmov_cost, 6),
6a569cdd 368 NAMED_PARAM (issue_rate, 2),
cee66c68
WD
369 NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH),
370 2, /* int_reassoc_width. */
371 4, /* fp_reassoc_width. */
372 1 /* vec_reassoc_width. */
d1bcc29f
AP
373};
374
43e9d192
IB
375/* A processor implementing AArch64. */
376struct processor
377{
378 const char *const name;
379 enum aarch64_processor core;
380 const char *arch;
0c6caaf8 381 unsigned architecture_version;
43e9d192
IB
382 const unsigned long flags;
383 const struct tune_params *const tune;
384};
385
386/* Processor cores implementing AArch64. */
387static const struct processor all_cores[] =
388{
d86cb6d5
RL
389#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
390 {NAME, SCHED, #ARCH, ARCH,\
0c6caaf8 391 FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
43e9d192
IB
392#include "aarch64-cores.def"
393#undef AARCH64_CORE
0c6caaf8
RL
394 {"generic", cortexa53, "8", 8,\
395 AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
396 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
397};
398
399/* Architectures implementing AArch64. */
400static const struct processor all_architectures[] =
401{
402#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
0c6caaf8 403 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
43e9d192
IB
404#include "aarch64-arches.def"
405#undef AARCH64_ARCH
0c6caaf8 406 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
407};
408
409/* Target specification. These are populated as commandline arguments
410 are processed, or NULL if not specified. */
411static const struct processor *selected_arch;
412static const struct processor *selected_cpu;
413static const struct processor *selected_tune;
414
415#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
416
417/* An ISA extension in the co-processor and main instruction set space. */
418struct aarch64_option_extension
419{
420 const char *const name;
421 const unsigned long flags_on;
422 const unsigned long flags_off;
423};
424
425/* ISA extensions in AArch64. */
426static const struct aarch64_option_extension all_extensions[] =
427{
428#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
429 {NAME, FLAGS_ON, FLAGS_OFF},
430#include "aarch64-option-extensions.def"
431#undef AARCH64_OPT_EXTENSION
432 {NULL, 0, 0}
433};
434
435/* Used to track the size of an address when generating a pre/post
436 increment address. */
ef4bddc2 437static machine_mode aarch64_memory_reference_mode;
43e9d192
IB
438
439/* Used to force GTY into this file. */
440static GTY(()) int gty_dummy;
441
442/* A table of valid AArch64 "bitmask immediate" values for
443 logical instructions. */
444
445#define AARCH64_NUM_BITMASKS 5334
446static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
447
43e9d192
IB
448typedef enum aarch64_cond_code
449{
450 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
451 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
452 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
453}
454aarch64_cc;
455
456#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
457
458/* The condition codes of the processor, and the inverse function. */
459static const char * const aarch64_condition_codes[] =
460{
461 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
462 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
463};
464
cee66c68
WD
465static int
466aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
467 enum machine_mode mode)
468{
469 if (VECTOR_MODE_P (mode))
470 return aarch64_tune_params->vec_reassoc_width;
471 if (INTEGRAL_MODE_P (mode))
472 return aarch64_tune_params->int_reassoc_width;
473 if (FLOAT_MODE_P (mode))
474 return aarch64_tune_params->fp_reassoc_width;
475 return 1;
476}
477
43e9d192
IB
478/* Provide a mapping from gcc register numbers to dwarf register numbers. */
479unsigned
480aarch64_dbx_register_number (unsigned regno)
481{
482 if (GP_REGNUM_P (regno))
483 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
484 else if (regno == SP_REGNUM)
485 return AARCH64_DWARF_SP;
486 else if (FP_REGNUM_P (regno))
487 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
488
489 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
490 equivalent DWARF register. */
491 return DWARF_FRAME_REGISTERS;
492}
493
494/* Return TRUE if MODE is any of the large INT modes. */
495static bool
ef4bddc2 496aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
497{
498 return mode == OImode || mode == CImode || mode == XImode;
499}
500
501/* Return TRUE if MODE is any of the vector modes. */
502static bool
ef4bddc2 503aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
504{
505 return aarch64_vector_mode_supported_p (mode)
506 || aarch64_vect_struct_mode_p (mode);
507}
508
509/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
510static bool
ef4bddc2 511aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
512 unsigned HOST_WIDE_INT nelems)
513{
514 if (TARGET_SIMD
515 && AARCH64_VALID_SIMD_QREG_MODE (mode)
516 && (nelems >= 2 && nelems <= 4))
517 return true;
518
519 return false;
520}
521
522/* Implement HARD_REGNO_NREGS. */
523
524int
ef4bddc2 525aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
526{
527 switch (aarch64_regno_regclass (regno))
528 {
529 case FP_REGS:
530 case FP_LO_REGS:
531 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
532 default:
533 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
534 }
535 gcc_unreachable ();
536}
537
538/* Implement HARD_REGNO_MODE_OK. */
539
540int
ef4bddc2 541aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
542{
543 if (GET_MODE_CLASS (mode) == MODE_CC)
544 return regno == CC_REGNUM;
545
9259db42
YZ
546 if (regno == SP_REGNUM)
547 /* The purpose of comparing with ptr_mode is to support the
548 global register variable associated with the stack pointer
549 register via the syntax of asm ("wsp") in ILP32. */
550 return mode == Pmode || mode == ptr_mode;
551
552 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
553 return mode == Pmode;
554
555 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
556 return 1;
557
558 if (FP_REGNUM_P (regno))
559 {
560 if (aarch64_vect_struct_mode_p (mode))
561 return
562 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
563 else
564 return 1;
565 }
566
567 return 0;
568}
569
73d9ac6a 570/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 571machine_mode
73d9ac6a 572aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 573 machine_mode mode)
73d9ac6a
IB
574{
575 /* Handle modes that fit within single registers. */
576 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
577 {
578 if (GET_MODE_SIZE (mode) >= 4)
579 return mode;
580 else
581 return SImode;
582 }
583 /* Fall back to generic for multi-reg and very large modes. */
584 else
585 return choose_hard_reg_mode (regno, nregs, false);
586}
587
43e9d192
IB
588/* Return true if calls to DECL should be treated as
589 long-calls (ie called via a register). */
590static bool
591aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
592{
593 return false;
594}
595
596/* Return true if calls to symbol-ref SYM should be treated as
597 long-calls (ie called via a register). */
598bool
599aarch64_is_long_call_p (rtx sym)
600{
601 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
602}
603
604/* Return true if the offsets to a zero/sign-extract operation
605 represent an expression that matches an extend operation. The
606 operands represent the paramters from
607
4745e701 608 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 609bool
ef4bddc2 610aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
611 rtx extract_imm)
612{
613 HOST_WIDE_INT mult_val, extract_val;
614
615 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
616 return false;
617
618 mult_val = INTVAL (mult_imm);
619 extract_val = INTVAL (extract_imm);
620
621 if (extract_val > 8
622 && extract_val < GET_MODE_BITSIZE (mode)
623 && exact_log2 (extract_val & ~7) > 0
624 && (extract_val & 7) <= 4
625 && mult_val == (1 << (extract_val & 7)))
626 return true;
627
628 return false;
629}
630
631/* Emit an insn that's a simple single-set. Both the operands must be
632 known to be valid. */
633inline static rtx
634emit_set_insn (rtx x, rtx y)
635{
636 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
637}
638
639/* X and Y are two things to compare using CODE. Emit the compare insn and
640 return the rtx for register 0 in the proper mode. */
641rtx
642aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
643{
ef4bddc2 644 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
645 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
646
647 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
648 return cc_reg;
649}
650
651/* Build the SYMBOL_REF for __tls_get_addr. */
652
653static GTY(()) rtx tls_get_addr_libfunc;
654
655rtx
656aarch64_tls_get_addr (void)
657{
658 if (!tls_get_addr_libfunc)
659 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
660 return tls_get_addr_libfunc;
661}
662
663/* Return the TLS model to use for ADDR. */
664
665static enum tls_model
666tls_symbolic_operand_type (rtx addr)
667{
668 enum tls_model tls_kind = TLS_MODEL_NONE;
669 rtx sym, addend;
670
671 if (GET_CODE (addr) == CONST)
672 {
673 split_const (addr, &sym, &addend);
674 if (GET_CODE (sym) == SYMBOL_REF)
675 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
676 }
677 else if (GET_CODE (addr) == SYMBOL_REF)
678 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
679
680 return tls_kind;
681}
682
683/* We'll allow lo_sum's in addresses in our legitimate addresses
684 so that combine would take care of combining addresses where
685 necessary, but for generation purposes, we'll generate the address
686 as :
687 RTL Absolute
688 tmp = hi (symbol_ref); adrp x1, foo
689 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
690 nop
691
692 PIC TLS
693 adrp x1, :got:foo adrp tmp, :tlsgd:foo
694 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
695 bl __tls_get_addr
696 nop
697
698 Load TLS symbol, depending on TLS mechanism and TLS access model.
699
700 Global Dynamic - Traditional TLS:
701 adrp tmp, :tlsgd:imm
702 add dest, tmp, #:tlsgd_lo12:imm
703 bl __tls_get_addr
704
705 Global Dynamic - TLS Descriptors:
706 adrp dest, :tlsdesc:imm
707 ldr tmp, [dest, #:tlsdesc_lo12:imm]
708 add dest, dest, #:tlsdesc_lo12:imm
709 blr tmp
710 mrs tp, tpidr_el0
711 add dest, dest, tp
712
713 Initial Exec:
714 mrs tp, tpidr_el0
715 adrp tmp, :gottprel:imm
716 ldr dest, [tmp, #:gottprel_lo12:imm]
717 add dest, dest, tp
718
719 Local Exec:
720 mrs tp, tpidr_el0
721 add t0, tp, #:tprel_hi12:imm
722 add t0, #:tprel_lo12_nc:imm
723*/
724
725static void
726aarch64_load_symref_appropriately (rtx dest, rtx imm,
727 enum aarch64_symbol_type type)
728{
729 switch (type)
730 {
731 case SYMBOL_SMALL_ABSOLUTE:
732 {
28514dda 733 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 734 rtx tmp_reg = dest;
ef4bddc2 735 machine_mode mode = GET_MODE (dest);
28514dda
YZ
736
737 gcc_assert (mode == Pmode || mode == ptr_mode);
738
43e9d192 739 if (can_create_pseudo_p ())
28514dda 740 tmp_reg = gen_reg_rtx (mode);
43e9d192 741
28514dda 742 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
743 emit_insn (gen_add_losym (dest, tmp_reg, imm));
744 return;
745 }
746
a5350ddc
CSS
747 case SYMBOL_TINY_ABSOLUTE:
748 emit_insn (gen_rtx_SET (Pmode, dest, imm));
749 return;
750
43e9d192
IB
751 case SYMBOL_SMALL_GOT:
752 {
28514dda
YZ
753 /* In ILP32, the mode of dest can be either SImode or DImode,
754 while the got entry is always of SImode size. The mode of
755 dest depends on how dest is used: if dest is assigned to a
756 pointer (e.g. in the memory), it has SImode; it may have
757 DImode if dest is dereferenced to access the memeory.
758 This is why we have to handle three different ldr_got_small
759 patterns here (two patterns for ILP32). */
43e9d192 760 rtx tmp_reg = dest;
ef4bddc2 761 machine_mode mode = GET_MODE (dest);
28514dda 762
43e9d192 763 if (can_create_pseudo_p ())
28514dda
YZ
764 tmp_reg = gen_reg_rtx (mode);
765
766 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
767 if (mode == ptr_mode)
768 {
769 if (mode == DImode)
770 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
771 else
772 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
773 }
774 else
775 {
776 gcc_assert (mode == Pmode);
777 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
778 }
779
43e9d192
IB
780 return;
781 }
782
783 case SYMBOL_SMALL_TLSGD:
784 {
5d8a22a5 785 rtx_insn *insns;
43e9d192
IB
786 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
787
788 start_sequence ();
78607708 789 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
790 insns = get_insns ();
791 end_sequence ();
792
793 RTL_CONST_CALL_P (insns) = 1;
794 emit_libcall_block (insns, dest, result, imm);
795 return;
796 }
797
798 case SYMBOL_SMALL_TLSDESC:
799 {
ef4bddc2 800 machine_mode mode = GET_MODE (dest);
621ad2de 801 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
802 rtx tp;
803
621ad2de
AP
804 gcc_assert (mode == Pmode || mode == ptr_mode);
805
806 /* In ILP32, the got entry is always of SImode size. Unlike
807 small GOT, the dest is fixed at reg 0. */
808 if (TARGET_ILP32)
809 emit_insn (gen_tlsdesc_small_si (imm));
810 else
811 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 812 tp = aarch64_load_tp (NULL);
621ad2de
AP
813
814 if (mode != Pmode)
815 tp = gen_lowpart (mode, tp);
816
817 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
818 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
819 return;
820 }
821
822 case SYMBOL_SMALL_GOTTPREL:
823 {
621ad2de
AP
824 /* In ILP32, the mode of dest can be either SImode or DImode,
825 while the got entry is always of SImode size. The mode of
826 dest depends on how dest is used: if dest is assigned to a
827 pointer (e.g. in the memory), it has SImode; it may have
828 DImode if dest is dereferenced to access the memeory.
829 This is why we have to handle three different tlsie_small
830 patterns here (two patterns for ILP32). */
ef4bddc2 831 machine_mode mode = GET_MODE (dest);
621ad2de 832 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 833 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
834
835 if (mode == ptr_mode)
836 {
837 if (mode == DImode)
838 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
839 else
840 {
841 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
842 tp = gen_lowpart (mode, tp);
843 }
844 }
845 else
846 {
847 gcc_assert (mode == Pmode);
848 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
849 }
850
851 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
852 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
853 return;
854 }
855
856 case SYMBOL_SMALL_TPREL:
857 {
858 rtx tp = aarch64_load_tp (NULL);
859 emit_insn (gen_tlsle_small (dest, tp, imm));
860 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
861 return;
862 }
863
87dd8ab0
MS
864 case SYMBOL_TINY_GOT:
865 emit_insn (gen_ldr_got_tiny (dest, imm));
866 return;
867
43e9d192
IB
868 default:
869 gcc_unreachable ();
870 }
871}
872
873/* Emit a move from SRC to DEST. Assume that the move expanders can
874 handle all moves if !can_create_pseudo_p (). The distinction is
875 important because, unlike emit_move_insn, the move expanders know
876 how to force Pmode objects into the constant pool even when the
877 constant pool address is not itself legitimate. */
878static rtx
879aarch64_emit_move (rtx dest, rtx src)
880{
881 return (can_create_pseudo_p ()
882 ? emit_move_insn (dest, src)
883 : emit_move_insn_1 (dest, src));
884}
885
030d03b8
RE
886/* Split a 128-bit move operation into two 64-bit move operations,
887 taking care to handle partial overlap of register to register
888 copies. Special cases are needed when moving between GP regs and
889 FP regs. SRC can be a register, constant or memory; DST a register
890 or memory. If either operand is memory it must not have any side
891 effects. */
43e9d192
IB
892void
893aarch64_split_128bit_move (rtx dst, rtx src)
894{
030d03b8
RE
895 rtx dst_lo, dst_hi;
896 rtx src_lo, src_hi;
43e9d192 897
ef4bddc2 898 machine_mode mode = GET_MODE (dst);
12dc6974 899
030d03b8
RE
900 gcc_assert (mode == TImode || mode == TFmode);
901 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
902 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
903
904 if (REG_P (dst) && REG_P (src))
905 {
030d03b8
RE
906 int src_regno = REGNO (src);
907 int dst_regno = REGNO (dst);
43e9d192 908
030d03b8 909 /* Handle FP <-> GP regs. */
43e9d192
IB
910 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
911 {
030d03b8
RE
912 src_lo = gen_lowpart (word_mode, src);
913 src_hi = gen_highpart (word_mode, src);
914
915 if (mode == TImode)
916 {
917 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
918 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
919 }
920 else
921 {
922 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
923 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
924 }
925 return;
43e9d192
IB
926 }
927 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
928 {
030d03b8
RE
929 dst_lo = gen_lowpart (word_mode, dst);
930 dst_hi = gen_highpart (word_mode, dst);
931
932 if (mode == TImode)
933 {
934 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
935 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
936 }
937 else
938 {
939 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
940 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
941 }
942 return;
43e9d192 943 }
43e9d192
IB
944 }
945
030d03b8
RE
946 dst_lo = gen_lowpart (word_mode, dst);
947 dst_hi = gen_highpart (word_mode, dst);
948 src_lo = gen_lowpart (word_mode, src);
949 src_hi = gen_highpart_mode (word_mode, mode, src);
950
951 /* At most one pairing may overlap. */
952 if (reg_overlap_mentioned_p (dst_lo, src_hi))
953 {
954 aarch64_emit_move (dst_hi, src_hi);
955 aarch64_emit_move (dst_lo, src_lo);
956 }
957 else
958 {
959 aarch64_emit_move (dst_lo, src_lo);
960 aarch64_emit_move (dst_hi, src_hi);
961 }
43e9d192
IB
962}
963
964bool
965aarch64_split_128bit_move_p (rtx dst, rtx src)
966{
967 return (! REG_P (src)
968 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
969}
970
8b033a8a
SN
971/* Split a complex SIMD combine. */
972
973void
974aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
975{
ef4bddc2
RS
976 machine_mode src_mode = GET_MODE (src1);
977 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
978
979 gcc_assert (VECTOR_MODE_P (dst_mode));
980
981 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
982 {
983 rtx (*gen) (rtx, rtx, rtx);
984
985 switch (src_mode)
986 {
987 case V8QImode:
988 gen = gen_aarch64_simd_combinev8qi;
989 break;
990 case V4HImode:
991 gen = gen_aarch64_simd_combinev4hi;
992 break;
993 case V2SImode:
994 gen = gen_aarch64_simd_combinev2si;
995 break;
996 case V2SFmode:
997 gen = gen_aarch64_simd_combinev2sf;
998 break;
999 case DImode:
1000 gen = gen_aarch64_simd_combinedi;
1001 break;
1002 case DFmode:
1003 gen = gen_aarch64_simd_combinedf;
1004 break;
1005 default:
1006 gcc_unreachable ();
1007 }
1008
1009 emit_insn (gen (dst, src1, src2));
1010 return;
1011 }
1012}
1013
fd4842cd
SN
1014/* Split a complex SIMD move. */
1015
1016void
1017aarch64_split_simd_move (rtx dst, rtx src)
1018{
ef4bddc2
RS
1019 machine_mode src_mode = GET_MODE (src);
1020 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1021
1022 gcc_assert (VECTOR_MODE_P (dst_mode));
1023
1024 if (REG_P (dst) && REG_P (src))
1025 {
c59b7e28
SN
1026 rtx (*gen) (rtx, rtx);
1027
fd4842cd
SN
1028 gcc_assert (VECTOR_MODE_P (src_mode));
1029
1030 switch (src_mode)
1031 {
1032 case V16QImode:
c59b7e28 1033 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1034 break;
1035 case V8HImode:
c59b7e28 1036 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1037 break;
1038 case V4SImode:
c59b7e28 1039 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1040 break;
1041 case V2DImode:
c59b7e28 1042 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
1043 break;
1044 case V4SFmode:
c59b7e28 1045 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1046 break;
1047 case V2DFmode:
c59b7e28 1048 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1049 break;
1050 default:
1051 gcc_unreachable ();
1052 }
c59b7e28
SN
1053
1054 emit_insn (gen (dst, src));
fd4842cd
SN
1055 return;
1056 }
1057}
1058
43e9d192 1059static rtx
ef4bddc2 1060aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1061{
1062 if (can_create_pseudo_p ())
e18b4a81 1063 return force_reg (mode, value);
43e9d192
IB
1064 else
1065 {
1066 x = aarch64_emit_move (x, value);
1067 return x;
1068 }
1069}
1070
1071
1072static rtx
ef4bddc2 1073aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1074{
9c023bf0 1075 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1076 {
1077 rtx high;
1078 /* Load the full offset into a register. This
1079 might be improvable in the future. */
1080 high = GEN_INT (offset);
1081 offset = 0;
e18b4a81
YZ
1082 high = aarch64_force_temporary (mode, temp, high);
1083 reg = aarch64_force_temporary (mode, temp,
1084 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1085 }
1086 return plus_constant (mode, reg, offset);
1087}
1088
82614948
RR
1089static int
1090aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1091 machine_mode mode)
43e9d192 1092{
43e9d192
IB
1093 unsigned HOST_WIDE_INT mask;
1094 int i;
1095 bool first;
1096 unsigned HOST_WIDE_INT val;
1097 bool subtargets;
1098 rtx subtarget;
c747993a 1099 int one_match, zero_match, first_not_ffff_match;
82614948 1100 int num_insns = 0;
43e9d192
IB
1101
1102 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1103 {
82614948 1104 if (generate)
43e9d192 1105 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
82614948
RR
1106 num_insns++;
1107 return num_insns;
43e9d192
IB
1108 }
1109
1110 if (mode == SImode)
1111 {
1112 /* We know we can't do this in 1 insn, and we must be able to do it
1113 in two; so don't mess around looking for sequences that don't buy
1114 us anything. */
82614948
RR
1115 if (generate)
1116 {
1117 emit_insn (gen_rtx_SET (VOIDmode, dest,
1118 GEN_INT (INTVAL (imm) & 0xffff)));
1119 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1120 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1121 }
1122 num_insns += 2;
1123 return num_insns;
43e9d192
IB
1124 }
1125
1126 /* Remaining cases are all for DImode. */
1127
1128 val = INTVAL (imm);
1129 subtargets = optimize && can_create_pseudo_p ();
1130
1131 one_match = 0;
1132 zero_match = 0;
1133 mask = 0xffff;
c747993a 1134 first_not_ffff_match = -1;
43e9d192
IB
1135
1136 for (i = 0; i < 64; i += 16, mask <<= 16)
1137 {
c747993a 1138 if ((val & mask) == mask)
43e9d192 1139 one_match++;
c747993a
IB
1140 else
1141 {
1142 if (first_not_ffff_match < 0)
1143 first_not_ffff_match = i;
1144 if ((val & mask) == 0)
1145 zero_match++;
1146 }
43e9d192
IB
1147 }
1148
1149 if (one_match == 2)
1150 {
c747993a
IB
1151 /* Set one of the quarters and then insert back into result. */
1152 mask = 0xffffll << first_not_ffff_match;
82614948
RR
1153 if (generate)
1154 {
1155 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1156 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1157 GEN_INT ((val >> first_not_ffff_match)
1158 & 0xffff)));
1159 }
1160 num_insns += 2;
1161 return num_insns;
c747993a
IB
1162 }
1163
43e9d192
IB
1164 if (zero_match == 2)
1165 goto simple_sequence;
1166
1167 mask = 0x0ffff0000UL;
1168 for (i = 16; i < 64; i += 16, mask <<= 16)
1169 {
1170 HOST_WIDE_INT comp = mask & ~(mask - 1);
1171
1172 if (aarch64_uimm12_shift (val - (val & mask)))
1173 {
82614948
RR
1174 if (generate)
1175 {
1176 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1177 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1178 GEN_INT (val & mask)));
1179 emit_insn (gen_adddi3 (dest, subtarget,
1180 GEN_INT (val - (val & mask))));
1181 }
1182 num_insns += 2;
1183 return num_insns;
43e9d192
IB
1184 }
1185 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1186 {
82614948
RR
1187 if (generate)
1188 {
1189 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1190 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1191 GEN_INT ((val + comp) & mask)));
1192 emit_insn (gen_adddi3 (dest, subtarget,
1193 GEN_INT (val - ((val + comp) & mask))));
1194 }
1195 num_insns += 2;
1196 return num_insns;
43e9d192
IB
1197 }
1198 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1199 {
82614948
RR
1200 if (generate)
1201 {
1202 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1203 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1204 GEN_INT ((val - comp) | ~mask)));
1205 emit_insn (gen_adddi3 (dest, subtarget,
1206 GEN_INT (val - ((val - comp) | ~mask))));
1207 }
1208 num_insns += 2;
1209 return num_insns;
43e9d192
IB
1210 }
1211 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1212 {
82614948
RR
1213 if (generate)
1214 {
1215 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1216 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1217 GEN_INT (val | ~mask)));
1218 emit_insn (gen_adddi3 (dest, subtarget,
1219 GEN_INT (val - (val | ~mask))));
1220 }
1221 num_insns += 2;
1222 return num_insns;
43e9d192
IB
1223 }
1224 }
1225
1226 /* See if we can do it by arithmetically combining two
1227 immediates. */
1228 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1229 {
1230 int j;
1231 mask = 0xffff;
1232
1233 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1234 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1235 {
82614948
RR
1236 if (generate)
1237 {
1238 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1239 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1240 GEN_INT (aarch64_bitmasks[i])));
1241 emit_insn (gen_adddi3 (dest, subtarget,
1242 GEN_INT (val - aarch64_bitmasks[i])));
1243 }
1244 num_insns += 2;
1245 return num_insns;
43e9d192
IB
1246 }
1247
1248 for (j = 0; j < 64; j += 16, mask <<= 16)
1249 {
1250 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1251 {
82614948
RR
1252 if (generate)
1253 {
1254 emit_insn (gen_rtx_SET (VOIDmode, dest,
1255 GEN_INT (aarch64_bitmasks[i])));
1256 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1257 GEN_INT ((val >> j) & 0xffff)));
1258 }
1259 num_insns += 2;
1260 return num_insns;
43e9d192
IB
1261 }
1262 }
1263 }
1264
1265 /* See if we can do it by logically combining two immediates. */
1266 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1267 {
1268 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1269 {
1270 int j;
1271
1272 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1273 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1274 {
82614948
RR
1275 if (generate)
1276 {
1277 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1278 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1279 GEN_INT (aarch64_bitmasks[i])));
1280 emit_insn (gen_iordi3 (dest, subtarget,
1281 GEN_INT (aarch64_bitmasks[j])));
1282 }
1283 num_insns += 2;
1284 return num_insns;
43e9d192
IB
1285 }
1286 }
1287 else if ((val & aarch64_bitmasks[i]) == val)
1288 {
1289 int j;
1290
1291 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1292 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1293 {
82614948
RR
1294 if (generate)
1295 {
1296 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1297 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1298 GEN_INT (aarch64_bitmasks[j])));
1299 emit_insn (gen_anddi3 (dest, subtarget,
1300 GEN_INT (aarch64_bitmasks[i])));
1301 }
1302 num_insns += 2;
1303 return num_insns;
43e9d192
IB
1304 }
1305 }
1306 }
1307
2c274197
KT
1308 if (one_match > zero_match)
1309 {
1310 /* Set either first three quarters or all but the third. */
1311 mask = 0xffffll << (16 - first_not_ffff_match);
82614948
RR
1312 if (generate)
1313 emit_insn (gen_rtx_SET (VOIDmode, dest,
1314 GEN_INT (val | mask | 0xffffffff00000000ull)));
1315 num_insns ++;
2c274197
KT
1316
1317 /* Now insert other two quarters. */
1318 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1319 i < 64; i += 16, mask <<= 16)
1320 {
1321 if ((val & mask) != mask)
82614948
RR
1322 {
1323 if (generate)
1324 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1325 GEN_INT ((val >> i) & 0xffff)));
1326 num_insns ++;
1327 }
2c274197 1328 }
82614948 1329 return num_insns;
2c274197
KT
1330 }
1331
43e9d192
IB
1332 simple_sequence:
1333 first = true;
1334 mask = 0xffff;
1335 for (i = 0; i < 64; i += 16, mask <<= 16)
1336 {
1337 if ((val & mask) != 0)
1338 {
1339 if (first)
1340 {
82614948
RR
1341 if (generate)
1342 emit_insn (gen_rtx_SET (VOIDmode, dest,
1343 GEN_INT (val & mask)));
1344 num_insns ++;
43e9d192
IB
1345 first = false;
1346 }
1347 else
82614948
RR
1348 {
1349 if (generate)
1350 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1351 GEN_INT ((val >> i) & 0xffff)));
1352 num_insns ++;
1353 }
1354 }
1355 }
1356
1357 return num_insns;
1358}
1359
1360
1361void
1362aarch64_expand_mov_immediate (rtx dest, rtx imm)
1363{
1364 machine_mode mode = GET_MODE (dest);
1365
1366 gcc_assert (mode == SImode || mode == DImode);
1367
1368 /* Check on what type of symbol it is. */
1369 if (GET_CODE (imm) == SYMBOL_REF
1370 || GET_CODE (imm) == LABEL_REF
1371 || GET_CODE (imm) == CONST)
1372 {
1373 rtx mem, base, offset;
1374 enum aarch64_symbol_type sty;
1375
1376 /* If we have (const (plus symbol offset)), separate out the offset
1377 before we start classifying the symbol. */
1378 split_const (imm, &base, &offset);
1379
f8b756b7 1380 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
82614948
RR
1381 switch (sty)
1382 {
1383 case SYMBOL_FORCE_TO_MEM:
1384 if (offset != const0_rtx
1385 && targetm.cannot_force_const_mem (mode, imm))
1386 {
1387 gcc_assert (can_create_pseudo_p ());
1388 base = aarch64_force_temporary (mode, dest, base);
1389 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1390 aarch64_emit_move (dest, base);
1391 return;
1392 }
1393 mem = force_const_mem (ptr_mode, imm);
1394 gcc_assert (mem);
1395 if (mode != ptr_mode)
1396 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1397 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1398 return;
1399
1400 case SYMBOL_SMALL_TLSGD:
1401 case SYMBOL_SMALL_TLSDESC:
1402 case SYMBOL_SMALL_GOTTPREL:
1403 case SYMBOL_SMALL_GOT:
1404 case SYMBOL_TINY_GOT:
1405 if (offset != const0_rtx)
1406 {
1407 gcc_assert(can_create_pseudo_p ());
1408 base = aarch64_force_temporary (mode, dest, base);
1409 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1410 aarch64_emit_move (dest, base);
1411 return;
1412 }
1413 /* FALLTHRU */
1414
1415 case SYMBOL_SMALL_TPREL:
1416 case SYMBOL_SMALL_ABSOLUTE:
1417 case SYMBOL_TINY_ABSOLUTE:
1418 aarch64_load_symref_appropriately (dest, imm, sty);
1419 return;
1420
1421 default:
1422 gcc_unreachable ();
1423 }
1424 }
1425
1426 if (!CONST_INT_P (imm))
1427 {
1428 if (GET_CODE (imm) == HIGH)
1429 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1430 else
1431 {
1432 rtx mem = force_const_mem (mode, imm);
1433 gcc_assert (mem);
1434 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
43e9d192 1435 }
82614948
RR
1436
1437 return;
43e9d192 1438 }
82614948
RR
1439
1440 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1441}
1442
1443static bool
fee9ba42
JW
1444aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1445 tree exp ATTRIBUTE_UNUSED)
43e9d192 1446{
fee9ba42 1447 /* Currently, always true. */
43e9d192
IB
1448 return true;
1449}
1450
1451/* Implement TARGET_PASS_BY_REFERENCE. */
1452
1453static bool
1454aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1455 machine_mode mode,
43e9d192
IB
1456 const_tree type,
1457 bool named ATTRIBUTE_UNUSED)
1458{
1459 HOST_WIDE_INT size;
ef4bddc2 1460 machine_mode dummymode;
43e9d192
IB
1461 int nregs;
1462
1463 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1464 size = (mode == BLKmode && type)
1465 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1466
aadc1c43
MHD
1467 /* Aggregates are passed by reference based on their size. */
1468 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1469 {
aadc1c43 1470 size = int_size_in_bytes (type);
43e9d192
IB
1471 }
1472
1473 /* Variable sized arguments are always returned by reference. */
1474 if (size < 0)
1475 return true;
1476
1477 /* Can this be a candidate to be passed in fp/simd register(s)? */
1478 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1479 &dummymode, &nregs,
1480 NULL))
1481 return false;
1482
1483 /* Arguments which are variable sized or larger than 2 registers are
1484 passed by reference unless they are a homogenous floating point
1485 aggregate. */
1486 return size > 2 * UNITS_PER_WORD;
1487}
1488
1489/* Return TRUE if VALTYPE is padded to its least significant bits. */
1490static bool
1491aarch64_return_in_msb (const_tree valtype)
1492{
ef4bddc2 1493 machine_mode dummy_mode;
43e9d192
IB
1494 int dummy_int;
1495
1496 /* Never happens in little-endian mode. */
1497 if (!BYTES_BIG_ENDIAN)
1498 return false;
1499
1500 /* Only composite types smaller than or equal to 16 bytes can
1501 be potentially returned in registers. */
1502 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1503 || int_size_in_bytes (valtype) <= 0
1504 || int_size_in_bytes (valtype) > 16)
1505 return false;
1506
1507 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1508 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1509 is always passed/returned in the least significant bits of fp/simd
1510 register(s). */
1511 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1512 &dummy_mode, &dummy_int, NULL))
1513 return false;
1514
1515 return true;
1516}
1517
1518/* Implement TARGET_FUNCTION_VALUE.
1519 Define how to find the value returned by a function. */
1520
1521static rtx
1522aarch64_function_value (const_tree type, const_tree func,
1523 bool outgoing ATTRIBUTE_UNUSED)
1524{
ef4bddc2 1525 machine_mode mode;
43e9d192
IB
1526 int unsignedp;
1527 int count;
ef4bddc2 1528 machine_mode ag_mode;
43e9d192
IB
1529
1530 mode = TYPE_MODE (type);
1531 if (INTEGRAL_TYPE_P (type))
1532 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1533
1534 if (aarch64_return_in_msb (type))
1535 {
1536 HOST_WIDE_INT size = int_size_in_bytes (type);
1537
1538 if (size % UNITS_PER_WORD != 0)
1539 {
1540 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1541 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1542 }
1543 }
1544
1545 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1546 &ag_mode, &count, NULL))
1547 {
1548 if (!aarch64_composite_type_p (type, mode))
1549 {
1550 gcc_assert (count == 1 && mode == ag_mode);
1551 return gen_rtx_REG (mode, V0_REGNUM);
1552 }
1553 else
1554 {
1555 int i;
1556 rtx par;
1557
1558 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1559 for (i = 0; i < count; i++)
1560 {
1561 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1562 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1563 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1564 XVECEXP (par, 0, i) = tmp;
1565 }
1566 return par;
1567 }
1568 }
1569 else
1570 return gen_rtx_REG (mode, R0_REGNUM);
1571}
1572
1573/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1574 Return true if REGNO is the number of a hard register in which the values
1575 of called function may come back. */
1576
1577static bool
1578aarch64_function_value_regno_p (const unsigned int regno)
1579{
1580 /* Maximum of 16 bytes can be returned in the general registers. Examples
1581 of 16-byte return values are: 128-bit integers and 16-byte small
1582 structures (excluding homogeneous floating-point aggregates). */
1583 if (regno == R0_REGNUM || regno == R1_REGNUM)
1584 return true;
1585
1586 /* Up to four fp/simd registers can return a function value, e.g. a
1587 homogeneous floating-point aggregate having four members. */
1588 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1589 return !TARGET_GENERAL_REGS_ONLY;
1590
1591 return false;
1592}
1593
1594/* Implement TARGET_RETURN_IN_MEMORY.
1595
1596 If the type T of the result of a function is such that
1597 void func (T arg)
1598 would require that arg be passed as a value in a register (or set of
1599 registers) according to the parameter passing rules, then the result
1600 is returned in the same registers as would be used for such an
1601 argument. */
1602
1603static bool
1604aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1605{
1606 HOST_WIDE_INT size;
ef4bddc2 1607 machine_mode ag_mode;
43e9d192
IB
1608 int count;
1609
1610 if (!AGGREGATE_TYPE_P (type)
1611 && TREE_CODE (type) != COMPLEX_TYPE
1612 && TREE_CODE (type) != VECTOR_TYPE)
1613 /* Simple scalar types always returned in registers. */
1614 return false;
1615
1616 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1617 type,
1618 &ag_mode,
1619 &count,
1620 NULL))
1621 return false;
1622
1623 /* Types larger than 2 registers returned in memory. */
1624 size = int_size_in_bytes (type);
1625 return (size < 0 || size > 2 * UNITS_PER_WORD);
1626}
1627
1628static bool
ef4bddc2 1629aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1630 const_tree type, int *nregs)
1631{
1632 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1633 return aarch64_vfp_is_call_or_return_candidate (mode,
1634 type,
1635 &pcum->aapcs_vfp_rmode,
1636 nregs,
1637 NULL);
1638}
1639
1640/* Given MODE and TYPE of a function argument, return the alignment in
1641 bits. The idea is to suppress any stronger alignment requested by
1642 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1643 This is a helper function for local use only. */
1644
1645static unsigned int
ef4bddc2 1646aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192
IB
1647{
1648 unsigned int alignment;
1649
1650 if (type)
1651 {
1652 if (!integer_zerop (TYPE_SIZE (type)))
1653 {
1654 if (TYPE_MODE (type) == mode)
1655 alignment = TYPE_ALIGN (type);
1656 else
1657 alignment = GET_MODE_ALIGNMENT (mode);
1658 }
1659 else
1660 alignment = 0;
1661 }
1662 else
1663 alignment = GET_MODE_ALIGNMENT (mode);
1664
1665 return alignment;
1666}
1667
1668/* Layout a function argument according to the AAPCS64 rules. The rule
1669 numbers refer to the rule numbers in the AAPCS64. */
1670
1671static void
ef4bddc2 1672aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1673 const_tree type,
1674 bool named ATTRIBUTE_UNUSED)
1675{
1676 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1677 int ncrn, nvrn, nregs;
1678 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1679 HOST_WIDE_INT size;
43e9d192
IB
1680
1681 /* We need to do this once per argument. */
1682 if (pcum->aapcs_arg_processed)
1683 return;
1684
1685 pcum->aapcs_arg_processed = true;
1686
3abf17cf
YZ
1687 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1688 size
1689 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1690 UNITS_PER_WORD);
1691
43e9d192
IB
1692 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1693 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1694 mode,
1695 type,
1696 &nregs);
1697
1698 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1699 The following code thus handles passing by SIMD/FP registers first. */
1700
1701 nvrn = pcum->aapcs_nvrn;
1702
1703 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1704 and homogenous short-vector aggregates (HVA). */
1705 if (allocate_nvrn)
1706 {
1707 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1708 {
1709 pcum->aapcs_nextnvrn = nvrn + nregs;
1710 if (!aarch64_composite_type_p (type, mode))
1711 {
1712 gcc_assert (nregs == 1);
1713 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1714 }
1715 else
1716 {
1717 rtx par;
1718 int i;
1719 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1720 for (i = 0; i < nregs; i++)
1721 {
1722 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1723 V0_REGNUM + nvrn + i);
1724 tmp = gen_rtx_EXPR_LIST
1725 (VOIDmode, tmp,
1726 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1727 XVECEXP (par, 0, i) = tmp;
1728 }
1729 pcum->aapcs_reg = par;
1730 }
1731 return;
1732 }
1733 else
1734 {
1735 /* C.3 NSRN is set to 8. */
1736 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1737 goto on_stack;
1738 }
1739 }
1740
1741 ncrn = pcum->aapcs_ncrn;
3abf17cf 1742 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1743
1744 /* C6 - C9. though the sign and zero extension semantics are
1745 handled elsewhere. This is the case where the argument fits
1746 entirely general registers. */
1747 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1748 {
1749 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1750
1751 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1752
1753 /* C.8 if the argument has an alignment of 16 then the NGRN is
1754 rounded up to the next even number. */
1755 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1756 {
1757 ++ncrn;
1758 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1759 }
1760 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1761 A reg is still generated for it, but the caller should be smart
1762 enough not to use it. */
1763 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1764 {
1765 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1766 }
1767 else
1768 {
1769 rtx par;
1770 int i;
1771
1772 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1773 for (i = 0; i < nregs; i++)
1774 {
1775 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1776 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1777 GEN_INT (i * UNITS_PER_WORD));
1778 XVECEXP (par, 0, i) = tmp;
1779 }
1780 pcum->aapcs_reg = par;
1781 }
1782
1783 pcum->aapcs_nextncrn = ncrn + nregs;
1784 return;
1785 }
1786
1787 /* C.11 */
1788 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1789
1790 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1791 this argument and align the total size if necessary. */
43e9d192 1792on_stack:
3abf17cf 1793 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1794 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1795 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1796 16 / UNITS_PER_WORD);
43e9d192
IB
1797 return;
1798}
1799
1800/* Implement TARGET_FUNCTION_ARG. */
1801
1802static rtx
ef4bddc2 1803aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1804 const_tree type, bool named)
1805{
1806 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1807 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1808
1809 if (mode == VOIDmode)
1810 return NULL_RTX;
1811
1812 aarch64_layout_arg (pcum_v, mode, type, named);
1813 return pcum->aapcs_reg;
1814}
1815
1816void
1817aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1818 const_tree fntype ATTRIBUTE_UNUSED,
1819 rtx libname ATTRIBUTE_UNUSED,
1820 const_tree fndecl ATTRIBUTE_UNUSED,
1821 unsigned n_named ATTRIBUTE_UNUSED)
1822{
1823 pcum->aapcs_ncrn = 0;
1824 pcum->aapcs_nvrn = 0;
1825 pcum->aapcs_nextncrn = 0;
1826 pcum->aapcs_nextnvrn = 0;
1827 pcum->pcs_variant = ARM_PCS_AAPCS64;
1828 pcum->aapcs_reg = NULL_RTX;
1829 pcum->aapcs_arg_processed = false;
1830 pcum->aapcs_stack_words = 0;
1831 pcum->aapcs_stack_size = 0;
1832
1833 return;
1834}
1835
1836static void
1837aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 1838 machine_mode mode,
43e9d192
IB
1839 const_tree type,
1840 bool named)
1841{
1842 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1843 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1844 {
1845 aarch64_layout_arg (pcum_v, mode, type, named);
1846 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1847 != (pcum->aapcs_stack_words != 0));
1848 pcum->aapcs_arg_processed = false;
1849 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1850 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1851 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1852 pcum->aapcs_stack_words = 0;
1853 pcum->aapcs_reg = NULL_RTX;
1854 }
1855}
1856
1857bool
1858aarch64_function_arg_regno_p (unsigned regno)
1859{
1860 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1861 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1862}
1863
1864/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1865 PARM_BOUNDARY bits of alignment, but will be given anything up
1866 to STACK_BOUNDARY bits if the type requires it. This makes sure
1867 that both before and after the layout of each argument, the Next
1868 Stacked Argument Address (NSAA) will have a minimum alignment of
1869 8 bytes. */
1870
1871static unsigned int
ef4bddc2 1872aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
1873{
1874 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1875
1876 if (alignment < PARM_BOUNDARY)
1877 alignment = PARM_BOUNDARY;
1878 if (alignment > STACK_BOUNDARY)
1879 alignment = STACK_BOUNDARY;
1880 return alignment;
1881}
1882
1883/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1884
1885 Return true if an argument passed on the stack should be padded upwards,
1886 i.e. if the least-significant byte of the stack slot has useful data.
1887
1888 Small aggregate types are placed in the lowest memory address.
1889
1890 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1891
1892bool
ef4bddc2 1893aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
1894{
1895 /* On little-endian targets, the least significant byte of every stack
1896 argument is passed at the lowest byte address of the stack slot. */
1897 if (!BYTES_BIG_ENDIAN)
1898 return true;
1899
00edcfbe 1900 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1901 the least significant byte of a stack argument is passed at the highest
1902 byte address of the stack slot. */
1903 if (type
00edcfbe
YZ
1904 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1905 || POINTER_TYPE_P (type))
43e9d192
IB
1906 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1907 return false;
1908
1909 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1910 return true;
1911}
1912
1913/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1914
1915 It specifies padding for the last (may also be the only)
1916 element of a block move between registers and memory. If
1917 assuming the block is in the memory, padding upward means that
1918 the last element is padded after its highest significant byte,
1919 while in downward padding, the last element is padded at the
1920 its least significant byte side.
1921
1922 Small aggregates and small complex types are always padded
1923 upwards.
1924
1925 We don't need to worry about homogeneous floating-point or
1926 short-vector aggregates; their move is not affected by the
1927 padding direction determined here. Regardless of endianness,
1928 each element of such an aggregate is put in the least
1929 significant bits of a fp/simd register.
1930
1931 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1932 register has useful data, and return the opposite if the most
1933 significant byte does. */
1934
1935bool
ef4bddc2 1936aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
1937 bool first ATTRIBUTE_UNUSED)
1938{
1939
1940 /* Small composite types are always padded upward. */
1941 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1942 {
1943 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1944 : GET_MODE_SIZE (mode));
1945 if (size < 2 * UNITS_PER_WORD)
1946 return true;
1947 }
1948
1949 /* Otherwise, use the default padding. */
1950 return !BYTES_BIG_ENDIAN;
1951}
1952
ef4bddc2 1953static machine_mode
43e9d192
IB
1954aarch64_libgcc_cmp_return_mode (void)
1955{
1956 return SImode;
1957}
1958
1959static bool
1960aarch64_frame_pointer_required (void)
1961{
0b7f8166
MS
1962 /* In aarch64_override_options_after_change
1963 flag_omit_leaf_frame_pointer turns off the frame pointer by
1964 default. Turn it back on now if we've not got a leaf
1965 function. */
1966 if (flag_omit_leaf_frame_pointer
1967 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1968 return true;
43e9d192 1969
0b7f8166 1970 return false;
43e9d192
IB
1971}
1972
1973/* Mark the registers that need to be saved by the callee and calculate
1974 the size of the callee-saved registers area and frame record (both FP
1975 and LR may be omitted). */
1976static void
1977aarch64_layout_frame (void)
1978{
1979 HOST_WIDE_INT offset = 0;
1980 int regno;
1981
1982 if (reload_completed && cfun->machine->frame.laid_out)
1983 return;
1984
97826595
MS
1985#define SLOT_NOT_REQUIRED (-2)
1986#define SLOT_REQUIRED (-1)
1987
363ffa50
JW
1988 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1989 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1990
43e9d192
IB
1991 /* First mark all the registers that really need to be saved... */
1992 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1993 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1994
1995 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1996 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1997
1998 /* ... that includes the eh data registers (if needed)... */
1999 if (crtl->calls_eh_return)
2000 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2001 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2002 = SLOT_REQUIRED;
43e9d192
IB
2003
2004 /* ... and any callee saved register that dataflow says is live. */
2005 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2006 if (df_regs_ever_live_p (regno)
1c923b60
JW
2007 && (regno == R30_REGNUM
2008 || !call_used_regs[regno]))
97826595 2009 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2010
2011 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2012 if (df_regs_ever_live_p (regno)
2013 && !call_used_regs[regno])
97826595 2014 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2015
2016 if (frame_pointer_needed)
2017 {
2e1cdae5 2018 /* FP and LR are placed in the linkage record. */
43e9d192 2019 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2020 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2021 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2022 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 2023 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 2024 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2025 }
2026
2027 /* Now assign stack slots for them. */
2e1cdae5 2028 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2029 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2030 {
2031 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2032 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2033 cfun->machine->frame.wb_candidate1 = regno;
2034 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2035 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2036 offset += UNITS_PER_WORD;
2037 }
2038
2039 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2040 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2041 {
2042 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2043 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2044 cfun->machine->frame.wb_candidate1 = regno;
2045 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2046 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2047 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2048 offset += UNITS_PER_WORD;
2049 }
2050
43e9d192
IB
2051 cfun->machine->frame.padding0 =
2052 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2053 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2054
2055 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
2056
2057 cfun->machine->frame.hard_fp_offset
2058 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2059 + get_frame_size ()
2060 + cfun->machine->frame.saved_regs_size,
2061 STACK_BOUNDARY / BITS_PER_UNIT);
2062
2063 cfun->machine->frame.frame_size
2064 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2065 + crtl->outgoing_args_size,
2066 STACK_BOUNDARY / BITS_PER_UNIT);
2067
43e9d192
IB
2068 cfun->machine->frame.laid_out = true;
2069}
2070
43e9d192
IB
2071static bool
2072aarch64_register_saved_on_entry (int regno)
2073{
97826595 2074 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2075}
2076
64dedd72
JW
2077static unsigned
2078aarch64_next_callee_save (unsigned regno, unsigned limit)
2079{
2080 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2081 regno ++;
2082 return regno;
2083}
43e9d192 2084
c5e1f66e 2085static void
ef4bddc2 2086aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2087 HOST_WIDE_INT adjustment)
2088 {
2089 rtx base_rtx = stack_pointer_rtx;
2090 rtx insn, reg, mem;
2091
2092 reg = gen_rtx_REG (mode, regno);
2093 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2094 plus_constant (Pmode, base_rtx, -adjustment));
2095 mem = gen_rtx_MEM (mode, mem);
2096
2097 insn = emit_move_insn (mem, reg);
2098 RTX_FRAME_RELATED_P (insn) = 1;
2099}
2100
80c11907 2101static rtx
ef4bddc2 2102aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2103 HOST_WIDE_INT adjustment)
2104{
2105 switch (mode)
2106 {
2107 case DImode:
2108 return gen_storewb_pairdi_di (base, base, reg, reg2,
2109 GEN_INT (-adjustment),
2110 GEN_INT (UNITS_PER_WORD - adjustment));
2111 case DFmode:
2112 return gen_storewb_pairdf_di (base, base, reg, reg2,
2113 GEN_INT (-adjustment),
2114 GEN_INT (UNITS_PER_WORD - adjustment));
2115 default:
2116 gcc_unreachable ();
2117 }
2118}
2119
2120static void
ef4bddc2 2121aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2122 unsigned regno2, HOST_WIDE_INT adjustment)
2123{
5d8a22a5 2124 rtx_insn *insn;
80c11907
JW
2125 rtx reg1 = gen_rtx_REG (mode, regno1);
2126 rtx reg2 = gen_rtx_REG (mode, regno2);
2127
2128 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2129 reg2, adjustment));
2130 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2131 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2132 RTX_FRAME_RELATED_P (insn) = 1;
2133}
2134
159313d9 2135static rtx
ef4bddc2 2136aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2137 HOST_WIDE_INT adjustment)
2138{
2139 switch (mode)
2140 {
2141 case DImode:
2142 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2143 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2144 case DFmode:
2145 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2146 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2147 default:
2148 gcc_unreachable ();
2149 }
2150}
2151
72df5c1f 2152static rtx
ef4bddc2 2153aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2154 rtx reg2)
2155{
2156 switch (mode)
2157 {
2158 case DImode:
2159 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2160
2161 case DFmode:
2162 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2163
2164 default:
2165 gcc_unreachable ();
2166 }
2167}
2168
2169static rtx
ef4bddc2 2170aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2171 rtx mem2)
2172{
2173 switch (mode)
2174 {
2175 case DImode:
2176 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2177
2178 case DFmode:
2179 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2180
2181 default:
2182 gcc_unreachable ();
2183 }
2184}
2185
43e9d192 2186
43e9d192 2187static void
ef4bddc2 2188aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2189 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2190{
5d8a22a5 2191 rtx_insn *insn;
ef4bddc2 2192 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2193 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2194 unsigned regno;
2195 unsigned regno2;
2196
0ec74a1e 2197 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2198 regno <= limit;
2199 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2200 {
ae13fce3
JW
2201 rtx reg, mem;
2202 HOST_WIDE_INT offset;
64dedd72 2203
ae13fce3
JW
2204 if (skip_wb
2205 && (regno == cfun->machine->frame.wb_candidate1
2206 || regno == cfun->machine->frame.wb_candidate2))
2207 continue;
2208
2209 reg = gen_rtx_REG (mode, regno);
2210 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2211 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2212 offset));
64dedd72
JW
2213
2214 regno2 = aarch64_next_callee_save (regno + 1, limit);
2215
2216 if (regno2 <= limit
2217 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2218 == cfun->machine->frame.reg_offset[regno2]))
2219
43e9d192 2220 {
0ec74a1e 2221 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2222 rtx mem2;
2223
2224 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2225 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2226 offset));
2227 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2228 reg2));
0b4a9743 2229
64dedd72
JW
2230 /* The first part of a frame-related parallel insn is
2231 always assumed to be relevant to the frame
2232 calculations; subsequent parts, are only
2233 frame-related if explicitly marked. */
2234 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2235 regno = regno2;
2236 }
2237 else
8ed2fc62
JW
2238 insn = emit_move_insn (mem, reg);
2239
2240 RTX_FRAME_RELATED_P (insn) = 1;
2241 }
2242}
2243
2244static void
ef4bddc2 2245aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2246 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2247 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2248{
8ed2fc62 2249 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2250 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2251 ? gen_frame_mem : gen_rtx_MEM);
2252 unsigned regno;
2253 unsigned regno2;
2254 HOST_WIDE_INT offset;
2255
2256 for (regno = aarch64_next_callee_save (start, limit);
2257 regno <= limit;
2258 regno = aarch64_next_callee_save (regno + 1, limit))
2259 {
ae13fce3 2260 rtx reg, mem;
8ed2fc62 2261
ae13fce3
JW
2262 if (skip_wb
2263 && (regno == cfun->machine->frame.wb_candidate1
2264 || regno == cfun->machine->frame.wb_candidate2))
2265 continue;
2266
2267 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2268 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2269 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2270
2271 regno2 = aarch64_next_callee_save (regno + 1, limit);
2272
2273 if (regno2 <= limit
2274 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2275 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2276 {
8ed2fc62
JW
2277 rtx reg2 = gen_rtx_REG (mode, regno2);
2278 rtx mem2;
2279
2280 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2281 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2282 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2283
dd991abb 2284 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2285 regno = regno2;
43e9d192 2286 }
8ed2fc62 2287 else
dd991abb
RH
2288 emit_move_insn (reg, mem);
2289 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2290 }
43e9d192
IB
2291}
2292
2293/* AArch64 stack frames generated by this compiler look like:
2294
2295 +-------------------------------+
2296 | |
2297 | incoming stack arguments |
2298 | |
34834420
MS
2299 +-------------------------------+
2300 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2301 | callee-allocated save area |
2302 | for register varargs |
2303 | |
34834420
MS
2304 +-------------------------------+
2305 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2306 | |
2307 +-------------------------------+
454fdba9
RL
2308 | padding0 | \
2309 +-------------------------------+ |
454fdba9 2310 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2311 +-------------------------------+ |
2312 | LR' | |
2313 +-------------------------------+ |
34834420
MS
2314 | FP' | / <- hard_frame_pointer_rtx (aligned)
2315 +-------------------------------+
43e9d192
IB
2316 | dynamic allocation |
2317 +-------------------------------+
34834420
MS
2318 | padding |
2319 +-------------------------------+
2320 | outgoing stack arguments | <-- arg_pointer
2321 | |
2322 +-------------------------------+
2323 | | <-- stack_pointer_rtx (aligned)
43e9d192 2324
34834420
MS
2325 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2326 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2327 unchanged. */
43e9d192
IB
2328
2329/* Generate the prologue instructions for entry into a function.
2330 Establish the stack frame by decreasing the stack pointer with a
2331 properly calculated size and, if necessary, create a frame record
2332 filled with the values of LR and previous frame pointer. The
6991c977 2333 current FP is also set up if it is in use. */
43e9d192
IB
2334
2335void
2336aarch64_expand_prologue (void)
2337{
2338 /* sub sp, sp, #<frame_size>
2339 stp {fp, lr}, [sp, #<frame_size> - 16]
2340 add fp, sp, #<frame_size> - hardfp_offset
2341 stp {cs_reg}, [fp, #-16] etc.
2342
2343 sub sp, sp, <final_adjustment_if_any>
2344 */
43e9d192 2345 HOST_WIDE_INT frame_size, offset;
1c960e02 2346 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2347 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2348 rtx_insn *insn;
43e9d192
IB
2349
2350 aarch64_layout_frame ();
43e9d192 2351
dd991abb
RH
2352 offset = frame_size = cfun->machine->frame.frame_size;
2353 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2354 fp_offset = frame_size - hard_fp_offset;
43e9d192 2355
dd991abb
RH
2356 if (flag_stack_usage_info)
2357 current_function_static_stack_size = frame_size;
43e9d192 2358
44c0e7b9 2359 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2360 if (offset >= 512)
2361 {
2362 /* When the frame has a large size, an initial decrease is done on
2363 the stack pointer to jump over the callee-allocated save area for
2364 register varargs, the local variable area and/or the callee-saved
2365 register area. This will allow the pre-index write-back
2366 store pair instructions to be used for setting up the stack frame
2367 efficiently. */
dd991abb 2368 offset = hard_fp_offset;
43e9d192
IB
2369 if (offset >= 512)
2370 offset = cfun->machine->frame.saved_regs_size;
2371
2372 frame_size -= (offset + crtl->outgoing_args_size);
2373 fp_offset = 0;
2374
2375 if (frame_size >= 0x1000000)
2376 {
2377 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2378 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2379 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2380
2381 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2382 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2383 plus_constant (Pmode, stack_pointer_rtx,
2384 -frame_size)));
2385 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2386 }
2387 else if (frame_size > 0)
2388 {
dd991abb
RH
2389 int hi_ofs = frame_size & 0xfff000;
2390 int lo_ofs = frame_size & 0x000fff;
2391
2392 if (hi_ofs)
43e9d192
IB
2393 {
2394 insn = emit_insn (gen_add2_insn
dd991abb 2395 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2396 RTX_FRAME_RELATED_P (insn) = 1;
2397 }
dd991abb 2398 if (lo_ofs)
43e9d192
IB
2399 {
2400 insn = emit_insn (gen_add2_insn
dd991abb 2401 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2402 RTX_FRAME_RELATED_P (insn) = 1;
2403 }
2404 }
2405 }
2406 else
2407 frame_size = -1;
2408
2409 if (offset > 0)
2410 {
ae13fce3
JW
2411 bool skip_wb = false;
2412
43e9d192
IB
2413 if (frame_pointer_needed)
2414 {
c5e1f66e
JW
2415 skip_wb = true;
2416
43e9d192
IB
2417 if (fp_offset)
2418 {
2419 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2420 GEN_INT (-offset)));
2421 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2422
2423 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2424 R30_REGNUM, false);
43e9d192
IB
2425 }
2426 else
80c11907 2427 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2428
2429 /* Set up frame pointer to point to the location of the
2430 previous frame pointer on the stack. */
2431 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2432 stack_pointer_rtx,
2433 GEN_INT (fp_offset)));
43e9d192 2434 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2435 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2436 }
2437 else
2438 {
c5e1f66e
JW
2439 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2440 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2441
c5e1f66e
JW
2442 if (fp_offset
2443 || reg1 == FIRST_PSEUDO_REGISTER
2444 || (reg2 == FIRST_PSEUDO_REGISTER
2445 && offset >= 256))
2446 {
2447 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2448 GEN_INT (-offset)));
2449 RTX_FRAME_RELATED_P (insn) = 1;
2450 }
2451 else
2452 {
ef4bddc2 2453 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
2454
2455 skip_wb = true;
2456
2457 if (reg2 == FIRST_PSEUDO_REGISTER)
2458 aarch64_pushwb_single_reg (mode1, reg1, offset);
2459 else
2460 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2461 }
43e9d192
IB
2462 }
2463
c5e1f66e
JW
2464 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2465 skip_wb);
ae13fce3
JW
2466 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2467 skip_wb);
43e9d192
IB
2468 }
2469
2470 /* when offset >= 512,
2471 sub sp, sp, #<outgoing_args_size> */
2472 if (frame_size > -1)
2473 {
2474 if (crtl->outgoing_args_size > 0)
2475 {
2476 insn = emit_insn (gen_add2_insn
2477 (stack_pointer_rtx,
2478 GEN_INT (- crtl->outgoing_args_size)));
2479 RTX_FRAME_RELATED_P (insn) = 1;
2480 }
2481 }
2482}
2483
4f942779
RL
2484/* Return TRUE if we can use a simple_return insn.
2485
2486 This function checks whether the callee saved stack is empty, which
2487 means no restore actions are need. The pro_and_epilogue will use
2488 this to check whether shrink-wrapping opt is feasible. */
2489
2490bool
2491aarch64_use_return_insn_p (void)
2492{
2493 if (!reload_completed)
2494 return false;
2495
2496 if (crtl->profile)
2497 return false;
2498
2499 aarch64_layout_frame ();
2500
2501 return cfun->machine->frame.frame_size == 0;
2502}
2503
43e9d192
IB
2504/* Generate the epilogue instructions for returning from a function. */
2505void
2506aarch64_expand_epilogue (bool for_sibcall)
2507{
1c960e02 2508 HOST_WIDE_INT frame_size, offset;
43e9d192 2509 HOST_WIDE_INT fp_offset;
dd991abb 2510 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2511 rtx_insn *insn;
7e8c2bd5
JW
2512 /* We need to add memory barrier to prevent read from deallocated stack. */
2513 bool need_barrier_p = (get_frame_size () != 0
2514 || cfun->machine->frame.saved_varargs_size);
43e9d192
IB
2515
2516 aarch64_layout_frame ();
43e9d192 2517
1c960e02 2518 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2519 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2520 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2521
2522 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2523 if (offset >= 512)
2524 {
dd991abb 2525 offset = hard_fp_offset;
43e9d192
IB
2526 if (offset >= 512)
2527 offset = cfun->machine->frame.saved_regs_size;
2528
2529 frame_size -= (offset + crtl->outgoing_args_size);
2530 fp_offset = 0;
2531 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2532 {
2533 insn = emit_insn (gen_add2_insn
2534 (stack_pointer_rtx,
2535 GEN_INT (crtl->outgoing_args_size)));
2536 RTX_FRAME_RELATED_P (insn) = 1;
2537 }
2538 }
2539 else
2540 frame_size = -1;
2541
2542 /* If there were outgoing arguments or we've done dynamic stack
2543 allocation, then restore the stack pointer from the frame
2544 pointer. This is at most one insn and more efficient than using
2545 GCC's internal mechanism. */
2546 if (frame_pointer_needed
2547 && (crtl->outgoing_args_size || cfun->calls_alloca))
2548 {
7e8c2bd5
JW
2549 if (cfun->calls_alloca)
2550 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2551
43e9d192
IB
2552 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2553 hard_frame_pointer_rtx,
8f454e9f
JW
2554 GEN_INT (0)));
2555 offset = offset - fp_offset;
43e9d192
IB
2556 }
2557
43e9d192
IB
2558 if (offset > 0)
2559 {
4b92caa1
JW
2560 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2561 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2562 bool skip_wb = true;
dd991abb 2563 rtx cfi_ops = NULL;
4b92caa1 2564
43e9d192 2565 if (frame_pointer_needed)
4b92caa1
JW
2566 fp_offset = 0;
2567 else if (fp_offset
2568 || reg1 == FIRST_PSEUDO_REGISTER
2569 || (reg2 == FIRST_PSEUDO_REGISTER
2570 && offset >= 256))
2571 skip_wb = false;
2572
2573 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2574 skip_wb, &cfi_ops);
4b92caa1 2575 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2576 skip_wb, &cfi_ops);
4b92caa1 2577
7e8c2bd5
JW
2578 if (need_barrier_p)
2579 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2580
4b92caa1 2581 if (skip_wb)
43e9d192 2582 {
ef4bddc2 2583 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2584 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2585
dd991abb 2586 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2587 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2588 {
2589 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2590 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2591 mem = gen_rtx_MEM (mode1, mem);
2592 insn = emit_move_insn (rreg1, mem);
2593 }
4b92caa1
JW
2594 else
2595 {
dd991abb 2596 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2597
dd991abb
RH
2598 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2599 insn = emit_insn (aarch64_gen_loadwb_pair
2600 (mode1, stack_pointer_rtx, rreg1,
2601 rreg2, offset));
4b92caa1 2602 }
43e9d192 2603 }
43e9d192
IB
2604 else
2605 {
2606 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2607 GEN_INT (offset)));
43e9d192 2608 }
43e9d192 2609
dd991abb
RH
2610 /* Reset the CFA to be SP + FRAME_SIZE. */
2611 rtx new_cfa = stack_pointer_rtx;
2612 if (frame_size > 0)
2613 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2614 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2615 REG_NOTES (insn) = cfi_ops;
43e9d192 2616 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2617 }
2618
dd991abb 2619 if (frame_size > 0)
43e9d192 2620 {
7e8c2bd5
JW
2621 if (need_barrier_p)
2622 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2623
43e9d192
IB
2624 if (frame_size >= 0x1000000)
2625 {
2626 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2627 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2628 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2629 }
dd991abb 2630 else
43e9d192 2631 {
dd991abb
RH
2632 int hi_ofs = frame_size & 0xfff000;
2633 int lo_ofs = frame_size & 0x000fff;
2634
2635 if (hi_ofs && lo_ofs)
43e9d192
IB
2636 {
2637 insn = emit_insn (gen_add2_insn
dd991abb 2638 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2639 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2640 frame_size = lo_ofs;
43e9d192 2641 }
dd991abb
RH
2642 insn = emit_insn (gen_add2_insn
2643 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2644 }
2645
dd991abb
RH
2646 /* Reset the CFA to be SP + 0. */
2647 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2648 RTX_FRAME_RELATED_P (insn) = 1;
2649 }
2650
2651 /* Stack adjustment for exception handler. */
2652 if (crtl->calls_eh_return)
2653 {
2654 /* We need to unwind the stack by the offset computed by
2655 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2656 to be SP; letting the CFA move during this adjustment
2657 is just as correct as retaining the CFA from the body
2658 of the function. Therefore, do nothing special. */
2659 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2660 }
2661
2662 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2663 if (!for_sibcall)
2664 emit_jump_insn (ret_rtx);
2665}
2666
2667/* Return the place to copy the exception unwinding return address to.
2668 This will probably be a stack slot, but could (in theory be the
2669 return register). */
2670rtx
2671aarch64_final_eh_return_addr (void)
2672{
1c960e02
MS
2673 HOST_WIDE_INT fp_offset;
2674
43e9d192 2675 aarch64_layout_frame ();
1c960e02
MS
2676
2677 fp_offset = cfun->machine->frame.frame_size
2678 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2679
2680 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2681 return gen_rtx_REG (DImode, LR_REGNUM);
2682
2683 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2684 result in a store to save LR introduced by builtin_eh_return () being
2685 incorrectly deleted because the alias is not detected.
2686 So in the calculation of the address to copy the exception unwinding
2687 return address to, we note 2 cases.
2688 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2689 we return a SP-relative location since all the addresses are SP-relative
2690 in this case. This prevents the store from being optimized away.
2691 If the fp_offset is not 0, then the addresses will be FP-relative and
2692 therefore we return a FP-relative location. */
2693
2694 if (frame_pointer_needed)
2695 {
2696 if (fp_offset)
2697 return gen_frame_mem (DImode,
2698 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2699 else
2700 return gen_frame_mem (DImode,
2701 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2702 }
2703
2704 /* If FP is not needed, we calculate the location of LR, which would be
2705 at the top of the saved registers block. */
2706
2707 return gen_frame_mem (DImode,
2708 plus_constant (Pmode,
2709 stack_pointer_rtx,
2710 fp_offset
2711 + cfun->machine->frame.saved_regs_size
2712 - 2 * UNITS_PER_WORD));
2713}
2714
9dfc162c
JG
2715/* Possibly output code to build up a constant in a register. For
2716 the benefit of the costs infrastructure, returns the number of
2717 instructions which would be emitted. GENERATE inhibits or
2718 enables code generation. */
2719
2720static int
2721aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2722{
9dfc162c
JG
2723 int insns = 0;
2724
43e9d192 2725 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2726 {
2727 if (generate)
2728 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2729 insns = 1;
2730 }
43e9d192
IB
2731 else
2732 {
2733 int i;
2734 int ncount = 0;
2735 int zcount = 0;
2736 HOST_WIDE_INT valp = val >> 16;
2737 HOST_WIDE_INT valm;
2738 HOST_WIDE_INT tval;
2739
2740 for (i = 16; i < 64; i += 16)
2741 {
2742 valm = (valp & 0xffff);
2743
2744 if (valm != 0)
2745 ++ zcount;
2746
2747 if (valm != 0xffff)
2748 ++ ncount;
2749
2750 valp >>= 16;
2751 }
2752
2753 /* zcount contains the number of additional MOVK instructions
2754 required if the constant is built up with an initial MOVZ instruction,
2755 while ncount is the number of MOVK instructions required if starting
2756 with a MOVN instruction. Choose the sequence that yields the fewest
2757 number of instructions, preferring MOVZ instructions when they are both
2758 the same. */
2759 if (ncount < zcount)
2760 {
9dfc162c
JG
2761 if (generate)
2762 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2763 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2764 tval = 0xffff;
9dfc162c 2765 insns++;
43e9d192
IB
2766 }
2767 else
2768 {
9dfc162c
JG
2769 if (generate)
2770 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2771 GEN_INT (val & 0xffff));
43e9d192 2772 tval = 0;
9dfc162c 2773 insns++;
43e9d192
IB
2774 }
2775
2776 val >>= 16;
2777
2778 for (i = 16; i < 64; i += 16)
2779 {
2780 if ((val & 0xffff) != tval)
9dfc162c
JG
2781 {
2782 if (generate)
2783 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2784 GEN_INT (i),
2785 GEN_INT (val & 0xffff)));
2786 insns++;
2787 }
43e9d192
IB
2788 val >>= 16;
2789 }
2790 }
9dfc162c 2791 return insns;
43e9d192
IB
2792}
2793
2794static void
d9600ae5 2795aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2796{
2797 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2798 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2799 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2800
2801 if (mdelta < 0)
2802 mdelta = -mdelta;
2803
2804 if (mdelta >= 4096 * 4096)
2805 {
9dfc162c 2806 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2807 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2808 }
2809 else if (mdelta > 0)
2810 {
43e9d192 2811 if (mdelta >= 4096)
d9600ae5
SN
2812 {
2813 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2814 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2815 if (delta < 0)
2816 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2817 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2818 else
2819 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2820 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2821 }
43e9d192 2822 if (mdelta % 4096 != 0)
d9600ae5
SN
2823 {
2824 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2825 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2826 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2827 }
43e9d192
IB
2828 }
2829}
2830
2831/* Output code to add DELTA to the first argument, and then jump
2832 to FUNCTION. Used for C++ multiple inheritance. */
2833static void
2834aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2835 HOST_WIDE_INT delta,
2836 HOST_WIDE_INT vcall_offset,
2837 tree function)
2838{
2839 /* The this pointer is always in x0. Note that this differs from
2840 Arm where the this pointer maybe bumped to r1 if r0 is required
2841 to return a pointer to an aggregate. On AArch64 a result value
2842 pointer will be in x8. */
2843 int this_regno = R0_REGNUM;
5d8a22a5
DM
2844 rtx this_rtx, temp0, temp1, addr, funexp;
2845 rtx_insn *insn;
43e9d192 2846
75f1d6fc
SN
2847 reload_completed = 1;
2848 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2849
2850 if (vcall_offset == 0)
d9600ae5 2851 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2852 else
2853 {
28514dda 2854 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2855
75f1d6fc
SN
2856 this_rtx = gen_rtx_REG (Pmode, this_regno);
2857 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2858 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2859
75f1d6fc
SN
2860 addr = this_rtx;
2861 if (delta != 0)
2862 {
2863 if (delta >= -256 && delta < 256)
2864 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2865 plus_constant (Pmode, this_rtx, delta));
2866 else
d9600ae5 2867 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2868 }
2869
28514dda
YZ
2870 if (Pmode == ptr_mode)
2871 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2872 else
2873 aarch64_emit_move (temp0,
2874 gen_rtx_ZERO_EXTEND (Pmode,
2875 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2876
28514dda 2877 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2878 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2879 else
2880 {
9dfc162c 2881 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2882 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2883 }
2884
28514dda
YZ
2885 if (Pmode == ptr_mode)
2886 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2887 else
2888 aarch64_emit_move (temp1,
2889 gen_rtx_SIGN_EXTEND (Pmode,
2890 gen_rtx_MEM (ptr_mode, addr)));
2891
75f1d6fc 2892 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2893 }
2894
75f1d6fc
SN
2895 /* Generate a tail call to the target function. */
2896 if (!TREE_USED (function))
2897 {
2898 assemble_external (function);
2899 TREE_USED (function) = 1;
2900 }
2901 funexp = XEXP (DECL_RTL (function), 0);
2902 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2903 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2904 SIBLING_CALL_P (insn) = 1;
2905
2906 insn = get_insns ();
2907 shorten_branches (insn);
2908 final_start_function (insn, file, 1);
2909 final (insn, file, 1);
43e9d192 2910 final_end_function ();
75f1d6fc
SN
2911
2912 /* Stop pretending to be a post-reload pass. */
2913 reload_completed = 0;
43e9d192
IB
2914}
2915
43e9d192
IB
2916static bool
2917aarch64_tls_referenced_p (rtx x)
2918{
2919 if (!TARGET_HAVE_TLS)
2920 return false;
e7de8563
RS
2921 subrtx_iterator::array_type array;
2922 FOR_EACH_SUBRTX (iter, array, x, ALL)
2923 {
2924 const_rtx x = *iter;
2925 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
2926 return true;
2927 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2928 TLS offsets, not real symbol references. */
2929 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
2930 iter.skip_subrtxes ();
2931 }
2932 return false;
43e9d192
IB
2933}
2934
2935
2936static int
2937aarch64_bitmasks_cmp (const void *i1, const void *i2)
2938{
2939 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2940 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2941
2942 if (*imm1 < *imm2)
2943 return -1;
2944 if (*imm1 > *imm2)
2945 return +1;
2946 return 0;
2947}
2948
2949
2950static void
2951aarch64_build_bitmask_table (void)
2952{
2953 unsigned HOST_WIDE_INT mask, imm;
2954 unsigned int log_e, e, s, r;
2955 unsigned int nimms = 0;
2956
2957 for (log_e = 1; log_e <= 6; log_e++)
2958 {
2959 e = 1 << log_e;
2960 if (e == 64)
2961 mask = ~(HOST_WIDE_INT) 0;
2962 else
2963 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2964 for (s = 1; s < e; s++)
2965 {
2966 for (r = 0; r < e; r++)
2967 {
2968 /* set s consecutive bits to 1 (s < 64) */
2969 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2970 /* rotate right by r */
2971 if (r != 0)
2972 imm = ((imm >> r) | (imm << (e - r))) & mask;
2973 /* replicate the constant depending on SIMD size */
2974 switch (log_e) {
2975 case 1: imm |= (imm << 2);
2976 case 2: imm |= (imm << 4);
2977 case 3: imm |= (imm << 8);
2978 case 4: imm |= (imm << 16);
2979 case 5: imm |= (imm << 32);
2980 case 6:
2981 break;
2982 default:
2983 gcc_unreachable ();
2984 }
2985 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2986 aarch64_bitmasks[nimms++] = imm;
2987 }
2988 }
2989 }
2990
2991 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2992 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2993 aarch64_bitmasks_cmp);
2994}
2995
2996
2997/* Return true if val can be encoded as a 12-bit unsigned immediate with
2998 a left shift of 0 or 12 bits. */
2999bool
3000aarch64_uimm12_shift (HOST_WIDE_INT val)
3001{
3002 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3003 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3004 );
3005}
3006
3007
3008/* Return true if val is an immediate that can be loaded into a
3009 register by a MOVZ instruction. */
3010static bool
ef4bddc2 3011aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3012{
3013 if (GET_MODE_SIZE (mode) > 4)
3014 {
3015 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3016 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3017 return 1;
3018 }
3019 else
3020 {
3021 /* Ignore sign extension. */
3022 val &= (HOST_WIDE_INT) 0xffffffff;
3023 }
3024 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3025 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3026}
3027
3028
3029/* Return true if val is a valid bitmask immediate. */
3030bool
ef4bddc2 3031aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3032{
3033 if (GET_MODE_SIZE (mode) < 8)
3034 {
3035 /* Replicate bit pattern. */
3036 val &= (HOST_WIDE_INT) 0xffffffff;
3037 val |= val << 32;
3038 }
3039 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3040 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3041}
3042
3043
3044/* Return true if val is an immediate that can be loaded into a
3045 register in a single instruction. */
3046bool
ef4bddc2 3047aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3048{
3049 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3050 return 1;
3051 return aarch64_bitmask_imm (val, mode);
3052}
3053
3054static bool
ef4bddc2 3055aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3056{
3057 rtx base, offset;
7eda14e1 3058
43e9d192
IB
3059 if (GET_CODE (x) == HIGH)
3060 return true;
3061
3062 split_const (x, &base, &offset);
3063 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3064 {
f8b756b7 3065 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
28514dda
YZ
3066 != SYMBOL_FORCE_TO_MEM)
3067 return true;
3068 else
3069 /* Avoid generating a 64-bit relocation in ILP32; leave
3070 to aarch64_expand_mov_immediate to handle it properly. */
3071 return mode != ptr_mode;
3072 }
43e9d192
IB
3073
3074 return aarch64_tls_referenced_p (x);
3075}
3076
3077/* Return true if register REGNO is a valid index register.
3078 STRICT_P is true if REG_OK_STRICT is in effect. */
3079
3080bool
3081aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3082{
3083 if (!HARD_REGISTER_NUM_P (regno))
3084 {
3085 if (!strict_p)
3086 return true;
3087
3088 if (!reg_renumber)
3089 return false;
3090
3091 regno = reg_renumber[regno];
3092 }
3093 return GP_REGNUM_P (regno);
3094}
3095
3096/* Return true if register REGNO is a valid base register for mode MODE.
3097 STRICT_P is true if REG_OK_STRICT is in effect. */
3098
3099bool
3100aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3101{
3102 if (!HARD_REGISTER_NUM_P (regno))
3103 {
3104 if (!strict_p)
3105 return true;
3106
3107 if (!reg_renumber)
3108 return false;
3109
3110 regno = reg_renumber[regno];
3111 }
3112
3113 /* The fake registers will be eliminated to either the stack or
3114 hard frame pointer, both of which are usually valid base registers.
3115 Reload deals with the cases where the eliminated form isn't valid. */
3116 return (GP_REGNUM_P (regno)
3117 || regno == SP_REGNUM
3118 || regno == FRAME_POINTER_REGNUM
3119 || regno == ARG_POINTER_REGNUM);
3120}
3121
3122/* Return true if X is a valid base register for mode MODE.
3123 STRICT_P is true if REG_OK_STRICT is in effect. */
3124
3125static bool
3126aarch64_base_register_rtx_p (rtx x, bool strict_p)
3127{
3128 if (!strict_p && GET_CODE (x) == SUBREG)
3129 x = SUBREG_REG (x);
3130
3131 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3132}
3133
3134/* Return true if address offset is a valid index. If it is, fill in INFO
3135 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3136
3137static bool
3138aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3139 machine_mode mode, bool strict_p)
43e9d192
IB
3140{
3141 enum aarch64_address_type type;
3142 rtx index;
3143 int shift;
3144
3145 /* (reg:P) */
3146 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3147 && GET_MODE (x) == Pmode)
3148 {
3149 type = ADDRESS_REG_REG;
3150 index = x;
3151 shift = 0;
3152 }
3153 /* (sign_extend:DI (reg:SI)) */
3154 else if ((GET_CODE (x) == SIGN_EXTEND
3155 || GET_CODE (x) == ZERO_EXTEND)
3156 && GET_MODE (x) == DImode
3157 && GET_MODE (XEXP (x, 0)) == SImode)
3158 {
3159 type = (GET_CODE (x) == SIGN_EXTEND)
3160 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3161 index = XEXP (x, 0);
3162 shift = 0;
3163 }
3164 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3165 else if (GET_CODE (x) == MULT
3166 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3167 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3168 && GET_MODE (XEXP (x, 0)) == DImode
3169 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3170 && CONST_INT_P (XEXP (x, 1)))
3171 {
3172 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3173 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3174 index = XEXP (XEXP (x, 0), 0);
3175 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3176 }
3177 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3178 else if (GET_CODE (x) == ASHIFT
3179 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3180 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3181 && GET_MODE (XEXP (x, 0)) == DImode
3182 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3183 && CONST_INT_P (XEXP (x, 1)))
3184 {
3185 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3186 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3187 index = XEXP (XEXP (x, 0), 0);
3188 shift = INTVAL (XEXP (x, 1));
3189 }
3190 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3191 else if ((GET_CODE (x) == SIGN_EXTRACT
3192 || GET_CODE (x) == ZERO_EXTRACT)
3193 && GET_MODE (x) == DImode
3194 && GET_CODE (XEXP (x, 0)) == MULT
3195 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3196 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3197 {
3198 type = (GET_CODE (x) == SIGN_EXTRACT)
3199 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3200 index = XEXP (XEXP (x, 0), 0);
3201 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3202 if (INTVAL (XEXP (x, 1)) != 32 + shift
3203 || INTVAL (XEXP (x, 2)) != 0)
3204 shift = -1;
3205 }
3206 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3207 (const_int 0xffffffff<<shift)) */
3208 else if (GET_CODE (x) == AND
3209 && GET_MODE (x) == DImode
3210 && GET_CODE (XEXP (x, 0)) == MULT
3211 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3212 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3213 && CONST_INT_P (XEXP (x, 1)))
3214 {
3215 type = ADDRESS_REG_UXTW;
3216 index = XEXP (XEXP (x, 0), 0);
3217 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3218 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3219 shift = -1;
3220 }
3221 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3222 else if ((GET_CODE (x) == SIGN_EXTRACT
3223 || GET_CODE (x) == ZERO_EXTRACT)
3224 && GET_MODE (x) == DImode
3225 && GET_CODE (XEXP (x, 0)) == ASHIFT
3226 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3227 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3228 {
3229 type = (GET_CODE (x) == SIGN_EXTRACT)
3230 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3231 index = XEXP (XEXP (x, 0), 0);
3232 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3233 if (INTVAL (XEXP (x, 1)) != 32 + shift
3234 || INTVAL (XEXP (x, 2)) != 0)
3235 shift = -1;
3236 }
3237 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3238 (const_int 0xffffffff<<shift)) */
3239 else if (GET_CODE (x) == AND
3240 && GET_MODE (x) == DImode
3241 && GET_CODE (XEXP (x, 0)) == ASHIFT
3242 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3243 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3244 && CONST_INT_P (XEXP (x, 1)))
3245 {
3246 type = ADDRESS_REG_UXTW;
3247 index = XEXP (XEXP (x, 0), 0);
3248 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3249 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3250 shift = -1;
3251 }
3252 /* (mult:P (reg:P) (const_int scale)) */
3253 else if (GET_CODE (x) == MULT
3254 && GET_MODE (x) == Pmode
3255 && GET_MODE (XEXP (x, 0)) == Pmode
3256 && CONST_INT_P (XEXP (x, 1)))
3257 {
3258 type = ADDRESS_REG_REG;
3259 index = XEXP (x, 0);
3260 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3261 }
3262 /* (ashift:P (reg:P) (const_int shift)) */
3263 else if (GET_CODE (x) == ASHIFT
3264 && GET_MODE (x) == Pmode
3265 && GET_MODE (XEXP (x, 0)) == Pmode
3266 && CONST_INT_P (XEXP (x, 1)))
3267 {
3268 type = ADDRESS_REG_REG;
3269 index = XEXP (x, 0);
3270 shift = INTVAL (XEXP (x, 1));
3271 }
3272 else
3273 return false;
3274
3275 if (GET_CODE (index) == SUBREG)
3276 index = SUBREG_REG (index);
3277
3278 if ((shift == 0 ||
3279 (shift > 0 && shift <= 3
3280 && (1 << shift) == GET_MODE_SIZE (mode)))
3281 && REG_P (index)
3282 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3283 {
3284 info->type = type;
3285 info->offset = index;
3286 info->shift = shift;
3287 return true;
3288 }
3289
3290 return false;
3291}
3292
44707478 3293bool
ef4bddc2 3294aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3295{
3296 return (offset >= -64 * GET_MODE_SIZE (mode)
3297 && offset < 64 * GET_MODE_SIZE (mode)
3298 && offset % GET_MODE_SIZE (mode) == 0);
3299}
3300
3301static inline bool
ef4bddc2 3302offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3303 HOST_WIDE_INT offset)
3304{
3305 return offset >= -256 && offset < 256;
3306}
3307
3308static inline bool
ef4bddc2 3309offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3310{
3311 return (offset >= 0
3312 && offset < 4096 * GET_MODE_SIZE (mode)
3313 && offset % GET_MODE_SIZE (mode) == 0);
3314}
3315
3316/* Return true if X is a valid address for machine mode MODE. If it is,
3317 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3318 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3319
3320static bool
3321aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3322 rtx x, machine_mode mode,
43e9d192
IB
3323 RTX_CODE outer_code, bool strict_p)
3324{
3325 enum rtx_code code = GET_CODE (x);
3326 rtx op0, op1;
3327 bool allow_reg_index_p =
348d4b0a
BC
3328 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3329 || aarch64_vector_mode_supported_p (mode));
43e9d192
IB
3330 /* Don't support anything other than POST_INC or REG addressing for
3331 AdvSIMD. */
348d4b0a 3332 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
3333 && (code != POST_INC && code != REG))
3334 return false;
3335
3336 switch (code)
3337 {
3338 case REG:
3339 case SUBREG:
3340 info->type = ADDRESS_REG_IMM;
3341 info->base = x;
3342 info->offset = const0_rtx;
3343 return aarch64_base_register_rtx_p (x, strict_p);
3344
3345 case PLUS:
3346 op0 = XEXP (x, 0);
3347 op1 = XEXP (x, 1);
15c0c5c9
JW
3348
3349 if (! strict_p
4aa81c2e 3350 && REG_P (op0)
15c0c5c9
JW
3351 && (op0 == virtual_stack_vars_rtx
3352 || op0 == frame_pointer_rtx
3353 || op0 == arg_pointer_rtx)
4aa81c2e 3354 && CONST_INT_P (op1))
15c0c5c9
JW
3355 {
3356 info->type = ADDRESS_REG_IMM;
3357 info->base = op0;
3358 info->offset = op1;
3359
3360 return true;
3361 }
3362
43e9d192
IB
3363 if (GET_MODE_SIZE (mode) != 0
3364 && CONST_INT_P (op1)
3365 && aarch64_base_register_rtx_p (op0, strict_p))
3366 {
3367 HOST_WIDE_INT offset = INTVAL (op1);
3368
3369 info->type = ADDRESS_REG_IMM;
3370 info->base = op0;
3371 info->offset = op1;
3372
3373 /* TImode and TFmode values are allowed in both pairs of X
3374 registers and individual Q registers. The available
3375 address modes are:
3376 X,X: 7-bit signed scaled offset
3377 Q: 9-bit signed offset
3378 We conservatively require an offset representable in either mode.
3379 */
3380 if (mode == TImode || mode == TFmode)
44707478 3381 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3382 && offset_9bit_signed_unscaled_p (mode, offset));
3383
3384 if (outer_code == PARALLEL)
3385 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3386 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3387 else
3388 return (offset_9bit_signed_unscaled_p (mode, offset)
3389 || offset_12bit_unsigned_scaled_p (mode, offset));
3390 }
3391
3392 if (allow_reg_index_p)
3393 {
3394 /* Look for base + (scaled/extended) index register. */
3395 if (aarch64_base_register_rtx_p (op0, strict_p)
3396 && aarch64_classify_index (info, op1, mode, strict_p))
3397 {
3398 info->base = op0;
3399 return true;
3400 }
3401 if (aarch64_base_register_rtx_p (op1, strict_p)
3402 && aarch64_classify_index (info, op0, mode, strict_p))
3403 {
3404 info->base = op1;
3405 return true;
3406 }
3407 }
3408
3409 return false;
3410
3411 case POST_INC:
3412 case POST_DEC:
3413 case PRE_INC:
3414 case PRE_DEC:
3415 info->type = ADDRESS_REG_WB;
3416 info->base = XEXP (x, 0);
3417 info->offset = NULL_RTX;
3418 return aarch64_base_register_rtx_p (info->base, strict_p);
3419
3420 case POST_MODIFY:
3421 case PRE_MODIFY:
3422 info->type = ADDRESS_REG_WB;
3423 info->base = XEXP (x, 0);
3424 if (GET_CODE (XEXP (x, 1)) == PLUS
3425 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3426 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3427 && aarch64_base_register_rtx_p (info->base, strict_p))
3428 {
3429 HOST_WIDE_INT offset;
3430 info->offset = XEXP (XEXP (x, 1), 1);
3431 offset = INTVAL (info->offset);
3432
3433 /* TImode and TFmode values are allowed in both pairs of X
3434 registers and individual Q registers. The available
3435 address modes are:
3436 X,X: 7-bit signed scaled offset
3437 Q: 9-bit signed offset
3438 We conservatively require an offset representable in either mode.
3439 */
3440 if (mode == TImode || mode == TFmode)
44707478 3441 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3442 && offset_9bit_signed_unscaled_p (mode, offset));
3443
3444 if (outer_code == PARALLEL)
3445 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3446 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3447 else
3448 return offset_9bit_signed_unscaled_p (mode, offset);
3449 }
3450 return false;
3451
3452 case CONST:
3453 case SYMBOL_REF:
3454 case LABEL_REF:
79517551
SN
3455 /* load literal: pc-relative constant pool entry. Only supported
3456 for SI mode or larger. */
43e9d192 3457 info->type = ADDRESS_SYMBOLIC;
79517551 3458 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3459 {
3460 rtx sym, addend;
3461
3462 split_const (x, &sym, &addend);
3463 return (GET_CODE (sym) == LABEL_REF
3464 || (GET_CODE (sym) == SYMBOL_REF
3465 && CONSTANT_POOL_ADDRESS_P (sym)));
3466 }
3467 return false;
3468
3469 case LO_SUM:
3470 info->type = ADDRESS_LO_SUM;
3471 info->base = XEXP (x, 0);
3472 info->offset = XEXP (x, 1);
3473 if (allow_reg_index_p
3474 && aarch64_base_register_rtx_p (info->base, strict_p))
3475 {
3476 rtx sym, offs;
3477 split_const (info->offset, &sym, &offs);
3478 if (GET_CODE (sym) == SYMBOL_REF
f8b756b7 3479 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
43e9d192
IB
3480 == SYMBOL_SMALL_ABSOLUTE))
3481 {
3482 /* The symbol and offset must be aligned to the access size. */
3483 unsigned int align;
3484 unsigned int ref_size;
3485
3486 if (CONSTANT_POOL_ADDRESS_P (sym))
3487 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3488 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3489 {
3490 tree exp = SYMBOL_REF_DECL (sym);
3491 align = TYPE_ALIGN (TREE_TYPE (exp));
3492 align = CONSTANT_ALIGNMENT (exp, align);
3493 }
3494 else if (SYMBOL_REF_DECL (sym))
3495 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3496 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3497 && SYMBOL_REF_BLOCK (sym) != NULL)
3498 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3499 else
3500 align = BITS_PER_UNIT;
3501
3502 ref_size = GET_MODE_SIZE (mode);
3503 if (ref_size == 0)
3504 ref_size = GET_MODE_SIZE (DImode);
3505
3506 return ((INTVAL (offs) & (ref_size - 1)) == 0
3507 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3508 }
3509 }
3510 return false;
3511
3512 default:
3513 return false;
3514 }
3515}
3516
3517bool
3518aarch64_symbolic_address_p (rtx x)
3519{
3520 rtx offset;
3521
3522 split_const (x, &x, &offset);
3523 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3524}
3525
3526/* Classify the base of symbolic expression X, given that X appears in
3527 context CONTEXT. */
da4f13a4
MS
3528
3529enum aarch64_symbol_type
3530aarch64_classify_symbolic_expression (rtx x,
3531 enum aarch64_symbol_context context)
43e9d192
IB
3532{
3533 rtx offset;
da4f13a4 3534
43e9d192 3535 split_const (x, &x, &offset);
f8b756b7 3536 return aarch64_classify_symbol (x, offset, context);
43e9d192
IB
3537}
3538
3539
3540/* Return TRUE if X is a legitimate address for accessing memory in
3541 mode MODE. */
3542static bool
ef4bddc2 3543aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
3544{
3545 struct aarch64_address_info addr;
3546
3547 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3548}
3549
3550/* Return TRUE if X is a legitimate address for accessing memory in
3551 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3552 pair operation. */
3553bool
ef4bddc2 3554aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 3555 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3556{
3557 struct aarch64_address_info addr;
3558
3559 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3560}
3561
3562/* Return TRUE if rtx X is immediate constant 0.0 */
3563bool
3520f7cc 3564aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3565{
3566 REAL_VALUE_TYPE r;
3567
3568 if (GET_MODE (x) == VOIDmode)
3569 return false;
3570
3571 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3572 if (REAL_VALUE_MINUS_ZERO (r))
3573 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3574 return REAL_VALUES_EQUAL (r, dconst0);
3575}
3576
70f09188
AP
3577/* Return the fixed registers used for condition codes. */
3578
3579static bool
3580aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3581{
3582 *p1 = CC_REGNUM;
3583 *p2 = INVALID_REGNUM;
3584 return true;
3585}
3586
78607708
TV
3587/* Emit call insn with PAT and do aarch64-specific handling. */
3588
d07a3fed 3589void
78607708
TV
3590aarch64_emit_call_insn (rtx pat)
3591{
3592 rtx insn = emit_call_insn (pat);
3593
3594 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3595 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3596 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3597}
3598
ef4bddc2 3599machine_mode
43e9d192
IB
3600aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3601{
3602 /* All floating point compares return CCFP if it is an equality
3603 comparison, and CCFPE otherwise. */
3604 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3605 {
3606 switch (code)
3607 {
3608 case EQ:
3609 case NE:
3610 case UNORDERED:
3611 case ORDERED:
3612 case UNLT:
3613 case UNLE:
3614 case UNGT:
3615 case UNGE:
3616 case UNEQ:
3617 case LTGT:
3618 return CCFPmode;
3619
3620 case LT:
3621 case LE:
3622 case GT:
3623 case GE:
3624 return CCFPEmode;
3625
3626 default:
3627 gcc_unreachable ();
3628 }
3629 }
3630
3631 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3632 && y == const0_rtx
3633 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3634 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3635 || GET_CODE (x) == NEG))
43e9d192
IB
3636 return CC_NZmode;
3637
1c992d1e 3638 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3639 the comparison will have to be swapped when we emit the assembly
3640 code. */
3641 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3642 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3643 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3644 || GET_CODE (x) == LSHIFTRT
1c992d1e 3645 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3646 return CC_SWPmode;
3647
1c992d1e
RE
3648 /* Similarly for a negated operand, but we can only do this for
3649 equalities. */
3650 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3651 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3652 && (code == EQ || code == NE)
3653 && GET_CODE (x) == NEG)
3654 return CC_Zmode;
3655
43e9d192
IB
3656 /* A compare of a mode narrower than SI mode against zero can be done
3657 by extending the value in the comparison. */
3658 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3659 && y == const0_rtx)
3660 /* Only use sign-extension if we really need it. */
3661 return ((code == GT || code == GE || code == LE || code == LT)
3662 ? CC_SESWPmode : CC_ZESWPmode);
3663
3664 /* For everything else, return CCmode. */
3665 return CCmode;
3666}
3667
3dfa7055
ZC
3668static int
3669aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3670
cd5660ab 3671int
43e9d192
IB
3672aarch64_get_condition_code (rtx x)
3673{
ef4bddc2 3674 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
3675 enum rtx_code comp_code = GET_CODE (x);
3676
3677 if (GET_MODE_CLASS (mode) != MODE_CC)
3678 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
3679 return aarch64_get_condition_code_1 (mode, comp_code);
3680}
43e9d192 3681
3dfa7055
ZC
3682static int
3683aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3684{
3685 int ne = -1, eq = -1;
43e9d192
IB
3686 switch (mode)
3687 {
3688 case CCFPmode:
3689 case CCFPEmode:
3690 switch (comp_code)
3691 {
3692 case GE: return AARCH64_GE;
3693 case GT: return AARCH64_GT;
3694 case LE: return AARCH64_LS;
3695 case LT: return AARCH64_MI;
3696 case NE: return AARCH64_NE;
3697 case EQ: return AARCH64_EQ;
3698 case ORDERED: return AARCH64_VC;
3699 case UNORDERED: return AARCH64_VS;
3700 case UNLT: return AARCH64_LT;
3701 case UNLE: return AARCH64_LE;
3702 case UNGT: return AARCH64_HI;
3703 case UNGE: return AARCH64_PL;
cd5660ab 3704 default: return -1;
43e9d192
IB
3705 }
3706 break;
3707
3dfa7055
ZC
3708 case CC_DNEmode:
3709 ne = AARCH64_NE;
3710 eq = AARCH64_EQ;
3711 break;
3712
3713 case CC_DEQmode:
3714 ne = AARCH64_EQ;
3715 eq = AARCH64_NE;
3716 break;
3717
3718 case CC_DGEmode:
3719 ne = AARCH64_GE;
3720 eq = AARCH64_LT;
3721 break;
3722
3723 case CC_DLTmode:
3724 ne = AARCH64_LT;
3725 eq = AARCH64_GE;
3726 break;
3727
3728 case CC_DGTmode:
3729 ne = AARCH64_GT;
3730 eq = AARCH64_LE;
3731 break;
3732
3733 case CC_DLEmode:
3734 ne = AARCH64_LE;
3735 eq = AARCH64_GT;
3736 break;
3737
3738 case CC_DGEUmode:
3739 ne = AARCH64_CS;
3740 eq = AARCH64_CC;
3741 break;
3742
3743 case CC_DLTUmode:
3744 ne = AARCH64_CC;
3745 eq = AARCH64_CS;
3746 break;
3747
3748 case CC_DGTUmode:
3749 ne = AARCH64_HI;
3750 eq = AARCH64_LS;
3751 break;
3752
3753 case CC_DLEUmode:
3754 ne = AARCH64_LS;
3755 eq = AARCH64_HI;
3756 break;
3757
43e9d192
IB
3758 case CCmode:
3759 switch (comp_code)
3760 {
3761 case NE: return AARCH64_NE;
3762 case EQ: return AARCH64_EQ;
3763 case GE: return AARCH64_GE;
3764 case GT: return AARCH64_GT;
3765 case LE: return AARCH64_LE;
3766 case LT: return AARCH64_LT;
3767 case GEU: return AARCH64_CS;
3768 case GTU: return AARCH64_HI;
3769 case LEU: return AARCH64_LS;
3770 case LTU: return AARCH64_CC;
cd5660ab 3771 default: return -1;
43e9d192
IB
3772 }
3773 break;
3774
3775 case CC_SWPmode:
3776 case CC_ZESWPmode:
3777 case CC_SESWPmode:
3778 switch (comp_code)
3779 {
3780 case NE: return AARCH64_NE;
3781 case EQ: return AARCH64_EQ;
3782 case GE: return AARCH64_LE;
3783 case GT: return AARCH64_LT;
3784 case LE: return AARCH64_GE;
3785 case LT: return AARCH64_GT;
3786 case GEU: return AARCH64_LS;
3787 case GTU: return AARCH64_CC;
3788 case LEU: return AARCH64_CS;
3789 case LTU: return AARCH64_HI;
cd5660ab 3790 default: return -1;
43e9d192
IB
3791 }
3792 break;
3793
3794 case CC_NZmode:
3795 switch (comp_code)
3796 {
3797 case NE: return AARCH64_NE;
3798 case EQ: return AARCH64_EQ;
3799 case GE: return AARCH64_PL;
3800 case LT: return AARCH64_MI;
cd5660ab 3801 default: return -1;
43e9d192
IB
3802 }
3803 break;
3804
1c992d1e
RE
3805 case CC_Zmode:
3806 switch (comp_code)
3807 {
3808 case NE: return AARCH64_NE;
3809 case EQ: return AARCH64_EQ;
cd5660ab 3810 default: return -1;
1c992d1e
RE
3811 }
3812 break;
3813
43e9d192 3814 default:
cd5660ab 3815 return -1;
43e9d192
IB
3816 break;
3817 }
3dfa7055
ZC
3818
3819 if (comp_code == NE)
3820 return ne;
3821
3822 if (comp_code == EQ)
3823 return eq;
3824
3825 return -1;
43e9d192
IB
3826}
3827
ddeabd3e
AL
3828bool
3829aarch64_const_vec_all_same_in_range_p (rtx x,
3830 HOST_WIDE_INT minval,
3831 HOST_WIDE_INT maxval)
3832{
3833 HOST_WIDE_INT firstval;
3834 int count, i;
3835
3836 if (GET_CODE (x) != CONST_VECTOR
3837 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3838 return false;
3839
3840 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3841 if (firstval < minval || firstval > maxval)
3842 return false;
3843
3844 count = CONST_VECTOR_NUNITS (x);
3845 for (i = 1; i < count; i++)
3846 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3847 return false;
3848
3849 return true;
3850}
3851
3852bool
3853aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3854{
3855 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3856}
3857
43e9d192
IB
3858static unsigned
3859bit_count (unsigned HOST_WIDE_INT value)
3860{
3861 unsigned count = 0;
3862
3863 while (value)
3864 {
3865 count++;
3866 value &= value - 1;
3867 }
3868
3869 return count;
3870}
3871
cf670503
ZC
3872/* N Z C V. */
3873#define AARCH64_CC_V 1
3874#define AARCH64_CC_C (1 << 1)
3875#define AARCH64_CC_Z (1 << 2)
3876#define AARCH64_CC_N (1 << 3)
3877
3878/* N Z C V flags for ccmp. The first code is for AND op and the other
3879 is for IOR op. Indexed by AARCH64_COND_CODE. */
3880static const int aarch64_nzcv_codes[][2] =
3881{
3882 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
3883 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
3884 {AARCH64_CC_C, 0}, /* CS, C == 1. */
3885 {0, AARCH64_CC_C}, /* CC, C == 0. */
3886 {AARCH64_CC_N, 0}, /* MI, N == 1. */
3887 {0, AARCH64_CC_N}, /* PL, N == 0. */
3888 {AARCH64_CC_V, 0}, /* VS, V == 1. */
3889 {0, AARCH64_CC_V}, /* VC, V == 0. */
3890 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
3891 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
3892 {0, AARCH64_CC_V}, /* GE, N == V. */
3893 {AARCH64_CC_V, 0}, /* LT, N != V. */
3894 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
3895 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
3896 {0, 0}, /* AL, Any. */
3897 {0, 0}, /* NV, Any. */
3898};
3899
3900int
3901aarch64_ccmp_mode_to_code (enum machine_mode mode)
3902{
3903 switch (mode)
3904 {
3905 case CC_DNEmode:
3906 return NE;
3907
3908 case CC_DEQmode:
3909 return EQ;
3910
3911 case CC_DLEmode:
3912 return LE;
3913
3914 case CC_DGTmode:
3915 return GT;
3916
3917 case CC_DLTmode:
3918 return LT;
3919
3920 case CC_DGEmode:
3921 return GE;
3922
3923 case CC_DLEUmode:
3924 return LEU;
3925
3926 case CC_DGTUmode:
3927 return GTU;
3928
3929 case CC_DLTUmode:
3930 return LTU;
3931
3932 case CC_DGEUmode:
3933 return GEU;
3934
3935 default:
3936 gcc_unreachable ();
3937 }
3938}
3939
3940
43e9d192
IB
3941void
3942aarch64_print_operand (FILE *f, rtx x, char code)
3943{
3944 switch (code)
3945 {
f541a481
KT
3946 /* An integer or symbol address without a preceding # sign. */
3947 case 'c':
3948 switch (GET_CODE (x))
3949 {
3950 case CONST_INT:
3951 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3952 break;
3953
3954 case SYMBOL_REF:
3955 output_addr_const (f, x);
3956 break;
3957
3958 case CONST:
3959 if (GET_CODE (XEXP (x, 0)) == PLUS
3960 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3961 {
3962 output_addr_const (f, x);
3963 break;
3964 }
3965 /* Fall through. */
3966
3967 default:
3968 output_operand_lossage ("Unsupported operand for code '%c'", code);
3969 }
3970 break;
3971
43e9d192
IB
3972 case 'e':
3973 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3974 {
3975 int n;
3976
4aa81c2e 3977 if (!CONST_INT_P (x)
43e9d192
IB
3978 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3979 {
3980 output_operand_lossage ("invalid operand for '%%%c'", code);
3981 return;
3982 }
3983
3984 switch (n)
3985 {
3986 case 3:
3987 fputc ('b', f);
3988 break;
3989 case 4:
3990 fputc ('h', f);
3991 break;
3992 case 5:
3993 fputc ('w', f);
3994 break;
3995 default:
3996 output_operand_lossage ("invalid operand for '%%%c'", code);
3997 return;
3998 }
3999 }
4000 break;
4001
4002 case 'p':
4003 {
4004 int n;
4005
4006 /* Print N such that 2^N == X. */
4aa81c2e 4007 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4008 {
4009 output_operand_lossage ("invalid operand for '%%%c'", code);
4010 return;
4011 }
4012
4013 asm_fprintf (f, "%d", n);
4014 }
4015 break;
4016
4017 case 'P':
4018 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4019 if (!CONST_INT_P (x))
43e9d192
IB
4020 {
4021 output_operand_lossage ("invalid operand for '%%%c'", code);
4022 return;
4023 }
4024
4025 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4026 break;
4027
4028 case 'H':
4029 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4030 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4031 {
4032 output_operand_lossage ("invalid operand for '%%%c'", code);
4033 return;
4034 }
4035
01a3a324 4036 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4037 break;
4038
43e9d192 4039 case 'm':
cd5660ab
KT
4040 {
4041 int cond_code;
4042 /* Print a condition (eq, ne, etc). */
43e9d192 4043
cd5660ab
KT
4044 /* CONST_TRUE_RTX means always -- that's the default. */
4045 if (x == const_true_rtx)
43e9d192 4046 return;
43e9d192 4047
cd5660ab
KT
4048 if (!COMPARISON_P (x))
4049 {
4050 output_operand_lossage ("invalid operand for '%%%c'", code);
4051 return;
4052 }
4053
4054 cond_code = aarch64_get_condition_code (x);
4055 gcc_assert (cond_code >= 0);
4056 fputs (aarch64_condition_codes[cond_code], f);
4057 }
43e9d192
IB
4058 break;
4059
4060 case 'M':
cd5660ab
KT
4061 {
4062 int cond_code;
4063 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 4064
cd5660ab
KT
4065 /* CONST_TRUE_RTX means never -- that's the default. */
4066 if (x == const_true_rtx)
4067 {
4068 fputs ("nv", f);
4069 return;
4070 }
43e9d192 4071
cd5660ab
KT
4072 if (!COMPARISON_P (x))
4073 {
4074 output_operand_lossage ("invalid operand for '%%%c'", code);
4075 return;
4076 }
4077 cond_code = aarch64_get_condition_code (x);
4078 gcc_assert (cond_code >= 0);
4079 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4080 (cond_code)], f);
4081 }
43e9d192
IB
4082 break;
4083
4084 case 'b':
4085 case 'h':
4086 case 's':
4087 case 'd':
4088 case 'q':
4089 /* Print a scalar FP/SIMD register name. */
4090 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4091 {
4092 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4093 return;
4094 }
50ce6f88 4095 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4096 break;
4097
4098 case 'S':
4099 case 'T':
4100 case 'U':
4101 case 'V':
4102 /* Print the first FP/SIMD register name in a list. */
4103 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4104 {
4105 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4106 return;
4107 }
50ce6f88 4108 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4109 break;
4110
a05c0ddf 4111 case 'X':
50d38551 4112 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4113 if (!CONST_INT_P (x))
a05c0ddf
IB
4114 {
4115 output_operand_lossage ("invalid operand for '%%%c'", code);
4116 return;
4117 }
50d38551 4118 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4119 break;
4120
43e9d192
IB
4121 case 'w':
4122 case 'x':
4123 /* Print a general register name or the zero register (32-bit or
4124 64-bit). */
3520f7cc
JG
4125 if (x == const0_rtx
4126 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4127 {
50ce6f88 4128 asm_fprintf (f, "%czr", code);
43e9d192
IB
4129 break;
4130 }
4131
4132 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4133 {
50ce6f88 4134 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4135 break;
4136 }
4137
4138 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4139 {
50ce6f88 4140 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4141 break;
4142 }
4143
4144 /* Fall through */
4145
4146 case 0:
4147 /* Print a normal operand, if it's a general register, then we
4148 assume DImode. */
4149 if (x == NULL)
4150 {
4151 output_operand_lossage ("missing operand");
4152 return;
4153 }
4154
4155 switch (GET_CODE (x))
4156 {
4157 case REG:
01a3a324 4158 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4159 break;
4160
4161 case MEM:
4162 aarch64_memory_reference_mode = GET_MODE (x);
4163 output_address (XEXP (x, 0));
4164 break;
4165
4166 case LABEL_REF:
4167 case SYMBOL_REF:
4168 output_addr_const (asm_out_file, x);
4169 break;
4170
4171 case CONST_INT:
4172 asm_fprintf (f, "%wd", INTVAL (x));
4173 break;
4174
4175 case CONST_VECTOR:
3520f7cc
JG
4176 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4177 {
ddeabd3e
AL
4178 gcc_assert (
4179 aarch64_const_vec_all_same_in_range_p (x,
4180 HOST_WIDE_INT_MIN,
4181 HOST_WIDE_INT_MAX));
3520f7cc
JG
4182 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4183 }
4184 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4185 {
4186 fputc ('0', f);
4187 }
4188 else
4189 gcc_unreachable ();
43e9d192
IB
4190 break;
4191
3520f7cc
JG
4192 case CONST_DOUBLE:
4193 /* CONST_DOUBLE can represent a double-width integer.
4194 In this case, the mode of x is VOIDmode. */
4195 if (GET_MODE (x) == VOIDmode)
4196 ; /* Do Nothing. */
4197 else if (aarch64_float_const_zero_rtx_p (x))
4198 {
4199 fputc ('0', f);
4200 break;
4201 }
4202 else if (aarch64_float_const_representable_p (x))
4203 {
4204#define buf_size 20
4205 char float_buf[buf_size] = {'\0'};
4206 REAL_VALUE_TYPE r;
4207 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4208 real_to_decimal_for_mode (float_buf, &r,
4209 buf_size, buf_size,
4210 1, GET_MODE (x));
4211 asm_fprintf (asm_out_file, "%s", float_buf);
4212 break;
4213#undef buf_size
4214 }
4215 output_operand_lossage ("invalid constant");
4216 return;
43e9d192
IB
4217 default:
4218 output_operand_lossage ("invalid operand");
4219 return;
4220 }
4221 break;
4222
4223 case 'A':
4224 if (GET_CODE (x) == HIGH)
4225 x = XEXP (x, 0);
4226
4227 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4228 {
4229 case SYMBOL_SMALL_GOT:
4230 asm_fprintf (asm_out_file, ":got:");
4231 break;
4232
4233 case SYMBOL_SMALL_TLSGD:
4234 asm_fprintf (asm_out_file, ":tlsgd:");
4235 break;
4236
4237 case SYMBOL_SMALL_TLSDESC:
4238 asm_fprintf (asm_out_file, ":tlsdesc:");
4239 break;
4240
4241 case SYMBOL_SMALL_GOTTPREL:
4242 asm_fprintf (asm_out_file, ":gottprel:");
4243 break;
4244
4245 case SYMBOL_SMALL_TPREL:
4246 asm_fprintf (asm_out_file, ":tprel:");
4247 break;
4248
87dd8ab0
MS
4249 case SYMBOL_TINY_GOT:
4250 gcc_unreachable ();
4251 break;
4252
43e9d192
IB
4253 default:
4254 break;
4255 }
4256 output_addr_const (asm_out_file, x);
4257 break;
4258
4259 case 'L':
4260 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4261 {
4262 case SYMBOL_SMALL_GOT:
4263 asm_fprintf (asm_out_file, ":lo12:");
4264 break;
4265
4266 case SYMBOL_SMALL_TLSGD:
4267 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4268 break;
4269
4270 case SYMBOL_SMALL_TLSDESC:
4271 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4272 break;
4273
4274 case SYMBOL_SMALL_GOTTPREL:
4275 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4276 break;
4277
4278 case SYMBOL_SMALL_TPREL:
4279 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4280 break;
4281
87dd8ab0
MS
4282 case SYMBOL_TINY_GOT:
4283 asm_fprintf (asm_out_file, ":got:");
4284 break;
4285
43e9d192
IB
4286 default:
4287 break;
4288 }
4289 output_addr_const (asm_out_file, x);
4290 break;
4291
4292 case 'G':
4293
4294 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4295 {
4296 case SYMBOL_SMALL_TPREL:
4297 asm_fprintf (asm_out_file, ":tprel_hi12:");
4298 break;
4299 default:
4300 break;
4301 }
4302 output_addr_const (asm_out_file, x);
4303 break;
4304
cf670503
ZC
4305 case 'K':
4306 {
4307 int cond_code;
4308 /* Print nzcv. */
4309
4310 if (!COMPARISON_P (x))
4311 {
4312 output_operand_lossage ("invalid operand for '%%%c'", code);
4313 return;
4314 }
4315
4316 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4317 gcc_assert (cond_code >= 0);
4318 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4319 }
4320 break;
4321
4322 case 'k':
4323 {
4324 int cond_code;
4325 /* Print nzcv. */
4326
4327 if (!COMPARISON_P (x))
4328 {
4329 output_operand_lossage ("invalid operand for '%%%c'", code);
4330 return;
4331 }
4332
4333 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4334 gcc_assert (cond_code >= 0);
4335 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4336 }
4337 break;
4338
43e9d192
IB
4339 default:
4340 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4341 return;
4342 }
4343}
4344
4345void
4346aarch64_print_operand_address (FILE *f, rtx x)
4347{
4348 struct aarch64_address_info addr;
4349
4350 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4351 MEM, true))
4352 switch (addr.type)
4353 {
4354 case ADDRESS_REG_IMM:
4355 if (addr.offset == const0_rtx)
01a3a324 4356 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4357 else
16a3246f 4358 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4359 INTVAL (addr.offset));
4360 return;
4361
4362 case ADDRESS_REG_REG:
4363 if (addr.shift == 0)
16a3246f 4364 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4365 reg_names [REGNO (addr.offset)]);
43e9d192 4366 else
16a3246f 4367 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4368 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4369 return;
4370
4371 case ADDRESS_REG_UXTW:
4372 if (addr.shift == 0)
16a3246f 4373 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4374 REGNO (addr.offset) - R0_REGNUM);
4375 else
16a3246f 4376 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4377 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4378 return;
4379
4380 case ADDRESS_REG_SXTW:
4381 if (addr.shift == 0)
16a3246f 4382 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4383 REGNO (addr.offset) - R0_REGNUM);
4384 else
16a3246f 4385 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4386 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4387 return;
4388
4389 case ADDRESS_REG_WB:
4390 switch (GET_CODE (x))
4391 {
4392 case PRE_INC:
16a3246f 4393 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4394 GET_MODE_SIZE (aarch64_memory_reference_mode));
4395 return;
4396 case POST_INC:
16a3246f 4397 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4398 GET_MODE_SIZE (aarch64_memory_reference_mode));
4399 return;
4400 case PRE_DEC:
16a3246f 4401 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4402 GET_MODE_SIZE (aarch64_memory_reference_mode));
4403 return;
4404 case POST_DEC:
16a3246f 4405 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4406 GET_MODE_SIZE (aarch64_memory_reference_mode));
4407 return;
4408 case PRE_MODIFY:
16a3246f 4409 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4410 INTVAL (addr.offset));
4411 return;
4412 case POST_MODIFY:
16a3246f 4413 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4414 INTVAL (addr.offset));
4415 return;
4416 default:
4417 break;
4418 }
4419 break;
4420
4421 case ADDRESS_LO_SUM:
16a3246f 4422 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4423 output_addr_const (f, addr.offset);
4424 asm_fprintf (f, "]");
4425 return;
4426
4427 case ADDRESS_SYMBOLIC:
4428 break;
4429 }
4430
4431 output_addr_const (f, x);
4432}
4433
43e9d192
IB
4434bool
4435aarch64_label_mentioned_p (rtx x)
4436{
4437 const char *fmt;
4438 int i;
4439
4440 if (GET_CODE (x) == LABEL_REF)
4441 return true;
4442
4443 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4444 referencing instruction, but they are constant offsets, not
4445 symbols. */
4446 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4447 return false;
4448
4449 fmt = GET_RTX_FORMAT (GET_CODE (x));
4450 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4451 {
4452 if (fmt[i] == 'E')
4453 {
4454 int j;
4455
4456 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4457 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4458 return 1;
4459 }
4460 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4461 return 1;
4462 }
4463
4464 return 0;
4465}
4466
4467/* Implement REGNO_REG_CLASS. */
4468
4469enum reg_class
4470aarch64_regno_regclass (unsigned regno)
4471{
4472 if (GP_REGNUM_P (regno))
a4a182c6 4473 return GENERAL_REGS;
43e9d192
IB
4474
4475 if (regno == SP_REGNUM)
4476 return STACK_REG;
4477
4478 if (regno == FRAME_POINTER_REGNUM
4479 || regno == ARG_POINTER_REGNUM)
f24bb080 4480 return POINTER_REGS;
43e9d192
IB
4481
4482 if (FP_REGNUM_P (regno))
4483 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4484
4485 return NO_REGS;
4486}
4487
0c4ec427 4488static rtx
ef4bddc2 4489aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
4490{
4491 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4492 where mask is selected by alignment and size of the offset.
4493 We try to pick as large a range for the offset as possible to
4494 maximize the chance of a CSE. However, for aligned addresses
4495 we limit the range to 4k so that structures with different sized
4496 elements are likely to use the same base. */
4497
4498 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4499 {
4500 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4501 HOST_WIDE_INT base_offset;
4502
4503 /* Does it look like we'll need a load/store-pair operation? */
4504 if (GET_MODE_SIZE (mode) > 16
4505 || mode == TImode)
4506 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4507 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4508 /* For offsets aren't a multiple of the access size, the limit is
4509 -256...255. */
4510 else if (offset & (GET_MODE_SIZE (mode) - 1))
4511 base_offset = (offset + 0x100) & ~0x1ff;
4512 else
4513 base_offset = offset & ~0xfff;
4514
4515 if (base_offset == 0)
4516 return x;
4517
4518 offset -= base_offset;
4519 rtx base_reg = gen_reg_rtx (Pmode);
4520 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4521 NULL_RTX);
4522 emit_move_insn (base_reg, val);
4523 x = plus_constant (Pmode, base_reg, offset);
4524 }
4525
4526 return x;
4527}
4528
43e9d192
IB
4529/* Try a machine-dependent way of reloading an illegitimate address
4530 operand. If we find one, push the reload and return the new rtx. */
4531
4532rtx
4533aarch64_legitimize_reload_address (rtx *x_p,
ef4bddc2 4534 machine_mode mode,
43e9d192
IB
4535 int opnum, int type,
4536 int ind_levels ATTRIBUTE_UNUSED)
4537{
4538 rtx x = *x_p;
4539
348d4b0a
BC
4540 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4541 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4542 && GET_CODE (x) == PLUS
4543 && REG_P (XEXP (x, 0))
4544 && CONST_INT_P (XEXP (x, 1)))
4545 {
4546 rtx orig_rtx = x;
4547 x = copy_rtx (x);
4548 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4549 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4550 opnum, (enum reload_type) type);
4551 return x;
4552 }
4553
4554 /* We must recognize output that we have already generated ourselves. */
4555 if (GET_CODE (x) == PLUS
4556 && GET_CODE (XEXP (x, 0)) == PLUS
4557 && REG_P (XEXP (XEXP (x, 0), 0))
4558 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4559 && CONST_INT_P (XEXP (x, 1)))
4560 {
4561 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4562 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4563 opnum, (enum reload_type) type);
4564 return x;
4565 }
4566
4567 /* We wish to handle large displacements off a base register by splitting
4568 the addend across an add and the mem insn. This can cut the number of
4569 extra insns needed from 3 to 1. It is only useful for load/store of a
4570 single register with 12 bit offset field. */
4571 if (GET_CODE (x) == PLUS
4572 && REG_P (XEXP (x, 0))
4573 && CONST_INT_P (XEXP (x, 1))
4574 && HARD_REGISTER_P (XEXP (x, 0))
4575 && mode != TImode
4576 && mode != TFmode
4577 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4578 {
4579 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4580 HOST_WIDE_INT low = val & 0xfff;
4581 HOST_WIDE_INT high = val - low;
4582 HOST_WIDE_INT offs;
4583 rtx cst;
ef4bddc2 4584 machine_mode xmode = GET_MODE (x);
28514dda
YZ
4585
4586 /* In ILP32, xmode can be either DImode or SImode. */
4587 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4588
4589 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4590 BLKmode alignment. */
4591 if (GET_MODE_SIZE (mode) == 0)
4592 return NULL_RTX;
4593
4594 offs = low % GET_MODE_SIZE (mode);
4595
4596 /* Align misaligned offset by adjusting high part to compensate. */
4597 if (offs != 0)
4598 {
4599 if (aarch64_uimm12_shift (high + offs))
4600 {
4601 /* Align down. */
4602 low = low - offs;
4603 high = high + offs;
4604 }
4605 else
4606 {
4607 /* Align up. */
4608 offs = GET_MODE_SIZE (mode) - offs;
4609 low = low + offs;
4610 high = high + (low & 0x1000) - offs;
4611 low &= 0xfff;
4612 }
4613 }
4614
4615 /* Check for overflow. */
4616 if (high + low != val)
4617 return NULL_RTX;
4618
4619 cst = GEN_INT (high);
4620 if (!aarch64_uimm12_shift (high))
28514dda 4621 cst = force_const_mem (xmode, cst);
43e9d192
IB
4622
4623 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4624 in the mem instruction.
4625 Note that replacing this gen_rtx_PLUS with plus_constant is
4626 wrong in this case because we rely on the
4627 (plus (plus reg c1) c2) structure being preserved so that
4628 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4629 x = gen_rtx_PLUS (xmode,
4630 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4631 GEN_INT (low));
43e9d192
IB
4632
4633 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4634 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4635 opnum, (enum reload_type) type);
4636 return x;
4637 }
4638
4639 return NULL_RTX;
4640}
4641
4642
4643static reg_class_t
4644aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4645 reg_class_t rclass,
ef4bddc2 4646 machine_mode mode,
43e9d192
IB
4647 secondary_reload_info *sri)
4648{
43e9d192
IB
4649 /* Without the TARGET_SIMD instructions we cannot move a Q register
4650 to a Q register directly. We need a scratch. */
4651 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4652 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4653 && reg_class_subset_p (rclass, FP_REGS))
4654 {
4655 if (mode == TFmode)
4656 sri->icode = CODE_FOR_aarch64_reload_movtf;
4657 else if (mode == TImode)
4658 sri->icode = CODE_FOR_aarch64_reload_movti;
4659 return NO_REGS;
4660 }
4661
4662 /* A TFmode or TImode memory access should be handled via an FP_REGS
4663 because AArch64 has richer addressing modes for LDR/STR instructions
4664 than LDP/STP instructions. */
a4a182c6 4665 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4666 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4667 return FP_REGS;
4668
4669 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4670 return GENERAL_REGS;
43e9d192
IB
4671
4672 return NO_REGS;
4673}
4674
4675static bool
4676aarch64_can_eliminate (const int from, const int to)
4677{
4678 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4679 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4680
4681 if (frame_pointer_needed)
4682 {
4683 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4684 return true;
4685 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4686 return false;
4687 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4688 && !cfun->calls_alloca)
4689 return true;
4690 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4691 return true;
0b7f8166
MS
4692
4693 return false;
43e9d192 4694 }
1c923b60
JW
4695 else
4696 {
4697 /* If we decided that we didn't need a leaf frame pointer but then used
4698 LR in the function, then we'll want a frame pointer after all, so
4699 prevent this elimination to ensure a frame pointer is used. */
4700 if (to == STACK_POINTER_REGNUM
4701 && flag_omit_leaf_frame_pointer
4702 && df_regs_ever_live_p (LR_REGNUM))
4703 return false;
4704 }
777e6976 4705
43e9d192
IB
4706 return true;
4707}
4708
4709HOST_WIDE_INT
4710aarch64_initial_elimination_offset (unsigned from, unsigned to)
4711{
43e9d192 4712 aarch64_layout_frame ();
78c29983
MS
4713
4714 if (to == HARD_FRAME_POINTER_REGNUM)
4715 {
4716 if (from == ARG_POINTER_REGNUM)
1c960e02 4717 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4718
4719 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4720 return (cfun->machine->frame.hard_fp_offset
4721 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4722 }
4723
4724 if (to == STACK_POINTER_REGNUM)
4725 {
4726 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4727 return (cfun->machine->frame.frame_size
4728 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4729 }
4730
1c960e02 4731 return cfun->machine->frame.frame_size;
43e9d192
IB
4732}
4733
43e9d192
IB
4734/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4735 previous frame. */
4736
4737rtx
4738aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4739{
4740 if (count != 0)
4741 return const0_rtx;
4742 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4743}
4744
4745
4746static void
4747aarch64_asm_trampoline_template (FILE *f)
4748{
28514dda
YZ
4749 if (TARGET_ILP32)
4750 {
4751 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4752 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4753 }
4754 else
4755 {
4756 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4757 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4758 }
01a3a324 4759 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4760 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4761 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4762 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4763}
4764
4765static void
4766aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4767{
4768 rtx fnaddr, mem, a_tramp;
28514dda 4769 const int tramp_code_sz = 16;
43e9d192
IB
4770
4771 /* Don't need to copy the trailing D-words, we fill those in below. */
4772 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4773 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4774 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4775 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4776 if (GET_MODE (fnaddr) != ptr_mode)
4777 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4778 emit_move_insn (mem, fnaddr);
4779
28514dda 4780 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4781 emit_move_insn (mem, chain_value);
4782
4783 /* XXX We should really define a "clear_cache" pattern and use
4784 gen_clear_cache(). */
4785 a_tramp = XEXP (m_tramp, 0);
4786 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4787 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4788 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4789 ptr_mode);
43e9d192
IB
4790}
4791
4792static unsigned char
ef4bddc2 4793aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
4794{
4795 switch (regclass)
4796 {
fee9ba42 4797 case CALLER_SAVE_REGS:
43e9d192
IB
4798 case POINTER_REGS:
4799 case GENERAL_REGS:
4800 case ALL_REGS:
4801 case FP_REGS:
4802 case FP_LO_REGS:
4803 return
4804 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4805 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4806 case STACK_REG:
4807 return 1;
4808
4809 case NO_REGS:
4810 return 0;
4811
4812 default:
4813 break;
4814 }
4815 gcc_unreachable ();
4816}
4817
4818static reg_class_t
78d8b9f0 4819aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4820{
51bb310d 4821 if (regclass == POINTER_REGS)
78d8b9f0
IB
4822 return GENERAL_REGS;
4823
51bb310d
MS
4824 if (regclass == STACK_REG)
4825 {
4826 if (REG_P(x)
4827 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4828 return regclass;
4829
4830 return NO_REGS;
4831 }
4832
78d8b9f0
IB
4833 /* If it's an integer immediate that MOVI can't handle, then
4834 FP_REGS is not an option, so we return NO_REGS instead. */
4835 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4836 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4837 return NO_REGS;
4838
27bd251b
IB
4839 /* Register eliminiation can result in a request for
4840 SP+constant->FP_REGS. We cannot support such operations which
4841 use SP as source and an FP_REG as destination, so reject out
4842 right now. */
4843 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4844 {
4845 rtx lhs = XEXP (x, 0);
4846
4847 /* Look through a possible SUBREG introduced by ILP32. */
4848 if (GET_CODE (lhs) == SUBREG)
4849 lhs = SUBREG_REG (lhs);
4850
4851 gcc_assert (REG_P (lhs));
4852 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4853 POINTER_REGS));
4854 return NO_REGS;
4855 }
4856
78d8b9f0 4857 return regclass;
43e9d192
IB
4858}
4859
4860void
4861aarch64_asm_output_labelref (FILE* f, const char *name)
4862{
4863 asm_fprintf (f, "%U%s", name);
4864}
4865
4866static void
4867aarch64_elf_asm_constructor (rtx symbol, int priority)
4868{
4869 if (priority == DEFAULT_INIT_PRIORITY)
4870 default_ctor_section_asm_out_constructor (symbol, priority);
4871 else
4872 {
4873 section *s;
4874 char buf[18];
4875 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4876 s = get_section (buf, SECTION_WRITE, NULL);
4877 switch_to_section (s);
4878 assemble_align (POINTER_SIZE);
28514dda 4879 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4880 }
4881}
4882
4883static void
4884aarch64_elf_asm_destructor (rtx symbol, int priority)
4885{
4886 if (priority == DEFAULT_INIT_PRIORITY)
4887 default_dtor_section_asm_out_destructor (symbol, priority);
4888 else
4889 {
4890 section *s;
4891 char buf[18];
4892 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4893 s = get_section (buf, SECTION_WRITE, NULL);
4894 switch_to_section (s);
4895 assemble_align (POINTER_SIZE);
28514dda 4896 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4897 }
4898}
4899
4900const char*
4901aarch64_output_casesi (rtx *operands)
4902{
4903 char buf[100];
4904 char label[100];
b32d5189 4905 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
4906 int index;
4907 static const char *const patterns[4][2] =
4908 {
4909 {
4910 "ldrb\t%w3, [%0,%w1,uxtw]",
4911 "add\t%3, %4, %w3, sxtb #2"
4912 },
4913 {
4914 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4915 "add\t%3, %4, %w3, sxth #2"
4916 },
4917 {
4918 "ldr\t%w3, [%0,%w1,uxtw #2]",
4919 "add\t%3, %4, %w3, sxtw #2"
4920 },
4921 /* We assume that DImode is only generated when not optimizing and
4922 that we don't really need 64-bit address offsets. That would
4923 imply an object file with 8GB of code in a single function! */
4924 {
4925 "ldr\t%w3, [%0,%w1,uxtw #2]",
4926 "add\t%3, %4, %w3, sxtw #2"
4927 }
4928 };
4929
4930 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4931
4932 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4933
4934 gcc_assert (index >= 0 && index <= 3);
4935
4936 /* Need to implement table size reduction, by chaning the code below. */
4937 output_asm_insn (patterns[index][0], operands);
4938 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4939 snprintf (buf, sizeof (buf),
4940 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4941 output_asm_insn (buf, operands);
4942 output_asm_insn (patterns[index][1], operands);
4943 output_asm_insn ("br\t%3", operands);
4944 assemble_label (asm_out_file, label);
4945 return "";
4946}
4947
4948
4949/* Return size in bits of an arithmetic operand which is shifted/scaled and
4950 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4951 operator. */
4952
4953int
4954aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4955{
4956 if (shift >= 0 && shift <= 3)
4957 {
4958 int size;
4959 for (size = 8; size <= 32; size *= 2)
4960 {
4961 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4962 if (mask == bits << shift)
4963 return size;
4964 }
4965 }
4966 return 0;
4967}
4968
4969static bool
ef4bddc2 4970aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
4971 const_rtx x ATTRIBUTE_UNUSED)
4972{
4973 /* We can't use blocks for constants when we're using a per-function
4974 constant pool. */
4975 return false;
4976}
4977
4978static section *
ef4bddc2 4979aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
4980 rtx x ATTRIBUTE_UNUSED,
4981 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4982{
4983 /* Force all constant pool entries into the current function section. */
4984 return function_section (current_function_decl);
4985}
4986
4987
4988/* Costs. */
4989
4990/* Helper function for rtx cost calculation. Strip a shift expression
4991 from X. Returns the inner operand if successful, or the original
4992 expression on failure. */
4993static rtx
4994aarch64_strip_shift (rtx x)
4995{
4996 rtx op = x;
4997
57b77d46
RE
4998 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4999 we can convert both to ROR during final output. */
43e9d192
IB
5000 if ((GET_CODE (op) == ASHIFT
5001 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5002 || GET_CODE (op) == LSHIFTRT
5003 || GET_CODE (op) == ROTATERT
5004 || GET_CODE (op) == ROTATE)
43e9d192
IB
5005 && CONST_INT_P (XEXP (op, 1)))
5006 return XEXP (op, 0);
5007
5008 if (GET_CODE (op) == MULT
5009 && CONST_INT_P (XEXP (op, 1))
5010 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5011 return XEXP (op, 0);
5012
5013 return x;
5014}
5015
4745e701 5016/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5017 expression from X. Returns the inner operand if successful, or the
5018 original expression on failure. We deal with a number of possible
5019 canonicalization variations here. */
5020static rtx
4745e701 5021aarch64_strip_extend (rtx x)
43e9d192
IB
5022{
5023 rtx op = x;
5024
5025 /* Zero and sign extraction of a widened value. */
5026 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5027 && XEXP (op, 2) == const0_rtx
4745e701 5028 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5029 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5030 XEXP (op, 1)))
5031 return XEXP (XEXP (op, 0), 0);
5032
5033 /* It can also be represented (for zero-extend) as an AND with an
5034 immediate. */
5035 if (GET_CODE (op) == AND
5036 && GET_CODE (XEXP (op, 0)) == MULT
5037 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5038 && CONST_INT_P (XEXP (op, 1))
5039 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5040 INTVAL (XEXP (op, 1))) != 0)
5041 return XEXP (XEXP (op, 0), 0);
5042
5043 /* Now handle extended register, as this may also have an optional
5044 left shift by 1..4. */
5045 if (GET_CODE (op) == ASHIFT
5046 && CONST_INT_P (XEXP (op, 1))
5047 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5048 op = XEXP (op, 0);
5049
5050 if (GET_CODE (op) == ZERO_EXTEND
5051 || GET_CODE (op) == SIGN_EXTEND)
5052 op = XEXP (op, 0);
5053
5054 if (op != x)
5055 return op;
5056
4745e701
JG
5057 return x;
5058}
5059
5060/* Helper function for rtx cost calculation. Calculate the cost of
5061 a MULT, which may be part of a multiply-accumulate rtx. Return
5062 the calculated cost of the expression, recursing manually in to
5063 operands where needed. */
5064
5065static int
5066aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5067{
5068 rtx op0, op1;
5069 const struct cpu_cost_table *extra_cost
5070 = aarch64_tune_params->insn_extra_cost;
5071 int cost = 0;
5072 bool maybe_fma = (outer == PLUS || outer == MINUS);
ef4bddc2 5073 machine_mode mode = GET_MODE (x);
4745e701
JG
5074
5075 gcc_checking_assert (code == MULT);
5076
5077 op0 = XEXP (x, 0);
5078 op1 = XEXP (x, 1);
5079
5080 if (VECTOR_MODE_P (mode))
5081 mode = GET_MODE_INNER (mode);
5082
5083 /* Integer multiply/fma. */
5084 if (GET_MODE_CLASS (mode) == MODE_INT)
5085 {
5086 /* The multiply will be canonicalized as a shift, cost it as such. */
5087 if (CONST_INT_P (op1)
5088 && exact_log2 (INTVAL (op1)) > 0)
5089 {
5090 if (speed)
5091 {
5092 if (maybe_fma)
5093 /* ADD (shifted register). */
5094 cost += extra_cost->alu.arith_shift;
5095 else
5096 /* LSL (immediate). */
5097 cost += extra_cost->alu.shift;
5098 }
5099
5100 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5101
5102 return cost;
5103 }
5104
5105 /* Integer multiplies or FMAs have zero/sign extending variants. */
5106 if ((GET_CODE (op0) == ZERO_EXTEND
5107 && GET_CODE (op1) == ZERO_EXTEND)
5108 || (GET_CODE (op0) == SIGN_EXTEND
5109 && GET_CODE (op1) == SIGN_EXTEND))
5110 {
5111 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5112 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5113
5114 if (speed)
5115 {
5116 if (maybe_fma)
5117 /* MADD/SMADDL/UMADDL. */
5118 cost += extra_cost->mult[0].extend_add;
5119 else
5120 /* MUL/SMULL/UMULL. */
5121 cost += extra_cost->mult[0].extend;
5122 }
5123
5124 return cost;
5125 }
5126
5127 /* This is either an integer multiply or an FMA. In both cases
5128 we want to recurse and cost the operands. */
5129 cost += rtx_cost (op0, MULT, 0, speed)
5130 + rtx_cost (op1, MULT, 1, speed);
5131
5132 if (speed)
5133 {
5134 if (maybe_fma)
5135 /* MADD. */
5136 cost += extra_cost->mult[mode == DImode].add;
5137 else
5138 /* MUL. */
5139 cost += extra_cost->mult[mode == DImode].simple;
5140 }
5141
5142 return cost;
5143 }
5144 else
5145 {
5146 if (speed)
5147 {
3d840f7d 5148 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
5149 operands. */
5150 if (GET_CODE (op0) == NEG)
3d840f7d 5151 op0 = XEXP (op0, 0);
4745e701 5152 if (GET_CODE (op1) == NEG)
3d840f7d 5153 op1 = XEXP (op1, 0);
4745e701
JG
5154
5155 if (maybe_fma)
5156 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5157 cost += extra_cost->fp[mode == DFmode].fma;
5158 else
3d840f7d 5159 /* FMUL/FNMUL. */
4745e701
JG
5160 cost += extra_cost->fp[mode == DFmode].mult;
5161 }
5162
5163 cost += rtx_cost (op0, MULT, 0, speed)
5164 + rtx_cost (op1, MULT, 1, speed);
5165 return cost;
5166 }
43e9d192
IB
5167}
5168
67747367
JG
5169static int
5170aarch64_address_cost (rtx x,
ef4bddc2 5171 machine_mode mode,
67747367
JG
5172 addr_space_t as ATTRIBUTE_UNUSED,
5173 bool speed)
5174{
5175 enum rtx_code c = GET_CODE (x);
5176 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5177 struct aarch64_address_info info;
5178 int cost = 0;
5179 info.shift = 0;
5180
5181 if (!aarch64_classify_address (&info, x, mode, c, false))
5182 {
5183 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5184 {
5185 /* This is a CONST or SYMBOL ref which will be split
5186 in a different way depending on the code model in use.
5187 Cost it through the generic infrastructure. */
5188 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5189 /* Divide through by the cost of one instruction to
5190 bring it to the same units as the address costs. */
5191 cost_symbol_ref /= COSTS_N_INSNS (1);
5192 /* The cost is then the cost of preparing the address,
5193 followed by an immediate (possibly 0) offset. */
5194 return cost_symbol_ref + addr_cost->imm_offset;
5195 }
5196 else
5197 {
5198 /* This is most likely a jump table from a case
5199 statement. */
5200 return addr_cost->register_offset;
5201 }
5202 }
5203
5204 switch (info.type)
5205 {
5206 case ADDRESS_LO_SUM:
5207 case ADDRESS_SYMBOLIC:
5208 case ADDRESS_REG_IMM:
5209 cost += addr_cost->imm_offset;
5210 break;
5211
5212 case ADDRESS_REG_WB:
5213 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5214 cost += addr_cost->pre_modify;
5215 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5216 cost += addr_cost->post_modify;
5217 else
5218 gcc_unreachable ();
5219
5220 break;
5221
5222 case ADDRESS_REG_REG:
5223 cost += addr_cost->register_offset;
5224 break;
5225
5226 case ADDRESS_REG_UXTW:
5227 case ADDRESS_REG_SXTW:
5228 cost += addr_cost->register_extend;
5229 break;
5230
5231 default:
5232 gcc_unreachable ();
5233 }
5234
5235
5236 if (info.shift > 0)
5237 {
5238 /* For the sake of calculating the cost of the shifted register
5239 component, we can treat same sized modes in the same way. */
5240 switch (GET_MODE_BITSIZE (mode))
5241 {
5242 case 16:
5243 cost += addr_cost->addr_scale_costs.hi;
5244 break;
5245
5246 case 32:
5247 cost += addr_cost->addr_scale_costs.si;
5248 break;
5249
5250 case 64:
5251 cost += addr_cost->addr_scale_costs.di;
5252 break;
5253
5254 /* We can't tell, or this is a 128-bit vector. */
5255 default:
5256 cost += addr_cost->addr_scale_costs.ti;
5257 break;
5258 }
5259 }
5260
5261 return cost;
5262}
5263
7cc2145f
JG
5264/* Return true if the RTX X in mode MODE is a zero or sign extract
5265 usable in an ADD or SUB (extended register) instruction. */
5266static bool
ef4bddc2 5267aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5268{
5269 /* Catch add with a sign extract.
5270 This is add_<optab><mode>_multp2. */
5271 if (GET_CODE (x) == SIGN_EXTRACT
5272 || GET_CODE (x) == ZERO_EXTRACT)
5273 {
5274 rtx op0 = XEXP (x, 0);
5275 rtx op1 = XEXP (x, 1);
5276 rtx op2 = XEXP (x, 2);
5277
5278 if (GET_CODE (op0) == MULT
5279 && CONST_INT_P (op1)
5280 && op2 == const0_rtx
5281 && CONST_INT_P (XEXP (op0, 1))
5282 && aarch64_is_extend_from_extract (mode,
5283 XEXP (op0, 1),
5284 op1))
5285 {
5286 return true;
5287 }
5288 }
5289
5290 return false;
5291}
5292
61263118
KT
5293static bool
5294aarch64_frint_unspec_p (unsigned int u)
5295{
5296 switch (u)
5297 {
5298 case UNSPEC_FRINTZ:
5299 case UNSPEC_FRINTP:
5300 case UNSPEC_FRINTM:
5301 case UNSPEC_FRINTA:
5302 case UNSPEC_FRINTN:
5303 case UNSPEC_FRINTX:
5304 case UNSPEC_FRINTI:
5305 return true;
5306
5307 default:
5308 return false;
5309 }
5310}
5311
2d5ffe46
AP
5312/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5313 storing it in *COST. Result is true if the total cost of the operation
5314 has now been calculated. */
5315static bool
5316aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5317{
b9e3afe9
AP
5318 rtx inner;
5319 rtx comparator;
5320 enum rtx_code cmpcode;
5321
5322 if (COMPARISON_P (op0))
5323 {
5324 inner = XEXP (op0, 0);
5325 comparator = XEXP (op0, 1);
5326 cmpcode = GET_CODE (op0);
5327 }
5328 else
5329 {
5330 inner = op0;
5331 comparator = const0_rtx;
5332 cmpcode = NE;
5333 }
5334
2d5ffe46
AP
5335 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5336 {
5337 /* Conditional branch. */
b9e3afe9 5338 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5339 return true;
5340 else
5341 {
b9e3afe9 5342 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5343 {
2d5ffe46
AP
5344 if (comparator == const0_rtx)
5345 {
5346 /* TBZ/TBNZ/CBZ/CBNZ. */
5347 if (GET_CODE (inner) == ZERO_EXTRACT)
5348 /* TBZ/TBNZ. */
5349 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5350 0, speed);
5351 else
5352 /* CBZ/CBNZ. */
b9e3afe9 5353 *cost += rtx_cost (inner, cmpcode, 0, speed);
2d5ffe46
AP
5354
5355 return true;
5356 }
5357 }
b9e3afe9 5358 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5359 {
2d5ffe46
AP
5360 /* TBZ/TBNZ. */
5361 if (comparator == const0_rtx)
5362 return true;
5363 }
5364 }
5365 }
b9e3afe9 5366 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5367 {
5368 /* It's a conditional operation based on the status flags,
5369 so it must be some flavor of CSEL. */
5370
5371 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5372 if (GET_CODE (op1) == NEG
5373 || GET_CODE (op1) == NOT
5374 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5375 op1 = XEXP (op1, 0);
5376
5377 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5378 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5379 return true;
5380 }
5381
5382 /* We don't know what this is, cost all operands. */
5383 return false;
5384}
5385
43e9d192
IB
5386/* Calculate the cost of calculating X, storing it in *COST. Result
5387 is true if the total cost of the operation has now been calculated. */
5388static bool
5389aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5390 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5391{
a8eecd00 5392 rtx op0, op1, op2;
73250c4c 5393 const struct cpu_cost_table *extra_cost
43e9d192 5394 = aarch64_tune_params->insn_extra_cost;
ef4bddc2 5395 machine_mode mode = GET_MODE (x);
43e9d192 5396
7fc5ef02
JG
5397 /* By default, assume that everything has equivalent cost to the
5398 cheapest instruction. Any additional costs are applied as a delta
5399 above this default. */
5400 *cost = COSTS_N_INSNS (1);
5401
5402 /* TODO: The cost infrastructure currently does not handle
5403 vector operations. Assume that all vector operations
5404 are equally expensive. */
5405 if (VECTOR_MODE_P (mode))
5406 {
5407 if (speed)
5408 *cost += extra_cost->vect.alu;
5409 return true;
5410 }
5411
43e9d192
IB
5412 switch (code)
5413 {
5414 case SET:
ba123b0d
JG
5415 /* The cost depends entirely on the operands to SET. */
5416 *cost = 0;
43e9d192
IB
5417 op0 = SET_DEST (x);
5418 op1 = SET_SRC (x);
5419
5420 switch (GET_CODE (op0))
5421 {
5422 case MEM:
5423 if (speed)
2961177e
JG
5424 {
5425 rtx address = XEXP (op0, 0);
5426 if (GET_MODE_CLASS (mode) == MODE_INT)
5427 *cost += extra_cost->ldst.store;
5428 else if (mode == SFmode)
5429 *cost += extra_cost->ldst.storef;
5430 else if (mode == DFmode)
5431 *cost += extra_cost->ldst.stored;
5432
5433 *cost +=
5434 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5435 0, speed));
5436 }
43e9d192 5437
ba123b0d 5438 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5439 return true;
5440
5441 case SUBREG:
5442 if (! REG_P (SUBREG_REG (op0)))
5443 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 5444
43e9d192
IB
5445 /* Fall through. */
5446 case REG:
ba123b0d
JG
5447 /* const0_rtx is in general free, but we will use an
5448 instruction to set a register to 0. */
5449 if (REG_P (op1) || op1 == const0_rtx)
5450 {
5451 /* The cost is 1 per register copied. */
5452 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5453 / UNITS_PER_WORD;
5454 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5455 }
5456 else
5457 /* Cost is just the cost of the RHS of the set. */
5458 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5459 return true;
5460
ba123b0d 5461 case ZERO_EXTRACT:
43e9d192 5462 case SIGN_EXTRACT:
ba123b0d
JG
5463 /* Bit-field insertion. Strip any redundant widening of
5464 the RHS to meet the width of the target. */
43e9d192
IB
5465 if (GET_CODE (op1) == SUBREG)
5466 op1 = SUBREG_REG (op1);
5467 if ((GET_CODE (op1) == ZERO_EXTEND
5468 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5469 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5470 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5471 >= INTVAL (XEXP (op0, 1))))
5472 op1 = XEXP (op1, 0);
ba123b0d
JG
5473
5474 if (CONST_INT_P (op1))
5475 {
5476 /* MOV immediate is assumed to always be cheap. */
5477 *cost = COSTS_N_INSNS (1);
5478 }
5479 else
5480 {
5481 /* BFM. */
5482 if (speed)
5483 *cost += extra_cost->alu.bfi;
5484 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5485 }
5486
43e9d192
IB
5487 return true;
5488
5489 default:
ba123b0d
JG
5490 /* We can't make sense of this, assume default cost. */
5491 *cost = COSTS_N_INSNS (1);
61263118 5492 return false;
43e9d192
IB
5493 }
5494 return false;
5495
9dfc162c
JG
5496 case CONST_INT:
5497 /* If an instruction can incorporate a constant within the
5498 instruction, the instruction's expression avoids calling
5499 rtx_cost() on the constant. If rtx_cost() is called on a
5500 constant, then it is usually because the constant must be
5501 moved into a register by one or more instructions.
5502
5503 The exception is constant 0, which can be expressed
5504 as XZR/WZR and is therefore free. The exception to this is
5505 if we have (set (reg) (const0_rtx)) in which case we must cost
5506 the move. However, we can catch that when we cost the SET, so
5507 we don't need to consider that here. */
5508 if (x == const0_rtx)
5509 *cost = 0;
5510 else
5511 {
5512 /* To an approximation, building any other constant is
5513 proportionally expensive to the number of instructions
5514 required to build that constant. This is true whether we
5515 are compiling for SPEED or otherwise. */
82614948
RR
5516 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5517 (NULL_RTX, x, false, mode));
9dfc162c
JG
5518 }
5519 return true;
5520
5521 case CONST_DOUBLE:
5522 if (speed)
5523 {
5524 /* mov[df,sf]_aarch64. */
5525 if (aarch64_float_const_representable_p (x))
5526 /* FMOV (scalar immediate). */
5527 *cost += extra_cost->fp[mode == DFmode].fpconst;
5528 else if (!aarch64_float_const_zero_rtx_p (x))
5529 {
5530 /* This will be a load from memory. */
5531 if (mode == DFmode)
5532 *cost += extra_cost->ldst.loadd;
5533 else
5534 *cost += extra_cost->ldst.loadf;
5535 }
5536 else
5537 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5538 or MOV v0.s[0], wzr - neither of which are modeled by the
5539 cost tables. Just use the default cost. */
5540 {
5541 }
5542 }
5543
5544 return true;
5545
43e9d192
IB
5546 case MEM:
5547 if (speed)
2961177e
JG
5548 {
5549 /* For loads we want the base cost of a load, plus an
5550 approximation for the additional cost of the addressing
5551 mode. */
5552 rtx address = XEXP (x, 0);
5553 if (GET_MODE_CLASS (mode) == MODE_INT)
5554 *cost += extra_cost->ldst.load;
5555 else if (mode == SFmode)
5556 *cost += extra_cost->ldst.loadf;
5557 else if (mode == DFmode)
5558 *cost += extra_cost->ldst.loadd;
5559
5560 *cost +=
5561 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5562 0, speed));
5563 }
43e9d192
IB
5564
5565 return true;
5566
5567 case NEG:
4745e701
JG
5568 op0 = XEXP (x, 0);
5569
5570 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5571 {
5572 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5573 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5574 {
5575 /* CSETM. */
5576 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5577 return true;
5578 }
5579
5580 /* Cost this as SUB wzr, X. */
5581 op0 = CONST0_RTX (GET_MODE (x));
5582 op1 = XEXP (x, 0);
5583 goto cost_minus;
5584 }
5585
5586 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5587 {
5588 /* Support (neg(fma...)) as a single instruction only if
5589 sign of zeros is unimportant. This matches the decision
5590 making in aarch64.md. */
5591 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5592 {
5593 /* FNMADD. */
5594 *cost = rtx_cost (op0, NEG, 0, speed);
5595 return true;
5596 }
5597 if (speed)
5598 /* FNEG. */
5599 *cost += extra_cost->fp[mode == DFmode].neg;
5600 return false;
5601 }
5602
5603 return false;
43e9d192 5604
781aeb73
KT
5605 case CLRSB:
5606 case CLZ:
5607 if (speed)
5608 *cost += extra_cost->alu.clz;
5609
5610 return false;
5611
43e9d192
IB
5612 case COMPARE:
5613 op0 = XEXP (x, 0);
5614 op1 = XEXP (x, 1);
5615
5616 if (op1 == const0_rtx
5617 && GET_CODE (op0) == AND)
5618 {
5619 x = op0;
5620 goto cost_logic;
5621 }
5622
a8eecd00
JG
5623 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5624 {
5625 /* TODO: A write to the CC flags possibly costs extra, this
5626 needs encoding in the cost tables. */
5627
5628 /* CC_ZESWPmode supports zero extend for free. */
5629 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5630 op0 = XEXP (op0, 0);
5631
5632 /* ANDS. */
5633 if (GET_CODE (op0) == AND)
5634 {
5635 x = op0;
5636 goto cost_logic;
5637 }
5638
5639 if (GET_CODE (op0) == PLUS)
5640 {
5641 /* ADDS (and CMN alias). */
5642 x = op0;
5643 goto cost_plus;
5644 }
5645
5646 if (GET_CODE (op0) == MINUS)
5647 {
5648 /* SUBS. */
5649 x = op0;
5650 goto cost_minus;
5651 }
5652
5653 if (GET_CODE (op1) == NEG)
5654 {
5655 /* CMN. */
5656 if (speed)
5657 *cost += extra_cost->alu.arith;
5658
5659 *cost += rtx_cost (op0, COMPARE, 0, speed);
5660 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5661 return true;
5662 }
5663
5664 /* CMP.
5665
5666 Compare can freely swap the order of operands, and
5667 canonicalization puts the more complex operation first.
5668 But the integer MINUS logic expects the shift/extend
5669 operation in op1. */
5670 if (! (REG_P (op0)
5671 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5672 {
5673 op0 = XEXP (x, 1);
5674 op1 = XEXP (x, 0);
5675 }
5676 goto cost_minus;
5677 }
5678
5679 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5680 {
5681 /* FCMP. */
5682 if (speed)
5683 *cost += extra_cost->fp[mode == DFmode].compare;
5684
5685 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5686 {
5687 /* FCMP supports constant 0.0 for no extra cost. */
5688 return true;
5689 }
5690 return false;
5691 }
5692
5693 return false;
43e9d192
IB
5694
5695 case MINUS:
4745e701
JG
5696 {
5697 op0 = XEXP (x, 0);
5698 op1 = XEXP (x, 1);
5699
5700cost_minus:
5701 /* Detect valid immediates. */
5702 if ((GET_MODE_CLASS (mode) == MODE_INT
5703 || (GET_MODE_CLASS (mode) == MODE_CC
5704 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5705 && CONST_INT_P (op1)
5706 && aarch64_uimm12_shift (INTVAL (op1)))
5707 {
5708 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5709
4745e701
JG
5710 if (speed)
5711 /* SUB(S) (immediate). */
5712 *cost += extra_cost->alu.arith;
5713 return true;
5714
5715 }
5716
7cc2145f
JG
5717 /* Look for SUB (extended register). */
5718 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5719 {
5720 if (speed)
5721 *cost += extra_cost->alu.arith_shift;
5722
5723 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5724 (enum rtx_code) GET_CODE (op1),
5725 0, speed);
5726 return true;
5727 }
5728
4745e701
JG
5729 rtx new_op1 = aarch64_strip_extend (op1);
5730
5731 /* Cost this as an FMA-alike operation. */
5732 if ((GET_CODE (new_op1) == MULT
5733 || GET_CODE (new_op1) == ASHIFT)
5734 && code != COMPARE)
5735 {
5736 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5737 (enum rtx_code) code,
5738 speed);
43e9d192 5739 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5740 return true;
5741 }
43e9d192 5742
4745e701 5743 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5744
4745e701
JG
5745 if (speed)
5746 {
5747 if (GET_MODE_CLASS (mode) == MODE_INT)
5748 /* SUB(S). */
5749 *cost += extra_cost->alu.arith;
5750 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5751 /* FSUB. */
5752 *cost += extra_cost->fp[mode == DFmode].addsub;
5753 }
5754 return true;
5755 }
43e9d192
IB
5756
5757 case PLUS:
4745e701
JG
5758 {
5759 rtx new_op0;
43e9d192 5760
4745e701
JG
5761 op0 = XEXP (x, 0);
5762 op1 = XEXP (x, 1);
43e9d192 5763
a8eecd00 5764cost_plus:
4745e701
JG
5765 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5766 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5767 {
5768 /* CSINC. */
5769 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5770 *cost += rtx_cost (op1, PLUS, 1, speed);
5771 return true;
5772 }
43e9d192 5773
4745e701
JG
5774 if (GET_MODE_CLASS (mode) == MODE_INT
5775 && CONST_INT_P (op1)
5776 && aarch64_uimm12_shift (INTVAL (op1)))
5777 {
5778 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5779
4745e701
JG
5780 if (speed)
5781 /* ADD (immediate). */
5782 *cost += extra_cost->alu.arith;
5783 return true;
5784 }
5785
7cc2145f
JG
5786 /* Look for ADD (extended register). */
5787 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5788 {
5789 if (speed)
5790 *cost += extra_cost->alu.arith_shift;
5791
5792 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5793 (enum rtx_code) GET_CODE (op0),
5794 0, speed);
5795 return true;
5796 }
5797
4745e701
JG
5798 /* Strip any extend, leave shifts behind as we will
5799 cost them through mult_cost. */
5800 new_op0 = aarch64_strip_extend (op0);
5801
5802 if (GET_CODE (new_op0) == MULT
5803 || GET_CODE (new_op0) == ASHIFT)
5804 {
5805 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5806 speed);
5807 *cost += rtx_cost (op1, PLUS, 1, speed);
5808 return true;
5809 }
5810
5811 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5812 + rtx_cost (op1, PLUS, 1, speed));
5813
5814 if (speed)
5815 {
5816 if (GET_MODE_CLASS (mode) == MODE_INT)
5817 /* ADD. */
5818 *cost += extra_cost->alu.arith;
5819 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5820 /* FADD. */
5821 *cost += extra_cost->fp[mode == DFmode].addsub;
5822 }
5823 return true;
5824 }
43e9d192 5825
18b42b2a
KT
5826 case BSWAP:
5827 *cost = COSTS_N_INSNS (1);
5828
5829 if (speed)
5830 *cost += extra_cost->alu.rev;
5831
5832 return false;
5833
43e9d192 5834 case IOR:
f7d5cf8d
KT
5835 if (aarch_rev16_p (x))
5836 {
5837 *cost = COSTS_N_INSNS (1);
5838
5839 if (speed)
5840 *cost += extra_cost->alu.rev;
5841
5842 return true;
5843 }
5844 /* Fall through. */
43e9d192
IB
5845 case XOR:
5846 case AND:
5847 cost_logic:
5848 op0 = XEXP (x, 0);
5849 op1 = XEXP (x, 1);
5850
268c3b47
JG
5851 if (code == AND
5852 && GET_CODE (op0) == MULT
5853 && CONST_INT_P (XEXP (op0, 1))
5854 && CONST_INT_P (op1)
5855 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5856 INTVAL (op1)) != 0)
5857 {
5858 /* This is a UBFM/SBFM. */
5859 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5860 if (speed)
5861 *cost += extra_cost->alu.bfx;
5862 return true;
5863 }
5864
43e9d192
IB
5865 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5866 {
268c3b47
JG
5867 /* We possibly get the immediate for free, this is not
5868 modelled. */
43e9d192
IB
5869 if (CONST_INT_P (op1)
5870 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5871 {
268c3b47
JG
5872 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5873
5874 if (speed)
5875 *cost += extra_cost->alu.logical;
5876
5877 return true;
43e9d192
IB
5878 }
5879 else
5880 {
268c3b47
JG
5881 rtx new_op0 = op0;
5882
5883 /* Handle ORN, EON, or BIC. */
43e9d192
IB
5884 if (GET_CODE (op0) == NOT)
5885 op0 = XEXP (op0, 0);
268c3b47
JG
5886
5887 new_op0 = aarch64_strip_shift (op0);
5888
5889 /* If we had a shift on op0 then this is a logical-shift-
5890 by-register/immediate operation. Otherwise, this is just
5891 a logical operation. */
5892 if (speed)
5893 {
5894 if (new_op0 != op0)
5895 {
5896 /* Shift by immediate. */
5897 if (CONST_INT_P (XEXP (op0, 1)))
5898 *cost += extra_cost->alu.log_shift;
5899 else
5900 *cost += extra_cost->alu.log_shift_reg;
5901 }
5902 else
5903 *cost += extra_cost->alu.logical;
5904 }
5905
5906 /* In both cases we want to cost both operands. */
5907 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5908 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5909
5910 return true;
43e9d192 5911 }
43e9d192
IB
5912 }
5913 return false;
5914
268c3b47
JG
5915 case NOT:
5916 /* MVN. */
5917 if (speed)
5918 *cost += extra_cost->alu.logical;
5919
5920 /* The logical instruction could have the shifted register form,
5921 but the cost is the same if the shift is processed as a separate
5922 instruction, so we don't bother with it here. */
5923 return false;
5924
43e9d192 5925 case ZERO_EXTEND:
b1685e62
JG
5926
5927 op0 = XEXP (x, 0);
5928 /* If a value is written in SI mode, then zero extended to DI
5929 mode, the operation will in general be free as a write to
5930 a 'w' register implicitly zeroes the upper bits of an 'x'
5931 register. However, if this is
5932
5933 (set (reg) (zero_extend (reg)))
5934
5935 we must cost the explicit register move. */
5936 if (mode == DImode
5937 && GET_MODE (op0) == SImode
5938 && outer == SET)
5939 {
5940 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5941
5942 if (!op_cost && speed)
5943 /* MOV. */
5944 *cost += extra_cost->alu.extend;
5945 else
5946 /* Free, the cost is that of the SI mode operation. */
5947 *cost = op_cost;
5948
5949 return true;
5950 }
5951 else if (MEM_P (XEXP (x, 0)))
43e9d192 5952 {
b1685e62
JG
5953 /* All loads can zero extend to any size for free. */
5954 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
5955 return true;
5956 }
b1685e62
JG
5957
5958 /* UXTB/UXTH. */
5959 if (speed)
5960 *cost += extra_cost->alu.extend;
5961
43e9d192
IB
5962 return false;
5963
5964 case SIGN_EXTEND:
b1685e62 5965 if (MEM_P (XEXP (x, 0)))
43e9d192 5966 {
b1685e62
JG
5967 /* LDRSH. */
5968 if (speed)
5969 {
5970 rtx address = XEXP (XEXP (x, 0), 0);
5971 *cost += extra_cost->ldst.load_sign_extend;
5972
5973 *cost +=
5974 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5975 0, speed));
5976 }
43e9d192
IB
5977 return true;
5978 }
b1685e62
JG
5979
5980 if (speed)
5981 *cost += extra_cost->alu.extend;
43e9d192
IB
5982 return false;
5983
ba0cfa17
JG
5984 case ASHIFT:
5985 op0 = XEXP (x, 0);
5986 op1 = XEXP (x, 1);
5987
5988 if (CONST_INT_P (op1))
5989 {
5990 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5991 aliases. */
5992 if (speed)
5993 *cost += extra_cost->alu.shift;
5994
5995 /* We can incorporate zero/sign extend for free. */
5996 if (GET_CODE (op0) == ZERO_EXTEND
5997 || GET_CODE (op0) == SIGN_EXTEND)
5998 op0 = XEXP (op0, 0);
5999
6000 *cost += rtx_cost (op0, ASHIFT, 0, speed);
6001 return true;
6002 }
6003 else
6004 {
6005 /* LSLV. */
6006 if (speed)
6007 *cost += extra_cost->alu.shift_reg;
6008
6009 return false; /* All arguments need to be in registers. */
6010 }
6011
43e9d192 6012 case ROTATE:
43e9d192
IB
6013 case ROTATERT:
6014 case LSHIFTRT:
43e9d192 6015 case ASHIFTRT:
ba0cfa17
JG
6016 op0 = XEXP (x, 0);
6017 op1 = XEXP (x, 1);
43e9d192 6018
ba0cfa17
JG
6019 if (CONST_INT_P (op1))
6020 {
6021 /* ASR (immediate) and friends. */
6022 if (speed)
6023 *cost += extra_cost->alu.shift;
43e9d192 6024
ba0cfa17
JG
6025 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6026 return true;
6027 }
6028 else
6029 {
6030
6031 /* ASR (register) and friends. */
6032 if (speed)
6033 *cost += extra_cost->alu.shift_reg;
6034
6035 return false; /* All arguments need to be in registers. */
6036 }
43e9d192 6037
909734be
JG
6038 case SYMBOL_REF:
6039
6040 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6041 {
6042 /* LDR. */
6043 if (speed)
6044 *cost += extra_cost->ldst.load;
6045 }
6046 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6047 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6048 {
6049 /* ADRP, followed by ADD. */
6050 *cost += COSTS_N_INSNS (1);
6051 if (speed)
6052 *cost += 2 * extra_cost->alu.arith;
6053 }
6054 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6055 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6056 {
6057 /* ADR. */
6058 if (speed)
6059 *cost += extra_cost->alu.arith;
6060 }
6061
6062 if (flag_pic)
6063 {
6064 /* One extra load instruction, after accessing the GOT. */
6065 *cost += COSTS_N_INSNS (1);
6066 if (speed)
6067 *cost += extra_cost->ldst.load;
6068 }
43e9d192
IB
6069 return true;
6070
909734be 6071 case HIGH:
43e9d192 6072 case LO_SUM:
909734be
JG
6073 /* ADRP/ADD (immediate). */
6074 if (speed)
6075 *cost += extra_cost->alu.arith;
43e9d192
IB
6076 return true;
6077
6078 case ZERO_EXTRACT:
6079 case SIGN_EXTRACT:
7cc2145f
JG
6080 /* UBFX/SBFX. */
6081 if (speed)
6082 *cost += extra_cost->alu.bfx;
6083
6084 /* We can trust that the immediates used will be correct (there
6085 are no by-register forms), so we need only cost op0. */
6086 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
6087 return true;
6088
6089 case MULT:
4745e701
JG
6090 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6091 /* aarch64_rtx_mult_cost always handles recursion to its
6092 operands. */
6093 return true;
43e9d192
IB
6094
6095 case MOD:
6096 case UMOD:
43e9d192
IB
6097 if (speed)
6098 {
6099 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
6100 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6101 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 6102 else if (GET_MODE (x) == DFmode)
73250c4c
KT
6103 *cost += (extra_cost->fp[1].mult
6104 + extra_cost->fp[1].div);
43e9d192 6105 else if (GET_MODE (x) == SFmode)
73250c4c
KT
6106 *cost += (extra_cost->fp[0].mult
6107 + extra_cost->fp[0].div);
43e9d192
IB
6108 }
6109 return false; /* All arguments need to be in registers. */
6110
6111 case DIV:
6112 case UDIV:
4105fe38 6113 case SQRT:
43e9d192
IB
6114 if (speed)
6115 {
4105fe38
JG
6116 if (GET_MODE_CLASS (mode) == MODE_INT)
6117 /* There is no integer SQRT, so only DIV and UDIV can get
6118 here. */
6119 *cost += extra_cost->mult[mode == DImode].idiv;
6120 else
6121 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
6122 }
6123 return false; /* All arguments need to be in registers. */
6124
a8eecd00 6125 case IF_THEN_ELSE:
2d5ffe46
AP
6126 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6127 XEXP (x, 2), cost, speed);
a8eecd00
JG
6128
6129 case EQ:
6130 case NE:
6131 case GT:
6132 case GTU:
6133 case LT:
6134 case LTU:
6135 case GE:
6136 case GEU:
6137 case LE:
6138 case LEU:
6139
6140 return false; /* All arguments must be in registers. */
6141
b292109f
JG
6142 case FMA:
6143 op0 = XEXP (x, 0);
6144 op1 = XEXP (x, 1);
6145 op2 = XEXP (x, 2);
6146
6147 if (speed)
6148 *cost += extra_cost->fp[mode == DFmode].fma;
6149
6150 /* FMSUB, FNMADD, and FNMSUB are free. */
6151 if (GET_CODE (op0) == NEG)
6152 op0 = XEXP (op0, 0);
6153
6154 if (GET_CODE (op2) == NEG)
6155 op2 = XEXP (op2, 0);
6156
6157 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6158 and the by-element operand as operand 0. */
6159 if (GET_CODE (op1) == NEG)
6160 op1 = XEXP (op1, 0);
6161
6162 /* Catch vector-by-element operations. The by-element operand can
6163 either be (vec_duplicate (vec_select (x))) or just
6164 (vec_select (x)), depending on whether we are multiplying by
6165 a vector or a scalar.
6166
6167 Canonicalization is not very good in these cases, FMA4 will put the
6168 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6169 if (GET_CODE (op0) == VEC_DUPLICATE)
6170 op0 = XEXP (op0, 0);
6171 else if (GET_CODE (op1) == VEC_DUPLICATE)
6172 op1 = XEXP (op1, 0);
6173
6174 if (GET_CODE (op0) == VEC_SELECT)
6175 op0 = XEXP (op0, 0);
6176 else if (GET_CODE (op1) == VEC_SELECT)
6177 op1 = XEXP (op1, 0);
6178
6179 /* If the remaining parameters are not registers,
6180 get the cost to put them into registers. */
6181 *cost += rtx_cost (op0, FMA, 0, speed);
6182 *cost += rtx_cost (op1, FMA, 1, speed);
6183 *cost += rtx_cost (op2, FMA, 2, speed);
6184 return true;
6185
6186 case FLOAT_EXTEND:
6187 if (speed)
6188 *cost += extra_cost->fp[mode == DFmode].widen;
6189 return false;
6190
6191 case FLOAT_TRUNCATE:
6192 if (speed)
6193 *cost += extra_cost->fp[mode == DFmode].narrow;
6194 return false;
6195
61263118
KT
6196 case FIX:
6197 case UNSIGNED_FIX:
6198 x = XEXP (x, 0);
6199 /* Strip the rounding part. They will all be implemented
6200 by the fcvt* family of instructions anyway. */
6201 if (GET_CODE (x) == UNSPEC)
6202 {
6203 unsigned int uns_code = XINT (x, 1);
6204
6205 if (uns_code == UNSPEC_FRINTA
6206 || uns_code == UNSPEC_FRINTM
6207 || uns_code == UNSPEC_FRINTN
6208 || uns_code == UNSPEC_FRINTP
6209 || uns_code == UNSPEC_FRINTZ)
6210 x = XVECEXP (x, 0, 0);
6211 }
6212
6213 if (speed)
6214 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6215
6216 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6217 return true;
6218
b292109f
JG
6219 case ABS:
6220 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6221 {
6222 /* FABS and FNEG are analogous. */
6223 if (speed)
6224 *cost += extra_cost->fp[mode == DFmode].neg;
6225 }
6226 else
6227 {
6228 /* Integer ABS will either be split to
6229 two arithmetic instructions, or will be an ABS
6230 (scalar), which we don't model. */
6231 *cost = COSTS_N_INSNS (2);
6232 if (speed)
6233 *cost += 2 * extra_cost->alu.arith;
6234 }
6235 return false;
6236
6237 case SMAX:
6238 case SMIN:
6239 if (speed)
6240 {
6241 /* FMAXNM/FMINNM/FMAX/FMIN.
6242 TODO: This may not be accurate for all implementations, but
6243 we do not model this in the cost tables. */
6244 *cost += extra_cost->fp[mode == DFmode].addsub;
6245 }
6246 return false;
6247
61263118
KT
6248 case UNSPEC:
6249 /* The floating point round to integer frint* instructions. */
6250 if (aarch64_frint_unspec_p (XINT (x, 1)))
6251 {
6252 if (speed)
6253 *cost += extra_cost->fp[mode == DFmode].roundint;
6254
6255 return false;
6256 }
781aeb73
KT
6257
6258 if (XINT (x, 1) == UNSPEC_RBIT)
6259 {
6260 if (speed)
6261 *cost += extra_cost->alu.rev;
6262
6263 return false;
6264 }
61263118
KT
6265 break;
6266
fb620c4a
JG
6267 case TRUNCATE:
6268
6269 /* Decompose <su>muldi3_highpart. */
6270 if (/* (truncate:DI */
6271 mode == DImode
6272 /* (lshiftrt:TI */
6273 && GET_MODE (XEXP (x, 0)) == TImode
6274 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6275 /* (mult:TI */
6276 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6277 /* (ANY_EXTEND:TI (reg:DI))
6278 (ANY_EXTEND:TI (reg:DI))) */
6279 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6280 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6281 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6282 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6283 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6284 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6285 /* (const_int 64) */
6286 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6287 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6288 {
6289 /* UMULH/SMULH. */
6290 if (speed)
6291 *cost += extra_cost->mult[mode == DImode].extend;
6292 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6293 MULT, 0, speed);
6294 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6295 MULT, 1, speed);
6296 return true;
6297 }
6298
6299 /* Fall through. */
43e9d192 6300 default:
61263118 6301 break;
43e9d192 6302 }
61263118
KT
6303
6304 if (dump_file && (dump_flags & TDF_DETAILS))
6305 fprintf (dump_file,
6306 "\nFailed to cost RTX. Assuming default cost.\n");
6307
6308 return true;
43e9d192
IB
6309}
6310
0ee859b5
JG
6311/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6312 calculated for X. This cost is stored in *COST. Returns true
6313 if the total cost of X was calculated. */
6314static bool
6315aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6316 int param, int *cost, bool speed)
6317{
6318 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6319
6320 if (dump_file && (dump_flags & TDF_DETAILS))
6321 {
6322 print_rtl_single (dump_file, x);
6323 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6324 speed ? "Hot" : "Cold",
6325 *cost, result ? "final" : "partial");
6326 }
6327
6328 return result;
6329}
6330
43e9d192 6331static int
ef4bddc2 6332aarch64_register_move_cost (machine_mode mode,
8a3a7e67 6333 reg_class_t from_i, reg_class_t to_i)
43e9d192 6334{
8a3a7e67
RH
6335 enum reg_class from = (enum reg_class) from_i;
6336 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
6337 const struct cpu_regmove_cost *regmove_cost
6338 = aarch64_tune_params->regmove_cost;
6339
3be07662
WD
6340 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6341 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6342 to = GENERAL_REGS;
6343
6344 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6345 from = GENERAL_REGS;
6346
6ee70f81
AP
6347 /* Moving between GPR and stack cost is the same as GP2GP. */
6348 if ((from == GENERAL_REGS && to == STACK_REG)
6349 || (to == GENERAL_REGS && from == STACK_REG))
6350 return regmove_cost->GP2GP;
6351
6352 /* To/From the stack register, we move via the gprs. */
6353 if (to == STACK_REG || from == STACK_REG)
6354 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6355 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6356
8919453c
WD
6357 if (GET_MODE_SIZE (mode) == 16)
6358 {
6359 /* 128-bit operations on general registers require 2 instructions. */
6360 if (from == GENERAL_REGS && to == GENERAL_REGS)
6361 return regmove_cost->GP2GP * 2;
6362 else if (from == GENERAL_REGS)
6363 return regmove_cost->GP2FP * 2;
6364 else if (to == GENERAL_REGS)
6365 return regmove_cost->FP2GP * 2;
6366
6367 /* When AdvSIMD instructions are disabled it is not possible to move
6368 a 128-bit value directly between Q registers. This is handled in
6369 secondary reload. A general register is used as a scratch to move
6370 the upper DI value and the lower DI value is moved directly,
6371 hence the cost is the sum of three moves. */
6372 if (! TARGET_SIMD)
6373 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6374
6375 return regmove_cost->FP2FP;
6376 }
6377
43e9d192
IB
6378 if (from == GENERAL_REGS && to == GENERAL_REGS)
6379 return regmove_cost->GP2GP;
6380 else if (from == GENERAL_REGS)
6381 return regmove_cost->GP2FP;
6382 else if (to == GENERAL_REGS)
6383 return regmove_cost->FP2GP;
6384
43e9d192
IB
6385 return regmove_cost->FP2FP;
6386}
6387
6388static int
ef4bddc2 6389aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
6390 reg_class_t rclass ATTRIBUTE_UNUSED,
6391 bool in ATTRIBUTE_UNUSED)
6392{
6393 return aarch64_tune_params->memmov_cost;
6394}
6395
d126a4ae
AP
6396/* Return the number of instructions that can be issued per cycle. */
6397static int
6398aarch64_sched_issue_rate (void)
6399{
6400 return aarch64_tune_params->issue_rate;
6401}
6402
8990e73a
TB
6403/* Vectorizer cost model target hooks. */
6404
6405/* Implement targetm.vectorize.builtin_vectorization_cost. */
6406static int
6407aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6408 tree vectype,
6409 int misalign ATTRIBUTE_UNUSED)
6410{
6411 unsigned elements;
6412
6413 switch (type_of_cost)
6414 {
6415 case scalar_stmt:
6416 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6417
6418 case scalar_load:
6419 return aarch64_tune_params->vec_costs->scalar_load_cost;
6420
6421 case scalar_store:
6422 return aarch64_tune_params->vec_costs->scalar_store_cost;
6423
6424 case vector_stmt:
6425 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6426
6427 case vector_load:
6428 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6429
6430 case vector_store:
6431 return aarch64_tune_params->vec_costs->vec_store_cost;
6432
6433 case vec_to_scalar:
6434 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6435
6436 case scalar_to_vec:
6437 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6438
6439 case unaligned_load:
6440 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6441
6442 case unaligned_store:
6443 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6444
6445 case cond_branch_taken:
6446 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6447
6448 case cond_branch_not_taken:
6449 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6450
6451 case vec_perm:
6452 case vec_promote_demote:
6453 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6454
6455 case vec_construct:
6456 elements = TYPE_VECTOR_SUBPARTS (vectype);
6457 return elements / 2 + 1;
6458
6459 default:
6460 gcc_unreachable ();
6461 }
6462}
6463
6464/* Implement targetm.vectorize.add_stmt_cost. */
6465static unsigned
6466aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6467 struct _stmt_vec_info *stmt_info, int misalign,
6468 enum vect_cost_model_location where)
6469{
6470 unsigned *cost = (unsigned *) data;
6471 unsigned retval = 0;
6472
6473 if (flag_vect_cost_model)
6474 {
6475 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6476 int stmt_cost =
6477 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6478
6479 /* Statements in an inner loop relative to the loop being
6480 vectorized are weighted more heavily. The value here is
6481 a function (linear for now) of the loop nest level. */
6482 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6483 {
6484 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6485 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6486 unsigned nest_level = loop_depth (loop);
6487
6488 count *= nest_level;
6489 }
6490
6491 retval = (unsigned) (count * stmt_cost);
6492 cost[where] += retval;
6493 }
6494
6495 return retval;
6496}
6497
43e9d192
IB
6498static void initialize_aarch64_code_model (void);
6499
6500/* Parse the architecture extension string. */
6501
6502static void
6503aarch64_parse_extension (char *str)
6504{
6505 /* The extension string is parsed left to right. */
6506 const struct aarch64_option_extension *opt = NULL;
6507
6508 /* Flag to say whether we are adding or removing an extension. */
6509 int adding_ext = -1;
6510
6511 while (str != NULL && *str != 0)
6512 {
6513 char *ext;
6514 size_t len;
6515
6516 str++;
6517 ext = strchr (str, '+');
6518
6519 if (ext != NULL)
6520 len = ext - str;
6521 else
6522 len = strlen (str);
6523
6524 if (len >= 2 && strncmp (str, "no", 2) == 0)
6525 {
6526 adding_ext = 0;
6527 len -= 2;
6528 str += 2;
6529 }
6530 else if (len > 0)
6531 adding_ext = 1;
6532
6533 if (len == 0)
6534 {
6535 error ("missing feature modifier after %qs", "+no");
6536 return;
6537 }
6538
6539 /* Scan over the extensions table trying to find an exact match. */
6540 for (opt = all_extensions; opt->name != NULL; opt++)
6541 {
6542 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6543 {
6544 /* Add or remove the extension. */
6545 if (adding_ext)
6546 aarch64_isa_flags |= opt->flags_on;
6547 else
6548 aarch64_isa_flags &= ~(opt->flags_off);
6549 break;
6550 }
6551 }
6552
6553 if (opt->name == NULL)
6554 {
6555 /* Extension not found in list. */
6556 error ("unknown feature modifier %qs", str);
6557 return;
6558 }
6559
6560 str = ext;
6561 };
6562
6563 return;
6564}
6565
6566/* Parse the ARCH string. */
6567
6568static void
6569aarch64_parse_arch (void)
6570{
6571 char *ext;
6572 const struct processor *arch;
6573 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6574 size_t len;
6575
6576 strcpy (str, aarch64_arch_string);
6577
6578 ext = strchr (str, '+');
6579
6580 if (ext != NULL)
6581 len = ext - str;
6582 else
6583 len = strlen (str);
6584
6585 if (len == 0)
6586 {
6587 error ("missing arch name in -march=%qs", str);
6588 return;
6589 }
6590
6591 /* Loop through the list of supported ARCHs to find a match. */
6592 for (arch = all_architectures; arch->name != NULL; arch++)
6593 {
6594 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6595 {
6596 selected_arch = arch;
6597 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6598
6599 if (!selected_cpu)
6600 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6601
6602 if (ext != NULL)
6603 {
6604 /* ARCH string contains at least one extension. */
6605 aarch64_parse_extension (ext);
6606 }
6607
ffee7aa9
JG
6608 if (strcmp (selected_arch->arch, selected_cpu->arch))
6609 {
6610 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6611 selected_cpu->name, selected_arch->name);
6612 }
6613
43e9d192
IB
6614 return;
6615 }
6616 }
6617
6618 /* ARCH name not found in list. */
6619 error ("unknown value %qs for -march", str);
6620 return;
6621}
6622
6623/* Parse the CPU string. */
6624
6625static void
6626aarch64_parse_cpu (void)
6627{
6628 char *ext;
6629 const struct processor *cpu;
6630 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6631 size_t len;
6632
6633 strcpy (str, aarch64_cpu_string);
6634
6635 ext = strchr (str, '+');
6636
6637 if (ext != NULL)
6638 len = ext - str;
6639 else
6640 len = strlen (str);
6641
6642 if (len == 0)
6643 {
6644 error ("missing cpu name in -mcpu=%qs", str);
6645 return;
6646 }
6647
6648 /* Loop through the list of supported CPUs to find a match. */
6649 for (cpu = all_cores; cpu->name != NULL; cpu++)
6650 {
6651 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6652 {
6653 selected_cpu = cpu;
6654 aarch64_isa_flags = selected_cpu->flags;
6655
6656 if (ext != NULL)
6657 {
6658 /* CPU string contains at least one extension. */
6659 aarch64_parse_extension (ext);
6660 }
6661
6662 return;
6663 }
6664 }
6665
6666 /* CPU name not found in list. */
6667 error ("unknown value %qs for -mcpu", str);
6668 return;
6669}
6670
6671/* Parse the TUNE string. */
6672
6673static void
6674aarch64_parse_tune (void)
6675{
6676 const struct processor *cpu;
6677 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6678 strcpy (str, aarch64_tune_string);
6679
6680 /* Loop through the list of supported CPUs to find a match. */
6681 for (cpu = all_cores; cpu->name != NULL; cpu++)
6682 {
6683 if (strcmp (cpu->name, str) == 0)
6684 {
6685 selected_tune = cpu;
6686 return;
6687 }
6688 }
6689
6690 /* CPU name not found in list. */
6691 error ("unknown value %qs for -mtune", str);
6692 return;
6693}
6694
6695
6696/* Implement TARGET_OPTION_OVERRIDE. */
6697
6698static void
6699aarch64_override_options (void)
6700{
ffee7aa9
JG
6701 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6702 If either of -march or -mtune is given, they override their
6703 respective component of -mcpu.
43e9d192 6704
ffee7aa9
JG
6705 So, first parse AARCH64_CPU_STRING, then the others, be careful
6706 with -march as, if -mcpu is not present on the command line, march
6707 must set a sensible default CPU. */
6708 if (aarch64_cpu_string)
43e9d192 6709 {
ffee7aa9 6710 aarch64_parse_cpu ();
43e9d192
IB
6711 }
6712
ffee7aa9 6713 if (aarch64_arch_string)
43e9d192 6714 {
ffee7aa9 6715 aarch64_parse_arch ();
43e9d192
IB
6716 }
6717
6718 if (aarch64_tune_string)
6719 {
6720 aarch64_parse_tune ();
6721 }
6722
63892fa2
KV
6723#ifndef HAVE_AS_MABI_OPTION
6724 /* The compiler may have been configured with 2.23.* binutils, which does
6725 not have support for ILP32. */
6726 if (TARGET_ILP32)
6727 error ("Assembler does not support -mabi=ilp32");
6728#endif
6729
43e9d192
IB
6730 initialize_aarch64_code_model ();
6731
6732 aarch64_build_bitmask_table ();
6733
6734 /* This target defaults to strict volatile bitfields. */
6735 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6736 flag_strict_volatile_bitfields = 1;
6737
6738 /* If the user did not specify a processor, choose the default
6739 one for them. This will be the CPU set during configuration using
a3cd0246 6740 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6741 if (!selected_cpu)
6742 {
6743 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6744 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6745 }
6746
6747 gcc_assert (selected_cpu);
6748
43e9d192 6749 if (!selected_tune)
3edaf26d 6750 selected_tune = selected_cpu;
43e9d192
IB
6751
6752 aarch64_tune_flags = selected_tune->flags;
6753 aarch64_tune = selected_tune->core;
6754 aarch64_tune_params = selected_tune->tune;
0c6caaf8 6755 aarch64_architecture_version = selected_cpu->architecture_version;
43e9d192 6756
5e396da6
KT
6757 if (aarch64_fix_a53_err835769 == 2)
6758 {
6759#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6760 aarch64_fix_a53_err835769 = 1;
6761#else
6762 aarch64_fix_a53_err835769 = 0;
6763#endif
6764 }
6765
43e9d192
IB
6766 aarch64_override_options_after_change ();
6767}
6768
6769/* Implement targetm.override_options_after_change. */
6770
6771static void
6772aarch64_override_options_after_change (void)
6773{
0b7f8166
MS
6774 if (flag_omit_frame_pointer)
6775 flag_omit_leaf_frame_pointer = false;
6776 else if (flag_omit_leaf_frame_pointer)
6777 flag_omit_frame_pointer = true;
43e9d192
IB
6778}
6779
6780static struct machine_function *
6781aarch64_init_machine_status (void)
6782{
6783 struct machine_function *machine;
766090c2 6784 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6785 return machine;
6786}
6787
6788void
6789aarch64_init_expanders (void)
6790{
6791 init_machine_status = aarch64_init_machine_status;
6792}
6793
6794/* A checking mechanism for the implementation of the various code models. */
6795static void
6796initialize_aarch64_code_model (void)
6797{
6798 if (flag_pic)
6799 {
6800 switch (aarch64_cmodel_var)
6801 {
6802 case AARCH64_CMODEL_TINY:
6803 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6804 break;
6805 case AARCH64_CMODEL_SMALL:
6806 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6807 break;
6808 case AARCH64_CMODEL_LARGE:
6809 sorry ("code model %qs with -f%s", "large",
6810 flag_pic > 1 ? "PIC" : "pic");
6811 default:
6812 gcc_unreachable ();
6813 }
6814 }
6815 else
6816 aarch64_cmodel = aarch64_cmodel_var;
6817}
6818
6819/* Return true if SYMBOL_REF X binds locally. */
6820
6821static bool
6822aarch64_symbol_binds_local_p (const_rtx x)
6823{
6824 return (SYMBOL_REF_DECL (x)
6825 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6826 : SYMBOL_REF_LOCAL_P (x));
6827}
6828
6829/* Return true if SYMBOL_REF X is thread local */
6830static bool
6831aarch64_tls_symbol_p (rtx x)
6832{
6833 if (! TARGET_HAVE_TLS)
6834 return false;
6835
6836 if (GET_CODE (x) != SYMBOL_REF)
6837 return false;
6838
6839 return SYMBOL_REF_TLS_MODEL (x) != 0;
6840}
6841
6842/* Classify a TLS symbol into one of the TLS kinds. */
6843enum aarch64_symbol_type
6844aarch64_classify_tls_symbol (rtx x)
6845{
6846 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6847
6848 switch (tls_kind)
6849 {
6850 case TLS_MODEL_GLOBAL_DYNAMIC:
6851 case TLS_MODEL_LOCAL_DYNAMIC:
6852 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6853
6854 case TLS_MODEL_INITIAL_EXEC:
6855 return SYMBOL_SMALL_GOTTPREL;
6856
6857 case TLS_MODEL_LOCAL_EXEC:
6858 return SYMBOL_SMALL_TPREL;
6859
6860 case TLS_MODEL_EMULATED:
6861 case TLS_MODEL_NONE:
6862 return SYMBOL_FORCE_TO_MEM;
6863
6864 default:
6865 gcc_unreachable ();
6866 }
6867}
6868
6869/* Return the method that should be used to access SYMBOL_REF or
6870 LABEL_REF X in context CONTEXT. */
17f4d4bf 6871
43e9d192 6872enum aarch64_symbol_type
f8b756b7 6873aarch64_classify_symbol (rtx x, rtx offset,
43e9d192
IB
6874 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6875{
6876 if (GET_CODE (x) == LABEL_REF)
6877 {
6878 switch (aarch64_cmodel)
6879 {
6880 case AARCH64_CMODEL_LARGE:
6881 return SYMBOL_FORCE_TO_MEM;
6882
6883 case AARCH64_CMODEL_TINY_PIC:
6884 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6885 return SYMBOL_TINY_ABSOLUTE;
6886
43e9d192
IB
6887 case AARCH64_CMODEL_SMALL_PIC:
6888 case AARCH64_CMODEL_SMALL:
6889 return SYMBOL_SMALL_ABSOLUTE;
6890
6891 default:
6892 gcc_unreachable ();
6893 }
6894 }
6895
17f4d4bf 6896 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 6897 {
4a985a37
MS
6898 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6899 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
6900
6901 if (aarch64_tls_symbol_p (x))
6902 return aarch64_classify_tls_symbol (x);
6903
17f4d4bf
CSS
6904 switch (aarch64_cmodel)
6905 {
6906 case AARCH64_CMODEL_TINY:
f8b756b7
TB
6907 /* When we retreive symbol + offset address, we have to make sure
6908 the offset does not cause overflow of the final address. But
6909 we have no way of knowing the address of symbol at compile time
6910 so we can't accurately say if the distance between the PC and
6911 symbol + offset is outside the addressible range of +/-1M in the
6912 TINY code model. So we rely on images not being greater than
6913 1M and cap the offset at 1M and anything beyond 1M will have to
6914 be loaded using an alternative mechanism. */
6915 if (SYMBOL_REF_WEAK (x)
6916 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
6917 return SYMBOL_FORCE_TO_MEM;
6918 return SYMBOL_TINY_ABSOLUTE;
6919
17f4d4bf 6920 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
6921 /* Same reasoning as the tiny code model, but the offset cap here is
6922 4G. */
6923 if (SYMBOL_REF_WEAK (x)
6924 || INTVAL (offset) < (HOST_WIDE_INT) -4294967263
6925 || INTVAL (offset) > (HOST_WIDE_INT) 4294967264)
17f4d4bf
CSS
6926 return SYMBOL_FORCE_TO_MEM;
6927 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6928
17f4d4bf 6929 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 6930 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 6931 return SYMBOL_TINY_GOT;
38e6c9a6
MS
6932 return SYMBOL_TINY_ABSOLUTE;
6933
17f4d4bf
CSS
6934 case AARCH64_CMODEL_SMALL_PIC:
6935 if (!aarch64_symbol_binds_local_p (x))
6936 return SYMBOL_SMALL_GOT;
6937 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6938
17f4d4bf
CSS
6939 default:
6940 gcc_unreachable ();
6941 }
43e9d192 6942 }
17f4d4bf 6943
43e9d192
IB
6944 /* By default push everything into the constant pool. */
6945 return SYMBOL_FORCE_TO_MEM;
6946}
6947
43e9d192
IB
6948bool
6949aarch64_constant_address_p (rtx x)
6950{
6951 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6952}
6953
6954bool
6955aarch64_legitimate_pic_operand_p (rtx x)
6956{
6957 if (GET_CODE (x) == SYMBOL_REF
6958 || (GET_CODE (x) == CONST
6959 && GET_CODE (XEXP (x, 0)) == PLUS
6960 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6961 return false;
6962
6963 return true;
6964}
6965
3520f7cc
JG
6966/* Return true if X holds either a quarter-precision or
6967 floating-point +0.0 constant. */
6968static bool
ef4bddc2 6969aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
6970{
6971 if (!CONST_DOUBLE_P (x))
6972 return false;
6973
6974 /* TODO: We could handle moving 0.0 to a TFmode register,
6975 but first we would like to refactor the movtf_aarch64
6976 to be more amicable to split moves properly and
6977 correctly gate on TARGET_SIMD. For now - reject all
6978 constants which are not to SFmode or DFmode registers. */
6979 if (!(mode == SFmode || mode == DFmode))
6980 return false;
6981
6982 if (aarch64_float_const_zero_rtx_p (x))
6983 return true;
6984 return aarch64_float_const_representable_p (x);
6985}
6986
43e9d192 6987static bool
ef4bddc2 6988aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
6989{
6990 /* Do not allow vector struct mode constants. We could support
6991 0 and -1 easily, but they need support in aarch64-simd.md. */
6992 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6993 return false;
6994
6995 /* This could probably go away because
6996 we now decompose CONST_INTs according to expand_mov_immediate. */
6997 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 6998 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
6999 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7000 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
7001
7002 if (GET_CODE (x) == HIGH
7003 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7004 return true;
7005
7006 return aarch64_constant_address_p (x);
7007}
7008
a5bc806c 7009rtx
43e9d192
IB
7010aarch64_load_tp (rtx target)
7011{
7012 if (!target
7013 || GET_MODE (target) != Pmode
7014 || !register_operand (target, Pmode))
7015 target = gen_reg_rtx (Pmode);
7016
7017 /* Can return in any reg. */
7018 emit_insn (gen_aarch64_load_tp_hard (target));
7019 return target;
7020}
7021
43e9d192
IB
7022/* On AAPCS systems, this is the "struct __va_list". */
7023static GTY(()) tree va_list_type;
7024
7025/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7026 Return the type to use as __builtin_va_list.
7027
7028 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7029
7030 struct __va_list
7031 {
7032 void *__stack;
7033 void *__gr_top;
7034 void *__vr_top;
7035 int __gr_offs;
7036 int __vr_offs;
7037 }; */
7038
7039static tree
7040aarch64_build_builtin_va_list (void)
7041{
7042 tree va_list_name;
7043 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7044
7045 /* Create the type. */
7046 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7047 /* Give it the required name. */
7048 va_list_name = build_decl (BUILTINS_LOCATION,
7049 TYPE_DECL,
7050 get_identifier ("__va_list"),
7051 va_list_type);
7052 DECL_ARTIFICIAL (va_list_name) = 1;
7053 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 7054 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
7055
7056 /* Create the fields. */
7057 f_stack = build_decl (BUILTINS_LOCATION,
7058 FIELD_DECL, get_identifier ("__stack"),
7059 ptr_type_node);
7060 f_grtop = build_decl (BUILTINS_LOCATION,
7061 FIELD_DECL, get_identifier ("__gr_top"),
7062 ptr_type_node);
7063 f_vrtop = build_decl (BUILTINS_LOCATION,
7064 FIELD_DECL, get_identifier ("__vr_top"),
7065 ptr_type_node);
7066 f_groff = build_decl (BUILTINS_LOCATION,
7067 FIELD_DECL, get_identifier ("__gr_offs"),
7068 integer_type_node);
7069 f_vroff = build_decl (BUILTINS_LOCATION,
7070 FIELD_DECL, get_identifier ("__vr_offs"),
7071 integer_type_node);
7072
7073 DECL_ARTIFICIAL (f_stack) = 1;
7074 DECL_ARTIFICIAL (f_grtop) = 1;
7075 DECL_ARTIFICIAL (f_vrtop) = 1;
7076 DECL_ARTIFICIAL (f_groff) = 1;
7077 DECL_ARTIFICIAL (f_vroff) = 1;
7078
7079 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7080 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7081 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7082 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7083 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7084
7085 TYPE_FIELDS (va_list_type) = f_stack;
7086 DECL_CHAIN (f_stack) = f_grtop;
7087 DECL_CHAIN (f_grtop) = f_vrtop;
7088 DECL_CHAIN (f_vrtop) = f_groff;
7089 DECL_CHAIN (f_groff) = f_vroff;
7090
7091 /* Compute its layout. */
7092 layout_type (va_list_type);
7093
7094 return va_list_type;
7095}
7096
7097/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7098static void
7099aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7100{
7101 const CUMULATIVE_ARGS *cum;
7102 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7103 tree stack, grtop, vrtop, groff, vroff;
7104 tree t;
7105 int gr_save_area_size;
7106 int vr_save_area_size;
7107 int vr_offset;
7108
7109 cum = &crtl->args.info;
7110 gr_save_area_size
7111 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7112 vr_save_area_size
7113 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7114
7115 if (TARGET_GENERAL_REGS_ONLY)
7116 {
7117 if (cum->aapcs_nvrn > 0)
7118 sorry ("%qs and floating point or vector arguments",
7119 "-mgeneral-regs-only");
7120 vr_save_area_size = 0;
7121 }
7122
7123 f_stack = TYPE_FIELDS (va_list_type_node);
7124 f_grtop = DECL_CHAIN (f_stack);
7125 f_vrtop = DECL_CHAIN (f_grtop);
7126 f_groff = DECL_CHAIN (f_vrtop);
7127 f_vroff = DECL_CHAIN (f_groff);
7128
7129 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7130 NULL_TREE);
7131 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7132 NULL_TREE);
7133 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7134 NULL_TREE);
7135 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7136 NULL_TREE);
7137 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7138 NULL_TREE);
7139
7140 /* Emit code to initialize STACK, which points to the next varargs stack
7141 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7142 by named arguments. STACK is 8-byte aligned. */
7143 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7144 if (cum->aapcs_stack_size > 0)
7145 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7146 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7147 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7148
7149 /* Emit code to initialize GRTOP, the top of the GR save area.
7150 virtual_incoming_args_rtx should have been 16 byte aligned. */
7151 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7152 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7153 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7154
7155 /* Emit code to initialize VRTOP, the top of the VR save area.
7156 This address is gr_save_area_bytes below GRTOP, rounded
7157 down to the next 16-byte boundary. */
7158 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7159 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7160 STACK_BOUNDARY / BITS_PER_UNIT);
7161
7162 if (vr_offset)
7163 t = fold_build_pointer_plus_hwi (t, -vr_offset);
7164 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7165 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7166
7167 /* Emit code to initialize GROFF, the offset from GRTOP of the
7168 next GPR argument. */
7169 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7170 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7171 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7172
7173 /* Likewise emit code to initialize VROFF, the offset from FTOP
7174 of the next VR argument. */
7175 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7176 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7177 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7178}
7179
7180/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7181
7182static tree
7183aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7184 gimple_seq *post_p ATTRIBUTE_UNUSED)
7185{
7186 tree addr;
7187 bool indirect_p;
7188 bool is_ha; /* is HFA or HVA. */
7189 bool dw_align; /* double-word align. */
ef4bddc2 7190 machine_mode ag_mode = VOIDmode;
43e9d192 7191 int nregs;
ef4bddc2 7192 machine_mode mode;
43e9d192
IB
7193
7194 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7195 tree stack, f_top, f_off, off, arg, roundup, on_stack;
7196 HOST_WIDE_INT size, rsize, adjust, align;
7197 tree t, u, cond1, cond2;
7198
7199 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7200 if (indirect_p)
7201 type = build_pointer_type (type);
7202
7203 mode = TYPE_MODE (type);
7204
7205 f_stack = TYPE_FIELDS (va_list_type_node);
7206 f_grtop = DECL_CHAIN (f_stack);
7207 f_vrtop = DECL_CHAIN (f_grtop);
7208 f_groff = DECL_CHAIN (f_vrtop);
7209 f_vroff = DECL_CHAIN (f_groff);
7210
7211 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7212 f_stack, NULL_TREE);
7213 size = int_size_in_bytes (type);
7214 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7215
7216 dw_align = false;
7217 adjust = 0;
7218 if (aarch64_vfp_is_call_or_return_candidate (mode,
7219 type,
7220 &ag_mode,
7221 &nregs,
7222 &is_ha))
7223 {
7224 /* TYPE passed in fp/simd registers. */
7225 if (TARGET_GENERAL_REGS_ONLY)
7226 sorry ("%qs and floating point or vector arguments",
7227 "-mgeneral-regs-only");
7228
7229 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7230 unshare_expr (valist), f_vrtop, NULL_TREE);
7231 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7232 unshare_expr (valist), f_vroff, NULL_TREE);
7233
7234 rsize = nregs * UNITS_PER_VREG;
7235
7236 if (is_ha)
7237 {
7238 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7239 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7240 }
7241 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7242 && size < UNITS_PER_VREG)
7243 {
7244 adjust = UNITS_PER_VREG - size;
7245 }
7246 }
7247 else
7248 {
7249 /* TYPE passed in general registers. */
7250 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7251 unshare_expr (valist), f_grtop, NULL_TREE);
7252 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7253 unshare_expr (valist), f_groff, NULL_TREE);
7254 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7255 nregs = rsize / UNITS_PER_WORD;
7256
7257 if (align > 8)
7258 dw_align = true;
7259
7260 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7261 && size < UNITS_PER_WORD)
7262 {
7263 adjust = UNITS_PER_WORD - size;
7264 }
7265 }
7266
7267 /* Get a local temporary for the field value. */
7268 off = get_initialized_tmp_var (f_off, pre_p, NULL);
7269
7270 /* Emit code to branch if off >= 0. */
7271 t = build2 (GE_EXPR, boolean_type_node, off,
7272 build_int_cst (TREE_TYPE (off), 0));
7273 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7274
7275 if (dw_align)
7276 {
7277 /* Emit: offs = (offs + 15) & -16. */
7278 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7279 build_int_cst (TREE_TYPE (off), 15));
7280 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7281 build_int_cst (TREE_TYPE (off), -16));
7282 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7283 }
7284 else
7285 roundup = NULL;
7286
7287 /* Update ap.__[g|v]r_offs */
7288 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7289 build_int_cst (TREE_TYPE (off), rsize));
7290 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7291
7292 /* String up. */
7293 if (roundup)
7294 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7295
7296 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7297 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7298 build_int_cst (TREE_TYPE (f_off), 0));
7299 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7300
7301 /* String up: make sure the assignment happens before the use. */
7302 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7303 COND_EXPR_ELSE (cond1) = t;
7304
7305 /* Prepare the trees handling the argument that is passed on the stack;
7306 the top level node will store in ON_STACK. */
7307 arg = get_initialized_tmp_var (stack, pre_p, NULL);
7308 if (align > 8)
7309 {
7310 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7311 t = fold_convert (intDI_type_node, arg);
7312 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7313 build_int_cst (TREE_TYPE (t), 15));
7314 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7315 build_int_cst (TREE_TYPE (t), -16));
7316 t = fold_convert (TREE_TYPE (arg), t);
7317 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7318 }
7319 else
7320 roundup = NULL;
7321 /* Advance ap.__stack */
7322 t = fold_convert (intDI_type_node, arg);
7323 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7324 build_int_cst (TREE_TYPE (t), size + 7));
7325 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7326 build_int_cst (TREE_TYPE (t), -8));
7327 t = fold_convert (TREE_TYPE (arg), t);
7328 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7329 /* String up roundup and advance. */
7330 if (roundup)
7331 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7332 /* String up with arg */
7333 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7334 /* Big-endianness related address adjustment. */
7335 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7336 && size < UNITS_PER_WORD)
7337 {
7338 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7339 size_int (UNITS_PER_WORD - size));
7340 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7341 }
7342
7343 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7344 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7345
7346 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7347 t = off;
7348 if (adjust)
7349 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7350 build_int_cst (TREE_TYPE (off), adjust));
7351
7352 t = fold_convert (sizetype, t);
7353 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7354
7355 if (is_ha)
7356 {
7357 /* type ha; // treat as "struct {ftype field[n];}"
7358 ... [computing offs]
7359 for (i = 0; i <nregs; ++i, offs += 16)
7360 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7361 return ha; */
7362 int i;
7363 tree tmp_ha, field_t, field_ptr_t;
7364
7365 /* Declare a local variable. */
7366 tmp_ha = create_tmp_var_raw (type, "ha");
7367 gimple_add_tmp_var (tmp_ha);
7368
7369 /* Establish the base type. */
7370 switch (ag_mode)
7371 {
7372 case SFmode:
7373 field_t = float_type_node;
7374 field_ptr_t = float_ptr_type_node;
7375 break;
7376 case DFmode:
7377 field_t = double_type_node;
7378 field_ptr_t = double_ptr_type_node;
7379 break;
7380 case TFmode:
7381 field_t = long_double_type_node;
7382 field_ptr_t = long_double_ptr_type_node;
7383 break;
7384/* The half precision and quad precision are not fully supported yet. Enable
7385 the following code after the support is complete. Need to find the correct
7386 type node for __fp16 *. */
7387#if 0
7388 case HFmode:
7389 field_t = float_type_node;
7390 field_ptr_t = float_ptr_type_node;
7391 break;
7392#endif
7393 case V2SImode:
7394 case V4SImode:
7395 {
7396 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7397 field_t = build_vector_type_for_mode (innertype, ag_mode);
7398 field_ptr_t = build_pointer_type (field_t);
7399 }
7400 break;
7401 default:
7402 gcc_assert (0);
7403 }
7404
7405 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7406 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7407 addr = t;
7408 t = fold_convert (field_ptr_t, addr);
7409 t = build2 (MODIFY_EXPR, field_t,
7410 build1 (INDIRECT_REF, field_t, tmp_ha),
7411 build1 (INDIRECT_REF, field_t, t));
7412
7413 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7414 for (i = 1; i < nregs; ++i)
7415 {
7416 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7417 u = fold_convert (field_ptr_t, addr);
7418 u = build2 (MODIFY_EXPR, field_t,
7419 build2 (MEM_REF, field_t, tmp_ha,
7420 build_int_cst (field_ptr_t,
7421 (i *
7422 int_size_in_bytes (field_t)))),
7423 build1 (INDIRECT_REF, field_t, u));
7424 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7425 }
7426
7427 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7428 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7429 }
7430
7431 COND_EXPR_ELSE (cond2) = t;
7432 addr = fold_convert (build_pointer_type (type), cond1);
7433 addr = build_va_arg_indirect_ref (addr);
7434
7435 if (indirect_p)
7436 addr = build_va_arg_indirect_ref (addr);
7437
7438 return addr;
7439}
7440
7441/* Implement TARGET_SETUP_INCOMING_VARARGS. */
7442
7443static void
ef4bddc2 7444aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
7445 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7446 int no_rtl)
7447{
7448 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7449 CUMULATIVE_ARGS local_cum;
7450 int gr_saved, vr_saved;
7451
7452 /* The caller has advanced CUM up to, but not beyond, the last named
7453 argument. Advance a local copy of CUM past the last "real" named
7454 argument, to find out how many registers are left over. */
7455 local_cum = *cum;
7456 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7457
7458 /* Found out how many registers we need to save. */
7459 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7460 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7461
7462 if (TARGET_GENERAL_REGS_ONLY)
7463 {
7464 if (local_cum.aapcs_nvrn > 0)
7465 sorry ("%qs and floating point or vector arguments",
7466 "-mgeneral-regs-only");
7467 vr_saved = 0;
7468 }
7469
7470 if (!no_rtl)
7471 {
7472 if (gr_saved > 0)
7473 {
7474 rtx ptr, mem;
7475
7476 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7477 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7478 - gr_saved * UNITS_PER_WORD);
7479 mem = gen_frame_mem (BLKmode, ptr);
7480 set_mem_alias_set (mem, get_varargs_alias_set ());
7481
7482 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7483 mem, gr_saved);
7484 }
7485 if (vr_saved > 0)
7486 {
7487 /* We can't use move_block_from_reg, because it will use
7488 the wrong mode, storing D regs only. */
ef4bddc2 7489 machine_mode mode = TImode;
43e9d192
IB
7490 int off, i;
7491
7492 /* Set OFF to the offset from virtual_incoming_args_rtx of
7493 the first vector register. The VR save area lies below
7494 the GR one, and is aligned to 16 bytes. */
7495 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7496 STACK_BOUNDARY / BITS_PER_UNIT);
7497 off -= vr_saved * UNITS_PER_VREG;
7498
7499 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7500 {
7501 rtx ptr, mem;
7502
7503 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7504 mem = gen_frame_mem (mode, ptr);
7505 set_mem_alias_set (mem, get_varargs_alias_set ());
7506 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7507 off += UNITS_PER_VREG;
7508 }
7509 }
7510 }
7511
7512 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7513 any complication of having crtl->args.pretend_args_size changed. */
8799637a 7514 cfun->machine->frame.saved_varargs_size
43e9d192
IB
7515 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7516 STACK_BOUNDARY / BITS_PER_UNIT)
7517 + vr_saved * UNITS_PER_VREG);
7518}
7519
7520static void
7521aarch64_conditional_register_usage (void)
7522{
7523 int i;
7524 if (!TARGET_FLOAT)
7525 {
7526 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7527 {
7528 fixed_regs[i] = 1;
7529 call_used_regs[i] = 1;
7530 }
7531 }
7532}
7533
7534/* Walk down the type tree of TYPE counting consecutive base elements.
7535 If *MODEP is VOIDmode, then set it to the first valid floating point
7536 type. If a non-floating point type is found, or if a floating point
7537 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7538 otherwise return the count in the sub-tree. */
7539static int
ef4bddc2 7540aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 7541{
ef4bddc2 7542 machine_mode mode;
43e9d192
IB
7543 HOST_WIDE_INT size;
7544
7545 switch (TREE_CODE (type))
7546 {
7547 case REAL_TYPE:
7548 mode = TYPE_MODE (type);
7549 if (mode != DFmode && mode != SFmode && mode != TFmode)
7550 return -1;
7551
7552 if (*modep == VOIDmode)
7553 *modep = mode;
7554
7555 if (*modep == mode)
7556 return 1;
7557
7558 break;
7559
7560 case COMPLEX_TYPE:
7561 mode = TYPE_MODE (TREE_TYPE (type));
7562 if (mode != DFmode && mode != SFmode && mode != TFmode)
7563 return -1;
7564
7565 if (*modep == VOIDmode)
7566 *modep = mode;
7567
7568 if (*modep == mode)
7569 return 2;
7570
7571 break;
7572
7573 case VECTOR_TYPE:
7574 /* Use V2SImode and V4SImode as representatives of all 64-bit
7575 and 128-bit vector types. */
7576 size = int_size_in_bytes (type);
7577 switch (size)
7578 {
7579 case 8:
7580 mode = V2SImode;
7581 break;
7582 case 16:
7583 mode = V4SImode;
7584 break;
7585 default:
7586 return -1;
7587 }
7588
7589 if (*modep == VOIDmode)
7590 *modep = mode;
7591
7592 /* Vector modes are considered to be opaque: two vectors are
7593 equivalent for the purposes of being homogeneous aggregates
7594 if they are the same size. */
7595 if (*modep == mode)
7596 return 1;
7597
7598 break;
7599
7600 case ARRAY_TYPE:
7601 {
7602 int count;
7603 tree index = TYPE_DOMAIN (type);
7604
807e902e
KZ
7605 /* Can't handle incomplete types nor sizes that are not
7606 fixed. */
7607 if (!COMPLETE_TYPE_P (type)
7608 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7609 return -1;
7610
7611 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7612 if (count == -1
7613 || !index
7614 || !TYPE_MAX_VALUE (index)
cc269bb6 7615 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7616 || !TYPE_MIN_VALUE (index)
cc269bb6 7617 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7618 || count < 0)
7619 return -1;
7620
ae7e9ddd
RS
7621 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7622 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7623
7624 /* There must be no padding. */
807e902e 7625 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7626 return -1;
7627
7628 return count;
7629 }
7630
7631 case RECORD_TYPE:
7632 {
7633 int count = 0;
7634 int sub_count;
7635 tree field;
7636
807e902e
KZ
7637 /* Can't handle incomplete types nor sizes that are not
7638 fixed. */
7639 if (!COMPLETE_TYPE_P (type)
7640 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7641 return -1;
7642
7643 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7644 {
7645 if (TREE_CODE (field) != FIELD_DECL)
7646 continue;
7647
7648 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7649 if (sub_count < 0)
7650 return -1;
7651 count += sub_count;
7652 }
7653
7654 /* There must be no padding. */
807e902e 7655 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7656 return -1;
7657
7658 return count;
7659 }
7660
7661 case UNION_TYPE:
7662 case QUAL_UNION_TYPE:
7663 {
7664 /* These aren't very interesting except in a degenerate case. */
7665 int count = 0;
7666 int sub_count;
7667 tree field;
7668
807e902e
KZ
7669 /* Can't handle incomplete types nor sizes that are not
7670 fixed. */
7671 if (!COMPLETE_TYPE_P (type)
7672 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7673 return -1;
7674
7675 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7676 {
7677 if (TREE_CODE (field) != FIELD_DECL)
7678 continue;
7679
7680 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7681 if (sub_count < 0)
7682 return -1;
7683 count = count > sub_count ? count : sub_count;
7684 }
7685
7686 /* There must be no padding. */
807e902e 7687 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7688 return -1;
7689
7690 return count;
7691 }
7692
7693 default:
7694 break;
7695 }
7696
7697 return -1;
7698}
7699
38e8f663
YR
7700/* Return true if we use LRA instead of reload pass. */
7701static bool
7702aarch64_lra_p (void)
7703{
7704 return aarch64_lra_flag;
7705}
7706
43e9d192
IB
7707/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7708 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7709 array types. The C99 floating-point complex types are also considered
7710 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7711 types, which are GCC extensions and out of the scope of AAPCS64, are
7712 treated as composite types here as well.
7713
7714 Note that MODE itself is not sufficient in determining whether a type
7715 is such a composite type or not. This is because
7716 stor-layout.c:compute_record_mode may have already changed the MODE
7717 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7718 structure with only one field may have its MODE set to the mode of the
7719 field. Also an integer mode whose size matches the size of the
7720 RECORD_TYPE type may be used to substitute the original mode
7721 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7722 solely relied on. */
7723
7724static bool
7725aarch64_composite_type_p (const_tree type,
ef4bddc2 7726 machine_mode mode)
43e9d192
IB
7727{
7728 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7729 return true;
7730
7731 if (mode == BLKmode
7732 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7733 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7734 return true;
7735
7736 return false;
7737}
7738
7739/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7740 type as described in AAPCS64 \S 4.1.2.
7741
7742 See the comment above aarch64_composite_type_p for the notes on MODE. */
7743
7744static bool
7745aarch64_short_vector_p (const_tree type,
ef4bddc2 7746 machine_mode mode)
43e9d192
IB
7747{
7748 HOST_WIDE_INT size = -1;
7749
7750 if (type && TREE_CODE (type) == VECTOR_TYPE)
7751 size = int_size_in_bytes (type);
7752 else if (!aarch64_composite_type_p (type, mode)
7753 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7754 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7755 size = GET_MODE_SIZE (mode);
7756
7757 return (size == 8 || size == 16) ? true : false;
7758}
7759
7760/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7761 shall be passed or returned in simd/fp register(s) (providing these
7762 parameter passing registers are available).
7763
7764 Upon successful return, *COUNT returns the number of needed registers,
7765 *BASE_MODE returns the mode of the individual register and when IS_HAF
7766 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7767 floating-point aggregate or a homogeneous short-vector aggregate. */
7768
7769static bool
ef4bddc2 7770aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 7771 const_tree type,
ef4bddc2 7772 machine_mode *base_mode,
43e9d192
IB
7773 int *count,
7774 bool *is_ha)
7775{
ef4bddc2 7776 machine_mode new_mode = VOIDmode;
43e9d192
IB
7777 bool composite_p = aarch64_composite_type_p (type, mode);
7778
7779 if (is_ha != NULL) *is_ha = false;
7780
7781 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7782 || aarch64_short_vector_p (type, mode))
7783 {
7784 *count = 1;
7785 new_mode = mode;
7786 }
7787 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7788 {
7789 if (is_ha != NULL) *is_ha = true;
7790 *count = 2;
7791 new_mode = GET_MODE_INNER (mode);
7792 }
7793 else if (type && composite_p)
7794 {
7795 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7796
7797 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7798 {
7799 if (is_ha != NULL) *is_ha = true;
7800 *count = ag_count;
7801 }
7802 else
7803 return false;
7804 }
7805 else
7806 return false;
7807
7808 *base_mode = new_mode;
7809 return true;
7810}
7811
7812/* Implement TARGET_STRUCT_VALUE_RTX. */
7813
7814static rtx
7815aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7816 int incoming ATTRIBUTE_UNUSED)
7817{
7818 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7819}
7820
7821/* Implements target hook vector_mode_supported_p. */
7822static bool
ef4bddc2 7823aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
7824{
7825 if (TARGET_SIMD
7826 && (mode == V4SImode || mode == V8HImode
7827 || mode == V16QImode || mode == V2DImode
7828 || mode == V2SImode || mode == V4HImode
7829 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
7830 || mode == V4SFmode || mode == V2DFmode
7831 || mode == V1DFmode))
43e9d192
IB
7832 return true;
7833
7834 return false;
7835}
7836
b7342d25
IB
7837/* Return appropriate SIMD container
7838 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
7839static machine_mode
7840aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 7841{
b7342d25 7842 gcc_assert (width == 64 || width == 128);
43e9d192 7843 if (TARGET_SIMD)
b7342d25
IB
7844 {
7845 if (width == 128)
7846 switch (mode)
7847 {
7848 case DFmode:
7849 return V2DFmode;
7850 case SFmode:
7851 return V4SFmode;
7852 case SImode:
7853 return V4SImode;
7854 case HImode:
7855 return V8HImode;
7856 case QImode:
7857 return V16QImode;
7858 case DImode:
7859 return V2DImode;
7860 default:
7861 break;
7862 }
7863 else
7864 switch (mode)
7865 {
7866 case SFmode:
7867 return V2SFmode;
7868 case SImode:
7869 return V2SImode;
7870 case HImode:
7871 return V4HImode;
7872 case QImode:
7873 return V8QImode;
7874 default:
7875 break;
7876 }
7877 }
43e9d192
IB
7878 return word_mode;
7879}
7880
b7342d25 7881/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
7882static machine_mode
7883aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
7884{
7885 return aarch64_simd_container_mode (mode, 128);
7886}
7887
3b357264
JG
7888/* Return the bitmask of possible vector sizes for the vectorizer
7889 to iterate over. */
7890static unsigned int
7891aarch64_autovectorize_vector_sizes (void)
7892{
7893 return (16 | 8);
7894}
7895
ac2b960f
YZ
7896/* Implement TARGET_MANGLE_TYPE. */
7897
6f549691 7898static const char *
ac2b960f
YZ
7899aarch64_mangle_type (const_tree type)
7900{
7901 /* The AArch64 ABI documents say that "__va_list" has to be
7902 managled as if it is in the "std" namespace. */
7903 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7904 return "St9__va_list";
7905
f9d53c27
TB
7906 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
7907 builtin types. */
7908 if (TYPE_NAME (type) != NULL)
7909 return aarch64_mangle_builtin_type (type);
c6fc9e43 7910
ac2b960f
YZ
7911 /* Use the default mangling. */
7912 return NULL;
7913}
7914
8baff86e
KT
7915
7916/* Return true if the rtx_insn contains a MEM RTX somewhere
7917 in it. */
75cf1494
KT
7918
7919static bool
8baff86e 7920has_memory_op (rtx_insn *mem_insn)
75cf1494 7921{
8baff86e
KT
7922 subrtx_iterator::array_type array;
7923 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
7924 if (MEM_P (*iter))
7925 return true;
7926
7927 return false;
75cf1494
KT
7928}
7929
7930/* Find the first rtx_insn before insn that will generate an assembly
7931 instruction. */
7932
7933static rtx_insn *
7934aarch64_prev_real_insn (rtx_insn *insn)
7935{
7936 if (!insn)
7937 return NULL;
7938
7939 do
7940 {
7941 insn = prev_real_insn (insn);
7942 }
7943 while (insn && recog_memoized (insn) < 0);
7944
7945 return insn;
7946}
7947
7948static bool
7949is_madd_op (enum attr_type t1)
7950{
7951 unsigned int i;
7952 /* A number of these may be AArch32 only. */
7953 enum attr_type mlatypes[] = {
7954 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7955 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7956 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7957 };
7958
7959 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7960 {
7961 if (t1 == mlatypes[i])
7962 return true;
7963 }
7964
7965 return false;
7966}
7967
7968/* Check if there is a register dependency between a load and the insn
7969 for which we hold recog_data. */
7970
7971static bool
7972dep_between_memop_and_curr (rtx memop)
7973{
7974 rtx load_reg;
7975 int opno;
7976
8baff86e 7977 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
7978
7979 if (!REG_P (SET_DEST (memop)))
7980 return false;
7981
7982 load_reg = SET_DEST (memop);
8baff86e 7983 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
7984 {
7985 rtx operand = recog_data.operand[opno];
7986 if (REG_P (operand)
7987 && reg_overlap_mentioned_p (load_reg, operand))
7988 return true;
7989
7990 }
7991 return false;
7992}
7993
8baff86e
KT
7994
7995/* When working around the Cortex-A53 erratum 835769,
7996 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
7997 instruction and has a preceding memory instruction such that a NOP
7998 should be inserted between them. */
7999
75cf1494
KT
8000bool
8001aarch64_madd_needs_nop (rtx_insn* insn)
8002{
8003 enum attr_type attr_type;
8004 rtx_insn *prev;
8005 rtx body;
8006
8007 if (!aarch64_fix_a53_err835769)
8008 return false;
8009
8010 if (recog_memoized (insn) < 0)
8011 return false;
8012
8013 attr_type = get_attr_type (insn);
8014 if (!is_madd_op (attr_type))
8015 return false;
8016
8017 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
8018 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8019 Restore recog state to INSN to avoid state corruption. */
8020 extract_constrain_insn_cached (insn);
8021
8baff86e 8022 if (!prev || !has_memory_op (prev))
75cf1494
KT
8023 return false;
8024
8025 body = single_set (prev);
8026
8027 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
8028 it and the DImode madd, emit a NOP between them. If body is NULL then we
8029 have a complex memory operation, probably a load/store pair.
8030 Be conservative for now and emit a NOP. */
8031 if (GET_MODE (recog_data.operand[0]) == DImode
8032 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
8033 return true;
8034
8035 return false;
8036
8037}
8038
8baff86e
KT
8039
8040/* Implement FINAL_PRESCAN_INSN. */
8041
75cf1494
KT
8042void
8043aarch64_final_prescan_insn (rtx_insn *insn)
8044{
8045 if (aarch64_madd_needs_nop (insn))
8046 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8047}
8048
8049
43e9d192 8050/* Return the equivalent letter for size. */
81c2dfb9 8051static char
43e9d192
IB
8052sizetochar (int size)
8053{
8054 switch (size)
8055 {
8056 case 64: return 'd';
8057 case 32: return 's';
8058 case 16: return 'h';
8059 case 8 : return 'b';
8060 default: gcc_unreachable ();
8061 }
8062}
8063
3520f7cc
JG
8064/* Return true iff x is a uniform vector of floating-point
8065 constants, and the constant can be represented in
8066 quarter-precision form. Note, as aarch64_float_const_representable
8067 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8068static bool
8069aarch64_vect_float_const_representable_p (rtx x)
8070{
8071 int i = 0;
8072 REAL_VALUE_TYPE r0, ri;
8073 rtx x0, xi;
8074
8075 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8076 return false;
8077
8078 x0 = CONST_VECTOR_ELT (x, 0);
8079 if (!CONST_DOUBLE_P (x0))
8080 return false;
8081
8082 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8083
8084 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8085 {
8086 xi = CONST_VECTOR_ELT (x, i);
8087 if (!CONST_DOUBLE_P (xi))
8088 return false;
8089
8090 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8091 if (!REAL_VALUES_EQUAL (r0, ri))
8092 return false;
8093 }
8094
8095 return aarch64_float_const_representable_p (x0);
8096}
8097
d8edd899 8098/* Return true for valid and false for invalid. */
3ea63f60 8099bool
ef4bddc2 8100aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 8101 struct simd_immediate_info *info)
43e9d192
IB
8102{
8103#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8104 matches = 1; \
8105 for (i = 0; i < idx; i += (STRIDE)) \
8106 if (!(TEST)) \
8107 matches = 0; \
8108 if (matches) \
8109 { \
8110 immtype = (CLASS); \
8111 elsize = (ELSIZE); \
43e9d192
IB
8112 eshift = (SHIFT); \
8113 emvn = (NEG); \
8114 break; \
8115 }
8116
8117 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8118 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8119 unsigned char bytes[16];
43e9d192
IB
8120 int immtype = -1, matches;
8121 unsigned int invmask = inverse ? 0xff : 0;
8122 int eshift, emvn;
8123
43e9d192 8124 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 8125 {
81c2dfb9
IB
8126 if (! (aarch64_simd_imm_zero_p (op, mode)
8127 || aarch64_vect_float_const_representable_p (op)))
d8edd899 8128 return false;
3520f7cc 8129
48063b9d
IB
8130 if (info)
8131 {
8132 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 8133 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
8134 info->mvn = false;
8135 info->shift = 0;
8136 }
3520f7cc 8137
d8edd899 8138 return true;
3520f7cc 8139 }
43e9d192
IB
8140
8141 /* Splat vector constant out into a byte vector. */
8142 for (i = 0; i < n_elts; i++)
8143 {
4b1e108c
AL
8144 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8145 it must be laid out in the vector register in reverse order. */
8146 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
8147 unsigned HOST_WIDE_INT elpart;
8148 unsigned int part, parts;
8149
4aa81c2e 8150 if (CONST_INT_P (el))
43e9d192
IB
8151 {
8152 elpart = INTVAL (el);
8153 parts = 1;
8154 }
8155 else if (GET_CODE (el) == CONST_DOUBLE)
8156 {
8157 elpart = CONST_DOUBLE_LOW (el);
8158 parts = 2;
8159 }
8160 else
8161 gcc_unreachable ();
8162
8163 for (part = 0; part < parts; part++)
8164 {
8165 unsigned int byte;
8166 for (byte = 0; byte < innersize; byte++)
8167 {
8168 bytes[idx++] = (elpart & 0xff) ^ invmask;
8169 elpart >>= BITS_PER_UNIT;
8170 }
8171 if (GET_CODE (el) == CONST_DOUBLE)
8172 elpart = CONST_DOUBLE_HIGH (el);
8173 }
8174 }
8175
8176 /* Sanity check. */
8177 gcc_assert (idx == GET_MODE_SIZE (mode));
8178
8179 do
8180 {
8181 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8182 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8183
8184 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8185 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8186
8187 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8188 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8189
8190 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8191 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8192
8193 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8194
8195 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8196
8197 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8198 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8199
8200 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8201 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8202
8203 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8204 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8205
8206 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8207 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8208
8209 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8210
8211 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8212
8213 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 8214 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
8215
8216 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 8217 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
8218
8219 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 8220 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
8221
8222 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 8223 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
8224
8225 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8226
8227 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8228 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8229 }
8230 while (0);
8231
e4f0f84d 8232 if (immtype == -1)
d8edd899 8233 return false;
43e9d192 8234
48063b9d 8235 if (info)
43e9d192 8236 {
48063b9d 8237 info->element_width = elsize;
48063b9d
IB
8238 info->mvn = emvn != 0;
8239 info->shift = eshift;
8240
43e9d192
IB
8241 unsigned HOST_WIDE_INT imm = 0;
8242
e4f0f84d
TB
8243 if (immtype >= 12 && immtype <= 15)
8244 info->msl = true;
8245
43e9d192
IB
8246 /* Un-invert bytes of recognized vector, if necessary. */
8247 if (invmask != 0)
8248 for (i = 0; i < idx; i++)
8249 bytes[i] ^= invmask;
8250
8251 if (immtype == 17)
8252 {
8253 /* FIXME: Broken on 32-bit H_W_I hosts. */
8254 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8255
8256 for (i = 0; i < 8; i++)
8257 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8258 << (i * BITS_PER_UNIT);
8259
43e9d192 8260
48063b9d
IB
8261 info->value = GEN_INT (imm);
8262 }
8263 else
8264 {
8265 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8266 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
8267
8268 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
8269 generic constants. */
8270 if (info->mvn)
43e9d192 8271 imm = ~imm;
48063b9d
IB
8272 imm = (imm >> info->shift) & 0xff;
8273 info->value = GEN_INT (imm);
8274 }
43e9d192
IB
8275 }
8276
48063b9d 8277 return true;
43e9d192
IB
8278#undef CHECK
8279}
8280
43e9d192
IB
8281/* Check of immediate shift constants are within range. */
8282bool
ef4bddc2 8283aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
8284{
8285 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8286 if (left)
ddeabd3e 8287 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 8288 else
ddeabd3e 8289 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
8290}
8291
3520f7cc
JG
8292/* Return true if X is a uniform vector where all elements
8293 are either the floating-point constant 0.0 or the
8294 integer constant 0. */
43e9d192 8295bool
ef4bddc2 8296aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 8297{
3520f7cc 8298 return x == CONST0_RTX (mode);
43e9d192
IB
8299}
8300
8301bool
ef4bddc2 8302aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
8303{
8304 HOST_WIDE_INT imm = INTVAL (x);
8305 int i;
8306
8307 for (i = 0; i < 8; i++)
8308 {
8309 unsigned int byte = imm & 0xff;
8310 if (byte != 0xff && byte != 0)
8311 return false;
8312 imm >>= 8;
8313 }
8314
8315 return true;
8316}
8317
83f8c414
CSS
8318bool
8319aarch64_mov_operand_p (rtx x,
a5350ddc 8320 enum aarch64_symbol_context context,
ef4bddc2 8321 machine_mode mode)
83f8c414 8322{
83f8c414
CSS
8323 if (GET_CODE (x) == HIGH
8324 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8325 return true;
8326
82614948 8327 if (CONST_INT_P (x))
83f8c414
CSS
8328 return true;
8329
8330 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8331 return true;
8332
a5350ddc
CSS
8333 return aarch64_classify_symbolic_expression (x, context)
8334 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
8335}
8336
43e9d192
IB
8337/* Return a const_int vector of VAL. */
8338rtx
ef4bddc2 8339aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
8340{
8341 int nunits = GET_MODE_NUNITS (mode);
8342 rtvec v = rtvec_alloc (nunits);
8343 int i;
8344
8345 for (i=0; i < nunits; i++)
8346 RTVEC_ELT (v, i) = GEN_INT (val);
8347
8348 return gen_rtx_CONST_VECTOR (mode, v);
8349}
8350
051d0e2f
SN
8351/* Check OP is a legal scalar immediate for the MOVI instruction. */
8352
8353bool
ef4bddc2 8354aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 8355{
ef4bddc2 8356 machine_mode vmode;
051d0e2f
SN
8357
8358 gcc_assert (!VECTOR_MODE_P (mode));
8359 vmode = aarch64_preferred_simd_mode (mode);
8360 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 8361 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
8362}
8363
988fa693
JG
8364/* Construct and return a PARALLEL RTX vector with elements numbering the
8365 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8366 the vector - from the perspective of the architecture. This does not
8367 line up with GCC's perspective on lane numbers, so we end up with
8368 different masks depending on our target endian-ness. The diagram
8369 below may help. We must draw the distinction when building masks
8370 which select one half of the vector. An instruction selecting
8371 architectural low-lanes for a big-endian target, must be described using
8372 a mask selecting GCC high-lanes.
8373
8374 Big-Endian Little-Endian
8375
8376GCC 0 1 2 3 3 2 1 0
8377 | x | x | x | x | | x | x | x | x |
8378Architecture 3 2 1 0 3 2 1 0
8379
8380Low Mask: { 2, 3 } { 0, 1 }
8381High Mask: { 0, 1 } { 2, 3 }
8382*/
8383
43e9d192 8384rtx
ef4bddc2 8385aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
8386{
8387 int nunits = GET_MODE_NUNITS (mode);
8388 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
8389 int high_base = nunits / 2;
8390 int low_base = 0;
8391 int base;
43e9d192
IB
8392 rtx t1;
8393 int i;
8394
988fa693
JG
8395 if (BYTES_BIG_ENDIAN)
8396 base = high ? low_base : high_base;
8397 else
8398 base = high ? high_base : low_base;
8399
8400 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
8401 RTVEC_ELT (v, i) = GEN_INT (base + i);
8402
8403 t1 = gen_rtx_PARALLEL (mode, v);
8404 return t1;
8405}
8406
988fa693
JG
8407/* Check OP for validity as a PARALLEL RTX vector with elements
8408 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8409 from the perspective of the architecture. See the diagram above
8410 aarch64_simd_vect_par_cnst_half for more details. */
8411
8412bool
ef4bddc2 8413aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
8414 bool high)
8415{
8416 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8417 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8418 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8419 int i = 0;
8420
8421 if (!VECTOR_MODE_P (mode))
8422 return false;
8423
8424 if (count_op != count_ideal)
8425 return false;
8426
8427 for (i = 0; i < count_ideal; i++)
8428 {
8429 rtx elt_op = XVECEXP (op, 0, i);
8430 rtx elt_ideal = XVECEXP (ideal, 0, i);
8431
4aa81c2e 8432 if (!CONST_INT_P (elt_op)
988fa693
JG
8433 || INTVAL (elt_ideal) != INTVAL (elt_op))
8434 return false;
8435 }
8436 return true;
8437}
8438
43e9d192
IB
8439/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8440 HIGH (exclusive). */
8441void
46ed6024
CB
8442aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8443 const_tree exp)
43e9d192
IB
8444{
8445 HOST_WIDE_INT lane;
4aa81c2e 8446 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
8447 lane = INTVAL (operand);
8448
8449 if (lane < low || lane >= high)
46ed6024
CB
8450 {
8451 if (exp)
8452 error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8453 else
8454 error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8455 }
43e9d192
IB
8456}
8457
43e9d192
IB
8458/* Emit code to place a AdvSIMD pair result in memory locations (with equal
8459 registers). */
8460void
ef4bddc2 8461aarch64_simd_emit_pair_result_insn (machine_mode mode,
43e9d192
IB
8462 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8463 rtx op1)
8464{
8465 rtx mem = gen_rtx_MEM (mode, destaddr);
8466 rtx tmp1 = gen_reg_rtx (mode);
8467 rtx tmp2 = gen_reg_rtx (mode);
8468
8469 emit_insn (intfn (tmp1, op1, tmp2));
8470
8471 emit_move_insn (mem, tmp1);
8472 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8473 emit_move_insn (mem, tmp2);
8474}
8475
8476/* Return TRUE if OP is a valid vector addressing mode. */
8477bool
8478aarch64_simd_mem_operand_p (rtx op)
8479{
8480 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 8481 || REG_P (XEXP (op, 0)));
43e9d192
IB
8482}
8483
8484/* Set up OPERANDS for a register copy from SRC to DEST, taking care
8485 not to early-clobber SRC registers in the process.
8486
8487 We assume that the operands described by SRC and DEST represent a
8488 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8489 number of components into which the copy has been decomposed. */
8490void
8491aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8492 rtx *src, unsigned int count)
8493{
8494 unsigned int i;
8495
8496 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8497 || REGNO (operands[0]) < REGNO (operands[1]))
8498 {
8499 for (i = 0; i < count; i++)
8500 {
8501 operands[2 * i] = dest[i];
8502 operands[2 * i + 1] = src[i];
8503 }
8504 }
8505 else
8506 {
8507 for (i = 0; i < count; i++)
8508 {
8509 operands[2 * i] = dest[count - i - 1];
8510 operands[2 * i + 1] = src[count - i - 1];
8511 }
8512 }
8513}
8514
8515/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8516 one of VSTRUCT modes: OI, CI or XI. */
8517int
647d790d 8518aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 8519{
ef4bddc2 8520 machine_mode mode;
43e9d192
IB
8521
8522 extract_insn_cached (insn);
8523
8524 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8525 {
8526 mode = GET_MODE (recog_data.operand[0]);
8527 switch (mode)
8528 {
8529 case OImode:
8530 return 8;
8531 case CImode:
8532 return 12;
8533 case XImode:
8534 return 16;
8535 default:
8536 gcc_unreachable ();
8537 }
8538 }
8539 return 4;
8540}
8541
db0253a4
TB
8542/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8543 alignment of a vector to 128 bits. */
8544static HOST_WIDE_INT
8545aarch64_simd_vector_alignment (const_tree type)
8546{
9439e9a1 8547 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
8548 return MIN (align, 128);
8549}
8550
8551/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8552static bool
8553aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8554{
8555 if (is_packed)
8556 return false;
8557
8558 /* We guarantee alignment for vectors up to 128-bits. */
8559 if (tree_int_cst_compare (TYPE_SIZE (type),
8560 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8561 return false;
8562
8563 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8564 return true;
8565}
8566
4369c11e
TB
8567/* If VALS is a vector constant that can be loaded into a register
8568 using DUP, generate instructions to do so and return an RTX to
8569 assign to the register. Otherwise return NULL_RTX. */
8570static rtx
8571aarch64_simd_dup_constant (rtx vals)
8572{
ef4bddc2
RS
8573 machine_mode mode = GET_MODE (vals);
8574 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
8575 int n_elts = GET_MODE_NUNITS (mode);
8576 bool all_same = true;
8577 rtx x;
8578 int i;
8579
8580 if (GET_CODE (vals) != CONST_VECTOR)
8581 return NULL_RTX;
8582
8583 for (i = 1; i < n_elts; ++i)
8584 {
8585 x = CONST_VECTOR_ELT (vals, i);
8586 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8587 all_same = false;
8588 }
8589
8590 if (!all_same)
8591 return NULL_RTX;
8592
8593 /* We can load this constant by using DUP and a constant in a
8594 single ARM register. This will be cheaper than a vector
8595 load. */
8596 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8597 return gen_rtx_VEC_DUPLICATE (mode, x);
8598}
8599
8600
8601/* Generate code to load VALS, which is a PARALLEL containing only
8602 constants (for vec_init) or CONST_VECTOR, efficiently into a
8603 register. Returns an RTX to copy into the register, or NULL_RTX
8604 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 8605static rtx
4369c11e
TB
8606aarch64_simd_make_constant (rtx vals)
8607{
ef4bddc2 8608 machine_mode mode = GET_MODE (vals);
4369c11e
TB
8609 rtx const_dup;
8610 rtx const_vec = NULL_RTX;
8611 int n_elts = GET_MODE_NUNITS (mode);
8612 int n_const = 0;
8613 int i;
8614
8615 if (GET_CODE (vals) == CONST_VECTOR)
8616 const_vec = vals;
8617 else if (GET_CODE (vals) == PARALLEL)
8618 {
8619 /* A CONST_VECTOR must contain only CONST_INTs and
8620 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8621 Only store valid constants in a CONST_VECTOR. */
8622 for (i = 0; i < n_elts; ++i)
8623 {
8624 rtx x = XVECEXP (vals, 0, i);
8625 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8626 n_const++;
8627 }
8628 if (n_const == n_elts)
8629 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8630 }
8631 else
8632 gcc_unreachable ();
8633
8634 if (const_vec != NULL_RTX
48063b9d 8635 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
8636 /* Load using MOVI/MVNI. */
8637 return const_vec;
8638 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8639 /* Loaded using DUP. */
8640 return const_dup;
8641 else if (const_vec != NULL_RTX)
8642 /* Load from constant pool. We can not take advantage of single-cycle
8643 LD1 because we need a PC-relative addressing mode. */
8644 return const_vec;
8645 else
8646 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8647 We can not construct an initializer. */
8648 return NULL_RTX;
8649}
8650
8651void
8652aarch64_expand_vector_init (rtx target, rtx vals)
8653{
ef4bddc2
RS
8654 machine_mode mode = GET_MODE (target);
8655 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
8656 int n_elts = GET_MODE_NUNITS (mode);
8657 int n_var = 0, one_var = -1;
8658 bool all_same = true;
8659 rtx x, mem;
8660 int i;
8661
8662 x = XVECEXP (vals, 0, 0);
8663 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8664 n_var = 1, one_var = 0;
8665
8666 for (i = 1; i < n_elts; ++i)
8667 {
8668 x = XVECEXP (vals, 0, i);
8669 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8670 ++n_var, one_var = i;
8671
8672 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8673 all_same = false;
8674 }
8675
8676 if (n_var == 0)
8677 {
8678 rtx constant = aarch64_simd_make_constant (vals);
8679 if (constant != NULL_RTX)
8680 {
8681 emit_move_insn (target, constant);
8682 return;
8683 }
8684 }
8685
8686 /* Splat a single non-constant element if we can. */
8687 if (all_same)
8688 {
8689 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8690 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8691 return;
8692 }
8693
8694 /* One field is non-constant. Load constant then overwrite varying
8695 field. This is more efficient than using the stack. */
8696 if (n_var == 1)
8697 {
8698 rtx copy = copy_rtx (vals);
8699 rtx index = GEN_INT (one_var);
8700 enum insn_code icode;
8701
8702 /* Load constant part of vector, substitute neighboring value for
8703 varying element. */
8704 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8705 aarch64_expand_vector_init (target, copy);
8706
8707 /* Insert variable. */
8708 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8709 icode = optab_handler (vec_set_optab, mode);
8710 gcc_assert (icode != CODE_FOR_nothing);
8711 emit_insn (GEN_FCN (icode) (target, x, index));
8712 return;
8713 }
8714
8715 /* Construct the vector in memory one field at a time
8716 and load the whole vector. */
8717 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8718 for (i = 0; i < n_elts; i++)
8719 emit_move_insn (adjust_address_nv (mem, inner_mode,
8720 i * GET_MODE_SIZE (inner_mode)),
8721 XVECEXP (vals, 0, i));
8722 emit_move_insn (target, mem);
8723
8724}
8725
43e9d192 8726static unsigned HOST_WIDE_INT
ef4bddc2 8727aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
8728{
8729 return
8730 (aarch64_vector_mode_supported_p (mode)
8731 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8732}
8733
8734#ifndef TLS_SECTION_ASM_FLAG
8735#define TLS_SECTION_ASM_FLAG 'T'
8736#endif
8737
8738void
8739aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8740 tree decl ATTRIBUTE_UNUSED)
8741{
8742 char flagchars[10], *f = flagchars;
8743
8744 /* If we have already declared this section, we can use an
8745 abbreviated form to switch back to it -- unless this section is
8746 part of a COMDAT groups, in which case GAS requires the full
8747 declaration every time. */
8748 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8749 && (flags & SECTION_DECLARED))
8750 {
8751 fprintf (asm_out_file, "\t.section\t%s\n", name);
8752 return;
8753 }
8754
8755 if (!(flags & SECTION_DEBUG))
8756 *f++ = 'a';
8757 if (flags & SECTION_WRITE)
8758 *f++ = 'w';
8759 if (flags & SECTION_CODE)
8760 *f++ = 'x';
8761 if (flags & SECTION_SMALL)
8762 *f++ = 's';
8763 if (flags & SECTION_MERGE)
8764 *f++ = 'M';
8765 if (flags & SECTION_STRINGS)
8766 *f++ = 'S';
8767 if (flags & SECTION_TLS)
8768 *f++ = TLS_SECTION_ASM_FLAG;
8769 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8770 *f++ = 'G';
8771 *f = '\0';
8772
8773 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8774
8775 if (!(flags & SECTION_NOTYPE))
8776 {
8777 const char *type;
8778 const char *format;
8779
8780 if (flags & SECTION_BSS)
8781 type = "nobits";
8782 else
8783 type = "progbits";
8784
8785#ifdef TYPE_OPERAND_FMT
8786 format = "," TYPE_OPERAND_FMT;
8787#else
8788 format = ",@%s";
8789#endif
8790
8791 fprintf (asm_out_file, format, type);
8792
8793 if (flags & SECTION_ENTSIZE)
8794 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8795 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8796 {
8797 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8798 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8799 else
8800 fprintf (asm_out_file, ",%s,comdat",
8801 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8802 }
8803 }
8804
8805 putc ('\n', asm_out_file);
8806}
8807
8808/* Select a format to encode pointers in exception handling data. */
8809int
8810aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8811{
8812 int type;
8813 switch (aarch64_cmodel)
8814 {
8815 case AARCH64_CMODEL_TINY:
8816 case AARCH64_CMODEL_TINY_PIC:
8817 case AARCH64_CMODEL_SMALL:
8818 case AARCH64_CMODEL_SMALL_PIC:
8819 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8820 for everything. */
8821 type = DW_EH_PE_sdata4;
8822 break;
8823 default:
8824 /* No assumptions here. 8-byte relocs required. */
8825 type = DW_EH_PE_sdata8;
8826 break;
8827 }
8828 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8829}
8830
0462169c
SN
8831/* Emit load exclusive. */
8832
8833static void
ef4bddc2 8834aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
8835 rtx mem, rtx model_rtx)
8836{
8837 rtx (*gen) (rtx, rtx, rtx);
8838
8839 switch (mode)
8840 {
8841 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8842 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8843 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8844 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8845 default:
8846 gcc_unreachable ();
8847 }
8848
8849 emit_insn (gen (rval, mem, model_rtx));
8850}
8851
8852/* Emit store exclusive. */
8853
8854static void
ef4bddc2 8855aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
8856 rtx rval, rtx mem, rtx model_rtx)
8857{
8858 rtx (*gen) (rtx, rtx, rtx, rtx);
8859
8860 switch (mode)
8861 {
8862 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8863 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8864 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8865 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8866 default:
8867 gcc_unreachable ();
8868 }
8869
8870 emit_insn (gen (bval, rval, mem, model_rtx));
8871}
8872
8873/* Mark the previous jump instruction as unlikely. */
8874
8875static void
8876aarch64_emit_unlikely_jump (rtx insn)
8877{
e5af9ddd 8878 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
8879
8880 insn = emit_jump_insn (insn);
e5af9ddd 8881 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
8882}
8883
8884/* Expand a compare and swap pattern. */
8885
8886void
8887aarch64_expand_compare_and_swap (rtx operands[])
8888{
8889 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 8890 machine_mode mode, cmp_mode;
0462169c
SN
8891 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8892
8893 bval = operands[0];
8894 rval = operands[1];
8895 mem = operands[2];
8896 oldval = operands[3];
8897 newval = operands[4];
8898 is_weak = operands[5];
8899 mod_s = operands[6];
8900 mod_f = operands[7];
8901 mode = GET_MODE (mem);
8902 cmp_mode = mode;
8903
8904 /* Normally the succ memory model must be stronger than fail, but in the
8905 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8906 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8907
8908 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8909 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8910 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8911
8912 switch (mode)
8913 {
8914 case QImode:
8915 case HImode:
8916 /* For short modes, we're going to perform the comparison in SImode,
8917 so do the zero-extension now. */
8918 cmp_mode = SImode;
8919 rval = gen_reg_rtx (SImode);
8920 oldval = convert_modes (SImode, mode, oldval, true);
8921 /* Fall through. */
8922
8923 case SImode:
8924 case DImode:
8925 /* Force the value into a register if needed. */
8926 if (!aarch64_plus_operand (oldval, mode))
8927 oldval = force_reg (cmp_mode, oldval);
8928 break;
8929
8930 default:
8931 gcc_unreachable ();
8932 }
8933
8934 switch (mode)
8935 {
8936 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8937 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8938 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8939 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8940 default:
8941 gcc_unreachable ();
8942 }
8943
8944 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8945
8946 if (mode == QImode || mode == HImode)
8947 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8948
8949 x = gen_rtx_REG (CCmode, CC_REGNUM);
8950 x = gen_rtx_EQ (SImode, x, const0_rtx);
8951 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8952}
8953
8954/* Split a compare and swap pattern. */
8955
8956void
8957aarch64_split_compare_and_swap (rtx operands[])
8958{
8959 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 8960 machine_mode mode;
0462169c 8961 bool is_weak;
5d8a22a5
DM
8962 rtx_code_label *label1, *label2;
8963 rtx x, cond;
0462169c
SN
8964
8965 rval = operands[0];
8966 mem = operands[1];
8967 oldval = operands[2];
8968 newval = operands[3];
8969 is_weak = (operands[4] != const0_rtx);
0462169c
SN
8970 scratch = operands[7];
8971 mode = GET_MODE (mem);
8972
5d8a22a5 8973 label1 = NULL;
0462169c
SN
8974 if (!is_weak)
8975 {
8976 label1 = gen_label_rtx ();
8977 emit_label (label1);
8978 }
8979 label2 = gen_label_rtx ();
8980
8981 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8982
8983 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8984 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8985 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8986 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8987 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8988
8989 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8990
8991 if (!is_weak)
8992 {
8993 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8994 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8995 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8996 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8997 }
8998 else
8999 {
9000 cond = gen_rtx_REG (CCmode, CC_REGNUM);
9001 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9002 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9003 }
9004
9005 emit_label (label2);
9006}
9007
9008/* Split an atomic operation. */
9009
9010void
9011aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9012 rtx value, rtx model_rtx, rtx cond)
9013{
ef4bddc2
RS
9014 machine_mode mode = GET_MODE (mem);
9015 machine_mode wmode = (mode == DImode ? DImode : SImode);
5d8a22a5
DM
9016 rtx_code_label *label;
9017 rtx x;
0462169c
SN
9018
9019 label = gen_label_rtx ();
9020 emit_label (label);
9021
9022 if (new_out)
9023 new_out = gen_lowpart (wmode, new_out);
9024 if (old_out)
9025 old_out = gen_lowpart (wmode, old_out);
9026 else
9027 old_out = new_out;
9028 value = simplify_gen_subreg (wmode, value, mode, 0);
9029
9030 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9031
9032 switch (code)
9033 {
9034 case SET:
9035 new_out = value;
9036 break;
9037
9038 case NOT:
9039 x = gen_rtx_AND (wmode, old_out, value);
9040 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9041 x = gen_rtx_NOT (wmode, new_out);
9042 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9043 break;
9044
9045 case MINUS:
9046 if (CONST_INT_P (value))
9047 {
9048 value = GEN_INT (-INTVAL (value));
9049 code = PLUS;
9050 }
9051 /* Fall through. */
9052
9053 default:
9054 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9055 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9056 break;
9057 }
9058
9059 aarch64_emit_store_exclusive (mode, cond, mem,
9060 gen_lowpart (mode, new_out), model_rtx);
9061
9062 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9063 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9064 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9065 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9066}
9067
95ca411e
YZ
9068static void
9069aarch64_print_extension (void)
9070{
9071 const struct aarch64_option_extension *opt = NULL;
9072
9073 for (opt = all_extensions; opt->name != NULL; opt++)
9074 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9075 asm_fprintf (asm_out_file, "+%s", opt->name);
9076
9077 asm_fprintf (asm_out_file, "\n");
9078}
9079
43e9d192
IB
9080static void
9081aarch64_start_file (void)
9082{
9083 if (selected_arch)
95ca411e
YZ
9084 {
9085 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9086 aarch64_print_extension ();
9087 }
43e9d192 9088 else if (selected_cpu)
95ca411e 9089 {
682287fb
JG
9090 const char *truncated_name
9091 = aarch64_rewrite_selected_cpu (selected_cpu->name);
9092 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
9093 aarch64_print_extension ();
9094 }
43e9d192
IB
9095 default_file_start();
9096}
9097
9098/* Target hook for c_mode_for_suffix. */
ef4bddc2 9099static machine_mode
43e9d192
IB
9100aarch64_c_mode_for_suffix (char suffix)
9101{
9102 if (suffix == 'q')
9103 return TFmode;
9104
9105 return VOIDmode;
9106}
9107
3520f7cc
JG
9108/* We can only represent floating point constants which will fit in
9109 "quarter-precision" values. These values are characterised by
9110 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9111 by:
9112
9113 (-1)^s * (n/16) * 2^r
9114
9115 Where:
9116 's' is the sign bit.
9117 'n' is an integer in the range 16 <= n <= 31.
9118 'r' is an integer in the range -3 <= r <= 4. */
9119
9120/* Return true iff X can be represented by a quarter-precision
9121 floating point immediate operand X. Note, we cannot represent 0.0. */
9122bool
9123aarch64_float_const_representable_p (rtx x)
9124{
9125 /* This represents our current view of how many bits
9126 make up the mantissa. */
9127 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 9128 int exponent;
3520f7cc 9129 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 9130 REAL_VALUE_TYPE r, m;
807e902e 9131 bool fail;
3520f7cc
JG
9132
9133 if (!CONST_DOUBLE_P (x))
9134 return false;
9135
94bfa2da
TV
9136 if (GET_MODE (x) == VOIDmode)
9137 return false;
9138
3520f7cc
JG
9139 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9140
9141 /* We cannot represent infinities, NaNs or +/-zero. We won't
9142 know if we have +zero until we analyse the mantissa, but we
9143 can reject the other invalid values. */
9144 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9145 || REAL_VALUE_MINUS_ZERO (r))
9146 return false;
9147
ba96cdfb 9148 /* Extract exponent. */
3520f7cc
JG
9149 r = real_value_abs (&r);
9150 exponent = REAL_EXP (&r);
9151
9152 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9153 highest (sign) bit, with a fixed binary point at bit point_pos.
9154 m1 holds the low part of the mantissa, m2 the high part.
9155 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9156 bits for the mantissa, this can fail (low bits will be lost). */
9157 real_ldexp (&m, &r, point_pos - exponent);
807e902e 9158 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
9159
9160 /* If the low part of the mantissa has bits set we cannot represent
9161 the value. */
807e902e 9162 if (w.elt (0) != 0)
3520f7cc
JG
9163 return false;
9164 /* We have rejected the lower HOST_WIDE_INT, so update our
9165 understanding of how many bits lie in the mantissa and
9166 look only at the high HOST_WIDE_INT. */
807e902e 9167 mantissa = w.elt (1);
3520f7cc
JG
9168 point_pos -= HOST_BITS_PER_WIDE_INT;
9169
9170 /* We can only represent values with a mantissa of the form 1.xxxx. */
9171 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9172 if ((mantissa & mask) != 0)
9173 return false;
9174
9175 /* Having filtered unrepresentable values, we may now remove all
9176 but the highest 5 bits. */
9177 mantissa >>= point_pos - 5;
9178
9179 /* We cannot represent the value 0.0, so reject it. This is handled
9180 elsewhere. */
9181 if (mantissa == 0)
9182 return false;
9183
9184 /* Then, as bit 4 is always set, we can mask it off, leaving
9185 the mantissa in the range [0, 15]. */
9186 mantissa &= ~(1 << 4);
9187 gcc_assert (mantissa <= 15);
9188
9189 /* GCC internally does not use IEEE754-like encoding (where normalized
9190 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9191 Our mantissa values are shifted 4 places to the left relative to
9192 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9193 by 5 places to correct for GCC's representation. */
9194 exponent = 5 - exponent;
9195
9196 return (exponent >= 0 && exponent <= 7);
9197}
9198
9199char*
81c2dfb9 9200aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 9201 machine_mode mode,
3520f7cc
JG
9202 unsigned width)
9203{
3ea63f60 9204 bool is_valid;
3520f7cc 9205 static char templ[40];
3520f7cc 9206 const char *mnemonic;
e4f0f84d 9207 const char *shift_op;
3520f7cc 9208 unsigned int lane_count = 0;
81c2dfb9 9209 char element_char;
3520f7cc 9210
e4f0f84d 9211 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
9212
9213 /* This will return true to show const_vector is legal for use as either
9214 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9215 also update INFO to show how the immediate should be generated. */
81c2dfb9 9216 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
9217 gcc_assert (is_valid);
9218
81c2dfb9 9219 element_char = sizetochar (info.element_width);
48063b9d
IB
9220 lane_count = width / info.element_width;
9221
3520f7cc
JG
9222 mode = GET_MODE_INNER (mode);
9223 if (mode == SFmode || mode == DFmode)
9224 {
48063b9d
IB
9225 gcc_assert (info.shift == 0 && ! info.mvn);
9226 if (aarch64_float_const_zero_rtx_p (info.value))
9227 info.value = GEN_INT (0);
9228 else
9229 {
9230#define buf_size 20
9231 REAL_VALUE_TYPE r;
9232 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9233 char float_buf[buf_size] = {'\0'};
9234 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9235#undef buf_size
9236
9237 if (lane_count == 1)
9238 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9239 else
9240 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 9241 lane_count, element_char, float_buf);
48063b9d
IB
9242 return templ;
9243 }
3520f7cc 9244 }
3520f7cc 9245
48063b9d 9246 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 9247 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
9248
9249 if (lane_count == 1)
48063b9d
IB
9250 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9251 mnemonic, UINTVAL (info.value));
9252 else if (info.shift)
9253 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
9254 ", %s %d", mnemonic, lane_count, element_char,
9255 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 9256 else
48063b9d 9257 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 9258 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
9259 return templ;
9260}
9261
b7342d25
IB
9262char*
9263aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 9264 machine_mode mode)
b7342d25 9265{
ef4bddc2 9266 machine_mode vmode;
b7342d25
IB
9267
9268 gcc_assert (!VECTOR_MODE_P (mode));
9269 vmode = aarch64_simd_container_mode (mode, 64);
9270 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9271 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9272}
9273
88b08073
JG
9274/* Split operands into moves from op[1] + op[2] into op[0]. */
9275
9276void
9277aarch64_split_combinev16qi (rtx operands[3])
9278{
9279 unsigned int dest = REGNO (operands[0]);
9280 unsigned int src1 = REGNO (operands[1]);
9281 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 9282 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
9283 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9284 rtx destlo, desthi;
9285
9286 gcc_assert (halfmode == V16QImode);
9287
9288 if (src1 == dest && src2 == dest + halfregs)
9289 {
9290 /* No-op move. Can't split to nothing; emit something. */
9291 emit_note (NOTE_INSN_DELETED);
9292 return;
9293 }
9294
9295 /* Preserve register attributes for variable tracking. */
9296 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9297 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9298 GET_MODE_SIZE (halfmode));
9299
9300 /* Special case of reversed high/low parts. */
9301 if (reg_overlap_mentioned_p (operands[2], destlo)
9302 && reg_overlap_mentioned_p (operands[1], desthi))
9303 {
9304 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9305 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9306 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9307 }
9308 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9309 {
9310 /* Try to avoid unnecessary moves if part of the result
9311 is in the right place already. */
9312 if (src1 != dest)
9313 emit_move_insn (destlo, operands[1]);
9314 if (src2 != dest + halfregs)
9315 emit_move_insn (desthi, operands[2]);
9316 }
9317 else
9318 {
9319 if (src2 != dest + halfregs)
9320 emit_move_insn (desthi, operands[2]);
9321 if (src1 != dest)
9322 emit_move_insn (destlo, operands[1]);
9323 }
9324}
9325
9326/* vec_perm support. */
9327
9328#define MAX_VECT_LEN 16
9329
9330struct expand_vec_perm_d
9331{
9332 rtx target, op0, op1;
9333 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 9334 machine_mode vmode;
88b08073
JG
9335 unsigned char nelt;
9336 bool one_vector_p;
9337 bool testing_p;
9338};
9339
9340/* Generate a variable permutation. */
9341
9342static void
9343aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9344{
ef4bddc2 9345 machine_mode vmode = GET_MODE (target);
88b08073
JG
9346 bool one_vector_p = rtx_equal_p (op0, op1);
9347
9348 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9349 gcc_checking_assert (GET_MODE (op0) == vmode);
9350 gcc_checking_assert (GET_MODE (op1) == vmode);
9351 gcc_checking_assert (GET_MODE (sel) == vmode);
9352 gcc_checking_assert (TARGET_SIMD);
9353
9354 if (one_vector_p)
9355 {
9356 if (vmode == V8QImode)
9357 {
9358 /* Expand the argument to a V16QI mode by duplicating it. */
9359 rtx pair = gen_reg_rtx (V16QImode);
9360 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9361 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9362 }
9363 else
9364 {
9365 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9366 }
9367 }
9368 else
9369 {
9370 rtx pair;
9371
9372 if (vmode == V8QImode)
9373 {
9374 pair = gen_reg_rtx (V16QImode);
9375 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9376 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9377 }
9378 else
9379 {
9380 pair = gen_reg_rtx (OImode);
9381 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9382 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9383 }
9384 }
9385}
9386
9387void
9388aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9389{
ef4bddc2 9390 machine_mode vmode = GET_MODE (target);
c9d1a16a 9391 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 9392 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 9393 rtx mask;
88b08073
JG
9394
9395 /* The TBL instruction does not use a modulo index, so we must take care
9396 of that ourselves. */
f7c4e5b8
AL
9397 mask = aarch64_simd_gen_const_vector_dup (vmode,
9398 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
9399 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9400
f7c4e5b8
AL
9401 /* For big-endian, we also need to reverse the index within the vector
9402 (but not which vector). */
9403 if (BYTES_BIG_ENDIAN)
9404 {
9405 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9406 if (!one_vector_p)
9407 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9408 sel = expand_simple_binop (vmode, XOR, sel, mask,
9409 NULL, 0, OPTAB_LIB_WIDEN);
9410 }
88b08073
JG
9411 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9412}
9413
cc4d934f
JG
9414/* Recognize patterns suitable for the TRN instructions. */
9415static bool
9416aarch64_evpc_trn (struct expand_vec_perm_d *d)
9417{
9418 unsigned int i, odd, mask, nelt = d->nelt;
9419 rtx out, in0, in1, x;
9420 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9421 machine_mode vmode = d->vmode;
cc4d934f
JG
9422
9423 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9424 return false;
9425
9426 /* Note that these are little-endian tests.
9427 We correct for big-endian later. */
9428 if (d->perm[0] == 0)
9429 odd = 0;
9430 else if (d->perm[0] == 1)
9431 odd = 1;
9432 else
9433 return false;
9434 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9435
9436 for (i = 0; i < nelt; i += 2)
9437 {
9438 if (d->perm[i] != i + odd)
9439 return false;
9440 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9441 return false;
9442 }
9443
9444 /* Success! */
9445 if (d->testing_p)
9446 return true;
9447
9448 in0 = d->op0;
9449 in1 = d->op1;
9450 if (BYTES_BIG_ENDIAN)
9451 {
9452 x = in0, in0 = in1, in1 = x;
9453 odd = !odd;
9454 }
9455 out = d->target;
9456
9457 if (odd)
9458 {
9459 switch (vmode)
9460 {
9461 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9462 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9463 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9464 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9465 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9466 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9467 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9468 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9469 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9470 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9471 default:
9472 return false;
9473 }
9474 }
9475 else
9476 {
9477 switch (vmode)
9478 {
9479 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9480 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9481 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9482 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9483 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9484 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9485 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9486 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9487 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9488 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9489 default:
9490 return false;
9491 }
9492 }
9493
9494 emit_insn (gen (out, in0, in1));
9495 return true;
9496}
9497
9498/* Recognize patterns suitable for the UZP instructions. */
9499static bool
9500aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9501{
9502 unsigned int i, odd, mask, nelt = d->nelt;
9503 rtx out, in0, in1, x;
9504 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9505 machine_mode vmode = d->vmode;
cc4d934f
JG
9506
9507 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9508 return false;
9509
9510 /* Note that these are little-endian tests.
9511 We correct for big-endian later. */
9512 if (d->perm[0] == 0)
9513 odd = 0;
9514 else if (d->perm[0] == 1)
9515 odd = 1;
9516 else
9517 return false;
9518 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9519
9520 for (i = 0; i < nelt; i++)
9521 {
9522 unsigned elt = (i * 2 + odd) & mask;
9523 if (d->perm[i] != elt)
9524 return false;
9525 }
9526
9527 /* Success! */
9528 if (d->testing_p)
9529 return true;
9530
9531 in0 = d->op0;
9532 in1 = d->op1;
9533 if (BYTES_BIG_ENDIAN)
9534 {
9535 x = in0, in0 = in1, in1 = x;
9536 odd = !odd;
9537 }
9538 out = d->target;
9539
9540 if (odd)
9541 {
9542 switch (vmode)
9543 {
9544 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9545 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9546 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9547 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9548 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9549 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9550 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9551 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9552 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9553 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9554 default:
9555 return false;
9556 }
9557 }
9558 else
9559 {
9560 switch (vmode)
9561 {
9562 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9563 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9564 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9565 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9566 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9567 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9568 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9569 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9570 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9571 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9572 default:
9573 return false;
9574 }
9575 }
9576
9577 emit_insn (gen (out, in0, in1));
9578 return true;
9579}
9580
9581/* Recognize patterns suitable for the ZIP instructions. */
9582static bool
9583aarch64_evpc_zip (struct expand_vec_perm_d *d)
9584{
9585 unsigned int i, high, mask, nelt = d->nelt;
9586 rtx out, in0, in1, x;
9587 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9588 machine_mode vmode = d->vmode;
cc4d934f
JG
9589
9590 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9591 return false;
9592
9593 /* Note that these are little-endian tests.
9594 We correct for big-endian later. */
9595 high = nelt / 2;
9596 if (d->perm[0] == high)
9597 /* Do Nothing. */
9598 ;
9599 else if (d->perm[0] == 0)
9600 high = 0;
9601 else
9602 return false;
9603 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9604
9605 for (i = 0; i < nelt / 2; i++)
9606 {
9607 unsigned elt = (i + high) & mask;
9608 if (d->perm[i * 2] != elt)
9609 return false;
9610 elt = (elt + nelt) & mask;
9611 if (d->perm[i * 2 + 1] != elt)
9612 return false;
9613 }
9614
9615 /* Success! */
9616 if (d->testing_p)
9617 return true;
9618
9619 in0 = d->op0;
9620 in1 = d->op1;
9621 if (BYTES_BIG_ENDIAN)
9622 {
9623 x = in0, in0 = in1, in1 = x;
9624 high = !high;
9625 }
9626 out = d->target;
9627
9628 if (high)
9629 {
9630 switch (vmode)
9631 {
9632 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9633 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9634 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9635 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9636 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9637 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9638 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9639 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9640 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9641 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9642 default:
9643 return false;
9644 }
9645 }
9646 else
9647 {
9648 switch (vmode)
9649 {
9650 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9651 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9652 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9653 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9654 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9655 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9656 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9657 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9658 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9659 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9660 default:
9661 return false;
9662 }
9663 }
9664
9665 emit_insn (gen (out, in0, in1));
9666 return true;
9667}
9668
ae0533da
AL
9669/* Recognize patterns for the EXT insn. */
9670
9671static bool
9672aarch64_evpc_ext (struct expand_vec_perm_d *d)
9673{
9674 unsigned int i, nelt = d->nelt;
9675 rtx (*gen) (rtx, rtx, rtx, rtx);
9676 rtx offset;
9677
9678 unsigned int location = d->perm[0]; /* Always < nelt. */
9679
9680 /* Check if the extracted indices are increasing by one. */
9681 for (i = 1; i < nelt; i++)
9682 {
9683 unsigned int required = location + i;
9684 if (d->one_vector_p)
9685 {
9686 /* We'll pass the same vector in twice, so allow indices to wrap. */
9687 required &= (nelt - 1);
9688 }
9689 if (d->perm[i] != required)
9690 return false;
9691 }
9692
ae0533da
AL
9693 switch (d->vmode)
9694 {
9695 case V16QImode: gen = gen_aarch64_extv16qi; break;
9696 case V8QImode: gen = gen_aarch64_extv8qi; break;
9697 case V4HImode: gen = gen_aarch64_extv4hi; break;
9698 case V8HImode: gen = gen_aarch64_extv8hi; break;
9699 case V2SImode: gen = gen_aarch64_extv2si; break;
9700 case V4SImode: gen = gen_aarch64_extv4si; break;
9701 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9702 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9703 case V2DImode: gen = gen_aarch64_extv2di; break;
9704 case V2DFmode: gen = gen_aarch64_extv2df; break;
9705 default:
9706 return false;
9707 }
9708
9709 /* Success! */
9710 if (d->testing_p)
9711 return true;
9712
b31e65bb
AL
9713 /* The case where (location == 0) is a no-op for both big- and little-endian,
9714 and is removed by the mid-end at optimization levels -O1 and higher. */
9715
9716 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
9717 {
9718 /* After setup, we want the high elements of the first vector (stored
9719 at the LSB end of the register), and the low elements of the second
9720 vector (stored at the MSB end of the register). So swap. */
9721 rtx temp = d->op0;
9722 d->op0 = d->op1;
9723 d->op1 = temp;
9724 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9725 location = nelt - location;
9726 }
9727
9728 offset = GEN_INT (location);
9729 emit_insn (gen (d->target, d->op0, d->op1, offset));
9730 return true;
9731}
9732
923fcec3
AL
9733/* Recognize patterns for the REV insns. */
9734
9735static bool
9736aarch64_evpc_rev (struct expand_vec_perm_d *d)
9737{
9738 unsigned int i, j, diff, nelt = d->nelt;
9739 rtx (*gen) (rtx, rtx);
9740
9741 if (!d->one_vector_p)
9742 return false;
9743
9744 diff = d->perm[0];
9745 switch (diff)
9746 {
9747 case 7:
9748 switch (d->vmode)
9749 {
9750 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9751 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9752 default:
9753 return false;
9754 }
9755 break;
9756 case 3:
9757 switch (d->vmode)
9758 {
9759 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9760 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9761 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9762 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9763 default:
9764 return false;
9765 }
9766 break;
9767 case 1:
9768 switch (d->vmode)
9769 {
9770 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9771 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9772 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9773 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9774 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9775 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9776 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9777 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9778 default:
9779 return false;
9780 }
9781 break;
9782 default:
9783 return false;
9784 }
9785
9786 for (i = 0; i < nelt ; i += diff + 1)
9787 for (j = 0; j <= diff; j += 1)
9788 {
9789 /* This is guaranteed to be true as the value of diff
9790 is 7, 3, 1 and we should have enough elements in the
9791 queue to generate this. Getting a vector mask with a
9792 value of diff other than these values implies that
9793 something is wrong by the time we get here. */
9794 gcc_assert (i + j < nelt);
9795 if (d->perm[i + j] != i + diff - j)
9796 return false;
9797 }
9798
9799 /* Success! */
9800 if (d->testing_p)
9801 return true;
9802
9803 emit_insn (gen (d->target, d->op0));
9804 return true;
9805}
9806
91bd4114
JG
9807static bool
9808aarch64_evpc_dup (struct expand_vec_perm_d *d)
9809{
9810 rtx (*gen) (rtx, rtx, rtx);
9811 rtx out = d->target;
9812 rtx in0;
ef4bddc2 9813 machine_mode vmode = d->vmode;
91bd4114
JG
9814 unsigned int i, elt, nelt = d->nelt;
9815 rtx lane;
9816
91bd4114
JG
9817 elt = d->perm[0];
9818 for (i = 1; i < nelt; i++)
9819 {
9820 if (elt != d->perm[i])
9821 return false;
9822 }
9823
9824 /* The generic preparation in aarch64_expand_vec_perm_const_1
9825 swaps the operand order and the permute indices if it finds
9826 d->perm[0] to be in the second operand. Thus, we can always
9827 use d->op0 and need not do any extra arithmetic to get the
9828 correct lane number. */
9829 in0 = d->op0;
f901401e 9830 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
9831
9832 switch (vmode)
9833 {
9834 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9835 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9836 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9837 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9838 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9839 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9840 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9841 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9842 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9843 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9844 default:
9845 return false;
9846 }
9847
9848 emit_insn (gen (out, in0, lane));
9849 return true;
9850}
9851
88b08073
JG
9852static bool
9853aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9854{
9855 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 9856 machine_mode vmode = d->vmode;
88b08073
JG
9857 unsigned int i, nelt = d->nelt;
9858
88b08073
JG
9859 if (d->testing_p)
9860 return true;
9861
9862 /* Generic code will try constant permutation twice. Once with the
9863 original mode and again with the elements lowered to QImode.
9864 So wait and don't do the selector expansion ourselves. */
9865 if (vmode != V8QImode && vmode != V16QImode)
9866 return false;
9867
9868 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
9869 {
9870 int nunits = GET_MODE_NUNITS (vmode);
9871
9872 /* If big-endian and two vectors we end up with a weird mixed-endian
9873 mode on NEON. Reverse the index within each word but not the word
9874 itself. */
9875 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9876 : d->perm[i]);
9877 }
88b08073
JG
9878 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9879 sel = force_reg (vmode, sel);
9880
9881 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9882 return true;
9883}
9884
9885static bool
9886aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9887{
9888 /* The pattern matching functions above are written to look for a small
9889 number to begin the sequence (0, 1, N/2). If we begin with an index
9890 from the second operand, we can swap the operands. */
9891 if (d->perm[0] >= d->nelt)
9892 {
9893 unsigned i, nelt = d->nelt;
9894 rtx x;
9895
0696116a 9896 gcc_assert (nelt == (nelt & -nelt));
88b08073 9897 for (i = 0; i < nelt; ++i)
0696116a 9898 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073
JG
9899
9900 x = d->op0;
9901 d->op0 = d->op1;
9902 d->op1 = x;
9903 }
9904
9905 if (TARGET_SIMD)
cc4d934f 9906 {
923fcec3
AL
9907 if (aarch64_evpc_rev (d))
9908 return true;
9909 else if (aarch64_evpc_ext (d))
ae0533da 9910 return true;
f901401e
AL
9911 else if (aarch64_evpc_dup (d))
9912 return true;
ae0533da 9913 else if (aarch64_evpc_zip (d))
cc4d934f
JG
9914 return true;
9915 else if (aarch64_evpc_uzp (d))
9916 return true;
9917 else if (aarch64_evpc_trn (d))
9918 return true;
9919 return aarch64_evpc_tbl (d);
9920 }
88b08073
JG
9921 return false;
9922}
9923
9924/* Expand a vec_perm_const pattern. */
9925
9926bool
9927aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9928{
9929 struct expand_vec_perm_d d;
9930 int i, nelt, which;
9931
9932 d.target = target;
9933 d.op0 = op0;
9934 d.op1 = op1;
9935
9936 d.vmode = GET_MODE (target);
9937 gcc_assert (VECTOR_MODE_P (d.vmode));
9938 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9939 d.testing_p = false;
9940
9941 for (i = which = 0; i < nelt; ++i)
9942 {
9943 rtx e = XVECEXP (sel, 0, i);
9944 int ei = INTVAL (e) & (2 * nelt - 1);
9945 which |= (ei < nelt ? 1 : 2);
9946 d.perm[i] = ei;
9947 }
9948
9949 switch (which)
9950 {
9951 default:
9952 gcc_unreachable ();
9953
9954 case 3:
9955 d.one_vector_p = false;
9956 if (!rtx_equal_p (op0, op1))
9957 break;
9958
9959 /* The elements of PERM do not suggest that only the first operand
9960 is used, but both operands are identical. Allow easier matching
9961 of the permutation by folding the permutation into the single
9962 input vector. */
9963 /* Fall Through. */
9964 case 2:
9965 for (i = 0; i < nelt; ++i)
9966 d.perm[i] &= nelt - 1;
9967 d.op0 = op1;
9968 d.one_vector_p = true;
9969 break;
9970
9971 case 1:
9972 d.op1 = op0;
9973 d.one_vector_p = true;
9974 break;
9975 }
9976
9977 return aarch64_expand_vec_perm_const_1 (&d);
9978}
9979
9980static bool
ef4bddc2 9981aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
9982 const unsigned char *sel)
9983{
9984 struct expand_vec_perm_d d;
9985 unsigned int i, nelt, which;
9986 bool ret;
9987
9988 d.vmode = vmode;
9989 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9990 d.testing_p = true;
9991 memcpy (d.perm, sel, nelt);
9992
9993 /* Calculate whether all elements are in one vector. */
9994 for (i = which = 0; i < nelt; ++i)
9995 {
9996 unsigned char e = d.perm[i];
9997 gcc_assert (e < 2 * nelt);
9998 which |= (e < nelt ? 1 : 2);
9999 }
10000
10001 /* If all elements are from the second vector, reindex as if from the
10002 first vector. */
10003 if (which == 2)
10004 for (i = 0; i < nelt; ++i)
10005 d.perm[i] -= nelt;
10006
10007 /* Check whether the mask can be applied to a single vector. */
10008 d.one_vector_p = (which != 3);
10009
10010 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10011 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10012 if (!d.one_vector_p)
10013 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10014
10015 start_sequence ();
10016 ret = aarch64_expand_vec_perm_const_1 (&d);
10017 end_sequence ();
10018
10019 return ret;
10020}
10021
69675d50
TB
10022/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
10023bool
ef4bddc2
RS
10024aarch64_cannot_change_mode_class (machine_mode from,
10025 machine_mode to,
69675d50
TB
10026 enum reg_class rclass)
10027{
10028 /* Full-reg subregs are allowed on general regs or any class if they are
10029 the same size. */
10030 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
10031 || !reg_classes_intersect_p (FP_REGS, rclass))
10032 return false;
10033
10034 /* Limited combinations of subregs are safe on FPREGs. Particularly,
10035 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
10036 2. Scalar to Scalar for integer modes or same size float modes.
97e1ad78
JG
10037 3. Vector to Vector modes.
10038 4. On little-endian only, Vector-Structure to Vector modes. */
69675d50
TB
10039 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
10040 {
10041 if (aarch64_vector_mode_supported_p (from)
10042 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
10043 return false;
10044
10045 if (GET_MODE_NUNITS (from) == 1
10046 && GET_MODE_NUNITS (to) == 1
10047 && (GET_MODE_CLASS (from) == MODE_INT
10048 || from == to))
10049 return false;
10050
10051 if (aarch64_vector_mode_supported_p (from)
10052 && aarch64_vector_mode_supported_p (to))
10053 return false;
97e1ad78
JG
10054
10055 /* Within an vector structure straddling multiple vector registers
10056 we are in a mixed-endian representation. As such, we can't
10057 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
10058 switch between vectors and vector structures cheaply. */
10059 if (!BYTES_BIG_ENDIAN)
10060 if ((aarch64_vector_mode_supported_p (from)
10061 && aarch64_vect_struct_mode_p (to))
10062 || (aarch64_vector_mode_supported_p (to)
10063 && aarch64_vect_struct_mode_p (from)))
10064 return false;
69675d50
TB
10065 }
10066
10067 return true;
10068}
10069
97e1ad78
JG
10070/* Implement MODES_TIEABLE_P. */
10071
10072bool
ef4bddc2 10073aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
10074{
10075 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10076 return true;
10077
10078 /* We specifically want to allow elements of "structure" modes to
10079 be tieable to the structure. This more general condition allows
10080 other rarer situations too. */
10081 if (TARGET_SIMD
10082 && aarch64_vector_mode_p (mode1)
10083 && aarch64_vector_mode_p (mode2))
10084 return true;
10085
10086 return false;
10087}
10088
e2c75eea
JG
10089/* Return a new RTX holding the result of moving POINTER forward by
10090 AMOUNT bytes. */
10091
10092static rtx
10093aarch64_move_pointer (rtx pointer, int amount)
10094{
10095 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10096
10097 return adjust_automodify_address (pointer, GET_MODE (pointer),
10098 next, amount);
10099}
10100
10101/* Return a new RTX holding the result of moving POINTER forward by the
10102 size of the mode it points to. */
10103
10104static rtx
10105aarch64_progress_pointer (rtx pointer)
10106{
10107 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10108
10109 return aarch64_move_pointer (pointer, amount);
10110}
10111
10112/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10113 MODE bytes. */
10114
10115static void
10116aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 10117 machine_mode mode)
e2c75eea
JG
10118{
10119 rtx reg = gen_reg_rtx (mode);
10120
10121 /* "Cast" the pointers to the correct mode. */
10122 *src = adjust_address (*src, mode, 0);
10123 *dst = adjust_address (*dst, mode, 0);
10124 /* Emit the memcpy. */
10125 emit_move_insn (reg, *src);
10126 emit_move_insn (*dst, reg);
10127 /* Move the pointers forward. */
10128 *src = aarch64_progress_pointer (*src);
10129 *dst = aarch64_progress_pointer (*dst);
10130}
10131
10132/* Expand movmem, as if from a __builtin_memcpy. Return true if
10133 we succeed, otherwise return false. */
10134
10135bool
10136aarch64_expand_movmem (rtx *operands)
10137{
10138 unsigned int n;
10139 rtx dst = operands[0];
10140 rtx src = operands[1];
10141 rtx base;
10142 bool speed_p = !optimize_function_for_size_p (cfun);
10143
10144 /* When optimizing for size, give a better estimate of the length of a
10145 memcpy call, but use the default otherwise. */
10146 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10147
10148 /* We can't do anything smart if the amount to copy is not constant. */
10149 if (!CONST_INT_P (operands[2]))
10150 return false;
10151
10152 n = UINTVAL (operands[2]);
10153
10154 /* Try to keep the number of instructions low. For cases below 16 bytes we
10155 need to make at most two moves. For cases above 16 bytes it will be one
10156 move for each 16 byte chunk, then at most two additional moves. */
10157 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10158 return false;
10159
10160 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10161 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10162
10163 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10164 src = adjust_automodify_address (src, VOIDmode, base, 0);
10165
10166 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10167 1-byte chunk. */
10168 if (n < 4)
10169 {
10170 if (n >= 2)
10171 {
10172 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10173 n -= 2;
10174 }
10175
10176 if (n == 1)
10177 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10178
10179 return true;
10180 }
10181
10182 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10183 4-byte chunk, partially overlapping with the previously copied chunk. */
10184 if (n < 8)
10185 {
10186 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10187 n -= 4;
10188 if (n > 0)
10189 {
10190 int move = n - 4;
10191
10192 src = aarch64_move_pointer (src, move);
10193 dst = aarch64_move_pointer (dst, move);
10194 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10195 }
10196 return true;
10197 }
10198
10199 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10200 them, then (if applicable) an 8-byte chunk. */
10201 while (n >= 8)
10202 {
10203 if (n / 16)
10204 {
10205 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10206 n -= 16;
10207 }
10208 else
10209 {
10210 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10211 n -= 8;
10212 }
10213 }
10214
10215 /* Finish the final bytes of the copy. We can always do this in one
10216 instruction. We either copy the exact amount we need, or partially
10217 overlap with the previous chunk we copied and copy 8-bytes. */
10218 if (n == 0)
10219 return true;
10220 else if (n == 1)
10221 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10222 else if (n == 2)
10223 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10224 else if (n == 4)
10225 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10226 else
10227 {
10228 if (n == 3)
10229 {
10230 src = aarch64_move_pointer (src, -1);
10231 dst = aarch64_move_pointer (dst, -1);
10232 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10233 }
10234 else
10235 {
10236 int move = n - 8;
10237
10238 src = aarch64_move_pointer (src, move);
10239 dst = aarch64_move_pointer (dst, move);
10240 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10241 }
10242 }
10243
10244 return true;
10245}
10246
a3125fc2
CL
10247/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10248
10249static unsigned HOST_WIDE_INT
10250aarch64_asan_shadow_offset (void)
10251{
10252 return (HOST_WIDE_INT_1 << 36);
10253}
10254
d3006da6 10255static bool
445d7826 10256aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
10257 unsigned int align,
10258 enum by_pieces_operation op,
10259 bool speed_p)
10260{
10261 /* STORE_BY_PIECES can be used when copying a constant string, but
10262 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10263 For now we always fail this and let the move_by_pieces code copy
10264 the string from read-only memory. */
10265 if (op == STORE_BY_PIECES)
10266 return false;
10267
10268 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10269}
10270
6a569cdd
KT
10271/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10272 instruction fusion of some sort. */
10273
10274static bool
10275aarch64_macro_fusion_p (void)
10276{
10277 return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10278}
10279
10280
10281/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10282 should be kept together during scheduling. */
10283
10284static bool
10285aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10286{
10287 rtx set_dest;
10288 rtx prev_set = single_set (prev);
10289 rtx curr_set = single_set (curr);
10290 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10291 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10292
10293 if (!aarch64_macro_fusion_p ())
10294 return false;
10295
10296 if (simple_sets_p
10297 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10298 {
10299 /* We are trying to match:
10300 prev (mov) == (set (reg r0) (const_int imm16))
10301 curr (movk) == (set (zero_extract (reg r0)
10302 (const_int 16)
10303 (const_int 16))
10304 (const_int imm16_1)) */
10305
10306 set_dest = SET_DEST (curr_set);
10307
10308 if (GET_CODE (set_dest) == ZERO_EXTRACT
10309 && CONST_INT_P (SET_SRC (curr_set))
10310 && CONST_INT_P (SET_SRC (prev_set))
10311 && CONST_INT_P (XEXP (set_dest, 2))
10312 && INTVAL (XEXP (set_dest, 2)) == 16
10313 && REG_P (XEXP (set_dest, 0))
10314 && REG_P (SET_DEST (prev_set))
10315 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10316 {
10317 return true;
10318 }
10319 }
10320
9bbe08fe
KT
10321 if (simple_sets_p
10322 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10323 {
10324
10325 /* We're trying to match:
10326 prev (adrp) == (set (reg r1)
10327 (high (symbol_ref ("SYM"))))
10328 curr (add) == (set (reg r0)
10329 (lo_sum (reg r1)
10330 (symbol_ref ("SYM"))))
10331 Note that r0 need not necessarily be the same as r1, especially
10332 during pre-regalloc scheduling. */
10333
10334 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10335 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10336 {
10337 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10338 && REG_P (XEXP (SET_SRC (curr_set), 0))
10339 && REGNO (XEXP (SET_SRC (curr_set), 0))
10340 == REGNO (SET_DEST (prev_set))
10341 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10342 XEXP (SET_SRC (curr_set), 1)))
10343 return true;
10344 }
10345 }
10346
cd0cb232
KT
10347 if (simple_sets_p
10348 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10349 {
10350
10351 /* We're trying to match:
10352 prev (movk) == (set (zero_extract (reg r0)
10353 (const_int 16)
10354 (const_int 32))
10355 (const_int imm16_1))
10356 curr (movk) == (set (zero_extract (reg r0)
10357 (const_int 16)
10358 (const_int 48))
10359 (const_int imm16_2)) */
10360
10361 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10362 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10363 && REG_P (XEXP (SET_DEST (prev_set), 0))
10364 && REG_P (XEXP (SET_DEST (curr_set), 0))
10365 && REGNO (XEXP (SET_DEST (prev_set), 0))
10366 == REGNO (XEXP (SET_DEST (curr_set), 0))
10367 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10368 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10369 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10370 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10371 && CONST_INT_P (SET_SRC (prev_set))
10372 && CONST_INT_P (SET_SRC (curr_set)))
10373 return true;
10374
10375 }
d8354ad7
KT
10376 if (simple_sets_p
10377 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10378 {
10379 /* We're trying to match:
10380 prev (adrp) == (set (reg r0)
10381 (high (symbol_ref ("SYM"))))
10382 curr (ldr) == (set (reg r1)
10383 (mem (lo_sum (reg r0)
10384 (symbol_ref ("SYM")))))
10385 or
10386 curr (ldr) == (set (reg r1)
10387 (zero_extend (mem
10388 (lo_sum (reg r0)
10389 (symbol_ref ("SYM")))))) */
10390 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10391 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10392 {
10393 rtx curr_src = SET_SRC (curr_set);
10394
10395 if (GET_CODE (curr_src) == ZERO_EXTEND)
10396 curr_src = XEXP (curr_src, 0);
10397
10398 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10399 && REG_P (XEXP (XEXP (curr_src, 0), 0))
10400 && REGNO (XEXP (XEXP (curr_src, 0), 0))
10401 == REGNO (SET_DEST (prev_set))
10402 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10403 XEXP (SET_SRC (prev_set), 0)))
10404 return true;
10405 }
10406 }
cd0cb232 10407
3759108f
AP
10408 if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10409 && any_condjump_p (curr))
10410 {
10411 enum attr_type prev_type = get_attr_type (prev);
10412
10413 /* FIXME: this misses some which is considered simple arthematic
10414 instructions for ThunderX. Simple shifts are missed here. */
10415 if (prev_type == TYPE_ALUS_SREG
10416 || prev_type == TYPE_ALUS_IMM
10417 || prev_type == TYPE_LOGICS_REG
10418 || prev_type == TYPE_LOGICS_IMM)
10419 return true;
10420 }
10421
6a569cdd
KT
10422 return false;
10423}
10424
350013bc
BC
10425/* If MEM is in the form of [base+offset], extract the two parts
10426 of address and set to BASE and OFFSET, otherwise return false
10427 after clearing BASE and OFFSET. */
10428
10429bool
10430extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10431{
10432 rtx addr;
10433
10434 gcc_assert (MEM_P (mem));
10435
10436 addr = XEXP (mem, 0);
10437
10438 if (REG_P (addr))
10439 {
10440 *base = addr;
10441 *offset = const0_rtx;
10442 return true;
10443 }
10444
10445 if (GET_CODE (addr) == PLUS
10446 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10447 {
10448 *base = XEXP (addr, 0);
10449 *offset = XEXP (addr, 1);
10450 return true;
10451 }
10452
10453 *base = NULL_RTX;
10454 *offset = NULL_RTX;
10455
10456 return false;
10457}
10458
10459/* Types for scheduling fusion. */
10460enum sched_fusion_type
10461{
10462 SCHED_FUSION_NONE = 0,
10463 SCHED_FUSION_LD_SIGN_EXTEND,
10464 SCHED_FUSION_LD_ZERO_EXTEND,
10465 SCHED_FUSION_LD,
10466 SCHED_FUSION_ST,
10467 SCHED_FUSION_NUM
10468};
10469
10470/* If INSN is a load or store of address in the form of [base+offset],
10471 extract the two parts and set to BASE and OFFSET. Return scheduling
10472 fusion type this INSN is. */
10473
10474static enum sched_fusion_type
10475fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10476{
10477 rtx x, dest, src;
10478 enum sched_fusion_type fusion = SCHED_FUSION_LD;
10479
10480 gcc_assert (INSN_P (insn));
10481 x = PATTERN (insn);
10482 if (GET_CODE (x) != SET)
10483 return SCHED_FUSION_NONE;
10484
10485 src = SET_SRC (x);
10486 dest = SET_DEST (x);
10487
10488 if (GET_MODE (src) != SImode && GET_MODE (src) != DImode
10489 && GET_MODE (src) != SFmode && GET_MODE (src) != DFmode)
10490 return SCHED_FUSION_NONE;
10491
10492 if (GET_CODE (src) == SIGN_EXTEND)
10493 {
10494 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10495 src = XEXP (src, 0);
10496 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10497 return SCHED_FUSION_NONE;
10498 }
10499 else if (GET_CODE (src) == ZERO_EXTEND)
10500 {
10501 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10502 src = XEXP (src, 0);
10503 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10504 return SCHED_FUSION_NONE;
10505 }
10506
10507 if (GET_CODE (src) == MEM && REG_P (dest))
10508 extract_base_offset_in_addr (src, base, offset);
10509 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10510 {
10511 fusion = SCHED_FUSION_ST;
10512 extract_base_offset_in_addr (dest, base, offset);
10513 }
10514 else
10515 return SCHED_FUSION_NONE;
10516
10517 if (*base == NULL_RTX || *offset == NULL_RTX)
10518 fusion = SCHED_FUSION_NONE;
10519
10520 return fusion;
10521}
10522
10523/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10524
10525 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10526 and PRI are only calculated for these instructions. For other instruction,
10527 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10528 type instruction fusion can be added by returning different priorities.
10529
10530 It's important that irrelevant instructions get the largest FUSION_PRI. */
10531
10532static void
10533aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10534 int *fusion_pri, int *pri)
10535{
10536 int tmp, off_val;
10537 rtx base, offset;
10538 enum sched_fusion_type fusion;
10539
10540 gcc_assert (INSN_P (insn));
10541
10542 tmp = max_pri - 1;
10543 fusion = fusion_load_store (insn, &base, &offset);
10544 if (fusion == SCHED_FUSION_NONE)
10545 {
10546 *pri = tmp;
10547 *fusion_pri = tmp;
10548 return;
10549 }
10550
10551 /* Set FUSION_PRI according to fusion type and base register. */
10552 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
10553
10554 /* Calculate PRI. */
10555 tmp /= 2;
10556
10557 /* INSN with smaller offset goes first. */
10558 off_val = (int)(INTVAL (offset));
10559 if (off_val >= 0)
10560 tmp -= (off_val & 0xfffff);
10561 else
10562 tmp += ((- off_val) & 0xfffff);
10563
10564 *pri = tmp;
10565 return;
10566}
10567
10568/* Given OPERANDS of consecutive load/store, check if we can merge
10569 them into ldp/stp. LOAD is true if they are load instructions.
10570 MODE is the mode of memory operands. */
10571
10572bool
10573aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
10574 enum machine_mode mode)
10575{
10576 HOST_WIDE_INT offval_1, offval_2, msize;
10577 enum reg_class rclass_1, rclass_2;
10578 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
10579
10580 if (load)
10581 {
10582 mem_1 = operands[1];
10583 mem_2 = operands[3];
10584 reg_1 = operands[0];
10585 reg_2 = operands[2];
10586 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
10587 if (REGNO (reg_1) == REGNO (reg_2))
10588 return false;
10589 }
10590 else
10591 {
10592 mem_1 = operands[0];
10593 mem_2 = operands[2];
10594 reg_1 = operands[1];
10595 reg_2 = operands[3];
10596 }
10597
10598 /* Check if the addresses are in the form of [base+offset]. */
10599 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10600 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10601 return false;
10602 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10603 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10604 return false;
10605
10606 /* Check if the bases are same. */
10607 if (!rtx_equal_p (base_1, base_2))
10608 return false;
10609
10610 offval_1 = INTVAL (offset_1);
10611 offval_2 = INTVAL (offset_2);
10612 msize = GET_MODE_SIZE (mode);
10613 /* Check if the offsets are consecutive. */
10614 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
10615 return false;
10616
10617 /* Check if the addresses are clobbered by load. */
10618 if (load)
10619 {
10620 if (reg_mentioned_p (reg_1, mem_1))
10621 return false;
10622
10623 /* In increasing order, the last load can clobber the address. */
10624 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
10625 return false;
10626 }
10627
10628 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10629 rclass_1 = FP_REGS;
10630 else
10631 rclass_1 = GENERAL_REGS;
10632
10633 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10634 rclass_2 = FP_REGS;
10635 else
10636 rclass_2 = GENERAL_REGS;
10637
10638 /* Check if the registers are of same class. */
10639 if (rclass_1 != rclass_2)
10640 return false;
10641
10642 return true;
10643}
10644
10645/* Given OPERANDS of consecutive load/store, check if we can merge
10646 them into ldp/stp by adjusting the offset. LOAD is true if they
10647 are load instructions. MODE is the mode of memory operands.
10648
10649 Given below consecutive stores:
10650
10651 str w1, [xb, 0x100]
10652 str w1, [xb, 0x104]
10653 str w1, [xb, 0x108]
10654 str w1, [xb, 0x10c]
10655
10656 Though the offsets are out of the range supported by stp, we can
10657 still pair them after adjusting the offset, like:
10658
10659 add scratch, xb, 0x100
10660 stp w1, w1, [scratch]
10661 stp w1, w1, [scratch, 0x8]
10662
10663 The peephole patterns detecting this opportunity should guarantee
10664 the scratch register is avaliable. */
10665
10666bool
10667aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
10668 enum machine_mode mode)
10669{
10670 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
10671 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
10672 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
10673 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
10674
10675 if (load)
10676 {
10677 reg_1 = operands[0];
10678 mem_1 = operands[1];
10679 reg_2 = operands[2];
10680 mem_2 = operands[3];
10681 reg_3 = operands[4];
10682 mem_3 = operands[5];
10683 reg_4 = operands[6];
10684 mem_4 = operands[7];
10685 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
10686 && REG_P (reg_3) && REG_P (reg_4));
10687 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
10688 return false;
10689 }
10690 else
10691 {
10692 mem_1 = operands[0];
10693 reg_1 = operands[1];
10694 mem_2 = operands[2];
10695 reg_2 = operands[3];
10696 mem_3 = operands[4];
10697 reg_3 = operands[5];
10698 mem_4 = operands[6];
10699 reg_4 = operands[7];
10700 }
10701 /* Skip if memory operand is by itslef valid for ldp/stp. */
10702 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
10703 return false;
10704
10705 /* Check if the addresses are in the form of [base+offset]. */
10706 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10707 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10708 return false;
10709 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10710 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10711 return false;
10712 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
10713 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
10714 return false;
10715 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
10716 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
10717 return false;
10718
10719 /* Check if the bases are same. */
10720 if (!rtx_equal_p (base_1, base_2)
10721 || !rtx_equal_p (base_2, base_3)
10722 || !rtx_equal_p (base_3, base_4))
10723 return false;
10724
10725 offval_1 = INTVAL (offset_1);
10726 offval_2 = INTVAL (offset_2);
10727 offval_3 = INTVAL (offset_3);
10728 offval_4 = INTVAL (offset_4);
10729 msize = GET_MODE_SIZE (mode);
10730 /* Check if the offsets are consecutive. */
10731 if ((offval_1 != (offval_2 + msize)
10732 || offval_1 != (offval_3 + msize * 2)
10733 || offval_1 != (offval_4 + msize * 3))
10734 && (offval_4 != (offval_3 + msize)
10735 || offval_4 != (offval_2 + msize * 2)
10736 || offval_4 != (offval_1 + msize * 3)))
10737 return false;
10738
10739 /* Check if the addresses are clobbered by load. */
10740 if (load)
10741 {
10742 if (reg_mentioned_p (reg_1, mem_1)
10743 || reg_mentioned_p (reg_2, mem_2)
10744 || reg_mentioned_p (reg_3, mem_3))
10745 return false;
10746
10747 /* In increasing order, the last load can clobber the address. */
10748 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
10749 return false;
10750 }
10751
10752 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10753 rclass_1 = FP_REGS;
10754 else
10755 rclass_1 = GENERAL_REGS;
10756
10757 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10758 rclass_2 = FP_REGS;
10759 else
10760 rclass_2 = GENERAL_REGS;
10761
10762 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
10763 rclass_3 = FP_REGS;
10764 else
10765 rclass_3 = GENERAL_REGS;
10766
10767 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
10768 rclass_4 = FP_REGS;
10769 else
10770 rclass_4 = GENERAL_REGS;
10771
10772 /* Check if the registers are of same class. */
10773 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
10774 return false;
10775
10776 return true;
10777}
10778
10779/* Given OPERANDS of consecutive load/store, this function pairs them
10780 into ldp/stp after adjusting the offset. It depends on the fact
10781 that addresses of load/store instructions are in increasing order.
10782 MODE is the mode of memory operands. CODE is the rtl operator
10783 which should be applied to all memory operands, it's SIGN_EXTEND,
10784 ZERO_EXTEND or UNKNOWN. */
10785
10786bool
10787aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
10788 enum machine_mode mode, RTX_CODE code)
10789{
10790 rtx base, offset, t1, t2;
10791 rtx mem_1, mem_2, mem_3, mem_4;
10792 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
10793
10794 if (load)
10795 {
10796 mem_1 = operands[1];
10797 mem_2 = operands[3];
10798 mem_3 = operands[5];
10799 mem_4 = operands[7];
10800 }
10801 else
10802 {
10803 mem_1 = operands[0];
10804 mem_2 = operands[2];
10805 mem_3 = operands[4];
10806 mem_4 = operands[6];
10807 gcc_assert (code == UNKNOWN);
10808 }
10809
10810 extract_base_offset_in_addr (mem_1, &base, &offset);
10811 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
10812
10813 /* Adjust offset thus it can fit in ldp/stp instruction. */
10814 msize = GET_MODE_SIZE (mode);
10815 stp_off_limit = msize * 0x40;
10816 off_val = INTVAL (offset);
10817 abs_off = (off_val < 0) ? -off_val : off_val;
10818 new_off = abs_off % stp_off_limit;
10819 adj_off = abs_off - new_off;
10820
10821 /* Further adjust to make sure all offsets are OK. */
10822 if ((new_off + msize * 2) >= stp_off_limit)
10823 {
10824 adj_off += stp_off_limit;
10825 new_off -= stp_off_limit;
10826 }
10827
10828 /* Make sure the adjustment can be done with ADD/SUB instructions. */
10829 if (adj_off >= 0x1000)
10830 return false;
10831
10832 if (off_val < 0)
10833 {
10834 adj_off = -adj_off;
10835 new_off = -new_off;
10836 }
10837
10838 /* Create new memory references. */
10839 mem_1 = change_address (mem_1, VOIDmode,
10840 plus_constant (DImode, operands[8], new_off));
10841
10842 /* Check if the adjusted address is OK for ldp/stp. */
10843 if (!aarch64_mem_pair_operand (mem_1, mode))
10844 return false;
10845
10846 msize = GET_MODE_SIZE (mode);
10847 mem_2 = change_address (mem_2, VOIDmode,
10848 plus_constant (DImode,
10849 operands[8],
10850 new_off + msize));
10851 mem_3 = change_address (mem_3, VOIDmode,
10852 plus_constant (DImode,
10853 operands[8],
10854 new_off + msize * 2));
10855 mem_4 = change_address (mem_4, VOIDmode,
10856 plus_constant (DImode,
10857 operands[8],
10858 new_off + msize * 3));
10859
10860 if (code == ZERO_EXTEND)
10861 {
10862 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
10863 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
10864 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
10865 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
10866 }
10867 else if (code == SIGN_EXTEND)
10868 {
10869 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
10870 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
10871 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
10872 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
10873 }
10874
10875 if (load)
10876 {
10877 operands[1] = mem_1;
10878 operands[3] = mem_2;
10879 operands[5] = mem_3;
10880 operands[7] = mem_4;
10881 }
10882 else
10883 {
10884 operands[0] = mem_1;
10885 operands[2] = mem_2;
10886 operands[4] = mem_3;
10887 operands[6] = mem_4;
10888 }
10889
10890 /* Emit adjusting instruction. */
10891 emit_insn (gen_rtx_SET (VOIDmode, operands[8],
10892 plus_constant (DImode, base, adj_off)));
10893 /* Emit ldp/stp instructions. */
10894 t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
10895 t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
10896 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
10897 t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
10898 t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
10899 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
10900 return true;
10901}
10902
43e9d192
IB
10903#undef TARGET_ADDRESS_COST
10904#define TARGET_ADDRESS_COST aarch64_address_cost
10905
10906/* This hook will determines whether unnamed bitfields affect the alignment
10907 of the containing structure. The hook returns true if the structure
10908 should inherit the alignment requirements of an unnamed bitfield's
10909 type. */
10910#undef TARGET_ALIGN_ANON_BITFIELD
10911#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
10912
10913#undef TARGET_ASM_ALIGNED_DI_OP
10914#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
10915
10916#undef TARGET_ASM_ALIGNED_HI_OP
10917#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
10918
10919#undef TARGET_ASM_ALIGNED_SI_OP
10920#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10921
10922#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10923#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
10924 hook_bool_const_tree_hwi_hwi_const_tree_true
10925
10926#undef TARGET_ASM_FILE_START
10927#define TARGET_ASM_FILE_START aarch64_start_file
10928
10929#undef TARGET_ASM_OUTPUT_MI_THUNK
10930#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
10931
10932#undef TARGET_ASM_SELECT_RTX_SECTION
10933#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
10934
10935#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
10936#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
10937
10938#undef TARGET_BUILD_BUILTIN_VA_LIST
10939#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
10940
10941#undef TARGET_CALLEE_COPIES
10942#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10943
10944#undef TARGET_CAN_ELIMINATE
10945#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
10946
10947#undef TARGET_CANNOT_FORCE_CONST_MEM
10948#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
10949
10950#undef TARGET_CONDITIONAL_REGISTER_USAGE
10951#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
10952
10953/* Only the least significant bit is used for initialization guard
10954 variables. */
10955#undef TARGET_CXX_GUARD_MASK_BIT
10956#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
10957
10958#undef TARGET_C_MODE_FOR_SUFFIX
10959#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
10960
10961#ifdef TARGET_BIG_ENDIAN_DEFAULT
10962#undef TARGET_DEFAULT_TARGET_FLAGS
10963#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
10964#endif
10965
10966#undef TARGET_CLASS_MAX_NREGS
10967#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
10968
119103ca
JG
10969#undef TARGET_BUILTIN_DECL
10970#define TARGET_BUILTIN_DECL aarch64_builtin_decl
10971
43e9d192
IB
10972#undef TARGET_EXPAND_BUILTIN
10973#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
10974
10975#undef TARGET_EXPAND_BUILTIN_VA_START
10976#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
10977
9697e620
JG
10978#undef TARGET_FOLD_BUILTIN
10979#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
10980
43e9d192
IB
10981#undef TARGET_FUNCTION_ARG
10982#define TARGET_FUNCTION_ARG aarch64_function_arg
10983
10984#undef TARGET_FUNCTION_ARG_ADVANCE
10985#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
10986
10987#undef TARGET_FUNCTION_ARG_BOUNDARY
10988#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
10989
10990#undef TARGET_FUNCTION_OK_FOR_SIBCALL
10991#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
10992
10993#undef TARGET_FUNCTION_VALUE
10994#define TARGET_FUNCTION_VALUE aarch64_function_value
10995
10996#undef TARGET_FUNCTION_VALUE_REGNO_P
10997#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
10998
10999#undef TARGET_FRAME_POINTER_REQUIRED
11000#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11001
fc72cba7
AL
11002#undef TARGET_GIMPLE_FOLD_BUILTIN
11003#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 11004
43e9d192
IB
11005#undef TARGET_GIMPLIFY_VA_ARG_EXPR
11006#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11007
11008#undef TARGET_INIT_BUILTINS
11009#define TARGET_INIT_BUILTINS aarch64_init_builtins
11010
11011#undef TARGET_LEGITIMATE_ADDRESS_P
11012#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11013
11014#undef TARGET_LEGITIMATE_CONSTANT_P
11015#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11016
11017#undef TARGET_LIBGCC_CMP_RETURN_MODE
11018#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11019
38e8f663
YR
11020#undef TARGET_LRA_P
11021#define TARGET_LRA_P aarch64_lra_p
11022
ac2b960f
YZ
11023#undef TARGET_MANGLE_TYPE
11024#define TARGET_MANGLE_TYPE aarch64_mangle_type
11025
43e9d192
IB
11026#undef TARGET_MEMORY_MOVE_COST
11027#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11028
11029#undef TARGET_MUST_PASS_IN_STACK
11030#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11031
11032/* This target hook should return true if accesses to volatile bitfields
11033 should use the narrowest mode possible. It should return false if these
11034 accesses should use the bitfield container type. */
11035#undef TARGET_NARROW_VOLATILE_BITFIELD
11036#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11037
11038#undef TARGET_OPTION_OVERRIDE
11039#define TARGET_OPTION_OVERRIDE aarch64_override_options
11040
11041#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11042#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11043 aarch64_override_options_after_change
11044
11045#undef TARGET_PASS_BY_REFERENCE
11046#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11047
11048#undef TARGET_PREFERRED_RELOAD_CLASS
11049#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11050
cee66c68
WD
11051#undef TARGET_SCHED_REASSOCIATION_WIDTH
11052#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11053
43e9d192
IB
11054#undef TARGET_SECONDARY_RELOAD
11055#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11056
11057#undef TARGET_SHIFT_TRUNCATION_MASK
11058#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11059
11060#undef TARGET_SETUP_INCOMING_VARARGS
11061#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11062
11063#undef TARGET_STRUCT_VALUE_RTX
11064#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11065
11066#undef TARGET_REGISTER_MOVE_COST
11067#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11068
11069#undef TARGET_RETURN_IN_MEMORY
11070#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11071
11072#undef TARGET_RETURN_IN_MSB
11073#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11074
11075#undef TARGET_RTX_COSTS
7cc2145f 11076#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 11077
d126a4ae
AP
11078#undef TARGET_SCHED_ISSUE_RATE
11079#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11080
43e9d192
IB
11081#undef TARGET_TRAMPOLINE_INIT
11082#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11083
11084#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11085#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11086
11087#undef TARGET_VECTOR_MODE_SUPPORTED_P
11088#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11089
11090#undef TARGET_ARRAY_MODE_SUPPORTED_P
11091#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11092
8990e73a
TB
11093#undef TARGET_VECTORIZE_ADD_STMT_COST
11094#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11095
11096#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11097#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11098 aarch64_builtin_vectorization_cost
11099
43e9d192
IB
11100#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11101#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11102
42fc9a7f
JG
11103#undef TARGET_VECTORIZE_BUILTINS
11104#define TARGET_VECTORIZE_BUILTINS
11105
11106#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11107#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11108 aarch64_builtin_vectorized_function
11109
3b357264
JG
11110#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11111#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11112 aarch64_autovectorize_vector_sizes
11113
aa87aced
KV
11114#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11115#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11116 aarch64_atomic_assign_expand_fenv
11117
43e9d192
IB
11118/* Section anchor support. */
11119
11120#undef TARGET_MIN_ANCHOR_OFFSET
11121#define TARGET_MIN_ANCHOR_OFFSET -256
11122
11123/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11124 byte offset; we can do much more for larger data types, but have no way
11125 to determine the size of the access. We assume accesses are aligned. */
11126#undef TARGET_MAX_ANCHOR_OFFSET
11127#define TARGET_MAX_ANCHOR_OFFSET 4095
11128
db0253a4
TB
11129#undef TARGET_VECTOR_ALIGNMENT
11130#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11131
11132#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11133#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11134 aarch64_simd_vector_alignment_reachable
11135
88b08073
JG
11136/* vec_perm support. */
11137
11138#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11139#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11140 aarch64_vectorize_vec_perm_const_ok
11141
70f09188 11142
706b2314 11143#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
11144#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11145
5cb74e90
RR
11146#undef TARGET_FLAGS_REGNUM
11147#define TARGET_FLAGS_REGNUM CC_REGNUM
11148
78607708
TV
11149#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11150#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11151
a3125fc2
CL
11152#undef TARGET_ASAN_SHADOW_OFFSET
11153#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11154
0c4ec427
RE
11155#undef TARGET_LEGITIMIZE_ADDRESS
11156#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11157
d3006da6
JG
11158#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11159#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11160 aarch64_use_by_pieces_infrastructure_p
11161
594bdd53
FY
11162#undef TARGET_CAN_USE_DOLOOP_P
11163#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11164
6a569cdd
KT
11165#undef TARGET_SCHED_MACRO_FUSION_P
11166#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11167
11168#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11169#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11170
350013bc
BC
11171#undef TARGET_SCHED_FUSION_PRIORITY
11172#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11173
43e9d192
IB
11174struct gcc_target targetm = TARGET_INITIALIZER;
11175
11176#include "gt-aarch64.h"