]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] Properly cost FABD pattern
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
5624e564 2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
40e23961
MC
28#include "hash-set.h"
29#include "machmode.h"
30#include "vec.h"
31#include "double-int.h"
32#include "input.h"
33#include "alias.h"
34#include "symtab.h"
35#include "wide-int.h"
36#include "inchash.h"
43e9d192 37#include "tree.h"
40e23961 38#include "fold-const.h"
d8a2d370
DN
39#include "stringpool.h"
40#include "stor-layout.h"
41#include "calls.h"
42#include "varasm.h"
43e9d192 43#include "regs.h"
60393bbc
AM
44#include "dominance.h"
45#include "cfg.h"
46#include "cfgrtl.h"
47#include "cfganal.h"
48#include "lcm.h"
49#include "cfgbuild.h"
50#include "cfgcleanup.h"
51#include "predict.h"
52#include "basic-block.h"
43e9d192
IB
53#include "df.h"
54#include "hard-reg-set.h"
55#include "output.h"
36566b39
PK
56#include "hashtab.h"
57#include "function.h"
58#include "flags.h"
59#include "statistics.h"
60#include "real.h"
61#include "fixed-value.h"
62#include "insn-config.h"
63#include "expmed.h"
64#include "dojump.h"
65#include "explow.h"
66#include "emit-rtl.h"
67#include "stmt.h"
43e9d192
IB
68#include "expr.h"
69#include "reload.h"
70#include "toplev.h"
71#include "target.h"
72#include "target-def.h"
73#include "targhooks.h"
74#include "ggc.h"
43e9d192
IB
75#include "tm_p.h"
76#include "recog.h"
77#include "langhooks.h"
78#include "diagnostic-core.h"
2fb9a547 79#include "hash-table.h"
2fb9a547
AM
80#include "tree-ssa-alias.h"
81#include "internal-fn.h"
82#include "gimple-fold.h"
83#include "tree-eh.h"
84#include "gimple-expr.h"
85#include "is-a.h"
18f429e2 86#include "gimple.h"
45b0be94 87#include "gimplify.h"
43e9d192
IB
88#include "optabs.h"
89#include "dwarf2.h"
8990e73a
TB
90#include "cfgloop.h"
91#include "tree-vectorizer.h"
d1bcc29f 92#include "aarch64-cost-tables.h"
0ee859b5 93#include "dumpfile.h"
9b2b7279 94#include "builtins.h"
8baff86e 95#include "rtl-iter.h"
9bbe08fe 96#include "tm-constrs.h"
d03f7e44 97#include "sched-int.h"
fde9b31b 98#include "cortex-a57-fma-steering.h"
43e9d192 99
28514dda
YZ
100/* Defined for convenience. */
101#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
102
43e9d192
IB
103/* Classifies an address.
104
105 ADDRESS_REG_IMM
106 A simple base register plus immediate offset.
107
108 ADDRESS_REG_WB
109 A base register indexed by immediate offset with writeback.
110
111 ADDRESS_REG_REG
112 A base register indexed by (optionally scaled) register.
113
114 ADDRESS_REG_UXTW
115 A base register indexed by (optionally scaled) zero-extended register.
116
117 ADDRESS_REG_SXTW
118 A base register indexed by (optionally scaled) sign-extended register.
119
120 ADDRESS_LO_SUM
121 A LO_SUM rtx with a base register and "LO12" symbol relocation.
122
123 ADDRESS_SYMBOLIC:
124 A constant symbolic address, in pc-relative literal pool. */
125
126enum aarch64_address_type {
127 ADDRESS_REG_IMM,
128 ADDRESS_REG_WB,
129 ADDRESS_REG_REG,
130 ADDRESS_REG_UXTW,
131 ADDRESS_REG_SXTW,
132 ADDRESS_LO_SUM,
133 ADDRESS_SYMBOLIC
134};
135
136struct aarch64_address_info {
137 enum aarch64_address_type type;
138 rtx base;
139 rtx offset;
140 int shift;
141 enum aarch64_symbol_type symbol_type;
142};
143
48063b9d
IB
144struct simd_immediate_info
145{
146 rtx value;
147 int shift;
148 int element_width;
48063b9d 149 bool mvn;
e4f0f84d 150 bool msl;
48063b9d
IB
151};
152
43e9d192
IB
153/* The current code model. */
154enum aarch64_code_model aarch64_cmodel;
155
156#ifdef HAVE_AS_TLS
157#undef TARGET_HAVE_TLS
158#define TARGET_HAVE_TLS 1
159#endif
160
ef4bddc2
RS
161static bool aarch64_composite_type_p (const_tree, machine_mode);
162static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 163 const_tree,
ef4bddc2 164 machine_mode *, int *,
43e9d192
IB
165 bool *);
166static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
167static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 168static void aarch64_override_options_after_change (void);
ef4bddc2 169static bool aarch64_vector_mode_supported_p (machine_mode);
43e9d192 170static unsigned bit_count (unsigned HOST_WIDE_INT);
ef4bddc2 171static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 172 const unsigned char *sel);
ef4bddc2 173static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 174
0c6caaf8
RL
175/* Major revision number of the ARM Architecture implemented by the target. */
176unsigned aarch64_architecture_version;
177
43e9d192 178/* The processor for which instructions should be scheduled. */
02fdbd5b 179enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
180
181/* The current tuning set. */
182const struct tune_params *aarch64_tune_params;
183
184/* Mask to specify which instructions we are allowed to generate. */
185unsigned long aarch64_isa_flags = 0;
186
187/* Mask to specify which instruction scheduling options should be used. */
188unsigned long aarch64_tune_flags = 0;
189
190/* Tuning parameters. */
191
43e9d192
IB
192static const struct cpu_addrcost_table generic_addrcost_table =
193{
67747367 194 {
bd95e655
JG
195 0, /* hi */
196 0, /* si */
197 0, /* di */
198 0, /* ti */
67747367 199 },
bd95e655
JG
200 0, /* pre_modify */
201 0, /* post_modify */
202 0, /* register_offset */
203 0, /* register_extend */
204 0 /* imm_offset */
43e9d192
IB
205};
206
60bff090
JG
207static const struct cpu_addrcost_table cortexa57_addrcost_table =
208{
60bff090 209 {
bd95e655
JG
210 1, /* hi */
211 0, /* si */
212 0, /* di */
213 1, /* ti */
60bff090 214 },
bd95e655
JG
215 0, /* pre_modify */
216 0, /* post_modify */
217 0, /* register_offset */
218 0, /* register_extend */
219 0, /* imm_offset */
60bff090
JG
220};
221
381e27aa
PT
222static const struct cpu_addrcost_table xgene1_addrcost_table =
223{
381e27aa 224 {
bd95e655
JG
225 1, /* hi */
226 0, /* si */
227 0, /* di */
228 1, /* ti */
381e27aa 229 },
bd95e655
JG
230 1, /* pre_modify */
231 0, /* post_modify */
232 0, /* register_offset */
233 1, /* register_extend */
234 0, /* imm_offset */
381e27aa
PT
235};
236
43e9d192
IB
237static const struct cpu_regmove_cost generic_regmove_cost =
238{
bd95e655 239 1, /* GP2GP */
3969c510
WD
240 /* Avoid the use of slow int<->fp moves for spilling by setting
241 their cost higher than memmov_cost. */
bd95e655
JG
242 5, /* GP2FP */
243 5, /* FP2GP */
244 2 /* FP2FP */
43e9d192
IB
245};
246
e4a9c55a
WD
247static const struct cpu_regmove_cost cortexa57_regmove_cost =
248{
bd95e655 249 1, /* GP2GP */
e4a9c55a
WD
250 /* Avoid the use of slow int<->fp moves for spilling by setting
251 their cost higher than memmov_cost. */
bd95e655
JG
252 5, /* GP2FP */
253 5, /* FP2GP */
254 2 /* FP2FP */
e4a9c55a
WD
255};
256
257static const struct cpu_regmove_cost cortexa53_regmove_cost =
258{
bd95e655 259 1, /* GP2GP */
e4a9c55a
WD
260 /* Avoid the use of slow int<->fp moves for spilling by setting
261 their cost higher than memmov_cost. */
bd95e655
JG
262 5, /* GP2FP */
263 5, /* FP2GP */
264 2 /* FP2FP */
e4a9c55a
WD
265};
266
d1bcc29f
AP
267static const struct cpu_regmove_cost thunderx_regmove_cost =
268{
bd95e655
JG
269 2, /* GP2GP */
270 2, /* GP2FP */
271 6, /* FP2GP */
272 4 /* FP2FP */
d1bcc29f
AP
273};
274
381e27aa
PT
275static const struct cpu_regmove_cost xgene1_regmove_cost =
276{
bd95e655 277 1, /* GP2GP */
381e27aa
PT
278 /* Avoid the use of slow int<->fp moves for spilling by setting
279 their cost higher than memmov_cost. */
bd95e655
JG
280 8, /* GP2FP */
281 8, /* FP2GP */
282 2 /* FP2FP */
381e27aa
PT
283};
284
8990e73a 285/* Generic costs for vector insn classes. */
8990e73a
TB
286static const struct cpu_vector_cost generic_vector_cost =
287{
bd95e655
JG
288 1, /* scalar_stmt_cost */
289 1, /* scalar_load_cost */
290 1, /* scalar_store_cost */
291 1, /* vec_stmt_cost */
292 1, /* vec_to_scalar_cost */
293 1, /* scalar_to_vec_cost */
294 1, /* vec_align_load_cost */
295 1, /* vec_unalign_load_cost */
296 1, /* vec_unalign_store_cost */
297 1, /* vec_store_cost */
298 3, /* cond_taken_branch_cost */
299 1 /* cond_not_taken_branch_cost */
8990e73a
TB
300};
301
60bff090 302/* Generic costs for vector insn classes. */
60bff090
JG
303static const struct cpu_vector_cost cortexa57_vector_cost =
304{
bd95e655
JG
305 1, /* scalar_stmt_cost */
306 4, /* scalar_load_cost */
307 1, /* scalar_store_cost */
308 3, /* vec_stmt_cost */
309 8, /* vec_to_scalar_cost */
310 8, /* scalar_to_vec_cost */
311 5, /* vec_align_load_cost */
312 5, /* vec_unalign_load_cost */
313 1, /* vec_unalign_store_cost */
314 1, /* vec_store_cost */
315 1, /* cond_taken_branch_cost */
316 1 /* cond_not_taken_branch_cost */
60bff090
JG
317};
318
381e27aa 319/* Generic costs for vector insn classes. */
381e27aa
PT
320static const struct cpu_vector_cost xgene1_vector_cost =
321{
bd95e655
JG
322 1, /* scalar_stmt_cost */
323 5, /* scalar_load_cost */
324 1, /* scalar_store_cost */
325 2, /* vec_stmt_cost */
326 4, /* vec_to_scalar_cost */
327 4, /* scalar_to_vec_cost */
328 10, /* vec_align_load_cost */
329 10, /* vec_unalign_load_cost */
330 2, /* vec_unalign_store_cost */
331 2, /* vec_store_cost */
332 2, /* cond_taken_branch_cost */
333 1 /* cond_not_taken_branch_cost */
381e27aa
PT
334};
335
6a569cdd
KT
336#define AARCH64_FUSE_NOTHING (0)
337#define AARCH64_FUSE_MOV_MOVK (1 << 0)
9bbe08fe 338#define AARCH64_FUSE_ADRP_ADD (1 << 1)
cd0cb232 339#define AARCH64_FUSE_MOVK_MOVK (1 << 2)
d8354ad7 340#define AARCH64_FUSE_ADRP_LDR (1 << 3)
3759108f 341#define AARCH64_FUSE_CMP_BRANCH (1 << 4)
6a569cdd 342
43e9d192
IB
343static const struct tune_params generic_tunings =
344{
4e2cd668 345 &cortexa57_extra_costs,
43e9d192
IB
346 &generic_addrcost_table,
347 &generic_regmove_cost,
8990e73a 348 &generic_vector_cost,
bd95e655
JG
349 4, /* memmov_cost */
350 2, /* issue_rate */
351 AARCH64_FUSE_NOTHING, /* fuseable_ops */
0b82a5a2
WD
352 8, /* function_align. */
353 8, /* jump_align. */
354 4, /* loop_align. */
cee66c68
WD
355 2, /* int_reassoc_width. */
356 4, /* fp_reassoc_width. */
357 1 /* vec_reassoc_width. */
43e9d192
IB
358};
359
984239ad
KT
360static const struct tune_params cortexa53_tunings =
361{
362 &cortexa53_extra_costs,
363 &generic_addrcost_table,
e4a9c55a 364 &cortexa53_regmove_cost,
984239ad 365 &generic_vector_cost,
bd95e655
JG
366 4, /* memmov_cost */
367 2, /* issue_rate */
368 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
369 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops */
0b82a5a2
WD
370 8, /* function_align. */
371 8, /* jump_align. */
372 4, /* loop_align. */
cee66c68
WD
373 2, /* int_reassoc_width. */
374 4, /* fp_reassoc_width. */
375 1 /* vec_reassoc_width. */
984239ad
KT
376};
377
4fd92af6
KT
378static const struct tune_params cortexa57_tunings =
379{
380 &cortexa57_extra_costs,
60bff090 381 &cortexa57_addrcost_table,
e4a9c55a 382 &cortexa57_regmove_cost,
60bff090 383 &cortexa57_vector_cost,
bd95e655
JG
384 4, /* memmov_cost */
385 3, /* issue_rate */
386 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
387 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
0b82a5a2
WD
388 16, /* function_align. */
389 8, /* jump_align. */
390 4, /* loop_align. */
cee66c68
WD
391 2, /* int_reassoc_width. */
392 4, /* fp_reassoc_width. */
393 1 /* vec_reassoc_width. */
4fd92af6
KT
394};
395
d1bcc29f
AP
396static const struct tune_params thunderx_tunings =
397{
398 &thunderx_extra_costs,
399 &generic_addrcost_table,
400 &thunderx_regmove_cost,
401 &generic_vector_cost,
bd95e655
JG
402 6, /* memmov_cost */
403 2, /* issue_rate */
404 AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */
0b82a5a2
WD
405 8, /* function_align. */
406 8, /* jump_align. */
407 8, /* loop_align. */
cee66c68
WD
408 2, /* int_reassoc_width. */
409 4, /* fp_reassoc_width. */
410 1 /* vec_reassoc_width. */
d1bcc29f
AP
411};
412
381e27aa
PT
413static const struct tune_params xgene1_tunings =
414{
415 &xgene1_extra_costs,
416 &xgene1_addrcost_table,
417 &xgene1_regmove_cost,
418 &xgene1_vector_cost,
bd95e655
JG
419 6, /* memmov_cost */
420 4, /* issue_rate */
421 AARCH64_FUSE_NOTHING, /* fuseable_ops */
381e27aa
PT
422 16, /* function_align. */
423 8, /* jump_align. */
424 16, /* loop_align. */
425 2, /* int_reassoc_width. */
426 4, /* fp_reassoc_width. */
427 1 /* vec_reassoc_width. */
428};
429
43e9d192
IB
430/* A processor implementing AArch64. */
431struct processor
432{
433 const char *const name;
434 enum aarch64_processor core;
435 const char *arch;
0c6caaf8 436 unsigned architecture_version;
43e9d192
IB
437 const unsigned long flags;
438 const struct tune_params *const tune;
439};
440
441/* Processor cores implementing AArch64. */
442static const struct processor all_cores[] =
443{
7e1bcce3 444#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
faa54226 445 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
43e9d192
IB
446#include "aarch64-cores.def"
447#undef AARCH64_CORE
faa54226 448 {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
0c6caaf8 449 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
450};
451
452/* Architectures implementing AArch64. */
453static const struct processor all_architectures[] =
454{
455#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
0c6caaf8 456 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
43e9d192
IB
457#include "aarch64-arches.def"
458#undef AARCH64_ARCH
0c6caaf8 459 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
460};
461
462/* Target specification. These are populated as commandline arguments
463 are processed, or NULL if not specified. */
464static const struct processor *selected_arch;
465static const struct processor *selected_cpu;
466static const struct processor *selected_tune;
467
468#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
469
470/* An ISA extension in the co-processor and main instruction set space. */
471struct aarch64_option_extension
472{
473 const char *const name;
474 const unsigned long flags_on;
475 const unsigned long flags_off;
476};
477
478/* ISA extensions in AArch64. */
479static const struct aarch64_option_extension all_extensions[] =
480{
7e1bcce3 481#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
43e9d192
IB
482 {NAME, FLAGS_ON, FLAGS_OFF},
483#include "aarch64-option-extensions.def"
484#undef AARCH64_OPT_EXTENSION
485 {NULL, 0, 0}
486};
487
488/* Used to track the size of an address when generating a pre/post
489 increment address. */
ef4bddc2 490static machine_mode aarch64_memory_reference_mode;
43e9d192 491
43e9d192
IB
492/* A table of valid AArch64 "bitmask immediate" values for
493 logical instructions. */
494
495#define AARCH64_NUM_BITMASKS 5334
496static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
497
43e9d192
IB
498typedef enum aarch64_cond_code
499{
500 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
501 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
502 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
503}
504aarch64_cc;
505
506#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
507
508/* The condition codes of the processor, and the inverse function. */
509static const char * const aarch64_condition_codes[] =
510{
511 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
512 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
513};
514
26e0ff94
WD
515static unsigned int
516aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
517{
518 return 2;
519}
520
cee66c68
WD
521static int
522aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
523 enum machine_mode mode)
524{
525 if (VECTOR_MODE_P (mode))
526 return aarch64_tune_params->vec_reassoc_width;
527 if (INTEGRAL_MODE_P (mode))
528 return aarch64_tune_params->int_reassoc_width;
529 if (FLOAT_MODE_P (mode))
530 return aarch64_tune_params->fp_reassoc_width;
531 return 1;
532}
533
43e9d192
IB
534/* Provide a mapping from gcc register numbers to dwarf register numbers. */
535unsigned
536aarch64_dbx_register_number (unsigned regno)
537{
538 if (GP_REGNUM_P (regno))
539 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
540 else if (regno == SP_REGNUM)
541 return AARCH64_DWARF_SP;
542 else if (FP_REGNUM_P (regno))
543 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
544
545 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
546 equivalent DWARF register. */
547 return DWARF_FRAME_REGISTERS;
548}
549
550/* Return TRUE if MODE is any of the large INT modes. */
551static bool
ef4bddc2 552aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
553{
554 return mode == OImode || mode == CImode || mode == XImode;
555}
556
557/* Return TRUE if MODE is any of the vector modes. */
558static bool
ef4bddc2 559aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
560{
561 return aarch64_vector_mode_supported_p (mode)
562 || aarch64_vect_struct_mode_p (mode);
563}
564
565/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
566static bool
ef4bddc2 567aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
568 unsigned HOST_WIDE_INT nelems)
569{
570 if (TARGET_SIMD
571 && AARCH64_VALID_SIMD_QREG_MODE (mode)
572 && (nelems >= 2 && nelems <= 4))
573 return true;
574
575 return false;
576}
577
578/* Implement HARD_REGNO_NREGS. */
579
580int
ef4bddc2 581aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
582{
583 switch (aarch64_regno_regclass (regno))
584 {
585 case FP_REGS:
586 case FP_LO_REGS:
587 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
588 default:
589 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
590 }
591 gcc_unreachable ();
592}
593
594/* Implement HARD_REGNO_MODE_OK. */
595
596int
ef4bddc2 597aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
598{
599 if (GET_MODE_CLASS (mode) == MODE_CC)
600 return regno == CC_REGNUM;
601
9259db42
YZ
602 if (regno == SP_REGNUM)
603 /* The purpose of comparing with ptr_mode is to support the
604 global register variable associated with the stack pointer
605 register via the syntax of asm ("wsp") in ILP32. */
606 return mode == Pmode || mode == ptr_mode;
607
608 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
609 return mode == Pmode;
610
611 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
612 return 1;
613
614 if (FP_REGNUM_P (regno))
615 {
616 if (aarch64_vect_struct_mode_p (mode))
617 return
618 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
619 else
620 return 1;
621 }
622
623 return 0;
624}
625
73d9ac6a 626/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 627machine_mode
73d9ac6a 628aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 629 machine_mode mode)
73d9ac6a
IB
630{
631 /* Handle modes that fit within single registers. */
632 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
633 {
634 if (GET_MODE_SIZE (mode) >= 4)
635 return mode;
636 else
637 return SImode;
638 }
639 /* Fall back to generic for multi-reg and very large modes. */
640 else
641 return choose_hard_reg_mode (regno, nregs, false);
642}
643
43e9d192
IB
644/* Return true if calls to DECL should be treated as
645 long-calls (ie called via a register). */
646static bool
647aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
648{
649 return false;
650}
651
652/* Return true if calls to symbol-ref SYM should be treated as
653 long-calls (ie called via a register). */
654bool
655aarch64_is_long_call_p (rtx sym)
656{
657 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
658}
659
660/* Return true if the offsets to a zero/sign-extract operation
661 represent an expression that matches an extend operation. The
662 operands represent the paramters from
663
4745e701 664 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 665bool
ef4bddc2 666aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
667 rtx extract_imm)
668{
669 HOST_WIDE_INT mult_val, extract_val;
670
671 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
672 return false;
673
674 mult_val = INTVAL (mult_imm);
675 extract_val = INTVAL (extract_imm);
676
677 if (extract_val > 8
678 && extract_val < GET_MODE_BITSIZE (mode)
679 && exact_log2 (extract_val & ~7) > 0
680 && (extract_val & 7) <= 4
681 && mult_val == (1 << (extract_val & 7)))
682 return true;
683
684 return false;
685}
686
687/* Emit an insn that's a simple single-set. Both the operands must be
688 known to be valid. */
689inline static rtx
690emit_set_insn (rtx x, rtx y)
691{
692 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
693}
694
695/* X and Y are two things to compare using CODE. Emit the compare insn and
696 return the rtx for register 0 in the proper mode. */
697rtx
698aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
699{
ef4bddc2 700 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
701 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
702
703 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
704 return cc_reg;
705}
706
707/* Build the SYMBOL_REF for __tls_get_addr. */
708
709static GTY(()) rtx tls_get_addr_libfunc;
710
711rtx
712aarch64_tls_get_addr (void)
713{
714 if (!tls_get_addr_libfunc)
715 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
716 return tls_get_addr_libfunc;
717}
718
719/* Return the TLS model to use for ADDR. */
720
721static enum tls_model
722tls_symbolic_operand_type (rtx addr)
723{
724 enum tls_model tls_kind = TLS_MODEL_NONE;
725 rtx sym, addend;
726
727 if (GET_CODE (addr) == CONST)
728 {
729 split_const (addr, &sym, &addend);
730 if (GET_CODE (sym) == SYMBOL_REF)
731 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
732 }
733 else if (GET_CODE (addr) == SYMBOL_REF)
734 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
735
736 return tls_kind;
737}
738
739/* We'll allow lo_sum's in addresses in our legitimate addresses
740 so that combine would take care of combining addresses where
741 necessary, but for generation purposes, we'll generate the address
742 as :
743 RTL Absolute
744 tmp = hi (symbol_ref); adrp x1, foo
745 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
746 nop
747
748 PIC TLS
749 adrp x1, :got:foo adrp tmp, :tlsgd:foo
750 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
751 bl __tls_get_addr
752 nop
753
754 Load TLS symbol, depending on TLS mechanism and TLS access model.
755
756 Global Dynamic - Traditional TLS:
757 adrp tmp, :tlsgd:imm
758 add dest, tmp, #:tlsgd_lo12:imm
759 bl __tls_get_addr
760
761 Global Dynamic - TLS Descriptors:
762 adrp dest, :tlsdesc:imm
763 ldr tmp, [dest, #:tlsdesc_lo12:imm]
764 add dest, dest, #:tlsdesc_lo12:imm
765 blr tmp
766 mrs tp, tpidr_el0
767 add dest, dest, tp
768
769 Initial Exec:
770 mrs tp, tpidr_el0
771 adrp tmp, :gottprel:imm
772 ldr dest, [tmp, #:gottprel_lo12:imm]
773 add dest, dest, tp
774
775 Local Exec:
776 mrs tp, tpidr_el0
0699caae
RL
777 add t0, tp, #:tprel_hi12:imm, lsl #12
778 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
779*/
780
781static void
782aarch64_load_symref_appropriately (rtx dest, rtx imm,
783 enum aarch64_symbol_type type)
784{
785 switch (type)
786 {
787 case SYMBOL_SMALL_ABSOLUTE:
788 {
28514dda 789 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 790 rtx tmp_reg = dest;
ef4bddc2 791 machine_mode mode = GET_MODE (dest);
28514dda
YZ
792
793 gcc_assert (mode == Pmode || mode == ptr_mode);
794
43e9d192 795 if (can_create_pseudo_p ())
28514dda 796 tmp_reg = gen_reg_rtx (mode);
43e9d192 797
28514dda 798 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
799 emit_insn (gen_add_losym (dest, tmp_reg, imm));
800 return;
801 }
802
a5350ddc
CSS
803 case SYMBOL_TINY_ABSOLUTE:
804 emit_insn (gen_rtx_SET (Pmode, dest, imm));
805 return;
806
43e9d192
IB
807 case SYMBOL_SMALL_GOT:
808 {
28514dda
YZ
809 /* In ILP32, the mode of dest can be either SImode or DImode,
810 while the got entry is always of SImode size. The mode of
811 dest depends on how dest is used: if dest is assigned to a
812 pointer (e.g. in the memory), it has SImode; it may have
813 DImode if dest is dereferenced to access the memeory.
814 This is why we have to handle three different ldr_got_small
815 patterns here (two patterns for ILP32). */
43e9d192 816 rtx tmp_reg = dest;
ef4bddc2 817 machine_mode mode = GET_MODE (dest);
28514dda 818
43e9d192 819 if (can_create_pseudo_p ())
28514dda
YZ
820 tmp_reg = gen_reg_rtx (mode);
821
822 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
823 if (mode == ptr_mode)
824 {
825 if (mode == DImode)
826 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
827 else
828 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
829 }
830 else
831 {
832 gcc_assert (mode == Pmode);
833 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
834 }
835
43e9d192
IB
836 return;
837 }
838
839 case SYMBOL_SMALL_TLSGD:
840 {
5d8a22a5 841 rtx_insn *insns;
43e9d192
IB
842 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
843
844 start_sequence ();
78607708 845 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
846 insns = get_insns ();
847 end_sequence ();
848
849 RTL_CONST_CALL_P (insns) = 1;
850 emit_libcall_block (insns, dest, result, imm);
851 return;
852 }
853
854 case SYMBOL_SMALL_TLSDESC:
855 {
ef4bddc2 856 machine_mode mode = GET_MODE (dest);
621ad2de 857 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
858 rtx tp;
859
621ad2de
AP
860 gcc_assert (mode == Pmode || mode == ptr_mode);
861
862 /* In ILP32, the got entry is always of SImode size. Unlike
863 small GOT, the dest is fixed at reg 0. */
864 if (TARGET_ILP32)
865 emit_insn (gen_tlsdesc_small_si (imm));
866 else
867 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 868 tp = aarch64_load_tp (NULL);
621ad2de
AP
869
870 if (mode != Pmode)
871 tp = gen_lowpart (mode, tp);
872
873 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
874 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
875 return;
876 }
877
878 case SYMBOL_SMALL_GOTTPREL:
879 {
621ad2de
AP
880 /* In ILP32, the mode of dest can be either SImode or DImode,
881 while the got entry is always of SImode size. The mode of
882 dest depends on how dest is used: if dest is assigned to a
883 pointer (e.g. in the memory), it has SImode; it may have
884 DImode if dest is dereferenced to access the memeory.
885 This is why we have to handle three different tlsie_small
886 patterns here (two patterns for ILP32). */
ef4bddc2 887 machine_mode mode = GET_MODE (dest);
621ad2de 888 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 889 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
890
891 if (mode == ptr_mode)
892 {
893 if (mode == DImode)
894 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
895 else
896 {
897 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
898 tp = gen_lowpart (mode, tp);
899 }
900 }
901 else
902 {
903 gcc_assert (mode == Pmode);
904 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
905 }
906
907 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
908 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
909 return;
910 }
911
912 case SYMBOL_SMALL_TPREL:
913 {
914 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9
AP
915
916 if (GET_MODE (dest) != Pmode)
917 tp = gen_lowpart (GET_MODE (dest), tp);
918
43e9d192
IB
919 emit_insn (gen_tlsle_small (dest, tp, imm));
920 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
921 return;
922 }
923
87dd8ab0
MS
924 case SYMBOL_TINY_GOT:
925 emit_insn (gen_ldr_got_tiny (dest, imm));
926 return;
927
43e9d192
IB
928 default:
929 gcc_unreachable ();
930 }
931}
932
933/* Emit a move from SRC to DEST. Assume that the move expanders can
934 handle all moves if !can_create_pseudo_p (). The distinction is
935 important because, unlike emit_move_insn, the move expanders know
936 how to force Pmode objects into the constant pool even when the
937 constant pool address is not itself legitimate. */
938static rtx
939aarch64_emit_move (rtx dest, rtx src)
940{
941 return (can_create_pseudo_p ()
942 ? emit_move_insn (dest, src)
943 : emit_move_insn_1 (dest, src));
944}
945
030d03b8
RE
946/* Split a 128-bit move operation into two 64-bit move operations,
947 taking care to handle partial overlap of register to register
948 copies. Special cases are needed when moving between GP regs and
949 FP regs. SRC can be a register, constant or memory; DST a register
950 or memory. If either operand is memory it must not have any side
951 effects. */
43e9d192
IB
952void
953aarch64_split_128bit_move (rtx dst, rtx src)
954{
030d03b8
RE
955 rtx dst_lo, dst_hi;
956 rtx src_lo, src_hi;
43e9d192 957
ef4bddc2 958 machine_mode mode = GET_MODE (dst);
12dc6974 959
030d03b8
RE
960 gcc_assert (mode == TImode || mode == TFmode);
961 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
962 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
963
964 if (REG_P (dst) && REG_P (src))
965 {
030d03b8
RE
966 int src_regno = REGNO (src);
967 int dst_regno = REGNO (dst);
43e9d192 968
030d03b8 969 /* Handle FP <-> GP regs. */
43e9d192
IB
970 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
971 {
030d03b8
RE
972 src_lo = gen_lowpart (word_mode, src);
973 src_hi = gen_highpart (word_mode, src);
974
975 if (mode == TImode)
976 {
977 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
978 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
979 }
980 else
981 {
982 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
983 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
984 }
985 return;
43e9d192
IB
986 }
987 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
988 {
030d03b8
RE
989 dst_lo = gen_lowpart (word_mode, dst);
990 dst_hi = gen_highpart (word_mode, dst);
991
992 if (mode == TImode)
993 {
994 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
995 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
996 }
997 else
998 {
999 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1000 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1001 }
1002 return;
43e9d192 1003 }
43e9d192
IB
1004 }
1005
030d03b8
RE
1006 dst_lo = gen_lowpart (word_mode, dst);
1007 dst_hi = gen_highpart (word_mode, dst);
1008 src_lo = gen_lowpart (word_mode, src);
1009 src_hi = gen_highpart_mode (word_mode, mode, src);
1010
1011 /* At most one pairing may overlap. */
1012 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1013 {
1014 aarch64_emit_move (dst_hi, src_hi);
1015 aarch64_emit_move (dst_lo, src_lo);
1016 }
1017 else
1018 {
1019 aarch64_emit_move (dst_lo, src_lo);
1020 aarch64_emit_move (dst_hi, src_hi);
1021 }
43e9d192
IB
1022}
1023
1024bool
1025aarch64_split_128bit_move_p (rtx dst, rtx src)
1026{
1027 return (! REG_P (src)
1028 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1029}
1030
8b033a8a
SN
1031/* Split a complex SIMD combine. */
1032
1033void
1034aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1035{
ef4bddc2
RS
1036 machine_mode src_mode = GET_MODE (src1);
1037 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1038
1039 gcc_assert (VECTOR_MODE_P (dst_mode));
1040
1041 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1042 {
1043 rtx (*gen) (rtx, rtx, rtx);
1044
1045 switch (src_mode)
1046 {
1047 case V8QImode:
1048 gen = gen_aarch64_simd_combinev8qi;
1049 break;
1050 case V4HImode:
1051 gen = gen_aarch64_simd_combinev4hi;
1052 break;
1053 case V2SImode:
1054 gen = gen_aarch64_simd_combinev2si;
1055 break;
1056 case V2SFmode:
1057 gen = gen_aarch64_simd_combinev2sf;
1058 break;
1059 case DImode:
1060 gen = gen_aarch64_simd_combinedi;
1061 break;
1062 case DFmode:
1063 gen = gen_aarch64_simd_combinedf;
1064 break;
1065 default:
1066 gcc_unreachable ();
1067 }
1068
1069 emit_insn (gen (dst, src1, src2));
1070 return;
1071 }
1072}
1073
fd4842cd
SN
1074/* Split a complex SIMD move. */
1075
1076void
1077aarch64_split_simd_move (rtx dst, rtx src)
1078{
ef4bddc2
RS
1079 machine_mode src_mode = GET_MODE (src);
1080 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1081
1082 gcc_assert (VECTOR_MODE_P (dst_mode));
1083
1084 if (REG_P (dst) && REG_P (src))
1085 {
c59b7e28
SN
1086 rtx (*gen) (rtx, rtx);
1087
fd4842cd
SN
1088 gcc_assert (VECTOR_MODE_P (src_mode));
1089
1090 switch (src_mode)
1091 {
1092 case V16QImode:
c59b7e28 1093 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1094 break;
1095 case V8HImode:
c59b7e28 1096 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1097 break;
1098 case V4SImode:
c59b7e28 1099 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1100 break;
1101 case V2DImode:
c59b7e28 1102 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
1103 break;
1104 case V4SFmode:
c59b7e28 1105 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1106 break;
1107 case V2DFmode:
c59b7e28 1108 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1109 break;
1110 default:
1111 gcc_unreachable ();
1112 }
c59b7e28
SN
1113
1114 emit_insn (gen (dst, src));
fd4842cd
SN
1115 return;
1116 }
1117}
1118
43e9d192 1119static rtx
ef4bddc2 1120aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1121{
1122 if (can_create_pseudo_p ())
e18b4a81 1123 return force_reg (mode, value);
43e9d192
IB
1124 else
1125 {
1126 x = aarch64_emit_move (x, value);
1127 return x;
1128 }
1129}
1130
1131
1132static rtx
ef4bddc2 1133aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1134{
9c023bf0 1135 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1136 {
1137 rtx high;
1138 /* Load the full offset into a register. This
1139 might be improvable in the future. */
1140 high = GEN_INT (offset);
1141 offset = 0;
e18b4a81
YZ
1142 high = aarch64_force_temporary (mode, temp, high);
1143 reg = aarch64_force_temporary (mode, temp,
1144 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1145 }
1146 return plus_constant (mode, reg, offset);
1147}
1148
82614948
RR
1149static int
1150aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1151 machine_mode mode)
43e9d192 1152{
43e9d192
IB
1153 unsigned HOST_WIDE_INT mask;
1154 int i;
1155 bool first;
1156 unsigned HOST_WIDE_INT val;
1157 bool subtargets;
1158 rtx subtarget;
c747993a 1159 int one_match, zero_match, first_not_ffff_match;
82614948 1160 int num_insns = 0;
43e9d192
IB
1161
1162 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1163 {
82614948 1164 if (generate)
43e9d192 1165 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
82614948
RR
1166 num_insns++;
1167 return num_insns;
43e9d192
IB
1168 }
1169
1170 if (mode == SImode)
1171 {
1172 /* We know we can't do this in 1 insn, and we must be able to do it
1173 in two; so don't mess around looking for sequences that don't buy
1174 us anything. */
82614948
RR
1175 if (generate)
1176 {
1177 emit_insn (gen_rtx_SET (VOIDmode, dest,
1178 GEN_INT (INTVAL (imm) & 0xffff)));
1179 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1180 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1181 }
1182 num_insns += 2;
1183 return num_insns;
43e9d192
IB
1184 }
1185
1186 /* Remaining cases are all for DImode. */
1187
1188 val = INTVAL (imm);
1189 subtargets = optimize && can_create_pseudo_p ();
1190
1191 one_match = 0;
1192 zero_match = 0;
1193 mask = 0xffff;
c747993a 1194 first_not_ffff_match = -1;
43e9d192
IB
1195
1196 for (i = 0; i < 64; i += 16, mask <<= 16)
1197 {
c747993a 1198 if ((val & mask) == mask)
43e9d192 1199 one_match++;
c747993a
IB
1200 else
1201 {
1202 if (first_not_ffff_match < 0)
1203 first_not_ffff_match = i;
1204 if ((val & mask) == 0)
1205 zero_match++;
1206 }
43e9d192
IB
1207 }
1208
1209 if (one_match == 2)
1210 {
c747993a
IB
1211 /* Set one of the quarters and then insert back into result. */
1212 mask = 0xffffll << first_not_ffff_match;
82614948
RR
1213 if (generate)
1214 {
1215 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1216 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1217 GEN_INT ((val >> first_not_ffff_match)
1218 & 0xffff)));
1219 }
1220 num_insns += 2;
1221 return num_insns;
c747993a
IB
1222 }
1223
43e9d192
IB
1224 if (zero_match == 2)
1225 goto simple_sequence;
1226
1227 mask = 0x0ffff0000UL;
1228 for (i = 16; i < 64; i += 16, mask <<= 16)
1229 {
1230 HOST_WIDE_INT comp = mask & ~(mask - 1);
1231
1232 if (aarch64_uimm12_shift (val - (val & mask)))
1233 {
82614948
RR
1234 if (generate)
1235 {
1236 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1237 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1238 GEN_INT (val & mask)));
1239 emit_insn (gen_adddi3 (dest, subtarget,
1240 GEN_INT (val - (val & mask))));
1241 }
1242 num_insns += 2;
1243 return num_insns;
43e9d192
IB
1244 }
1245 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1246 {
82614948
RR
1247 if (generate)
1248 {
1249 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1250 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1251 GEN_INT ((val + comp) & mask)));
1252 emit_insn (gen_adddi3 (dest, subtarget,
1253 GEN_INT (val - ((val + comp) & mask))));
1254 }
1255 num_insns += 2;
1256 return num_insns;
43e9d192
IB
1257 }
1258 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1259 {
82614948
RR
1260 if (generate)
1261 {
1262 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1263 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1264 GEN_INT ((val - comp) | ~mask)));
1265 emit_insn (gen_adddi3 (dest, subtarget,
1266 GEN_INT (val - ((val - comp) | ~mask))));
1267 }
1268 num_insns += 2;
1269 return num_insns;
43e9d192
IB
1270 }
1271 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1272 {
82614948
RR
1273 if (generate)
1274 {
1275 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1276 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1277 GEN_INT (val | ~mask)));
1278 emit_insn (gen_adddi3 (dest, subtarget,
1279 GEN_INT (val - (val | ~mask))));
1280 }
1281 num_insns += 2;
1282 return num_insns;
43e9d192
IB
1283 }
1284 }
1285
1286 /* See if we can do it by arithmetically combining two
1287 immediates. */
1288 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1289 {
1290 int j;
1291 mask = 0xffff;
1292
1293 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1294 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1295 {
82614948
RR
1296 if (generate)
1297 {
1298 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1299 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1300 GEN_INT (aarch64_bitmasks[i])));
1301 emit_insn (gen_adddi3 (dest, subtarget,
1302 GEN_INT (val - aarch64_bitmasks[i])));
1303 }
1304 num_insns += 2;
1305 return num_insns;
43e9d192
IB
1306 }
1307
1308 for (j = 0; j < 64; j += 16, mask <<= 16)
1309 {
1310 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1311 {
82614948
RR
1312 if (generate)
1313 {
1314 emit_insn (gen_rtx_SET (VOIDmode, dest,
1315 GEN_INT (aarch64_bitmasks[i])));
1316 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1317 GEN_INT ((val >> j) & 0xffff)));
1318 }
1319 num_insns += 2;
1320 return num_insns;
43e9d192
IB
1321 }
1322 }
1323 }
1324
1325 /* See if we can do it by logically combining two immediates. */
1326 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1327 {
1328 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1329 {
1330 int j;
1331
1332 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1333 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1334 {
82614948
RR
1335 if (generate)
1336 {
1337 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1338 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1339 GEN_INT (aarch64_bitmasks[i])));
1340 emit_insn (gen_iordi3 (dest, subtarget,
1341 GEN_INT (aarch64_bitmasks[j])));
1342 }
1343 num_insns += 2;
1344 return num_insns;
43e9d192
IB
1345 }
1346 }
1347 else if ((val & aarch64_bitmasks[i]) == val)
1348 {
1349 int j;
1350
1351 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1352 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1353 {
82614948
RR
1354 if (generate)
1355 {
1356 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1357 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1358 GEN_INT (aarch64_bitmasks[j])));
1359 emit_insn (gen_anddi3 (dest, subtarget,
1360 GEN_INT (aarch64_bitmasks[i])));
1361 }
1362 num_insns += 2;
1363 return num_insns;
43e9d192
IB
1364 }
1365 }
1366 }
1367
2c274197
KT
1368 if (one_match > zero_match)
1369 {
1370 /* Set either first three quarters or all but the third. */
1371 mask = 0xffffll << (16 - first_not_ffff_match);
82614948
RR
1372 if (generate)
1373 emit_insn (gen_rtx_SET (VOIDmode, dest,
1374 GEN_INT (val | mask | 0xffffffff00000000ull)));
1375 num_insns ++;
2c274197
KT
1376
1377 /* Now insert other two quarters. */
1378 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1379 i < 64; i += 16, mask <<= 16)
1380 {
1381 if ((val & mask) != mask)
82614948
RR
1382 {
1383 if (generate)
1384 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1385 GEN_INT ((val >> i) & 0xffff)));
1386 num_insns ++;
1387 }
2c274197 1388 }
82614948 1389 return num_insns;
2c274197
KT
1390 }
1391
43e9d192
IB
1392 simple_sequence:
1393 first = true;
1394 mask = 0xffff;
1395 for (i = 0; i < 64; i += 16, mask <<= 16)
1396 {
1397 if ((val & mask) != 0)
1398 {
1399 if (first)
1400 {
82614948
RR
1401 if (generate)
1402 emit_insn (gen_rtx_SET (VOIDmode, dest,
1403 GEN_INT (val & mask)));
1404 num_insns ++;
43e9d192
IB
1405 first = false;
1406 }
1407 else
82614948
RR
1408 {
1409 if (generate)
1410 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1411 GEN_INT ((val >> i) & 0xffff)));
1412 num_insns ++;
1413 }
1414 }
1415 }
1416
1417 return num_insns;
1418}
1419
1420
1421void
1422aarch64_expand_mov_immediate (rtx dest, rtx imm)
1423{
1424 machine_mode mode = GET_MODE (dest);
1425
1426 gcc_assert (mode == SImode || mode == DImode);
1427
1428 /* Check on what type of symbol it is. */
1429 if (GET_CODE (imm) == SYMBOL_REF
1430 || GET_CODE (imm) == LABEL_REF
1431 || GET_CODE (imm) == CONST)
1432 {
1433 rtx mem, base, offset;
1434 enum aarch64_symbol_type sty;
1435
1436 /* If we have (const (plus symbol offset)), separate out the offset
1437 before we start classifying the symbol. */
1438 split_const (imm, &base, &offset);
1439
f8b756b7 1440 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
82614948
RR
1441 switch (sty)
1442 {
1443 case SYMBOL_FORCE_TO_MEM:
1444 if (offset != const0_rtx
1445 && targetm.cannot_force_const_mem (mode, imm))
1446 {
1447 gcc_assert (can_create_pseudo_p ());
1448 base = aarch64_force_temporary (mode, dest, base);
1449 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1450 aarch64_emit_move (dest, base);
1451 return;
1452 }
1453 mem = force_const_mem (ptr_mode, imm);
1454 gcc_assert (mem);
1455 if (mode != ptr_mode)
1456 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1457 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1458 return;
1459
1460 case SYMBOL_SMALL_TLSGD:
1461 case SYMBOL_SMALL_TLSDESC:
1462 case SYMBOL_SMALL_GOTTPREL:
1463 case SYMBOL_SMALL_GOT:
1464 case SYMBOL_TINY_GOT:
1465 if (offset != const0_rtx)
1466 {
1467 gcc_assert(can_create_pseudo_p ());
1468 base = aarch64_force_temporary (mode, dest, base);
1469 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1470 aarch64_emit_move (dest, base);
1471 return;
1472 }
1473 /* FALLTHRU */
1474
1475 case SYMBOL_SMALL_TPREL:
1476 case SYMBOL_SMALL_ABSOLUTE:
1477 case SYMBOL_TINY_ABSOLUTE:
1478 aarch64_load_symref_appropriately (dest, imm, sty);
1479 return;
1480
1481 default:
1482 gcc_unreachable ();
1483 }
1484 }
1485
1486 if (!CONST_INT_P (imm))
1487 {
1488 if (GET_CODE (imm) == HIGH)
1489 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1490 else
1491 {
1492 rtx mem = force_const_mem (mode, imm);
1493 gcc_assert (mem);
1494 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
43e9d192 1495 }
82614948
RR
1496
1497 return;
43e9d192 1498 }
82614948
RR
1499
1500 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1501}
1502
1503static bool
fee9ba42
JW
1504aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1505 tree exp ATTRIBUTE_UNUSED)
43e9d192 1506{
fee9ba42 1507 /* Currently, always true. */
43e9d192
IB
1508 return true;
1509}
1510
1511/* Implement TARGET_PASS_BY_REFERENCE. */
1512
1513static bool
1514aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1515 machine_mode mode,
43e9d192
IB
1516 const_tree type,
1517 bool named ATTRIBUTE_UNUSED)
1518{
1519 HOST_WIDE_INT size;
ef4bddc2 1520 machine_mode dummymode;
43e9d192
IB
1521 int nregs;
1522
1523 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1524 size = (mode == BLKmode && type)
1525 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1526
aadc1c43
MHD
1527 /* Aggregates are passed by reference based on their size. */
1528 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1529 {
aadc1c43 1530 size = int_size_in_bytes (type);
43e9d192
IB
1531 }
1532
1533 /* Variable sized arguments are always returned by reference. */
1534 if (size < 0)
1535 return true;
1536
1537 /* Can this be a candidate to be passed in fp/simd register(s)? */
1538 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1539 &dummymode, &nregs,
1540 NULL))
1541 return false;
1542
1543 /* Arguments which are variable sized or larger than 2 registers are
1544 passed by reference unless they are a homogenous floating point
1545 aggregate. */
1546 return size > 2 * UNITS_PER_WORD;
1547}
1548
1549/* Return TRUE if VALTYPE is padded to its least significant bits. */
1550static bool
1551aarch64_return_in_msb (const_tree valtype)
1552{
ef4bddc2 1553 machine_mode dummy_mode;
43e9d192
IB
1554 int dummy_int;
1555
1556 /* Never happens in little-endian mode. */
1557 if (!BYTES_BIG_ENDIAN)
1558 return false;
1559
1560 /* Only composite types smaller than or equal to 16 bytes can
1561 be potentially returned in registers. */
1562 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1563 || int_size_in_bytes (valtype) <= 0
1564 || int_size_in_bytes (valtype) > 16)
1565 return false;
1566
1567 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1568 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1569 is always passed/returned in the least significant bits of fp/simd
1570 register(s). */
1571 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1572 &dummy_mode, &dummy_int, NULL))
1573 return false;
1574
1575 return true;
1576}
1577
1578/* Implement TARGET_FUNCTION_VALUE.
1579 Define how to find the value returned by a function. */
1580
1581static rtx
1582aarch64_function_value (const_tree type, const_tree func,
1583 bool outgoing ATTRIBUTE_UNUSED)
1584{
ef4bddc2 1585 machine_mode mode;
43e9d192
IB
1586 int unsignedp;
1587 int count;
ef4bddc2 1588 machine_mode ag_mode;
43e9d192
IB
1589
1590 mode = TYPE_MODE (type);
1591 if (INTEGRAL_TYPE_P (type))
1592 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1593
1594 if (aarch64_return_in_msb (type))
1595 {
1596 HOST_WIDE_INT size = int_size_in_bytes (type);
1597
1598 if (size % UNITS_PER_WORD != 0)
1599 {
1600 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1601 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1602 }
1603 }
1604
1605 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1606 &ag_mode, &count, NULL))
1607 {
1608 if (!aarch64_composite_type_p (type, mode))
1609 {
1610 gcc_assert (count == 1 && mode == ag_mode);
1611 return gen_rtx_REG (mode, V0_REGNUM);
1612 }
1613 else
1614 {
1615 int i;
1616 rtx par;
1617
1618 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1619 for (i = 0; i < count; i++)
1620 {
1621 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1622 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1623 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1624 XVECEXP (par, 0, i) = tmp;
1625 }
1626 return par;
1627 }
1628 }
1629 else
1630 return gen_rtx_REG (mode, R0_REGNUM);
1631}
1632
1633/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1634 Return true if REGNO is the number of a hard register in which the values
1635 of called function may come back. */
1636
1637static bool
1638aarch64_function_value_regno_p (const unsigned int regno)
1639{
1640 /* Maximum of 16 bytes can be returned in the general registers. Examples
1641 of 16-byte return values are: 128-bit integers and 16-byte small
1642 structures (excluding homogeneous floating-point aggregates). */
1643 if (regno == R0_REGNUM || regno == R1_REGNUM)
1644 return true;
1645
1646 /* Up to four fp/simd registers can return a function value, e.g. a
1647 homogeneous floating-point aggregate having four members. */
1648 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1649 return !TARGET_GENERAL_REGS_ONLY;
1650
1651 return false;
1652}
1653
1654/* Implement TARGET_RETURN_IN_MEMORY.
1655
1656 If the type T of the result of a function is such that
1657 void func (T arg)
1658 would require that arg be passed as a value in a register (or set of
1659 registers) according to the parameter passing rules, then the result
1660 is returned in the same registers as would be used for such an
1661 argument. */
1662
1663static bool
1664aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1665{
1666 HOST_WIDE_INT size;
ef4bddc2 1667 machine_mode ag_mode;
43e9d192
IB
1668 int count;
1669
1670 if (!AGGREGATE_TYPE_P (type)
1671 && TREE_CODE (type) != COMPLEX_TYPE
1672 && TREE_CODE (type) != VECTOR_TYPE)
1673 /* Simple scalar types always returned in registers. */
1674 return false;
1675
1676 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1677 type,
1678 &ag_mode,
1679 &count,
1680 NULL))
1681 return false;
1682
1683 /* Types larger than 2 registers returned in memory. */
1684 size = int_size_in_bytes (type);
1685 return (size < 0 || size > 2 * UNITS_PER_WORD);
1686}
1687
1688static bool
ef4bddc2 1689aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1690 const_tree type, int *nregs)
1691{
1692 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1693 return aarch64_vfp_is_call_or_return_candidate (mode,
1694 type,
1695 &pcum->aapcs_vfp_rmode,
1696 nregs,
1697 NULL);
1698}
1699
1700/* Given MODE and TYPE of a function argument, return the alignment in
1701 bits. The idea is to suppress any stronger alignment requested by
1702 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1703 This is a helper function for local use only. */
1704
1705static unsigned int
ef4bddc2 1706aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192
IB
1707{
1708 unsigned int alignment;
1709
1710 if (type)
1711 {
1712 if (!integer_zerop (TYPE_SIZE (type)))
1713 {
1714 if (TYPE_MODE (type) == mode)
1715 alignment = TYPE_ALIGN (type);
1716 else
1717 alignment = GET_MODE_ALIGNMENT (mode);
1718 }
1719 else
1720 alignment = 0;
1721 }
1722 else
1723 alignment = GET_MODE_ALIGNMENT (mode);
1724
1725 return alignment;
1726}
1727
1728/* Layout a function argument according to the AAPCS64 rules. The rule
1729 numbers refer to the rule numbers in the AAPCS64. */
1730
1731static void
ef4bddc2 1732aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1733 const_tree type,
1734 bool named ATTRIBUTE_UNUSED)
1735{
1736 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1737 int ncrn, nvrn, nregs;
1738 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1739 HOST_WIDE_INT size;
43e9d192
IB
1740
1741 /* We need to do this once per argument. */
1742 if (pcum->aapcs_arg_processed)
1743 return;
1744
1745 pcum->aapcs_arg_processed = true;
1746
3abf17cf
YZ
1747 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1748 size
1749 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1750 UNITS_PER_WORD);
1751
43e9d192
IB
1752 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1753 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1754 mode,
1755 type,
1756 &nregs);
1757
1758 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1759 The following code thus handles passing by SIMD/FP registers first. */
1760
1761 nvrn = pcum->aapcs_nvrn;
1762
1763 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1764 and homogenous short-vector aggregates (HVA). */
1765 if (allocate_nvrn)
1766 {
1767 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1768 {
1769 pcum->aapcs_nextnvrn = nvrn + nregs;
1770 if (!aarch64_composite_type_p (type, mode))
1771 {
1772 gcc_assert (nregs == 1);
1773 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1774 }
1775 else
1776 {
1777 rtx par;
1778 int i;
1779 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1780 for (i = 0; i < nregs; i++)
1781 {
1782 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1783 V0_REGNUM + nvrn + i);
1784 tmp = gen_rtx_EXPR_LIST
1785 (VOIDmode, tmp,
1786 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1787 XVECEXP (par, 0, i) = tmp;
1788 }
1789 pcum->aapcs_reg = par;
1790 }
1791 return;
1792 }
1793 else
1794 {
1795 /* C.3 NSRN is set to 8. */
1796 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1797 goto on_stack;
1798 }
1799 }
1800
1801 ncrn = pcum->aapcs_ncrn;
3abf17cf 1802 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1803
1804 /* C6 - C9. though the sign and zero extension semantics are
1805 handled elsewhere. This is the case where the argument fits
1806 entirely general registers. */
1807 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1808 {
1809 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1810
1811 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1812
1813 /* C.8 if the argument has an alignment of 16 then the NGRN is
1814 rounded up to the next even number. */
1815 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1816 {
1817 ++ncrn;
1818 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1819 }
1820 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1821 A reg is still generated for it, but the caller should be smart
1822 enough not to use it. */
1823 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1824 {
1825 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1826 }
1827 else
1828 {
1829 rtx par;
1830 int i;
1831
1832 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1833 for (i = 0; i < nregs; i++)
1834 {
1835 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1836 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1837 GEN_INT (i * UNITS_PER_WORD));
1838 XVECEXP (par, 0, i) = tmp;
1839 }
1840 pcum->aapcs_reg = par;
1841 }
1842
1843 pcum->aapcs_nextncrn = ncrn + nregs;
1844 return;
1845 }
1846
1847 /* C.11 */
1848 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1849
1850 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1851 this argument and align the total size if necessary. */
43e9d192 1852on_stack:
3abf17cf 1853 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1854 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1855 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1856 16 / UNITS_PER_WORD);
43e9d192
IB
1857 return;
1858}
1859
1860/* Implement TARGET_FUNCTION_ARG. */
1861
1862static rtx
ef4bddc2 1863aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1864 const_tree type, bool named)
1865{
1866 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1867 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1868
1869 if (mode == VOIDmode)
1870 return NULL_RTX;
1871
1872 aarch64_layout_arg (pcum_v, mode, type, named);
1873 return pcum->aapcs_reg;
1874}
1875
1876void
1877aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1878 const_tree fntype ATTRIBUTE_UNUSED,
1879 rtx libname ATTRIBUTE_UNUSED,
1880 const_tree fndecl ATTRIBUTE_UNUSED,
1881 unsigned n_named ATTRIBUTE_UNUSED)
1882{
1883 pcum->aapcs_ncrn = 0;
1884 pcum->aapcs_nvrn = 0;
1885 pcum->aapcs_nextncrn = 0;
1886 pcum->aapcs_nextnvrn = 0;
1887 pcum->pcs_variant = ARM_PCS_AAPCS64;
1888 pcum->aapcs_reg = NULL_RTX;
1889 pcum->aapcs_arg_processed = false;
1890 pcum->aapcs_stack_words = 0;
1891 pcum->aapcs_stack_size = 0;
1892
1893 return;
1894}
1895
1896static void
1897aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 1898 machine_mode mode,
43e9d192
IB
1899 const_tree type,
1900 bool named)
1901{
1902 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1903 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1904 {
1905 aarch64_layout_arg (pcum_v, mode, type, named);
1906 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1907 != (pcum->aapcs_stack_words != 0));
1908 pcum->aapcs_arg_processed = false;
1909 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1910 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1911 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1912 pcum->aapcs_stack_words = 0;
1913 pcum->aapcs_reg = NULL_RTX;
1914 }
1915}
1916
1917bool
1918aarch64_function_arg_regno_p (unsigned regno)
1919{
1920 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1921 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1922}
1923
1924/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1925 PARM_BOUNDARY bits of alignment, but will be given anything up
1926 to STACK_BOUNDARY bits if the type requires it. This makes sure
1927 that both before and after the layout of each argument, the Next
1928 Stacked Argument Address (NSAA) will have a minimum alignment of
1929 8 bytes. */
1930
1931static unsigned int
ef4bddc2 1932aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
1933{
1934 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1935
1936 if (alignment < PARM_BOUNDARY)
1937 alignment = PARM_BOUNDARY;
1938 if (alignment > STACK_BOUNDARY)
1939 alignment = STACK_BOUNDARY;
1940 return alignment;
1941}
1942
1943/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1944
1945 Return true if an argument passed on the stack should be padded upwards,
1946 i.e. if the least-significant byte of the stack slot has useful data.
1947
1948 Small aggregate types are placed in the lowest memory address.
1949
1950 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1951
1952bool
ef4bddc2 1953aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
1954{
1955 /* On little-endian targets, the least significant byte of every stack
1956 argument is passed at the lowest byte address of the stack slot. */
1957 if (!BYTES_BIG_ENDIAN)
1958 return true;
1959
00edcfbe 1960 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1961 the least significant byte of a stack argument is passed at the highest
1962 byte address of the stack slot. */
1963 if (type
00edcfbe
YZ
1964 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1965 || POINTER_TYPE_P (type))
43e9d192
IB
1966 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1967 return false;
1968
1969 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1970 return true;
1971}
1972
1973/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1974
1975 It specifies padding for the last (may also be the only)
1976 element of a block move between registers and memory. If
1977 assuming the block is in the memory, padding upward means that
1978 the last element is padded after its highest significant byte,
1979 while in downward padding, the last element is padded at the
1980 its least significant byte side.
1981
1982 Small aggregates and small complex types are always padded
1983 upwards.
1984
1985 We don't need to worry about homogeneous floating-point or
1986 short-vector aggregates; their move is not affected by the
1987 padding direction determined here. Regardless of endianness,
1988 each element of such an aggregate is put in the least
1989 significant bits of a fp/simd register.
1990
1991 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1992 register has useful data, and return the opposite if the most
1993 significant byte does. */
1994
1995bool
ef4bddc2 1996aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
1997 bool first ATTRIBUTE_UNUSED)
1998{
1999
2000 /* Small composite types are always padded upward. */
2001 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2002 {
2003 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2004 : GET_MODE_SIZE (mode));
2005 if (size < 2 * UNITS_PER_WORD)
2006 return true;
2007 }
2008
2009 /* Otherwise, use the default padding. */
2010 return !BYTES_BIG_ENDIAN;
2011}
2012
ef4bddc2 2013static machine_mode
43e9d192
IB
2014aarch64_libgcc_cmp_return_mode (void)
2015{
2016 return SImode;
2017}
2018
2019static bool
2020aarch64_frame_pointer_required (void)
2021{
0b7f8166
MS
2022 /* In aarch64_override_options_after_change
2023 flag_omit_leaf_frame_pointer turns off the frame pointer by
2024 default. Turn it back on now if we've not got a leaf
2025 function. */
2026 if (flag_omit_leaf_frame_pointer
2027 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2028 return true;
43e9d192 2029
0b7f8166 2030 return false;
43e9d192
IB
2031}
2032
2033/* Mark the registers that need to be saved by the callee and calculate
2034 the size of the callee-saved registers area and frame record (both FP
2035 and LR may be omitted). */
2036static void
2037aarch64_layout_frame (void)
2038{
2039 HOST_WIDE_INT offset = 0;
2040 int regno;
2041
2042 if (reload_completed && cfun->machine->frame.laid_out)
2043 return;
2044
97826595
MS
2045#define SLOT_NOT_REQUIRED (-2)
2046#define SLOT_REQUIRED (-1)
2047
363ffa50
JW
2048 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2049 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2050
43e9d192
IB
2051 /* First mark all the registers that really need to be saved... */
2052 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2053 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2054
2055 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2056 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2057
2058 /* ... that includes the eh data registers (if needed)... */
2059 if (crtl->calls_eh_return)
2060 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2061 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2062 = SLOT_REQUIRED;
43e9d192
IB
2063
2064 /* ... and any callee saved register that dataflow says is live. */
2065 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2066 if (df_regs_ever_live_p (regno)
1c923b60
JW
2067 && (regno == R30_REGNUM
2068 || !call_used_regs[regno]))
97826595 2069 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2070
2071 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2072 if (df_regs_ever_live_p (regno)
2073 && !call_used_regs[regno])
97826595 2074 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2075
2076 if (frame_pointer_needed)
2077 {
2e1cdae5 2078 /* FP and LR are placed in the linkage record. */
43e9d192 2079 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2080 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2081 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2082 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 2083 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 2084 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2085 }
2086
2087 /* Now assign stack slots for them. */
2e1cdae5 2088 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2089 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2090 {
2091 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2092 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2093 cfun->machine->frame.wb_candidate1 = regno;
2094 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2095 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2096 offset += UNITS_PER_WORD;
2097 }
2098
2099 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2100 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2101 {
2102 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2103 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2104 cfun->machine->frame.wb_candidate1 = regno;
2105 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2106 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2107 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2108 offset += UNITS_PER_WORD;
2109 }
2110
43e9d192
IB
2111 cfun->machine->frame.padding0 =
2112 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2113 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2114
2115 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
2116
2117 cfun->machine->frame.hard_fp_offset
2118 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2119 + get_frame_size ()
2120 + cfun->machine->frame.saved_regs_size,
2121 STACK_BOUNDARY / BITS_PER_UNIT);
2122
2123 cfun->machine->frame.frame_size
2124 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2125 + crtl->outgoing_args_size,
2126 STACK_BOUNDARY / BITS_PER_UNIT);
2127
43e9d192
IB
2128 cfun->machine->frame.laid_out = true;
2129}
2130
43e9d192
IB
2131static bool
2132aarch64_register_saved_on_entry (int regno)
2133{
97826595 2134 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2135}
2136
64dedd72
JW
2137static unsigned
2138aarch64_next_callee_save (unsigned regno, unsigned limit)
2139{
2140 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2141 regno ++;
2142 return regno;
2143}
43e9d192 2144
c5e1f66e 2145static void
ef4bddc2 2146aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2147 HOST_WIDE_INT adjustment)
2148 {
2149 rtx base_rtx = stack_pointer_rtx;
2150 rtx insn, reg, mem;
2151
2152 reg = gen_rtx_REG (mode, regno);
2153 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2154 plus_constant (Pmode, base_rtx, -adjustment));
2155 mem = gen_rtx_MEM (mode, mem);
2156
2157 insn = emit_move_insn (mem, reg);
2158 RTX_FRAME_RELATED_P (insn) = 1;
2159}
2160
80c11907 2161static rtx
ef4bddc2 2162aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2163 HOST_WIDE_INT adjustment)
2164{
2165 switch (mode)
2166 {
2167 case DImode:
2168 return gen_storewb_pairdi_di (base, base, reg, reg2,
2169 GEN_INT (-adjustment),
2170 GEN_INT (UNITS_PER_WORD - adjustment));
2171 case DFmode:
2172 return gen_storewb_pairdf_di (base, base, reg, reg2,
2173 GEN_INT (-adjustment),
2174 GEN_INT (UNITS_PER_WORD - adjustment));
2175 default:
2176 gcc_unreachable ();
2177 }
2178}
2179
2180static void
ef4bddc2 2181aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2182 unsigned regno2, HOST_WIDE_INT adjustment)
2183{
5d8a22a5 2184 rtx_insn *insn;
80c11907
JW
2185 rtx reg1 = gen_rtx_REG (mode, regno1);
2186 rtx reg2 = gen_rtx_REG (mode, regno2);
2187
2188 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2189 reg2, adjustment));
2190 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2191 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2192 RTX_FRAME_RELATED_P (insn) = 1;
2193}
2194
159313d9 2195static rtx
ef4bddc2 2196aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2197 HOST_WIDE_INT adjustment)
2198{
2199 switch (mode)
2200 {
2201 case DImode:
2202 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2203 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2204 case DFmode:
2205 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2206 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2207 default:
2208 gcc_unreachable ();
2209 }
2210}
2211
72df5c1f 2212static rtx
ef4bddc2 2213aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2214 rtx reg2)
2215{
2216 switch (mode)
2217 {
2218 case DImode:
2219 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2220
2221 case DFmode:
2222 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2223
2224 default:
2225 gcc_unreachable ();
2226 }
2227}
2228
2229static rtx
ef4bddc2 2230aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2231 rtx mem2)
2232{
2233 switch (mode)
2234 {
2235 case DImode:
2236 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2237
2238 case DFmode:
2239 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2240
2241 default:
2242 gcc_unreachable ();
2243 }
2244}
2245
43e9d192 2246
43e9d192 2247static void
ef4bddc2 2248aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2249 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2250{
5d8a22a5 2251 rtx_insn *insn;
ef4bddc2 2252 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2253 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2254 unsigned regno;
2255 unsigned regno2;
2256
0ec74a1e 2257 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2258 regno <= limit;
2259 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2260 {
ae13fce3
JW
2261 rtx reg, mem;
2262 HOST_WIDE_INT offset;
64dedd72 2263
ae13fce3
JW
2264 if (skip_wb
2265 && (regno == cfun->machine->frame.wb_candidate1
2266 || regno == cfun->machine->frame.wb_candidate2))
2267 continue;
2268
2269 reg = gen_rtx_REG (mode, regno);
2270 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2271 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2272 offset));
64dedd72
JW
2273
2274 regno2 = aarch64_next_callee_save (regno + 1, limit);
2275
2276 if (regno2 <= limit
2277 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2278 == cfun->machine->frame.reg_offset[regno2]))
2279
43e9d192 2280 {
0ec74a1e 2281 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2282 rtx mem2;
2283
2284 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2285 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2286 offset));
2287 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2288 reg2));
0b4a9743 2289
64dedd72
JW
2290 /* The first part of a frame-related parallel insn is
2291 always assumed to be relevant to the frame
2292 calculations; subsequent parts, are only
2293 frame-related if explicitly marked. */
2294 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2295 regno = regno2;
2296 }
2297 else
8ed2fc62
JW
2298 insn = emit_move_insn (mem, reg);
2299
2300 RTX_FRAME_RELATED_P (insn) = 1;
2301 }
2302}
2303
2304static void
ef4bddc2 2305aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2306 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2307 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2308{
8ed2fc62 2309 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2310 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2311 ? gen_frame_mem : gen_rtx_MEM);
2312 unsigned regno;
2313 unsigned regno2;
2314 HOST_WIDE_INT offset;
2315
2316 for (regno = aarch64_next_callee_save (start, limit);
2317 regno <= limit;
2318 regno = aarch64_next_callee_save (regno + 1, limit))
2319 {
ae13fce3 2320 rtx reg, mem;
8ed2fc62 2321
ae13fce3
JW
2322 if (skip_wb
2323 && (regno == cfun->machine->frame.wb_candidate1
2324 || regno == cfun->machine->frame.wb_candidate2))
2325 continue;
2326
2327 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2328 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2329 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2330
2331 regno2 = aarch64_next_callee_save (regno + 1, limit);
2332
2333 if (regno2 <= limit
2334 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2335 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2336 {
8ed2fc62
JW
2337 rtx reg2 = gen_rtx_REG (mode, regno2);
2338 rtx mem2;
2339
2340 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2341 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2342 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2343
dd991abb 2344 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2345 regno = regno2;
43e9d192 2346 }
8ed2fc62 2347 else
dd991abb
RH
2348 emit_move_insn (reg, mem);
2349 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2350 }
43e9d192
IB
2351}
2352
2353/* AArch64 stack frames generated by this compiler look like:
2354
2355 +-------------------------------+
2356 | |
2357 | incoming stack arguments |
2358 | |
34834420
MS
2359 +-------------------------------+
2360 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2361 | callee-allocated save area |
2362 | for register varargs |
2363 | |
34834420
MS
2364 +-------------------------------+
2365 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2366 | |
2367 +-------------------------------+
454fdba9
RL
2368 | padding0 | \
2369 +-------------------------------+ |
454fdba9 2370 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2371 +-------------------------------+ |
2372 | LR' | |
2373 +-------------------------------+ |
34834420
MS
2374 | FP' | / <- hard_frame_pointer_rtx (aligned)
2375 +-------------------------------+
43e9d192
IB
2376 | dynamic allocation |
2377 +-------------------------------+
34834420
MS
2378 | padding |
2379 +-------------------------------+
2380 | outgoing stack arguments | <-- arg_pointer
2381 | |
2382 +-------------------------------+
2383 | | <-- stack_pointer_rtx (aligned)
43e9d192 2384
34834420
MS
2385 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2386 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2387 unchanged. */
43e9d192
IB
2388
2389/* Generate the prologue instructions for entry into a function.
2390 Establish the stack frame by decreasing the stack pointer with a
2391 properly calculated size and, if necessary, create a frame record
2392 filled with the values of LR and previous frame pointer. The
6991c977 2393 current FP is also set up if it is in use. */
43e9d192
IB
2394
2395void
2396aarch64_expand_prologue (void)
2397{
2398 /* sub sp, sp, #<frame_size>
2399 stp {fp, lr}, [sp, #<frame_size> - 16]
2400 add fp, sp, #<frame_size> - hardfp_offset
2401 stp {cs_reg}, [fp, #-16] etc.
2402
2403 sub sp, sp, <final_adjustment_if_any>
2404 */
43e9d192 2405 HOST_WIDE_INT frame_size, offset;
1c960e02 2406 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2407 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2408 rtx_insn *insn;
43e9d192
IB
2409
2410 aarch64_layout_frame ();
43e9d192 2411
dd991abb
RH
2412 offset = frame_size = cfun->machine->frame.frame_size;
2413 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2414 fp_offset = frame_size - hard_fp_offset;
43e9d192 2415
dd991abb
RH
2416 if (flag_stack_usage_info)
2417 current_function_static_stack_size = frame_size;
43e9d192 2418
44c0e7b9 2419 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2420 if (offset >= 512)
2421 {
2422 /* When the frame has a large size, an initial decrease is done on
2423 the stack pointer to jump over the callee-allocated save area for
2424 register varargs, the local variable area and/or the callee-saved
2425 register area. This will allow the pre-index write-back
2426 store pair instructions to be used for setting up the stack frame
2427 efficiently. */
dd991abb 2428 offset = hard_fp_offset;
43e9d192
IB
2429 if (offset >= 512)
2430 offset = cfun->machine->frame.saved_regs_size;
2431
2432 frame_size -= (offset + crtl->outgoing_args_size);
2433 fp_offset = 0;
2434
2435 if (frame_size >= 0x1000000)
2436 {
2437 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2438 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2439 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2440
2441 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2442 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2443 plus_constant (Pmode, stack_pointer_rtx,
2444 -frame_size)));
2445 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2446 }
2447 else if (frame_size > 0)
2448 {
dd991abb
RH
2449 int hi_ofs = frame_size & 0xfff000;
2450 int lo_ofs = frame_size & 0x000fff;
2451
2452 if (hi_ofs)
43e9d192
IB
2453 {
2454 insn = emit_insn (gen_add2_insn
dd991abb 2455 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2456 RTX_FRAME_RELATED_P (insn) = 1;
2457 }
dd991abb 2458 if (lo_ofs)
43e9d192
IB
2459 {
2460 insn = emit_insn (gen_add2_insn
dd991abb 2461 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2462 RTX_FRAME_RELATED_P (insn) = 1;
2463 }
2464 }
2465 }
2466 else
2467 frame_size = -1;
2468
2469 if (offset > 0)
2470 {
ae13fce3
JW
2471 bool skip_wb = false;
2472
43e9d192
IB
2473 if (frame_pointer_needed)
2474 {
c5e1f66e
JW
2475 skip_wb = true;
2476
43e9d192
IB
2477 if (fp_offset)
2478 {
2479 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2480 GEN_INT (-offset)));
2481 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2482
2483 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2484 R30_REGNUM, false);
43e9d192
IB
2485 }
2486 else
80c11907 2487 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2488
2489 /* Set up frame pointer to point to the location of the
2490 previous frame pointer on the stack. */
2491 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2492 stack_pointer_rtx,
2493 GEN_INT (fp_offset)));
43e9d192 2494 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2495 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2496 }
2497 else
2498 {
c5e1f66e
JW
2499 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2500 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2501
c5e1f66e
JW
2502 if (fp_offset
2503 || reg1 == FIRST_PSEUDO_REGISTER
2504 || (reg2 == FIRST_PSEUDO_REGISTER
2505 && offset >= 256))
2506 {
2507 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2508 GEN_INT (-offset)));
2509 RTX_FRAME_RELATED_P (insn) = 1;
2510 }
2511 else
2512 {
ef4bddc2 2513 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
2514
2515 skip_wb = true;
2516
2517 if (reg2 == FIRST_PSEUDO_REGISTER)
2518 aarch64_pushwb_single_reg (mode1, reg1, offset);
2519 else
2520 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2521 }
43e9d192
IB
2522 }
2523
c5e1f66e
JW
2524 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2525 skip_wb);
ae13fce3
JW
2526 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2527 skip_wb);
43e9d192
IB
2528 }
2529
2530 /* when offset >= 512,
2531 sub sp, sp, #<outgoing_args_size> */
2532 if (frame_size > -1)
2533 {
2534 if (crtl->outgoing_args_size > 0)
2535 {
2536 insn = emit_insn (gen_add2_insn
2537 (stack_pointer_rtx,
2538 GEN_INT (- crtl->outgoing_args_size)));
2539 RTX_FRAME_RELATED_P (insn) = 1;
2540 }
2541 }
2542}
2543
4f942779
RL
2544/* Return TRUE if we can use a simple_return insn.
2545
2546 This function checks whether the callee saved stack is empty, which
2547 means no restore actions are need. The pro_and_epilogue will use
2548 this to check whether shrink-wrapping opt is feasible. */
2549
2550bool
2551aarch64_use_return_insn_p (void)
2552{
2553 if (!reload_completed)
2554 return false;
2555
2556 if (crtl->profile)
2557 return false;
2558
2559 aarch64_layout_frame ();
2560
2561 return cfun->machine->frame.frame_size == 0;
2562}
2563
43e9d192
IB
2564/* Generate the epilogue instructions for returning from a function. */
2565void
2566aarch64_expand_epilogue (bool for_sibcall)
2567{
1c960e02 2568 HOST_WIDE_INT frame_size, offset;
43e9d192 2569 HOST_WIDE_INT fp_offset;
dd991abb 2570 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2571 rtx_insn *insn;
7e8c2bd5
JW
2572 /* We need to add memory barrier to prevent read from deallocated stack. */
2573 bool need_barrier_p = (get_frame_size () != 0
2574 || cfun->machine->frame.saved_varargs_size);
43e9d192
IB
2575
2576 aarch64_layout_frame ();
43e9d192 2577
1c960e02 2578 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2579 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2580 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2581
2582 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2583 if (offset >= 512)
2584 {
dd991abb 2585 offset = hard_fp_offset;
43e9d192
IB
2586 if (offset >= 512)
2587 offset = cfun->machine->frame.saved_regs_size;
2588
2589 frame_size -= (offset + crtl->outgoing_args_size);
2590 fp_offset = 0;
2591 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2592 {
2593 insn = emit_insn (gen_add2_insn
2594 (stack_pointer_rtx,
2595 GEN_INT (crtl->outgoing_args_size)));
2596 RTX_FRAME_RELATED_P (insn) = 1;
2597 }
2598 }
2599 else
2600 frame_size = -1;
2601
2602 /* If there were outgoing arguments or we've done dynamic stack
2603 allocation, then restore the stack pointer from the frame
2604 pointer. This is at most one insn and more efficient than using
2605 GCC's internal mechanism. */
2606 if (frame_pointer_needed
2607 && (crtl->outgoing_args_size || cfun->calls_alloca))
2608 {
7e8c2bd5
JW
2609 if (cfun->calls_alloca)
2610 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2611
43e9d192
IB
2612 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2613 hard_frame_pointer_rtx,
8f454e9f
JW
2614 GEN_INT (0)));
2615 offset = offset - fp_offset;
43e9d192
IB
2616 }
2617
43e9d192
IB
2618 if (offset > 0)
2619 {
4b92caa1
JW
2620 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2621 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2622 bool skip_wb = true;
dd991abb 2623 rtx cfi_ops = NULL;
4b92caa1 2624
43e9d192 2625 if (frame_pointer_needed)
4b92caa1
JW
2626 fp_offset = 0;
2627 else if (fp_offset
2628 || reg1 == FIRST_PSEUDO_REGISTER
2629 || (reg2 == FIRST_PSEUDO_REGISTER
2630 && offset >= 256))
2631 skip_wb = false;
2632
2633 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2634 skip_wb, &cfi_ops);
4b92caa1 2635 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2636 skip_wb, &cfi_ops);
4b92caa1 2637
7e8c2bd5
JW
2638 if (need_barrier_p)
2639 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2640
4b92caa1 2641 if (skip_wb)
43e9d192 2642 {
ef4bddc2 2643 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2644 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2645
dd991abb 2646 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2647 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2648 {
2649 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2650 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2651 mem = gen_rtx_MEM (mode1, mem);
2652 insn = emit_move_insn (rreg1, mem);
2653 }
4b92caa1
JW
2654 else
2655 {
dd991abb 2656 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2657
dd991abb
RH
2658 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2659 insn = emit_insn (aarch64_gen_loadwb_pair
2660 (mode1, stack_pointer_rtx, rreg1,
2661 rreg2, offset));
4b92caa1 2662 }
43e9d192 2663 }
43e9d192
IB
2664 else
2665 {
2666 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2667 GEN_INT (offset)));
43e9d192 2668 }
43e9d192 2669
dd991abb
RH
2670 /* Reset the CFA to be SP + FRAME_SIZE. */
2671 rtx new_cfa = stack_pointer_rtx;
2672 if (frame_size > 0)
2673 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2674 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2675 REG_NOTES (insn) = cfi_ops;
43e9d192 2676 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2677 }
2678
dd991abb 2679 if (frame_size > 0)
43e9d192 2680 {
7e8c2bd5
JW
2681 if (need_barrier_p)
2682 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2683
43e9d192
IB
2684 if (frame_size >= 0x1000000)
2685 {
2686 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2687 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2688 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2689 }
dd991abb 2690 else
43e9d192 2691 {
dd991abb
RH
2692 int hi_ofs = frame_size & 0xfff000;
2693 int lo_ofs = frame_size & 0x000fff;
2694
2695 if (hi_ofs && lo_ofs)
43e9d192
IB
2696 {
2697 insn = emit_insn (gen_add2_insn
dd991abb 2698 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2699 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2700 frame_size = lo_ofs;
43e9d192 2701 }
dd991abb
RH
2702 insn = emit_insn (gen_add2_insn
2703 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2704 }
2705
dd991abb
RH
2706 /* Reset the CFA to be SP + 0. */
2707 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2708 RTX_FRAME_RELATED_P (insn) = 1;
2709 }
2710
2711 /* Stack adjustment for exception handler. */
2712 if (crtl->calls_eh_return)
2713 {
2714 /* We need to unwind the stack by the offset computed by
2715 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2716 to be SP; letting the CFA move during this adjustment
2717 is just as correct as retaining the CFA from the body
2718 of the function. Therefore, do nothing special. */
2719 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2720 }
2721
2722 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2723 if (!for_sibcall)
2724 emit_jump_insn (ret_rtx);
2725}
2726
2727/* Return the place to copy the exception unwinding return address to.
2728 This will probably be a stack slot, but could (in theory be the
2729 return register). */
2730rtx
2731aarch64_final_eh_return_addr (void)
2732{
1c960e02
MS
2733 HOST_WIDE_INT fp_offset;
2734
43e9d192 2735 aarch64_layout_frame ();
1c960e02
MS
2736
2737 fp_offset = cfun->machine->frame.frame_size
2738 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2739
2740 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2741 return gen_rtx_REG (DImode, LR_REGNUM);
2742
2743 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2744 result in a store to save LR introduced by builtin_eh_return () being
2745 incorrectly deleted because the alias is not detected.
2746 So in the calculation of the address to copy the exception unwinding
2747 return address to, we note 2 cases.
2748 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2749 we return a SP-relative location since all the addresses are SP-relative
2750 in this case. This prevents the store from being optimized away.
2751 If the fp_offset is not 0, then the addresses will be FP-relative and
2752 therefore we return a FP-relative location. */
2753
2754 if (frame_pointer_needed)
2755 {
2756 if (fp_offset)
2757 return gen_frame_mem (DImode,
2758 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2759 else
2760 return gen_frame_mem (DImode,
2761 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2762 }
2763
2764 /* If FP is not needed, we calculate the location of LR, which would be
2765 at the top of the saved registers block. */
2766
2767 return gen_frame_mem (DImode,
2768 plus_constant (Pmode,
2769 stack_pointer_rtx,
2770 fp_offset
2771 + cfun->machine->frame.saved_regs_size
2772 - 2 * UNITS_PER_WORD));
2773}
2774
9dfc162c
JG
2775/* Possibly output code to build up a constant in a register. For
2776 the benefit of the costs infrastructure, returns the number of
2777 instructions which would be emitted. GENERATE inhibits or
2778 enables code generation. */
2779
2780static int
2781aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2782{
9dfc162c
JG
2783 int insns = 0;
2784
43e9d192 2785 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2786 {
2787 if (generate)
2788 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2789 insns = 1;
2790 }
43e9d192
IB
2791 else
2792 {
2793 int i;
2794 int ncount = 0;
2795 int zcount = 0;
2796 HOST_WIDE_INT valp = val >> 16;
2797 HOST_WIDE_INT valm;
2798 HOST_WIDE_INT tval;
2799
2800 for (i = 16; i < 64; i += 16)
2801 {
2802 valm = (valp & 0xffff);
2803
2804 if (valm != 0)
2805 ++ zcount;
2806
2807 if (valm != 0xffff)
2808 ++ ncount;
2809
2810 valp >>= 16;
2811 }
2812
2813 /* zcount contains the number of additional MOVK instructions
2814 required if the constant is built up with an initial MOVZ instruction,
2815 while ncount is the number of MOVK instructions required if starting
2816 with a MOVN instruction. Choose the sequence that yields the fewest
2817 number of instructions, preferring MOVZ instructions when they are both
2818 the same. */
2819 if (ncount < zcount)
2820 {
9dfc162c
JG
2821 if (generate)
2822 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2823 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2824 tval = 0xffff;
9dfc162c 2825 insns++;
43e9d192
IB
2826 }
2827 else
2828 {
9dfc162c
JG
2829 if (generate)
2830 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2831 GEN_INT (val & 0xffff));
43e9d192 2832 tval = 0;
9dfc162c 2833 insns++;
43e9d192
IB
2834 }
2835
2836 val >>= 16;
2837
2838 for (i = 16; i < 64; i += 16)
2839 {
2840 if ((val & 0xffff) != tval)
9dfc162c
JG
2841 {
2842 if (generate)
2843 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2844 GEN_INT (i),
2845 GEN_INT (val & 0xffff)));
2846 insns++;
2847 }
43e9d192
IB
2848 val >>= 16;
2849 }
2850 }
9dfc162c 2851 return insns;
43e9d192
IB
2852}
2853
2854static void
d9600ae5 2855aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2856{
2857 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2858 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2859 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2860
2861 if (mdelta < 0)
2862 mdelta = -mdelta;
2863
2864 if (mdelta >= 4096 * 4096)
2865 {
9dfc162c 2866 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2867 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2868 }
2869 else if (mdelta > 0)
2870 {
43e9d192 2871 if (mdelta >= 4096)
d9600ae5
SN
2872 {
2873 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2874 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2875 if (delta < 0)
2876 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2877 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2878 else
2879 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2880 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2881 }
43e9d192 2882 if (mdelta % 4096 != 0)
d9600ae5
SN
2883 {
2884 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2885 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2886 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2887 }
43e9d192
IB
2888 }
2889}
2890
2891/* Output code to add DELTA to the first argument, and then jump
2892 to FUNCTION. Used for C++ multiple inheritance. */
2893static void
2894aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2895 HOST_WIDE_INT delta,
2896 HOST_WIDE_INT vcall_offset,
2897 tree function)
2898{
2899 /* The this pointer is always in x0. Note that this differs from
2900 Arm where the this pointer maybe bumped to r1 if r0 is required
2901 to return a pointer to an aggregate. On AArch64 a result value
2902 pointer will be in x8. */
2903 int this_regno = R0_REGNUM;
5d8a22a5
DM
2904 rtx this_rtx, temp0, temp1, addr, funexp;
2905 rtx_insn *insn;
43e9d192 2906
75f1d6fc
SN
2907 reload_completed = 1;
2908 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2909
2910 if (vcall_offset == 0)
d9600ae5 2911 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2912 else
2913 {
28514dda 2914 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2915
75f1d6fc
SN
2916 this_rtx = gen_rtx_REG (Pmode, this_regno);
2917 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2918 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2919
75f1d6fc
SN
2920 addr = this_rtx;
2921 if (delta != 0)
2922 {
2923 if (delta >= -256 && delta < 256)
2924 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2925 plus_constant (Pmode, this_rtx, delta));
2926 else
d9600ae5 2927 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2928 }
2929
28514dda
YZ
2930 if (Pmode == ptr_mode)
2931 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2932 else
2933 aarch64_emit_move (temp0,
2934 gen_rtx_ZERO_EXTEND (Pmode,
2935 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2936
28514dda 2937 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2938 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2939 else
2940 {
9dfc162c 2941 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2942 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2943 }
2944
28514dda
YZ
2945 if (Pmode == ptr_mode)
2946 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2947 else
2948 aarch64_emit_move (temp1,
2949 gen_rtx_SIGN_EXTEND (Pmode,
2950 gen_rtx_MEM (ptr_mode, addr)));
2951
75f1d6fc 2952 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2953 }
2954
75f1d6fc
SN
2955 /* Generate a tail call to the target function. */
2956 if (!TREE_USED (function))
2957 {
2958 assemble_external (function);
2959 TREE_USED (function) = 1;
2960 }
2961 funexp = XEXP (DECL_RTL (function), 0);
2962 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2963 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2964 SIBLING_CALL_P (insn) = 1;
2965
2966 insn = get_insns ();
2967 shorten_branches (insn);
2968 final_start_function (insn, file, 1);
2969 final (insn, file, 1);
43e9d192 2970 final_end_function ();
75f1d6fc
SN
2971
2972 /* Stop pretending to be a post-reload pass. */
2973 reload_completed = 0;
43e9d192
IB
2974}
2975
43e9d192
IB
2976static bool
2977aarch64_tls_referenced_p (rtx x)
2978{
2979 if (!TARGET_HAVE_TLS)
2980 return false;
e7de8563
RS
2981 subrtx_iterator::array_type array;
2982 FOR_EACH_SUBRTX (iter, array, x, ALL)
2983 {
2984 const_rtx x = *iter;
2985 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
2986 return true;
2987 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2988 TLS offsets, not real symbol references. */
2989 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
2990 iter.skip_subrtxes ();
2991 }
2992 return false;
43e9d192
IB
2993}
2994
2995
2996static int
2997aarch64_bitmasks_cmp (const void *i1, const void *i2)
2998{
2999 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
3000 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3001
3002 if (*imm1 < *imm2)
3003 return -1;
3004 if (*imm1 > *imm2)
3005 return +1;
3006 return 0;
3007}
3008
3009
3010static void
3011aarch64_build_bitmask_table (void)
3012{
3013 unsigned HOST_WIDE_INT mask, imm;
3014 unsigned int log_e, e, s, r;
3015 unsigned int nimms = 0;
3016
3017 for (log_e = 1; log_e <= 6; log_e++)
3018 {
3019 e = 1 << log_e;
3020 if (e == 64)
3021 mask = ~(HOST_WIDE_INT) 0;
3022 else
3023 mask = ((HOST_WIDE_INT) 1 << e) - 1;
3024 for (s = 1; s < e; s++)
3025 {
3026 for (r = 0; r < e; r++)
3027 {
3028 /* set s consecutive bits to 1 (s < 64) */
3029 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3030 /* rotate right by r */
3031 if (r != 0)
3032 imm = ((imm >> r) | (imm << (e - r))) & mask;
3033 /* replicate the constant depending on SIMD size */
3034 switch (log_e) {
3035 case 1: imm |= (imm << 2);
3036 case 2: imm |= (imm << 4);
3037 case 3: imm |= (imm << 8);
3038 case 4: imm |= (imm << 16);
3039 case 5: imm |= (imm << 32);
3040 case 6:
3041 break;
3042 default:
3043 gcc_unreachable ();
3044 }
3045 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3046 aarch64_bitmasks[nimms++] = imm;
3047 }
3048 }
3049 }
3050
3051 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3052 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3053 aarch64_bitmasks_cmp);
3054}
3055
3056
3057/* Return true if val can be encoded as a 12-bit unsigned immediate with
3058 a left shift of 0 or 12 bits. */
3059bool
3060aarch64_uimm12_shift (HOST_WIDE_INT val)
3061{
3062 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3063 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3064 );
3065}
3066
3067
3068/* Return true if val is an immediate that can be loaded into a
3069 register by a MOVZ instruction. */
3070static bool
ef4bddc2 3071aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3072{
3073 if (GET_MODE_SIZE (mode) > 4)
3074 {
3075 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3076 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3077 return 1;
3078 }
3079 else
3080 {
3081 /* Ignore sign extension. */
3082 val &= (HOST_WIDE_INT) 0xffffffff;
3083 }
3084 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3085 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3086}
3087
3088
3089/* Return true if val is a valid bitmask immediate. */
3090bool
ef4bddc2 3091aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3092{
3093 if (GET_MODE_SIZE (mode) < 8)
3094 {
3095 /* Replicate bit pattern. */
3096 val &= (HOST_WIDE_INT) 0xffffffff;
3097 val |= val << 32;
3098 }
3099 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3100 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3101}
3102
3103
3104/* Return true if val is an immediate that can be loaded into a
3105 register in a single instruction. */
3106bool
ef4bddc2 3107aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3108{
3109 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3110 return 1;
3111 return aarch64_bitmask_imm (val, mode);
3112}
3113
3114static bool
ef4bddc2 3115aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3116{
3117 rtx base, offset;
7eda14e1 3118
43e9d192
IB
3119 if (GET_CODE (x) == HIGH)
3120 return true;
3121
3122 split_const (x, &base, &offset);
3123 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3124 {
f8b756b7 3125 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
28514dda
YZ
3126 != SYMBOL_FORCE_TO_MEM)
3127 return true;
3128 else
3129 /* Avoid generating a 64-bit relocation in ILP32; leave
3130 to aarch64_expand_mov_immediate to handle it properly. */
3131 return mode != ptr_mode;
3132 }
43e9d192
IB
3133
3134 return aarch64_tls_referenced_p (x);
3135}
3136
3137/* Return true if register REGNO is a valid index register.
3138 STRICT_P is true if REG_OK_STRICT is in effect. */
3139
3140bool
3141aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3142{
3143 if (!HARD_REGISTER_NUM_P (regno))
3144 {
3145 if (!strict_p)
3146 return true;
3147
3148 if (!reg_renumber)
3149 return false;
3150
3151 regno = reg_renumber[regno];
3152 }
3153 return GP_REGNUM_P (regno);
3154}
3155
3156/* Return true if register REGNO is a valid base register for mode MODE.
3157 STRICT_P is true if REG_OK_STRICT is in effect. */
3158
3159bool
3160aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3161{
3162 if (!HARD_REGISTER_NUM_P (regno))
3163 {
3164 if (!strict_p)
3165 return true;
3166
3167 if (!reg_renumber)
3168 return false;
3169
3170 regno = reg_renumber[regno];
3171 }
3172
3173 /* The fake registers will be eliminated to either the stack or
3174 hard frame pointer, both of which are usually valid base registers.
3175 Reload deals with the cases where the eliminated form isn't valid. */
3176 return (GP_REGNUM_P (regno)
3177 || regno == SP_REGNUM
3178 || regno == FRAME_POINTER_REGNUM
3179 || regno == ARG_POINTER_REGNUM);
3180}
3181
3182/* Return true if X is a valid base register for mode MODE.
3183 STRICT_P is true if REG_OK_STRICT is in effect. */
3184
3185static bool
3186aarch64_base_register_rtx_p (rtx x, bool strict_p)
3187{
3188 if (!strict_p && GET_CODE (x) == SUBREG)
3189 x = SUBREG_REG (x);
3190
3191 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3192}
3193
3194/* Return true if address offset is a valid index. If it is, fill in INFO
3195 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3196
3197static bool
3198aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3199 machine_mode mode, bool strict_p)
43e9d192
IB
3200{
3201 enum aarch64_address_type type;
3202 rtx index;
3203 int shift;
3204
3205 /* (reg:P) */
3206 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3207 && GET_MODE (x) == Pmode)
3208 {
3209 type = ADDRESS_REG_REG;
3210 index = x;
3211 shift = 0;
3212 }
3213 /* (sign_extend:DI (reg:SI)) */
3214 else if ((GET_CODE (x) == SIGN_EXTEND
3215 || GET_CODE (x) == ZERO_EXTEND)
3216 && GET_MODE (x) == DImode
3217 && GET_MODE (XEXP (x, 0)) == SImode)
3218 {
3219 type = (GET_CODE (x) == SIGN_EXTEND)
3220 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3221 index = XEXP (x, 0);
3222 shift = 0;
3223 }
3224 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3225 else if (GET_CODE (x) == MULT
3226 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3227 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3228 && GET_MODE (XEXP (x, 0)) == DImode
3229 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3230 && CONST_INT_P (XEXP (x, 1)))
3231 {
3232 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3233 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3234 index = XEXP (XEXP (x, 0), 0);
3235 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3236 }
3237 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3238 else if (GET_CODE (x) == ASHIFT
3239 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3240 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3241 && GET_MODE (XEXP (x, 0)) == DImode
3242 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3243 && CONST_INT_P (XEXP (x, 1)))
3244 {
3245 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3246 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3247 index = XEXP (XEXP (x, 0), 0);
3248 shift = INTVAL (XEXP (x, 1));
3249 }
3250 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3251 else if ((GET_CODE (x) == SIGN_EXTRACT
3252 || GET_CODE (x) == ZERO_EXTRACT)
3253 && GET_MODE (x) == DImode
3254 && GET_CODE (XEXP (x, 0)) == MULT
3255 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3256 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3257 {
3258 type = (GET_CODE (x) == SIGN_EXTRACT)
3259 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3260 index = XEXP (XEXP (x, 0), 0);
3261 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3262 if (INTVAL (XEXP (x, 1)) != 32 + shift
3263 || INTVAL (XEXP (x, 2)) != 0)
3264 shift = -1;
3265 }
3266 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3267 (const_int 0xffffffff<<shift)) */
3268 else if (GET_CODE (x) == AND
3269 && GET_MODE (x) == DImode
3270 && GET_CODE (XEXP (x, 0)) == MULT
3271 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3272 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3273 && CONST_INT_P (XEXP (x, 1)))
3274 {
3275 type = ADDRESS_REG_UXTW;
3276 index = XEXP (XEXP (x, 0), 0);
3277 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3278 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3279 shift = -1;
3280 }
3281 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3282 else if ((GET_CODE (x) == SIGN_EXTRACT
3283 || GET_CODE (x) == ZERO_EXTRACT)
3284 && GET_MODE (x) == DImode
3285 && GET_CODE (XEXP (x, 0)) == ASHIFT
3286 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3287 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3288 {
3289 type = (GET_CODE (x) == SIGN_EXTRACT)
3290 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3291 index = XEXP (XEXP (x, 0), 0);
3292 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3293 if (INTVAL (XEXP (x, 1)) != 32 + shift
3294 || INTVAL (XEXP (x, 2)) != 0)
3295 shift = -1;
3296 }
3297 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3298 (const_int 0xffffffff<<shift)) */
3299 else if (GET_CODE (x) == AND
3300 && GET_MODE (x) == DImode
3301 && GET_CODE (XEXP (x, 0)) == ASHIFT
3302 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3303 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3304 && CONST_INT_P (XEXP (x, 1)))
3305 {
3306 type = ADDRESS_REG_UXTW;
3307 index = XEXP (XEXP (x, 0), 0);
3308 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3309 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3310 shift = -1;
3311 }
3312 /* (mult:P (reg:P) (const_int scale)) */
3313 else if (GET_CODE (x) == MULT
3314 && GET_MODE (x) == Pmode
3315 && GET_MODE (XEXP (x, 0)) == Pmode
3316 && CONST_INT_P (XEXP (x, 1)))
3317 {
3318 type = ADDRESS_REG_REG;
3319 index = XEXP (x, 0);
3320 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3321 }
3322 /* (ashift:P (reg:P) (const_int shift)) */
3323 else if (GET_CODE (x) == ASHIFT
3324 && GET_MODE (x) == Pmode
3325 && GET_MODE (XEXP (x, 0)) == Pmode
3326 && CONST_INT_P (XEXP (x, 1)))
3327 {
3328 type = ADDRESS_REG_REG;
3329 index = XEXP (x, 0);
3330 shift = INTVAL (XEXP (x, 1));
3331 }
3332 else
3333 return false;
3334
3335 if (GET_CODE (index) == SUBREG)
3336 index = SUBREG_REG (index);
3337
3338 if ((shift == 0 ||
3339 (shift > 0 && shift <= 3
3340 && (1 << shift) == GET_MODE_SIZE (mode)))
3341 && REG_P (index)
3342 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3343 {
3344 info->type = type;
3345 info->offset = index;
3346 info->shift = shift;
3347 return true;
3348 }
3349
3350 return false;
3351}
3352
44707478 3353bool
ef4bddc2 3354aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3355{
3356 return (offset >= -64 * GET_MODE_SIZE (mode)
3357 && offset < 64 * GET_MODE_SIZE (mode)
3358 && offset % GET_MODE_SIZE (mode) == 0);
3359}
3360
3361static inline bool
ef4bddc2 3362offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3363 HOST_WIDE_INT offset)
3364{
3365 return offset >= -256 && offset < 256;
3366}
3367
3368static inline bool
ef4bddc2 3369offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3370{
3371 return (offset >= 0
3372 && offset < 4096 * GET_MODE_SIZE (mode)
3373 && offset % GET_MODE_SIZE (mode) == 0);
3374}
3375
3376/* Return true if X is a valid address for machine mode MODE. If it is,
3377 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3378 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3379
3380static bool
3381aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3382 rtx x, machine_mode mode,
43e9d192
IB
3383 RTX_CODE outer_code, bool strict_p)
3384{
3385 enum rtx_code code = GET_CODE (x);
3386 rtx op0, op1;
2d8c6dc1
AH
3387
3388 /* On BE, we use load/store pair for all large int mode load/stores. */
3389 bool load_store_pair_p = (outer_code == PARALLEL
3390 || (BYTES_BIG_ENDIAN
3391 && aarch64_vect_struct_mode_p (mode)));
3392
43e9d192 3393 bool allow_reg_index_p =
2d8c6dc1
AH
3394 !load_store_pair_p
3395 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3396 && !aarch64_vect_struct_mode_p (mode);
3397
3398 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3399 REG addressing. */
3400 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
3401 && (code != POST_INC && code != REG))
3402 return false;
3403
3404 switch (code)
3405 {
3406 case REG:
3407 case SUBREG:
3408 info->type = ADDRESS_REG_IMM;
3409 info->base = x;
3410 info->offset = const0_rtx;
3411 return aarch64_base_register_rtx_p (x, strict_p);
3412
3413 case PLUS:
3414 op0 = XEXP (x, 0);
3415 op1 = XEXP (x, 1);
15c0c5c9
JW
3416
3417 if (! strict_p
4aa81c2e 3418 && REG_P (op0)
15c0c5c9
JW
3419 && (op0 == virtual_stack_vars_rtx
3420 || op0 == frame_pointer_rtx
3421 || op0 == arg_pointer_rtx)
4aa81c2e 3422 && CONST_INT_P (op1))
15c0c5c9
JW
3423 {
3424 info->type = ADDRESS_REG_IMM;
3425 info->base = op0;
3426 info->offset = op1;
3427
3428 return true;
3429 }
3430
43e9d192
IB
3431 if (GET_MODE_SIZE (mode) != 0
3432 && CONST_INT_P (op1)
3433 && aarch64_base_register_rtx_p (op0, strict_p))
3434 {
3435 HOST_WIDE_INT offset = INTVAL (op1);
3436
3437 info->type = ADDRESS_REG_IMM;
3438 info->base = op0;
3439 info->offset = op1;
3440
3441 /* TImode and TFmode values are allowed in both pairs of X
3442 registers and individual Q registers. The available
3443 address modes are:
3444 X,X: 7-bit signed scaled offset
3445 Q: 9-bit signed offset
3446 We conservatively require an offset representable in either mode.
3447 */
3448 if (mode == TImode || mode == TFmode)
44707478 3449 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3450 && offset_9bit_signed_unscaled_p (mode, offset));
3451
2d8c6dc1
AH
3452 /* A 7bit offset check because OImode will emit a ldp/stp
3453 instruction (only big endian will get here).
3454 For ldp/stp instructions, the offset is scaled for the size of a
3455 single element of the pair. */
3456 if (mode == OImode)
3457 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3458
3459 /* Three 9/12 bit offsets checks because CImode will emit three
3460 ldr/str instructions (only big endian will get here). */
3461 if (mode == CImode)
3462 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3463 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3464 || offset_12bit_unsigned_scaled_p (V16QImode,
3465 offset + 32)));
3466
3467 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3468 instructions (only big endian will get here). */
3469 if (mode == XImode)
3470 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3471 && aarch64_offset_7bit_signed_scaled_p (TImode,
3472 offset + 32));
3473
3474 if (load_store_pair_p)
43e9d192 3475 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3476 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3477 else
3478 return (offset_9bit_signed_unscaled_p (mode, offset)
3479 || offset_12bit_unsigned_scaled_p (mode, offset));
3480 }
3481
3482 if (allow_reg_index_p)
3483 {
3484 /* Look for base + (scaled/extended) index register. */
3485 if (aarch64_base_register_rtx_p (op0, strict_p)
3486 && aarch64_classify_index (info, op1, mode, strict_p))
3487 {
3488 info->base = op0;
3489 return true;
3490 }
3491 if (aarch64_base_register_rtx_p (op1, strict_p)
3492 && aarch64_classify_index (info, op0, mode, strict_p))
3493 {
3494 info->base = op1;
3495 return true;
3496 }
3497 }
3498
3499 return false;
3500
3501 case POST_INC:
3502 case POST_DEC:
3503 case PRE_INC:
3504 case PRE_DEC:
3505 info->type = ADDRESS_REG_WB;
3506 info->base = XEXP (x, 0);
3507 info->offset = NULL_RTX;
3508 return aarch64_base_register_rtx_p (info->base, strict_p);
3509
3510 case POST_MODIFY:
3511 case PRE_MODIFY:
3512 info->type = ADDRESS_REG_WB;
3513 info->base = XEXP (x, 0);
3514 if (GET_CODE (XEXP (x, 1)) == PLUS
3515 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3516 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3517 && aarch64_base_register_rtx_p (info->base, strict_p))
3518 {
3519 HOST_WIDE_INT offset;
3520 info->offset = XEXP (XEXP (x, 1), 1);
3521 offset = INTVAL (info->offset);
3522
3523 /* TImode and TFmode values are allowed in both pairs of X
3524 registers and individual Q registers. The available
3525 address modes are:
3526 X,X: 7-bit signed scaled offset
3527 Q: 9-bit signed offset
3528 We conservatively require an offset representable in either mode.
3529 */
3530 if (mode == TImode || mode == TFmode)
44707478 3531 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3532 && offset_9bit_signed_unscaled_p (mode, offset));
3533
2d8c6dc1 3534 if (load_store_pair_p)
43e9d192 3535 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3536 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3537 else
3538 return offset_9bit_signed_unscaled_p (mode, offset);
3539 }
3540 return false;
3541
3542 case CONST:
3543 case SYMBOL_REF:
3544 case LABEL_REF:
79517551
SN
3545 /* load literal: pc-relative constant pool entry. Only supported
3546 for SI mode or larger. */
43e9d192 3547 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
3548
3549 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3550 {
3551 rtx sym, addend;
3552
3553 split_const (x, &sym, &addend);
3554 return (GET_CODE (sym) == LABEL_REF
3555 || (GET_CODE (sym) == SYMBOL_REF
3556 && CONSTANT_POOL_ADDRESS_P (sym)));
3557 }
3558 return false;
3559
3560 case LO_SUM:
3561 info->type = ADDRESS_LO_SUM;
3562 info->base = XEXP (x, 0);
3563 info->offset = XEXP (x, 1);
3564 if (allow_reg_index_p
3565 && aarch64_base_register_rtx_p (info->base, strict_p))
3566 {
3567 rtx sym, offs;
3568 split_const (info->offset, &sym, &offs);
3569 if (GET_CODE (sym) == SYMBOL_REF
f8b756b7 3570 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
43e9d192
IB
3571 == SYMBOL_SMALL_ABSOLUTE))
3572 {
3573 /* The symbol and offset must be aligned to the access size. */
3574 unsigned int align;
3575 unsigned int ref_size;
3576
3577 if (CONSTANT_POOL_ADDRESS_P (sym))
3578 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3579 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3580 {
3581 tree exp = SYMBOL_REF_DECL (sym);
3582 align = TYPE_ALIGN (TREE_TYPE (exp));
3583 align = CONSTANT_ALIGNMENT (exp, align);
3584 }
3585 else if (SYMBOL_REF_DECL (sym))
3586 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3587 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3588 && SYMBOL_REF_BLOCK (sym) != NULL)
3589 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3590 else
3591 align = BITS_PER_UNIT;
3592
3593 ref_size = GET_MODE_SIZE (mode);
3594 if (ref_size == 0)
3595 ref_size = GET_MODE_SIZE (DImode);
3596
3597 return ((INTVAL (offs) & (ref_size - 1)) == 0
3598 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3599 }
3600 }
3601 return false;
3602
3603 default:
3604 return false;
3605 }
3606}
3607
3608bool
3609aarch64_symbolic_address_p (rtx x)
3610{
3611 rtx offset;
3612
3613 split_const (x, &x, &offset);
3614 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3615}
3616
3617/* Classify the base of symbolic expression X, given that X appears in
3618 context CONTEXT. */
da4f13a4
MS
3619
3620enum aarch64_symbol_type
3621aarch64_classify_symbolic_expression (rtx x,
3622 enum aarch64_symbol_context context)
43e9d192
IB
3623{
3624 rtx offset;
da4f13a4 3625
43e9d192 3626 split_const (x, &x, &offset);
f8b756b7 3627 return aarch64_classify_symbol (x, offset, context);
43e9d192
IB
3628}
3629
3630
3631/* Return TRUE if X is a legitimate address for accessing memory in
3632 mode MODE. */
3633static bool
ef4bddc2 3634aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
3635{
3636 struct aarch64_address_info addr;
3637
3638 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3639}
3640
3641/* Return TRUE if X is a legitimate address for accessing memory in
3642 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3643 pair operation. */
3644bool
ef4bddc2 3645aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 3646 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3647{
3648 struct aarch64_address_info addr;
3649
3650 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3651}
3652
3653/* Return TRUE if rtx X is immediate constant 0.0 */
3654bool
3520f7cc 3655aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3656{
3657 REAL_VALUE_TYPE r;
3658
3659 if (GET_MODE (x) == VOIDmode)
3660 return false;
3661
3662 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3663 if (REAL_VALUE_MINUS_ZERO (r))
3664 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3665 return REAL_VALUES_EQUAL (r, dconst0);
3666}
3667
70f09188
AP
3668/* Return the fixed registers used for condition codes. */
3669
3670static bool
3671aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3672{
3673 *p1 = CC_REGNUM;
3674 *p2 = INVALID_REGNUM;
3675 return true;
3676}
3677
78607708
TV
3678/* Emit call insn with PAT and do aarch64-specific handling. */
3679
d07a3fed 3680void
78607708
TV
3681aarch64_emit_call_insn (rtx pat)
3682{
3683 rtx insn = emit_call_insn (pat);
3684
3685 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3686 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3687 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3688}
3689
ef4bddc2 3690machine_mode
43e9d192
IB
3691aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3692{
3693 /* All floating point compares return CCFP if it is an equality
3694 comparison, and CCFPE otherwise. */
3695 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3696 {
3697 switch (code)
3698 {
3699 case EQ:
3700 case NE:
3701 case UNORDERED:
3702 case ORDERED:
3703 case UNLT:
3704 case UNLE:
3705 case UNGT:
3706 case UNGE:
3707 case UNEQ:
3708 case LTGT:
3709 return CCFPmode;
3710
3711 case LT:
3712 case LE:
3713 case GT:
3714 case GE:
3715 return CCFPEmode;
3716
3717 default:
3718 gcc_unreachable ();
3719 }
3720 }
3721
3722 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3723 && y == const0_rtx
3724 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3725 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3726 || GET_CODE (x) == NEG))
43e9d192
IB
3727 return CC_NZmode;
3728
1c992d1e 3729 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3730 the comparison will have to be swapped when we emit the assembly
3731 code. */
3732 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3733 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3734 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3735 || GET_CODE (x) == LSHIFTRT
1c992d1e 3736 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3737 return CC_SWPmode;
3738
1c992d1e
RE
3739 /* Similarly for a negated operand, but we can only do this for
3740 equalities. */
3741 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3742 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3743 && (code == EQ || code == NE)
3744 && GET_CODE (x) == NEG)
3745 return CC_Zmode;
3746
43e9d192
IB
3747 /* A compare of a mode narrower than SI mode against zero can be done
3748 by extending the value in the comparison. */
3749 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3750 && y == const0_rtx)
3751 /* Only use sign-extension if we really need it. */
3752 return ((code == GT || code == GE || code == LE || code == LT)
3753 ? CC_SESWPmode : CC_ZESWPmode);
3754
3755 /* For everything else, return CCmode. */
3756 return CCmode;
3757}
3758
3dfa7055
ZC
3759static int
3760aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3761
cd5660ab 3762int
43e9d192
IB
3763aarch64_get_condition_code (rtx x)
3764{
ef4bddc2 3765 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
3766 enum rtx_code comp_code = GET_CODE (x);
3767
3768 if (GET_MODE_CLASS (mode) != MODE_CC)
3769 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
3770 return aarch64_get_condition_code_1 (mode, comp_code);
3771}
43e9d192 3772
3dfa7055
ZC
3773static int
3774aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3775{
3776 int ne = -1, eq = -1;
43e9d192
IB
3777 switch (mode)
3778 {
3779 case CCFPmode:
3780 case CCFPEmode:
3781 switch (comp_code)
3782 {
3783 case GE: return AARCH64_GE;
3784 case GT: return AARCH64_GT;
3785 case LE: return AARCH64_LS;
3786 case LT: return AARCH64_MI;
3787 case NE: return AARCH64_NE;
3788 case EQ: return AARCH64_EQ;
3789 case ORDERED: return AARCH64_VC;
3790 case UNORDERED: return AARCH64_VS;
3791 case UNLT: return AARCH64_LT;
3792 case UNLE: return AARCH64_LE;
3793 case UNGT: return AARCH64_HI;
3794 case UNGE: return AARCH64_PL;
cd5660ab 3795 default: return -1;
43e9d192
IB
3796 }
3797 break;
3798
3dfa7055
ZC
3799 case CC_DNEmode:
3800 ne = AARCH64_NE;
3801 eq = AARCH64_EQ;
3802 break;
3803
3804 case CC_DEQmode:
3805 ne = AARCH64_EQ;
3806 eq = AARCH64_NE;
3807 break;
3808
3809 case CC_DGEmode:
3810 ne = AARCH64_GE;
3811 eq = AARCH64_LT;
3812 break;
3813
3814 case CC_DLTmode:
3815 ne = AARCH64_LT;
3816 eq = AARCH64_GE;
3817 break;
3818
3819 case CC_DGTmode:
3820 ne = AARCH64_GT;
3821 eq = AARCH64_LE;
3822 break;
3823
3824 case CC_DLEmode:
3825 ne = AARCH64_LE;
3826 eq = AARCH64_GT;
3827 break;
3828
3829 case CC_DGEUmode:
3830 ne = AARCH64_CS;
3831 eq = AARCH64_CC;
3832 break;
3833
3834 case CC_DLTUmode:
3835 ne = AARCH64_CC;
3836 eq = AARCH64_CS;
3837 break;
3838
3839 case CC_DGTUmode:
3840 ne = AARCH64_HI;
3841 eq = AARCH64_LS;
3842 break;
3843
3844 case CC_DLEUmode:
3845 ne = AARCH64_LS;
3846 eq = AARCH64_HI;
3847 break;
3848
43e9d192
IB
3849 case CCmode:
3850 switch (comp_code)
3851 {
3852 case NE: return AARCH64_NE;
3853 case EQ: return AARCH64_EQ;
3854 case GE: return AARCH64_GE;
3855 case GT: return AARCH64_GT;
3856 case LE: return AARCH64_LE;
3857 case LT: return AARCH64_LT;
3858 case GEU: return AARCH64_CS;
3859 case GTU: return AARCH64_HI;
3860 case LEU: return AARCH64_LS;
3861 case LTU: return AARCH64_CC;
cd5660ab 3862 default: return -1;
43e9d192
IB
3863 }
3864 break;
3865
3866 case CC_SWPmode:
3867 case CC_ZESWPmode:
3868 case CC_SESWPmode:
3869 switch (comp_code)
3870 {
3871 case NE: return AARCH64_NE;
3872 case EQ: return AARCH64_EQ;
3873 case GE: return AARCH64_LE;
3874 case GT: return AARCH64_LT;
3875 case LE: return AARCH64_GE;
3876 case LT: return AARCH64_GT;
3877 case GEU: return AARCH64_LS;
3878 case GTU: return AARCH64_CC;
3879 case LEU: return AARCH64_CS;
3880 case LTU: return AARCH64_HI;
cd5660ab 3881 default: return -1;
43e9d192
IB
3882 }
3883 break;
3884
3885 case CC_NZmode:
3886 switch (comp_code)
3887 {
3888 case NE: return AARCH64_NE;
3889 case EQ: return AARCH64_EQ;
3890 case GE: return AARCH64_PL;
3891 case LT: return AARCH64_MI;
cd5660ab 3892 default: return -1;
43e9d192
IB
3893 }
3894 break;
3895
1c992d1e
RE
3896 case CC_Zmode:
3897 switch (comp_code)
3898 {
3899 case NE: return AARCH64_NE;
3900 case EQ: return AARCH64_EQ;
cd5660ab 3901 default: return -1;
1c992d1e
RE
3902 }
3903 break;
3904
43e9d192 3905 default:
cd5660ab 3906 return -1;
43e9d192
IB
3907 break;
3908 }
3dfa7055
ZC
3909
3910 if (comp_code == NE)
3911 return ne;
3912
3913 if (comp_code == EQ)
3914 return eq;
3915
3916 return -1;
43e9d192
IB
3917}
3918
ddeabd3e
AL
3919bool
3920aarch64_const_vec_all_same_in_range_p (rtx x,
3921 HOST_WIDE_INT minval,
3922 HOST_WIDE_INT maxval)
3923{
3924 HOST_WIDE_INT firstval;
3925 int count, i;
3926
3927 if (GET_CODE (x) != CONST_VECTOR
3928 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3929 return false;
3930
3931 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3932 if (firstval < minval || firstval > maxval)
3933 return false;
3934
3935 count = CONST_VECTOR_NUNITS (x);
3936 for (i = 1; i < count; i++)
3937 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3938 return false;
3939
3940 return true;
3941}
3942
3943bool
3944aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3945{
3946 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3947}
3948
43e9d192
IB
3949static unsigned
3950bit_count (unsigned HOST_WIDE_INT value)
3951{
3952 unsigned count = 0;
3953
3954 while (value)
3955 {
3956 count++;
3957 value &= value - 1;
3958 }
3959
3960 return count;
3961}
3962
cf670503
ZC
3963/* N Z C V. */
3964#define AARCH64_CC_V 1
3965#define AARCH64_CC_C (1 << 1)
3966#define AARCH64_CC_Z (1 << 2)
3967#define AARCH64_CC_N (1 << 3)
3968
3969/* N Z C V flags for ccmp. The first code is for AND op and the other
3970 is for IOR op. Indexed by AARCH64_COND_CODE. */
3971static const int aarch64_nzcv_codes[][2] =
3972{
3973 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
3974 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
3975 {AARCH64_CC_C, 0}, /* CS, C == 1. */
3976 {0, AARCH64_CC_C}, /* CC, C == 0. */
3977 {AARCH64_CC_N, 0}, /* MI, N == 1. */
3978 {0, AARCH64_CC_N}, /* PL, N == 0. */
3979 {AARCH64_CC_V, 0}, /* VS, V == 1. */
3980 {0, AARCH64_CC_V}, /* VC, V == 0. */
3981 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
3982 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
3983 {0, AARCH64_CC_V}, /* GE, N == V. */
3984 {AARCH64_CC_V, 0}, /* LT, N != V. */
3985 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
3986 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
3987 {0, 0}, /* AL, Any. */
3988 {0, 0}, /* NV, Any. */
3989};
3990
3991int
3992aarch64_ccmp_mode_to_code (enum machine_mode mode)
3993{
3994 switch (mode)
3995 {
3996 case CC_DNEmode:
3997 return NE;
3998
3999 case CC_DEQmode:
4000 return EQ;
4001
4002 case CC_DLEmode:
4003 return LE;
4004
4005 case CC_DGTmode:
4006 return GT;
4007
4008 case CC_DLTmode:
4009 return LT;
4010
4011 case CC_DGEmode:
4012 return GE;
4013
4014 case CC_DLEUmode:
4015 return LEU;
4016
4017 case CC_DGTUmode:
4018 return GTU;
4019
4020 case CC_DLTUmode:
4021 return LTU;
4022
4023 case CC_DGEUmode:
4024 return GEU;
4025
4026 default:
4027 gcc_unreachable ();
4028 }
4029}
4030
4031
43e9d192
IB
4032void
4033aarch64_print_operand (FILE *f, rtx x, char code)
4034{
4035 switch (code)
4036 {
f541a481
KT
4037 /* An integer or symbol address without a preceding # sign. */
4038 case 'c':
4039 switch (GET_CODE (x))
4040 {
4041 case CONST_INT:
4042 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4043 break;
4044
4045 case SYMBOL_REF:
4046 output_addr_const (f, x);
4047 break;
4048
4049 case CONST:
4050 if (GET_CODE (XEXP (x, 0)) == PLUS
4051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4052 {
4053 output_addr_const (f, x);
4054 break;
4055 }
4056 /* Fall through. */
4057
4058 default:
4059 output_operand_lossage ("Unsupported operand for code '%c'", code);
4060 }
4061 break;
4062
43e9d192
IB
4063 case 'e':
4064 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4065 {
4066 int n;
4067
4aa81c2e 4068 if (!CONST_INT_P (x)
43e9d192
IB
4069 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4070 {
4071 output_operand_lossage ("invalid operand for '%%%c'", code);
4072 return;
4073 }
4074
4075 switch (n)
4076 {
4077 case 3:
4078 fputc ('b', f);
4079 break;
4080 case 4:
4081 fputc ('h', f);
4082 break;
4083 case 5:
4084 fputc ('w', f);
4085 break;
4086 default:
4087 output_operand_lossage ("invalid operand for '%%%c'", code);
4088 return;
4089 }
4090 }
4091 break;
4092
4093 case 'p':
4094 {
4095 int n;
4096
4097 /* Print N such that 2^N == X. */
4aa81c2e 4098 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4099 {
4100 output_operand_lossage ("invalid operand for '%%%c'", code);
4101 return;
4102 }
4103
4104 asm_fprintf (f, "%d", n);
4105 }
4106 break;
4107
4108 case 'P':
4109 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4110 if (!CONST_INT_P (x))
43e9d192
IB
4111 {
4112 output_operand_lossage ("invalid operand for '%%%c'", code);
4113 return;
4114 }
4115
4116 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4117 break;
4118
4119 case 'H':
4120 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4121 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4122 {
4123 output_operand_lossage ("invalid operand for '%%%c'", code);
4124 return;
4125 }
4126
01a3a324 4127 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4128 break;
4129
43e9d192 4130 case 'm':
cd5660ab
KT
4131 {
4132 int cond_code;
4133 /* Print a condition (eq, ne, etc). */
43e9d192 4134
cd5660ab
KT
4135 /* CONST_TRUE_RTX means always -- that's the default. */
4136 if (x == const_true_rtx)
43e9d192 4137 return;
43e9d192 4138
cd5660ab
KT
4139 if (!COMPARISON_P (x))
4140 {
4141 output_operand_lossage ("invalid operand for '%%%c'", code);
4142 return;
4143 }
4144
4145 cond_code = aarch64_get_condition_code (x);
4146 gcc_assert (cond_code >= 0);
4147 fputs (aarch64_condition_codes[cond_code], f);
4148 }
43e9d192
IB
4149 break;
4150
4151 case 'M':
cd5660ab
KT
4152 {
4153 int cond_code;
4154 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 4155
cd5660ab
KT
4156 /* CONST_TRUE_RTX means never -- that's the default. */
4157 if (x == const_true_rtx)
4158 {
4159 fputs ("nv", f);
4160 return;
4161 }
43e9d192 4162
cd5660ab
KT
4163 if (!COMPARISON_P (x))
4164 {
4165 output_operand_lossage ("invalid operand for '%%%c'", code);
4166 return;
4167 }
4168 cond_code = aarch64_get_condition_code (x);
4169 gcc_assert (cond_code >= 0);
4170 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4171 (cond_code)], f);
4172 }
43e9d192
IB
4173 break;
4174
4175 case 'b':
4176 case 'h':
4177 case 's':
4178 case 'd':
4179 case 'q':
4180 /* Print a scalar FP/SIMD register name. */
4181 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4182 {
4183 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4184 return;
4185 }
50ce6f88 4186 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4187 break;
4188
4189 case 'S':
4190 case 'T':
4191 case 'U':
4192 case 'V':
4193 /* Print the first FP/SIMD register name in a list. */
4194 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4195 {
4196 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4197 return;
4198 }
50ce6f88 4199 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4200 break;
4201
2d8c6dc1
AH
4202 case 'R':
4203 /* Print a scalar FP/SIMD register name + 1. */
4204 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4205 {
4206 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4207 return;
4208 }
4209 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4210 break;
4211
a05c0ddf 4212 case 'X':
50d38551 4213 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4214 if (!CONST_INT_P (x))
a05c0ddf
IB
4215 {
4216 output_operand_lossage ("invalid operand for '%%%c'", code);
4217 return;
4218 }
50d38551 4219 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4220 break;
4221
43e9d192
IB
4222 case 'w':
4223 case 'x':
4224 /* Print a general register name or the zero register (32-bit or
4225 64-bit). */
3520f7cc
JG
4226 if (x == const0_rtx
4227 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4228 {
50ce6f88 4229 asm_fprintf (f, "%czr", code);
43e9d192
IB
4230 break;
4231 }
4232
4233 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4234 {
50ce6f88 4235 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4236 break;
4237 }
4238
4239 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4240 {
50ce6f88 4241 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4242 break;
4243 }
4244
4245 /* Fall through */
4246
4247 case 0:
4248 /* Print a normal operand, if it's a general register, then we
4249 assume DImode. */
4250 if (x == NULL)
4251 {
4252 output_operand_lossage ("missing operand");
4253 return;
4254 }
4255
4256 switch (GET_CODE (x))
4257 {
4258 case REG:
01a3a324 4259 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4260 break;
4261
4262 case MEM:
4263 aarch64_memory_reference_mode = GET_MODE (x);
4264 output_address (XEXP (x, 0));
4265 break;
4266
4267 case LABEL_REF:
4268 case SYMBOL_REF:
4269 output_addr_const (asm_out_file, x);
4270 break;
4271
4272 case CONST_INT:
4273 asm_fprintf (f, "%wd", INTVAL (x));
4274 break;
4275
4276 case CONST_VECTOR:
3520f7cc
JG
4277 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4278 {
ddeabd3e
AL
4279 gcc_assert (
4280 aarch64_const_vec_all_same_in_range_p (x,
4281 HOST_WIDE_INT_MIN,
4282 HOST_WIDE_INT_MAX));
3520f7cc
JG
4283 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4284 }
4285 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4286 {
4287 fputc ('0', f);
4288 }
4289 else
4290 gcc_unreachable ();
43e9d192
IB
4291 break;
4292
3520f7cc
JG
4293 case CONST_DOUBLE:
4294 /* CONST_DOUBLE can represent a double-width integer.
4295 In this case, the mode of x is VOIDmode. */
4296 if (GET_MODE (x) == VOIDmode)
4297 ; /* Do Nothing. */
4298 else if (aarch64_float_const_zero_rtx_p (x))
4299 {
4300 fputc ('0', f);
4301 break;
4302 }
4303 else if (aarch64_float_const_representable_p (x))
4304 {
4305#define buf_size 20
4306 char float_buf[buf_size] = {'\0'};
4307 REAL_VALUE_TYPE r;
4308 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4309 real_to_decimal_for_mode (float_buf, &r,
4310 buf_size, buf_size,
4311 1, GET_MODE (x));
4312 asm_fprintf (asm_out_file, "%s", float_buf);
4313 break;
4314#undef buf_size
4315 }
4316 output_operand_lossage ("invalid constant");
4317 return;
43e9d192
IB
4318 default:
4319 output_operand_lossage ("invalid operand");
4320 return;
4321 }
4322 break;
4323
4324 case 'A':
4325 if (GET_CODE (x) == HIGH)
4326 x = XEXP (x, 0);
4327
4328 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4329 {
4330 case SYMBOL_SMALL_GOT:
4331 asm_fprintf (asm_out_file, ":got:");
4332 break;
4333
4334 case SYMBOL_SMALL_TLSGD:
4335 asm_fprintf (asm_out_file, ":tlsgd:");
4336 break;
4337
4338 case SYMBOL_SMALL_TLSDESC:
4339 asm_fprintf (asm_out_file, ":tlsdesc:");
4340 break;
4341
4342 case SYMBOL_SMALL_GOTTPREL:
4343 asm_fprintf (asm_out_file, ":gottprel:");
4344 break;
4345
4346 case SYMBOL_SMALL_TPREL:
4347 asm_fprintf (asm_out_file, ":tprel:");
4348 break;
4349
87dd8ab0
MS
4350 case SYMBOL_TINY_GOT:
4351 gcc_unreachable ();
4352 break;
4353
43e9d192
IB
4354 default:
4355 break;
4356 }
4357 output_addr_const (asm_out_file, x);
4358 break;
4359
4360 case 'L':
4361 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4362 {
4363 case SYMBOL_SMALL_GOT:
4364 asm_fprintf (asm_out_file, ":lo12:");
4365 break;
4366
4367 case SYMBOL_SMALL_TLSGD:
4368 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4369 break;
4370
4371 case SYMBOL_SMALL_TLSDESC:
4372 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4373 break;
4374
4375 case SYMBOL_SMALL_GOTTPREL:
4376 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4377 break;
4378
4379 case SYMBOL_SMALL_TPREL:
4380 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4381 break;
4382
87dd8ab0
MS
4383 case SYMBOL_TINY_GOT:
4384 asm_fprintf (asm_out_file, ":got:");
4385 break;
4386
43e9d192
IB
4387 default:
4388 break;
4389 }
4390 output_addr_const (asm_out_file, x);
4391 break;
4392
4393 case 'G':
4394
4395 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4396 {
4397 case SYMBOL_SMALL_TPREL:
4398 asm_fprintf (asm_out_file, ":tprel_hi12:");
4399 break;
4400 default:
4401 break;
4402 }
4403 output_addr_const (asm_out_file, x);
4404 break;
4405
cf670503
ZC
4406 case 'K':
4407 {
4408 int cond_code;
4409 /* Print nzcv. */
4410
4411 if (!COMPARISON_P (x))
4412 {
4413 output_operand_lossage ("invalid operand for '%%%c'", code);
4414 return;
4415 }
4416
4417 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4418 gcc_assert (cond_code >= 0);
4419 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4420 }
4421 break;
4422
4423 case 'k':
4424 {
4425 int cond_code;
4426 /* Print nzcv. */
4427
4428 if (!COMPARISON_P (x))
4429 {
4430 output_operand_lossage ("invalid operand for '%%%c'", code);
4431 return;
4432 }
4433
4434 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4435 gcc_assert (cond_code >= 0);
4436 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4437 }
4438 break;
4439
43e9d192
IB
4440 default:
4441 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4442 return;
4443 }
4444}
4445
4446void
4447aarch64_print_operand_address (FILE *f, rtx x)
4448{
4449 struct aarch64_address_info addr;
4450
4451 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4452 MEM, true))
4453 switch (addr.type)
4454 {
4455 case ADDRESS_REG_IMM:
4456 if (addr.offset == const0_rtx)
01a3a324 4457 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4458 else
16a3246f 4459 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4460 INTVAL (addr.offset));
4461 return;
4462
4463 case ADDRESS_REG_REG:
4464 if (addr.shift == 0)
16a3246f 4465 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4466 reg_names [REGNO (addr.offset)]);
43e9d192 4467 else
16a3246f 4468 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4469 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4470 return;
4471
4472 case ADDRESS_REG_UXTW:
4473 if (addr.shift == 0)
16a3246f 4474 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4475 REGNO (addr.offset) - R0_REGNUM);
4476 else
16a3246f 4477 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4478 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4479 return;
4480
4481 case ADDRESS_REG_SXTW:
4482 if (addr.shift == 0)
16a3246f 4483 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4484 REGNO (addr.offset) - R0_REGNUM);
4485 else
16a3246f 4486 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4487 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4488 return;
4489
4490 case ADDRESS_REG_WB:
4491 switch (GET_CODE (x))
4492 {
4493 case PRE_INC:
16a3246f 4494 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4495 GET_MODE_SIZE (aarch64_memory_reference_mode));
4496 return;
4497 case POST_INC:
16a3246f 4498 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4499 GET_MODE_SIZE (aarch64_memory_reference_mode));
4500 return;
4501 case PRE_DEC:
16a3246f 4502 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4503 GET_MODE_SIZE (aarch64_memory_reference_mode));
4504 return;
4505 case POST_DEC:
16a3246f 4506 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4507 GET_MODE_SIZE (aarch64_memory_reference_mode));
4508 return;
4509 case PRE_MODIFY:
16a3246f 4510 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4511 INTVAL (addr.offset));
4512 return;
4513 case POST_MODIFY:
16a3246f 4514 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4515 INTVAL (addr.offset));
4516 return;
4517 default:
4518 break;
4519 }
4520 break;
4521
4522 case ADDRESS_LO_SUM:
16a3246f 4523 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4524 output_addr_const (f, addr.offset);
4525 asm_fprintf (f, "]");
4526 return;
4527
4528 case ADDRESS_SYMBOLIC:
4529 break;
4530 }
4531
4532 output_addr_const (f, x);
4533}
4534
43e9d192
IB
4535bool
4536aarch64_label_mentioned_p (rtx x)
4537{
4538 const char *fmt;
4539 int i;
4540
4541 if (GET_CODE (x) == LABEL_REF)
4542 return true;
4543
4544 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4545 referencing instruction, but they are constant offsets, not
4546 symbols. */
4547 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4548 return false;
4549
4550 fmt = GET_RTX_FORMAT (GET_CODE (x));
4551 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4552 {
4553 if (fmt[i] == 'E')
4554 {
4555 int j;
4556
4557 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4558 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4559 return 1;
4560 }
4561 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4562 return 1;
4563 }
4564
4565 return 0;
4566}
4567
4568/* Implement REGNO_REG_CLASS. */
4569
4570enum reg_class
4571aarch64_regno_regclass (unsigned regno)
4572{
4573 if (GP_REGNUM_P (regno))
a4a182c6 4574 return GENERAL_REGS;
43e9d192
IB
4575
4576 if (regno == SP_REGNUM)
4577 return STACK_REG;
4578
4579 if (regno == FRAME_POINTER_REGNUM
4580 || regno == ARG_POINTER_REGNUM)
f24bb080 4581 return POINTER_REGS;
43e9d192
IB
4582
4583 if (FP_REGNUM_P (regno))
4584 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4585
4586 return NO_REGS;
4587}
4588
0c4ec427 4589static rtx
ef4bddc2 4590aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
4591{
4592 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4593 where mask is selected by alignment and size of the offset.
4594 We try to pick as large a range for the offset as possible to
4595 maximize the chance of a CSE. However, for aligned addresses
4596 we limit the range to 4k so that structures with different sized
4597 elements are likely to use the same base. */
4598
4599 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4600 {
4601 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4602 HOST_WIDE_INT base_offset;
4603
4604 /* Does it look like we'll need a load/store-pair operation? */
4605 if (GET_MODE_SIZE (mode) > 16
4606 || mode == TImode)
4607 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4608 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4609 /* For offsets aren't a multiple of the access size, the limit is
4610 -256...255. */
4611 else if (offset & (GET_MODE_SIZE (mode) - 1))
4612 base_offset = (offset + 0x100) & ~0x1ff;
4613 else
4614 base_offset = offset & ~0xfff;
4615
4616 if (base_offset == 0)
4617 return x;
4618
4619 offset -= base_offset;
4620 rtx base_reg = gen_reg_rtx (Pmode);
4621 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4622 NULL_RTX);
4623 emit_move_insn (base_reg, val);
4624 x = plus_constant (Pmode, base_reg, offset);
4625 }
4626
4627 return x;
4628}
4629
43e9d192
IB
4630/* Try a machine-dependent way of reloading an illegitimate address
4631 operand. If we find one, push the reload and return the new rtx. */
4632
4633rtx
4634aarch64_legitimize_reload_address (rtx *x_p,
ef4bddc2 4635 machine_mode mode,
43e9d192
IB
4636 int opnum, int type,
4637 int ind_levels ATTRIBUTE_UNUSED)
4638{
4639 rtx x = *x_p;
4640
348d4b0a
BC
4641 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4642 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4643 && GET_CODE (x) == PLUS
4644 && REG_P (XEXP (x, 0))
4645 && CONST_INT_P (XEXP (x, 1)))
4646 {
4647 rtx orig_rtx = x;
4648 x = copy_rtx (x);
4649 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4650 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4651 opnum, (enum reload_type) type);
4652 return x;
4653 }
4654
4655 /* We must recognize output that we have already generated ourselves. */
4656 if (GET_CODE (x) == PLUS
4657 && GET_CODE (XEXP (x, 0)) == PLUS
4658 && REG_P (XEXP (XEXP (x, 0), 0))
4659 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4660 && CONST_INT_P (XEXP (x, 1)))
4661 {
4662 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4663 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4664 opnum, (enum reload_type) type);
4665 return x;
4666 }
4667
4668 /* We wish to handle large displacements off a base register by splitting
4669 the addend across an add and the mem insn. This can cut the number of
4670 extra insns needed from 3 to 1. It is only useful for load/store of a
4671 single register with 12 bit offset field. */
4672 if (GET_CODE (x) == PLUS
4673 && REG_P (XEXP (x, 0))
4674 && CONST_INT_P (XEXP (x, 1))
4675 && HARD_REGISTER_P (XEXP (x, 0))
4676 && mode != TImode
4677 && mode != TFmode
4678 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4679 {
4680 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4681 HOST_WIDE_INT low = val & 0xfff;
4682 HOST_WIDE_INT high = val - low;
4683 HOST_WIDE_INT offs;
4684 rtx cst;
ef4bddc2 4685 machine_mode xmode = GET_MODE (x);
28514dda
YZ
4686
4687 /* In ILP32, xmode can be either DImode or SImode. */
4688 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4689
4690 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4691 BLKmode alignment. */
4692 if (GET_MODE_SIZE (mode) == 0)
4693 return NULL_RTX;
4694
4695 offs = low % GET_MODE_SIZE (mode);
4696
4697 /* Align misaligned offset by adjusting high part to compensate. */
4698 if (offs != 0)
4699 {
4700 if (aarch64_uimm12_shift (high + offs))
4701 {
4702 /* Align down. */
4703 low = low - offs;
4704 high = high + offs;
4705 }
4706 else
4707 {
4708 /* Align up. */
4709 offs = GET_MODE_SIZE (mode) - offs;
4710 low = low + offs;
4711 high = high + (low & 0x1000) - offs;
4712 low &= 0xfff;
4713 }
4714 }
4715
4716 /* Check for overflow. */
4717 if (high + low != val)
4718 return NULL_RTX;
4719
4720 cst = GEN_INT (high);
4721 if (!aarch64_uimm12_shift (high))
28514dda 4722 cst = force_const_mem (xmode, cst);
43e9d192
IB
4723
4724 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4725 in the mem instruction.
4726 Note that replacing this gen_rtx_PLUS with plus_constant is
4727 wrong in this case because we rely on the
4728 (plus (plus reg c1) c2) structure being preserved so that
4729 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4730 x = gen_rtx_PLUS (xmode,
4731 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4732 GEN_INT (low));
43e9d192
IB
4733
4734 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4735 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4736 opnum, (enum reload_type) type);
4737 return x;
4738 }
4739
4740 return NULL_RTX;
4741}
4742
4743
4744static reg_class_t
4745aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4746 reg_class_t rclass,
ef4bddc2 4747 machine_mode mode,
43e9d192
IB
4748 secondary_reload_info *sri)
4749{
43e9d192
IB
4750 /* Without the TARGET_SIMD instructions we cannot move a Q register
4751 to a Q register directly. We need a scratch. */
4752 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4753 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4754 && reg_class_subset_p (rclass, FP_REGS))
4755 {
4756 if (mode == TFmode)
4757 sri->icode = CODE_FOR_aarch64_reload_movtf;
4758 else if (mode == TImode)
4759 sri->icode = CODE_FOR_aarch64_reload_movti;
4760 return NO_REGS;
4761 }
4762
4763 /* A TFmode or TImode memory access should be handled via an FP_REGS
4764 because AArch64 has richer addressing modes for LDR/STR instructions
4765 than LDP/STP instructions. */
a4a182c6 4766 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4767 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4768 return FP_REGS;
4769
4770 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4771 return GENERAL_REGS;
43e9d192
IB
4772
4773 return NO_REGS;
4774}
4775
4776static bool
4777aarch64_can_eliminate (const int from, const int to)
4778{
4779 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4780 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4781
4782 if (frame_pointer_needed)
4783 {
4784 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4785 return true;
4786 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4787 return false;
4788 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4789 && !cfun->calls_alloca)
4790 return true;
4791 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4792 return true;
0b7f8166
MS
4793
4794 return false;
43e9d192 4795 }
1c923b60
JW
4796 else
4797 {
4798 /* If we decided that we didn't need a leaf frame pointer but then used
4799 LR in the function, then we'll want a frame pointer after all, so
4800 prevent this elimination to ensure a frame pointer is used. */
4801 if (to == STACK_POINTER_REGNUM
4802 && flag_omit_leaf_frame_pointer
4803 && df_regs_ever_live_p (LR_REGNUM))
4804 return false;
4805 }
777e6976 4806
43e9d192
IB
4807 return true;
4808}
4809
4810HOST_WIDE_INT
4811aarch64_initial_elimination_offset (unsigned from, unsigned to)
4812{
43e9d192 4813 aarch64_layout_frame ();
78c29983
MS
4814
4815 if (to == HARD_FRAME_POINTER_REGNUM)
4816 {
4817 if (from == ARG_POINTER_REGNUM)
1c960e02 4818 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4819
4820 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4821 return (cfun->machine->frame.hard_fp_offset
4822 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4823 }
4824
4825 if (to == STACK_POINTER_REGNUM)
4826 {
4827 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4828 return (cfun->machine->frame.frame_size
4829 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4830 }
4831
1c960e02 4832 return cfun->machine->frame.frame_size;
43e9d192
IB
4833}
4834
43e9d192
IB
4835/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4836 previous frame. */
4837
4838rtx
4839aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4840{
4841 if (count != 0)
4842 return const0_rtx;
4843 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4844}
4845
4846
4847static void
4848aarch64_asm_trampoline_template (FILE *f)
4849{
28514dda
YZ
4850 if (TARGET_ILP32)
4851 {
4852 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4853 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4854 }
4855 else
4856 {
4857 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4858 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4859 }
01a3a324 4860 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4861 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4862 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4863 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4864}
4865
4866static void
4867aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4868{
4869 rtx fnaddr, mem, a_tramp;
28514dda 4870 const int tramp_code_sz = 16;
43e9d192
IB
4871
4872 /* Don't need to copy the trailing D-words, we fill those in below. */
4873 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4874 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4875 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4876 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4877 if (GET_MODE (fnaddr) != ptr_mode)
4878 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4879 emit_move_insn (mem, fnaddr);
4880
28514dda 4881 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4882 emit_move_insn (mem, chain_value);
4883
4884 /* XXX We should really define a "clear_cache" pattern and use
4885 gen_clear_cache(). */
4886 a_tramp = XEXP (m_tramp, 0);
4887 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4888 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4889 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4890 ptr_mode);
43e9d192
IB
4891}
4892
4893static unsigned char
ef4bddc2 4894aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
4895{
4896 switch (regclass)
4897 {
fee9ba42 4898 case CALLER_SAVE_REGS:
43e9d192
IB
4899 case POINTER_REGS:
4900 case GENERAL_REGS:
4901 case ALL_REGS:
4902 case FP_REGS:
4903 case FP_LO_REGS:
4904 return
4905 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4906 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4907 case STACK_REG:
4908 return 1;
4909
4910 case NO_REGS:
4911 return 0;
4912
4913 default:
4914 break;
4915 }
4916 gcc_unreachable ();
4917}
4918
4919static reg_class_t
78d8b9f0 4920aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4921{
51bb310d 4922 if (regclass == POINTER_REGS)
78d8b9f0
IB
4923 return GENERAL_REGS;
4924
51bb310d
MS
4925 if (regclass == STACK_REG)
4926 {
4927 if (REG_P(x)
4928 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4929 return regclass;
4930
4931 return NO_REGS;
4932 }
4933
78d8b9f0
IB
4934 /* If it's an integer immediate that MOVI can't handle, then
4935 FP_REGS is not an option, so we return NO_REGS instead. */
4936 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4937 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4938 return NO_REGS;
4939
27bd251b
IB
4940 /* Register eliminiation can result in a request for
4941 SP+constant->FP_REGS. We cannot support such operations which
4942 use SP as source and an FP_REG as destination, so reject out
4943 right now. */
4944 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4945 {
4946 rtx lhs = XEXP (x, 0);
4947
4948 /* Look through a possible SUBREG introduced by ILP32. */
4949 if (GET_CODE (lhs) == SUBREG)
4950 lhs = SUBREG_REG (lhs);
4951
4952 gcc_assert (REG_P (lhs));
4953 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4954 POINTER_REGS));
4955 return NO_REGS;
4956 }
4957
78d8b9f0 4958 return regclass;
43e9d192
IB
4959}
4960
4961void
4962aarch64_asm_output_labelref (FILE* f, const char *name)
4963{
4964 asm_fprintf (f, "%U%s", name);
4965}
4966
4967static void
4968aarch64_elf_asm_constructor (rtx symbol, int priority)
4969{
4970 if (priority == DEFAULT_INIT_PRIORITY)
4971 default_ctor_section_asm_out_constructor (symbol, priority);
4972 else
4973 {
4974 section *s;
4975 char buf[18];
4976 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4977 s = get_section (buf, SECTION_WRITE, NULL);
4978 switch_to_section (s);
4979 assemble_align (POINTER_SIZE);
28514dda 4980 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4981 }
4982}
4983
4984static void
4985aarch64_elf_asm_destructor (rtx symbol, int priority)
4986{
4987 if (priority == DEFAULT_INIT_PRIORITY)
4988 default_dtor_section_asm_out_destructor (symbol, priority);
4989 else
4990 {
4991 section *s;
4992 char buf[18];
4993 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4994 s = get_section (buf, SECTION_WRITE, NULL);
4995 switch_to_section (s);
4996 assemble_align (POINTER_SIZE);
28514dda 4997 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4998 }
4999}
5000
5001const char*
5002aarch64_output_casesi (rtx *operands)
5003{
5004 char buf[100];
5005 char label[100];
b32d5189 5006 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5007 int index;
5008 static const char *const patterns[4][2] =
5009 {
5010 {
5011 "ldrb\t%w3, [%0,%w1,uxtw]",
5012 "add\t%3, %4, %w3, sxtb #2"
5013 },
5014 {
5015 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5016 "add\t%3, %4, %w3, sxth #2"
5017 },
5018 {
5019 "ldr\t%w3, [%0,%w1,uxtw #2]",
5020 "add\t%3, %4, %w3, sxtw #2"
5021 },
5022 /* We assume that DImode is only generated when not optimizing and
5023 that we don't really need 64-bit address offsets. That would
5024 imply an object file with 8GB of code in a single function! */
5025 {
5026 "ldr\t%w3, [%0,%w1,uxtw #2]",
5027 "add\t%3, %4, %w3, sxtw #2"
5028 }
5029 };
5030
5031 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5032
5033 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5034
5035 gcc_assert (index >= 0 && index <= 3);
5036
5037 /* Need to implement table size reduction, by chaning the code below. */
5038 output_asm_insn (patterns[index][0], operands);
5039 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5040 snprintf (buf, sizeof (buf),
5041 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5042 output_asm_insn (buf, operands);
5043 output_asm_insn (patterns[index][1], operands);
5044 output_asm_insn ("br\t%3", operands);
5045 assemble_label (asm_out_file, label);
5046 return "";
5047}
5048
5049
5050/* Return size in bits of an arithmetic operand which is shifted/scaled and
5051 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5052 operator. */
5053
5054int
5055aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5056{
5057 if (shift >= 0 && shift <= 3)
5058 {
5059 int size;
5060 for (size = 8; size <= 32; size *= 2)
5061 {
5062 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5063 if (mask == bits << shift)
5064 return size;
5065 }
5066 }
5067 return 0;
5068}
5069
5070static bool
ef4bddc2 5071aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5072 const_rtx x ATTRIBUTE_UNUSED)
5073{
5074 /* We can't use blocks for constants when we're using a per-function
5075 constant pool. */
5076 return false;
5077}
5078
5079static section *
ef4bddc2 5080aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5081 rtx x ATTRIBUTE_UNUSED,
5082 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5083{
5084 /* Force all constant pool entries into the current function section. */
5085 return function_section (current_function_decl);
5086}
5087
5088
5089/* Costs. */
5090
5091/* Helper function for rtx cost calculation. Strip a shift expression
5092 from X. Returns the inner operand if successful, or the original
5093 expression on failure. */
5094static rtx
5095aarch64_strip_shift (rtx x)
5096{
5097 rtx op = x;
5098
57b77d46
RE
5099 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5100 we can convert both to ROR during final output. */
43e9d192
IB
5101 if ((GET_CODE (op) == ASHIFT
5102 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5103 || GET_CODE (op) == LSHIFTRT
5104 || GET_CODE (op) == ROTATERT
5105 || GET_CODE (op) == ROTATE)
43e9d192
IB
5106 && CONST_INT_P (XEXP (op, 1)))
5107 return XEXP (op, 0);
5108
5109 if (GET_CODE (op) == MULT
5110 && CONST_INT_P (XEXP (op, 1))
5111 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5112 return XEXP (op, 0);
5113
5114 return x;
5115}
5116
4745e701 5117/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5118 expression from X. Returns the inner operand if successful, or the
5119 original expression on failure. We deal with a number of possible
5120 canonicalization variations here. */
5121static rtx
4745e701 5122aarch64_strip_extend (rtx x)
43e9d192
IB
5123{
5124 rtx op = x;
5125
5126 /* Zero and sign extraction of a widened value. */
5127 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5128 && XEXP (op, 2) == const0_rtx
4745e701 5129 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5130 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5131 XEXP (op, 1)))
5132 return XEXP (XEXP (op, 0), 0);
5133
5134 /* It can also be represented (for zero-extend) as an AND with an
5135 immediate. */
5136 if (GET_CODE (op) == AND
5137 && GET_CODE (XEXP (op, 0)) == MULT
5138 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5139 && CONST_INT_P (XEXP (op, 1))
5140 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5141 INTVAL (XEXP (op, 1))) != 0)
5142 return XEXP (XEXP (op, 0), 0);
5143
5144 /* Now handle extended register, as this may also have an optional
5145 left shift by 1..4. */
5146 if (GET_CODE (op) == ASHIFT
5147 && CONST_INT_P (XEXP (op, 1))
5148 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5149 op = XEXP (op, 0);
5150
5151 if (GET_CODE (op) == ZERO_EXTEND
5152 || GET_CODE (op) == SIGN_EXTEND)
5153 op = XEXP (op, 0);
5154
5155 if (op != x)
5156 return op;
5157
4745e701
JG
5158 return x;
5159}
5160
0a78ebe4
KT
5161/* Return true iff CODE is a shift supported in combination
5162 with arithmetic instructions. */
4d1919ed 5163
0a78ebe4
KT
5164static bool
5165aarch64_shift_p (enum rtx_code code)
5166{
5167 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5168}
5169
4745e701 5170/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
5171 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5172 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
5173 operands where needed. */
5174
5175static int
5176aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5177{
5178 rtx op0, op1;
5179 const struct cpu_cost_table *extra_cost
5180 = aarch64_tune_params->insn_extra_cost;
5181 int cost = 0;
0a78ebe4 5182 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 5183 machine_mode mode = GET_MODE (x);
4745e701
JG
5184
5185 gcc_checking_assert (code == MULT);
5186
5187 op0 = XEXP (x, 0);
5188 op1 = XEXP (x, 1);
5189
5190 if (VECTOR_MODE_P (mode))
5191 mode = GET_MODE_INNER (mode);
5192
5193 /* Integer multiply/fma. */
5194 if (GET_MODE_CLASS (mode) == MODE_INT)
5195 {
5196 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
5197 if (aarch64_shift_p (GET_CODE (x))
5198 || (CONST_INT_P (op1)
5199 && exact_log2 (INTVAL (op1)) > 0))
4745e701 5200 {
0a78ebe4
KT
5201 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5202 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
5203 if (speed)
5204 {
0a78ebe4
KT
5205 if (compound_p)
5206 {
5207 if (REG_P (op1))
5208 /* ARITH + shift-by-register. */
5209 cost += extra_cost->alu.arith_shift_reg;
5210 else if (is_extend)
5211 /* ARITH + extended register. We don't have a cost field
5212 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5213 cost += extra_cost->alu.extend_arith;
5214 else
5215 /* ARITH + shift-by-immediate. */
5216 cost += extra_cost->alu.arith_shift;
5217 }
4745e701
JG
5218 else
5219 /* LSL (immediate). */
0a78ebe4
KT
5220 cost += extra_cost->alu.shift;
5221
4745e701 5222 }
0a78ebe4
KT
5223 /* Strip extends as we will have costed them in the case above. */
5224 if (is_extend)
5225 op0 = aarch64_strip_extend (op0);
4745e701
JG
5226
5227 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5228
5229 return cost;
5230 }
5231
d2ac256b
KT
5232 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5233 compound and let the below cases handle it. After all, MNEG is a
5234 special-case alias of MSUB. */
5235 if (GET_CODE (op0) == NEG)
5236 {
5237 op0 = XEXP (op0, 0);
5238 compound_p = true;
5239 }
5240
4745e701
JG
5241 /* Integer multiplies or FMAs have zero/sign extending variants. */
5242 if ((GET_CODE (op0) == ZERO_EXTEND
5243 && GET_CODE (op1) == ZERO_EXTEND)
5244 || (GET_CODE (op0) == SIGN_EXTEND
5245 && GET_CODE (op1) == SIGN_EXTEND))
5246 {
5247 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5248 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5249
5250 if (speed)
5251 {
0a78ebe4 5252 if (compound_p)
d2ac256b 5253 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
5254 cost += extra_cost->mult[0].extend_add;
5255 else
5256 /* MUL/SMULL/UMULL. */
5257 cost += extra_cost->mult[0].extend;
5258 }
5259
5260 return cost;
5261 }
5262
d2ac256b 5263 /* This is either an integer multiply or a MADD. In both cases
4745e701
JG
5264 we want to recurse and cost the operands. */
5265 cost += rtx_cost (op0, MULT, 0, speed)
5266 + rtx_cost (op1, MULT, 1, speed);
5267
5268 if (speed)
5269 {
0a78ebe4 5270 if (compound_p)
d2ac256b 5271 /* MADD/MSUB. */
4745e701
JG
5272 cost += extra_cost->mult[mode == DImode].add;
5273 else
5274 /* MUL. */
5275 cost += extra_cost->mult[mode == DImode].simple;
5276 }
5277
5278 return cost;
5279 }
5280 else
5281 {
5282 if (speed)
5283 {
3d840f7d 5284 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
5285 operands. */
5286 if (GET_CODE (op0) == NEG)
3d840f7d 5287 op0 = XEXP (op0, 0);
4745e701 5288 if (GET_CODE (op1) == NEG)
3d840f7d 5289 op1 = XEXP (op1, 0);
4745e701 5290
0a78ebe4 5291 if (compound_p)
4745e701
JG
5292 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5293 cost += extra_cost->fp[mode == DFmode].fma;
5294 else
3d840f7d 5295 /* FMUL/FNMUL. */
4745e701
JG
5296 cost += extra_cost->fp[mode == DFmode].mult;
5297 }
5298
5299 cost += rtx_cost (op0, MULT, 0, speed)
5300 + rtx_cost (op1, MULT, 1, speed);
5301 return cost;
5302 }
43e9d192
IB
5303}
5304
67747367
JG
5305static int
5306aarch64_address_cost (rtx x,
ef4bddc2 5307 machine_mode mode,
67747367
JG
5308 addr_space_t as ATTRIBUTE_UNUSED,
5309 bool speed)
5310{
5311 enum rtx_code c = GET_CODE (x);
5312 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5313 struct aarch64_address_info info;
5314 int cost = 0;
5315 info.shift = 0;
5316
5317 if (!aarch64_classify_address (&info, x, mode, c, false))
5318 {
5319 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5320 {
5321 /* This is a CONST or SYMBOL ref which will be split
5322 in a different way depending on the code model in use.
5323 Cost it through the generic infrastructure. */
5324 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5325 /* Divide through by the cost of one instruction to
5326 bring it to the same units as the address costs. */
5327 cost_symbol_ref /= COSTS_N_INSNS (1);
5328 /* The cost is then the cost of preparing the address,
5329 followed by an immediate (possibly 0) offset. */
5330 return cost_symbol_ref + addr_cost->imm_offset;
5331 }
5332 else
5333 {
5334 /* This is most likely a jump table from a case
5335 statement. */
5336 return addr_cost->register_offset;
5337 }
5338 }
5339
5340 switch (info.type)
5341 {
5342 case ADDRESS_LO_SUM:
5343 case ADDRESS_SYMBOLIC:
5344 case ADDRESS_REG_IMM:
5345 cost += addr_cost->imm_offset;
5346 break;
5347
5348 case ADDRESS_REG_WB:
5349 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5350 cost += addr_cost->pre_modify;
5351 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5352 cost += addr_cost->post_modify;
5353 else
5354 gcc_unreachable ();
5355
5356 break;
5357
5358 case ADDRESS_REG_REG:
5359 cost += addr_cost->register_offset;
5360 break;
5361
5362 case ADDRESS_REG_UXTW:
5363 case ADDRESS_REG_SXTW:
5364 cost += addr_cost->register_extend;
5365 break;
5366
5367 default:
5368 gcc_unreachable ();
5369 }
5370
5371
5372 if (info.shift > 0)
5373 {
5374 /* For the sake of calculating the cost of the shifted register
5375 component, we can treat same sized modes in the same way. */
5376 switch (GET_MODE_BITSIZE (mode))
5377 {
5378 case 16:
5379 cost += addr_cost->addr_scale_costs.hi;
5380 break;
5381
5382 case 32:
5383 cost += addr_cost->addr_scale_costs.si;
5384 break;
5385
5386 case 64:
5387 cost += addr_cost->addr_scale_costs.di;
5388 break;
5389
5390 /* We can't tell, or this is a 128-bit vector. */
5391 default:
5392 cost += addr_cost->addr_scale_costs.ti;
5393 break;
5394 }
5395 }
5396
5397 return cost;
5398}
5399
7cc2145f
JG
5400/* Return true if the RTX X in mode MODE is a zero or sign extract
5401 usable in an ADD or SUB (extended register) instruction. */
5402static bool
ef4bddc2 5403aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5404{
5405 /* Catch add with a sign extract.
5406 This is add_<optab><mode>_multp2. */
5407 if (GET_CODE (x) == SIGN_EXTRACT
5408 || GET_CODE (x) == ZERO_EXTRACT)
5409 {
5410 rtx op0 = XEXP (x, 0);
5411 rtx op1 = XEXP (x, 1);
5412 rtx op2 = XEXP (x, 2);
5413
5414 if (GET_CODE (op0) == MULT
5415 && CONST_INT_P (op1)
5416 && op2 == const0_rtx
5417 && CONST_INT_P (XEXP (op0, 1))
5418 && aarch64_is_extend_from_extract (mode,
5419 XEXP (op0, 1),
5420 op1))
5421 {
5422 return true;
5423 }
5424 }
5425
5426 return false;
5427}
5428
61263118
KT
5429static bool
5430aarch64_frint_unspec_p (unsigned int u)
5431{
5432 switch (u)
5433 {
5434 case UNSPEC_FRINTZ:
5435 case UNSPEC_FRINTP:
5436 case UNSPEC_FRINTM:
5437 case UNSPEC_FRINTA:
5438 case UNSPEC_FRINTN:
5439 case UNSPEC_FRINTX:
5440 case UNSPEC_FRINTI:
5441 return true;
5442
5443 default:
5444 return false;
5445 }
5446}
5447
2d5ffe46
AP
5448/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5449 storing it in *COST. Result is true if the total cost of the operation
5450 has now been calculated. */
5451static bool
5452aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5453{
b9e3afe9
AP
5454 rtx inner;
5455 rtx comparator;
5456 enum rtx_code cmpcode;
5457
5458 if (COMPARISON_P (op0))
5459 {
5460 inner = XEXP (op0, 0);
5461 comparator = XEXP (op0, 1);
5462 cmpcode = GET_CODE (op0);
5463 }
5464 else
5465 {
5466 inner = op0;
5467 comparator = const0_rtx;
5468 cmpcode = NE;
5469 }
5470
2d5ffe46
AP
5471 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5472 {
5473 /* Conditional branch. */
b9e3afe9 5474 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5475 return true;
5476 else
5477 {
b9e3afe9 5478 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5479 {
2d5ffe46
AP
5480 if (comparator == const0_rtx)
5481 {
5482 /* TBZ/TBNZ/CBZ/CBNZ. */
5483 if (GET_CODE (inner) == ZERO_EXTRACT)
5484 /* TBZ/TBNZ. */
5485 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5486 0, speed);
5487 else
5488 /* CBZ/CBNZ. */
b9e3afe9 5489 *cost += rtx_cost (inner, cmpcode, 0, speed);
2d5ffe46
AP
5490
5491 return true;
5492 }
5493 }
b9e3afe9 5494 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5495 {
2d5ffe46
AP
5496 /* TBZ/TBNZ. */
5497 if (comparator == const0_rtx)
5498 return true;
5499 }
5500 }
5501 }
b9e3afe9 5502 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5503 {
5504 /* It's a conditional operation based on the status flags,
5505 so it must be some flavor of CSEL. */
5506
5507 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5508 if (GET_CODE (op1) == NEG
5509 || GET_CODE (op1) == NOT
5510 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5511 op1 = XEXP (op1, 0);
5512
5513 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5514 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5515 return true;
5516 }
5517
5518 /* We don't know what this is, cost all operands. */
5519 return false;
5520}
5521
43e9d192
IB
5522/* Calculate the cost of calculating X, storing it in *COST. Result
5523 is true if the total cost of the operation has now been calculated. */
5524static bool
5525aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5526 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5527{
a8eecd00 5528 rtx op0, op1, op2;
73250c4c 5529 const struct cpu_cost_table *extra_cost
43e9d192 5530 = aarch64_tune_params->insn_extra_cost;
ef4bddc2 5531 machine_mode mode = GET_MODE (x);
43e9d192 5532
7fc5ef02
JG
5533 /* By default, assume that everything has equivalent cost to the
5534 cheapest instruction. Any additional costs are applied as a delta
5535 above this default. */
5536 *cost = COSTS_N_INSNS (1);
5537
5538 /* TODO: The cost infrastructure currently does not handle
5539 vector operations. Assume that all vector operations
5540 are equally expensive. */
5541 if (VECTOR_MODE_P (mode))
5542 {
5543 if (speed)
5544 *cost += extra_cost->vect.alu;
5545 return true;
5546 }
5547
43e9d192
IB
5548 switch (code)
5549 {
5550 case SET:
ba123b0d
JG
5551 /* The cost depends entirely on the operands to SET. */
5552 *cost = 0;
43e9d192
IB
5553 op0 = SET_DEST (x);
5554 op1 = SET_SRC (x);
5555
5556 switch (GET_CODE (op0))
5557 {
5558 case MEM:
5559 if (speed)
2961177e
JG
5560 {
5561 rtx address = XEXP (op0, 0);
5562 if (GET_MODE_CLASS (mode) == MODE_INT)
5563 *cost += extra_cost->ldst.store;
5564 else if (mode == SFmode)
5565 *cost += extra_cost->ldst.storef;
5566 else if (mode == DFmode)
5567 *cost += extra_cost->ldst.stored;
5568
5569 *cost +=
5570 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5571 0, speed));
5572 }
43e9d192 5573
ba123b0d 5574 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5575 return true;
5576
5577 case SUBREG:
5578 if (! REG_P (SUBREG_REG (op0)))
5579 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 5580
43e9d192
IB
5581 /* Fall through. */
5582 case REG:
ba123b0d
JG
5583 /* const0_rtx is in general free, but we will use an
5584 instruction to set a register to 0. */
5585 if (REG_P (op1) || op1 == const0_rtx)
5586 {
5587 /* The cost is 1 per register copied. */
5588 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5589 / UNITS_PER_WORD;
5590 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5591 }
5592 else
5593 /* Cost is just the cost of the RHS of the set. */
5594 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5595 return true;
5596
ba123b0d 5597 case ZERO_EXTRACT:
43e9d192 5598 case SIGN_EXTRACT:
ba123b0d
JG
5599 /* Bit-field insertion. Strip any redundant widening of
5600 the RHS to meet the width of the target. */
43e9d192
IB
5601 if (GET_CODE (op1) == SUBREG)
5602 op1 = SUBREG_REG (op1);
5603 if ((GET_CODE (op1) == ZERO_EXTEND
5604 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5605 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5606 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5607 >= INTVAL (XEXP (op0, 1))))
5608 op1 = XEXP (op1, 0);
ba123b0d
JG
5609
5610 if (CONST_INT_P (op1))
5611 {
5612 /* MOV immediate is assumed to always be cheap. */
5613 *cost = COSTS_N_INSNS (1);
5614 }
5615 else
5616 {
5617 /* BFM. */
5618 if (speed)
5619 *cost += extra_cost->alu.bfi;
5620 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5621 }
5622
43e9d192
IB
5623 return true;
5624
5625 default:
ba123b0d
JG
5626 /* We can't make sense of this, assume default cost. */
5627 *cost = COSTS_N_INSNS (1);
61263118 5628 return false;
43e9d192
IB
5629 }
5630 return false;
5631
9dfc162c
JG
5632 case CONST_INT:
5633 /* If an instruction can incorporate a constant within the
5634 instruction, the instruction's expression avoids calling
5635 rtx_cost() on the constant. If rtx_cost() is called on a
5636 constant, then it is usually because the constant must be
5637 moved into a register by one or more instructions.
5638
5639 The exception is constant 0, which can be expressed
5640 as XZR/WZR and is therefore free. The exception to this is
5641 if we have (set (reg) (const0_rtx)) in which case we must cost
5642 the move. However, we can catch that when we cost the SET, so
5643 we don't need to consider that here. */
5644 if (x == const0_rtx)
5645 *cost = 0;
5646 else
5647 {
5648 /* To an approximation, building any other constant is
5649 proportionally expensive to the number of instructions
5650 required to build that constant. This is true whether we
5651 are compiling for SPEED or otherwise. */
82614948
RR
5652 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5653 (NULL_RTX, x, false, mode));
9dfc162c
JG
5654 }
5655 return true;
5656
5657 case CONST_DOUBLE:
5658 if (speed)
5659 {
5660 /* mov[df,sf]_aarch64. */
5661 if (aarch64_float_const_representable_p (x))
5662 /* FMOV (scalar immediate). */
5663 *cost += extra_cost->fp[mode == DFmode].fpconst;
5664 else if (!aarch64_float_const_zero_rtx_p (x))
5665 {
5666 /* This will be a load from memory. */
5667 if (mode == DFmode)
5668 *cost += extra_cost->ldst.loadd;
5669 else
5670 *cost += extra_cost->ldst.loadf;
5671 }
5672 else
5673 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5674 or MOV v0.s[0], wzr - neither of which are modeled by the
5675 cost tables. Just use the default cost. */
5676 {
5677 }
5678 }
5679
5680 return true;
5681
43e9d192
IB
5682 case MEM:
5683 if (speed)
2961177e
JG
5684 {
5685 /* For loads we want the base cost of a load, plus an
5686 approximation for the additional cost of the addressing
5687 mode. */
5688 rtx address = XEXP (x, 0);
5689 if (GET_MODE_CLASS (mode) == MODE_INT)
5690 *cost += extra_cost->ldst.load;
5691 else if (mode == SFmode)
5692 *cost += extra_cost->ldst.loadf;
5693 else if (mode == DFmode)
5694 *cost += extra_cost->ldst.loadd;
5695
5696 *cost +=
5697 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5698 0, speed));
5699 }
43e9d192
IB
5700
5701 return true;
5702
5703 case NEG:
4745e701
JG
5704 op0 = XEXP (x, 0);
5705
5706 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5707 {
5708 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5709 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5710 {
5711 /* CSETM. */
5712 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5713 return true;
5714 }
5715
5716 /* Cost this as SUB wzr, X. */
5717 op0 = CONST0_RTX (GET_MODE (x));
5718 op1 = XEXP (x, 0);
5719 goto cost_minus;
5720 }
5721
5722 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5723 {
5724 /* Support (neg(fma...)) as a single instruction only if
5725 sign of zeros is unimportant. This matches the decision
5726 making in aarch64.md. */
5727 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5728 {
5729 /* FNMADD. */
5730 *cost = rtx_cost (op0, NEG, 0, speed);
5731 return true;
5732 }
5733 if (speed)
5734 /* FNEG. */
5735 *cost += extra_cost->fp[mode == DFmode].neg;
5736 return false;
5737 }
5738
5739 return false;
43e9d192 5740
781aeb73
KT
5741 case CLRSB:
5742 case CLZ:
5743 if (speed)
5744 *cost += extra_cost->alu.clz;
5745
5746 return false;
5747
43e9d192
IB
5748 case COMPARE:
5749 op0 = XEXP (x, 0);
5750 op1 = XEXP (x, 1);
5751
5752 if (op1 == const0_rtx
5753 && GET_CODE (op0) == AND)
5754 {
5755 x = op0;
5756 goto cost_logic;
5757 }
5758
a8eecd00
JG
5759 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5760 {
5761 /* TODO: A write to the CC flags possibly costs extra, this
5762 needs encoding in the cost tables. */
5763
5764 /* CC_ZESWPmode supports zero extend for free. */
5765 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5766 op0 = XEXP (op0, 0);
5767
5768 /* ANDS. */
5769 if (GET_CODE (op0) == AND)
5770 {
5771 x = op0;
5772 goto cost_logic;
5773 }
5774
5775 if (GET_CODE (op0) == PLUS)
5776 {
5777 /* ADDS (and CMN alias). */
5778 x = op0;
5779 goto cost_plus;
5780 }
5781
5782 if (GET_CODE (op0) == MINUS)
5783 {
5784 /* SUBS. */
5785 x = op0;
5786 goto cost_minus;
5787 }
5788
5789 if (GET_CODE (op1) == NEG)
5790 {
5791 /* CMN. */
5792 if (speed)
5793 *cost += extra_cost->alu.arith;
5794
5795 *cost += rtx_cost (op0, COMPARE, 0, speed);
5796 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5797 return true;
5798 }
5799
5800 /* CMP.
5801
5802 Compare can freely swap the order of operands, and
5803 canonicalization puts the more complex operation first.
5804 But the integer MINUS logic expects the shift/extend
5805 operation in op1. */
5806 if (! (REG_P (op0)
5807 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5808 {
5809 op0 = XEXP (x, 1);
5810 op1 = XEXP (x, 0);
5811 }
5812 goto cost_minus;
5813 }
5814
5815 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5816 {
5817 /* FCMP. */
5818 if (speed)
5819 *cost += extra_cost->fp[mode == DFmode].compare;
5820
5821 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5822 {
5823 /* FCMP supports constant 0.0 for no extra cost. */
5824 return true;
5825 }
5826 return false;
5827 }
5828
5829 return false;
43e9d192
IB
5830
5831 case MINUS:
4745e701
JG
5832 {
5833 op0 = XEXP (x, 0);
5834 op1 = XEXP (x, 1);
5835
5836cost_minus:
5837 /* Detect valid immediates. */
5838 if ((GET_MODE_CLASS (mode) == MODE_INT
5839 || (GET_MODE_CLASS (mode) == MODE_CC
5840 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5841 && CONST_INT_P (op1)
5842 && aarch64_uimm12_shift (INTVAL (op1)))
5843 {
5844 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5845
4745e701
JG
5846 if (speed)
5847 /* SUB(S) (immediate). */
5848 *cost += extra_cost->alu.arith;
5849 return true;
5850
5851 }
5852
7cc2145f
JG
5853 /* Look for SUB (extended register). */
5854 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5855 {
5856 if (speed)
2533c820 5857 *cost += extra_cost->alu.extend_arith;
7cc2145f
JG
5858
5859 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5860 (enum rtx_code) GET_CODE (op1),
5861 0, speed);
5862 return true;
5863 }
5864
4745e701
JG
5865 rtx new_op1 = aarch64_strip_extend (op1);
5866
5867 /* Cost this as an FMA-alike operation. */
5868 if ((GET_CODE (new_op1) == MULT
0a78ebe4 5869 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
5870 && code != COMPARE)
5871 {
5872 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5873 (enum rtx_code) code,
5874 speed);
43e9d192 5875 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5876 return true;
5877 }
43e9d192 5878
4745e701 5879 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5880
4745e701
JG
5881 if (speed)
5882 {
5883 if (GET_MODE_CLASS (mode) == MODE_INT)
5884 /* SUB(S). */
5885 *cost += extra_cost->alu.arith;
5886 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5887 /* FSUB. */
5888 *cost += extra_cost->fp[mode == DFmode].addsub;
5889 }
5890 return true;
5891 }
43e9d192
IB
5892
5893 case PLUS:
4745e701
JG
5894 {
5895 rtx new_op0;
43e9d192 5896
4745e701
JG
5897 op0 = XEXP (x, 0);
5898 op1 = XEXP (x, 1);
43e9d192 5899
a8eecd00 5900cost_plus:
4745e701
JG
5901 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5902 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5903 {
5904 /* CSINC. */
5905 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5906 *cost += rtx_cost (op1, PLUS, 1, speed);
5907 return true;
5908 }
43e9d192 5909
4745e701
JG
5910 if (GET_MODE_CLASS (mode) == MODE_INT
5911 && CONST_INT_P (op1)
5912 && aarch64_uimm12_shift (INTVAL (op1)))
5913 {
5914 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5915
4745e701
JG
5916 if (speed)
5917 /* ADD (immediate). */
5918 *cost += extra_cost->alu.arith;
5919 return true;
5920 }
5921
7cc2145f
JG
5922 /* Look for ADD (extended register). */
5923 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5924 {
5925 if (speed)
2533c820 5926 *cost += extra_cost->alu.extend_arith;
7cc2145f
JG
5927
5928 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5929 (enum rtx_code) GET_CODE (op0),
5930 0, speed);
5931 return true;
5932 }
5933
4745e701
JG
5934 /* Strip any extend, leave shifts behind as we will
5935 cost them through mult_cost. */
5936 new_op0 = aarch64_strip_extend (op0);
5937
5938 if (GET_CODE (new_op0) == MULT
0a78ebe4 5939 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
5940 {
5941 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5942 speed);
5943 *cost += rtx_cost (op1, PLUS, 1, speed);
5944 return true;
5945 }
5946
5947 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5948 + rtx_cost (op1, PLUS, 1, speed));
5949
5950 if (speed)
5951 {
5952 if (GET_MODE_CLASS (mode) == MODE_INT)
5953 /* ADD. */
5954 *cost += extra_cost->alu.arith;
5955 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5956 /* FADD. */
5957 *cost += extra_cost->fp[mode == DFmode].addsub;
5958 }
5959 return true;
5960 }
43e9d192 5961
18b42b2a
KT
5962 case BSWAP:
5963 *cost = COSTS_N_INSNS (1);
5964
5965 if (speed)
5966 *cost += extra_cost->alu.rev;
5967
5968 return false;
5969
43e9d192 5970 case IOR:
f7d5cf8d
KT
5971 if (aarch_rev16_p (x))
5972 {
5973 *cost = COSTS_N_INSNS (1);
5974
5975 if (speed)
5976 *cost += extra_cost->alu.rev;
5977
5978 return true;
5979 }
5980 /* Fall through. */
43e9d192
IB
5981 case XOR:
5982 case AND:
5983 cost_logic:
5984 op0 = XEXP (x, 0);
5985 op1 = XEXP (x, 1);
5986
268c3b47
JG
5987 if (code == AND
5988 && GET_CODE (op0) == MULT
5989 && CONST_INT_P (XEXP (op0, 1))
5990 && CONST_INT_P (op1)
5991 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5992 INTVAL (op1)) != 0)
5993 {
5994 /* This is a UBFM/SBFM. */
5995 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5996 if (speed)
5997 *cost += extra_cost->alu.bfx;
5998 return true;
5999 }
6000
43e9d192
IB
6001 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6002 {
268c3b47
JG
6003 /* We possibly get the immediate for free, this is not
6004 modelled. */
43e9d192
IB
6005 if (CONST_INT_P (op1)
6006 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
6007 {
268c3b47
JG
6008 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6009
6010 if (speed)
6011 *cost += extra_cost->alu.logical;
6012
6013 return true;
43e9d192
IB
6014 }
6015 else
6016 {
268c3b47
JG
6017 rtx new_op0 = op0;
6018
6019 /* Handle ORN, EON, or BIC. */
43e9d192
IB
6020 if (GET_CODE (op0) == NOT)
6021 op0 = XEXP (op0, 0);
268c3b47
JG
6022
6023 new_op0 = aarch64_strip_shift (op0);
6024
6025 /* If we had a shift on op0 then this is a logical-shift-
6026 by-register/immediate operation. Otherwise, this is just
6027 a logical operation. */
6028 if (speed)
6029 {
6030 if (new_op0 != op0)
6031 {
6032 /* Shift by immediate. */
6033 if (CONST_INT_P (XEXP (op0, 1)))
6034 *cost += extra_cost->alu.log_shift;
6035 else
6036 *cost += extra_cost->alu.log_shift_reg;
6037 }
6038 else
6039 *cost += extra_cost->alu.logical;
6040 }
6041
6042 /* In both cases we want to cost both operands. */
6043 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
6044 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
6045
6046 return true;
43e9d192 6047 }
43e9d192
IB
6048 }
6049 return false;
6050
268c3b47 6051 case NOT:
6365da9e
KT
6052 x = XEXP (x, 0);
6053 op0 = aarch64_strip_shift (x);
6054
6055 /* MVN-shifted-reg. */
6056 if (op0 != x)
6057 {
6058 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6059
6060 if (speed)
6061 *cost += extra_cost->alu.log_shift;
6062
6063 return true;
6064 }
6065 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6066 Handle the second form here taking care that 'a' in the above can
6067 be a shift. */
6068 else if (GET_CODE (op0) == XOR)
6069 {
6070 rtx newop0 = XEXP (op0, 0);
6071 rtx newop1 = XEXP (op0, 1);
6072 rtx op0_stripped = aarch64_strip_shift (newop0);
6073
6074 *cost += rtx_cost (newop1, (enum rtx_code) code, 1, speed)
6075 + rtx_cost (op0_stripped, XOR, 0, speed);
6076
6077 if (speed)
6078 {
6079 if (op0_stripped != newop0)
6080 *cost += extra_cost->alu.log_shift;
6081 else
6082 *cost += extra_cost->alu.logical;
6083 }
6084
6085 return true;
6086 }
268c3b47
JG
6087 /* MVN. */
6088 if (speed)
6089 *cost += extra_cost->alu.logical;
6090
268c3b47
JG
6091 return false;
6092
43e9d192 6093 case ZERO_EXTEND:
b1685e62
JG
6094
6095 op0 = XEXP (x, 0);
6096 /* If a value is written in SI mode, then zero extended to DI
6097 mode, the operation will in general be free as a write to
6098 a 'w' register implicitly zeroes the upper bits of an 'x'
6099 register. However, if this is
6100
6101 (set (reg) (zero_extend (reg)))
6102
6103 we must cost the explicit register move. */
6104 if (mode == DImode
6105 && GET_MODE (op0) == SImode
6106 && outer == SET)
6107 {
6108 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
6109
6110 if (!op_cost && speed)
6111 /* MOV. */
6112 *cost += extra_cost->alu.extend;
6113 else
6114 /* Free, the cost is that of the SI mode operation. */
6115 *cost = op_cost;
6116
6117 return true;
6118 }
6119 else if (MEM_P (XEXP (x, 0)))
43e9d192 6120 {
b1685e62
JG
6121 /* All loads can zero extend to any size for free. */
6122 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
6123 return true;
6124 }
b1685e62
JG
6125
6126 /* UXTB/UXTH. */
6127 if (speed)
6128 *cost += extra_cost->alu.extend;
6129
43e9d192
IB
6130 return false;
6131
6132 case SIGN_EXTEND:
b1685e62 6133 if (MEM_P (XEXP (x, 0)))
43e9d192 6134 {
b1685e62
JG
6135 /* LDRSH. */
6136 if (speed)
6137 {
6138 rtx address = XEXP (XEXP (x, 0), 0);
6139 *cost += extra_cost->ldst.load_sign_extend;
6140
6141 *cost +=
6142 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6143 0, speed));
6144 }
43e9d192
IB
6145 return true;
6146 }
b1685e62
JG
6147
6148 if (speed)
6149 *cost += extra_cost->alu.extend;
43e9d192
IB
6150 return false;
6151
ba0cfa17
JG
6152 case ASHIFT:
6153 op0 = XEXP (x, 0);
6154 op1 = XEXP (x, 1);
6155
6156 if (CONST_INT_P (op1))
6157 {
6158 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6159 aliases. */
6160 if (speed)
6161 *cost += extra_cost->alu.shift;
6162
6163 /* We can incorporate zero/sign extend for free. */
6164 if (GET_CODE (op0) == ZERO_EXTEND
6165 || GET_CODE (op0) == SIGN_EXTEND)
6166 op0 = XEXP (op0, 0);
6167
6168 *cost += rtx_cost (op0, ASHIFT, 0, speed);
6169 return true;
6170 }
6171 else
6172 {
6173 /* LSLV. */
6174 if (speed)
6175 *cost += extra_cost->alu.shift_reg;
6176
6177 return false; /* All arguments need to be in registers. */
6178 }
6179
43e9d192 6180 case ROTATE:
43e9d192
IB
6181 case ROTATERT:
6182 case LSHIFTRT:
43e9d192 6183 case ASHIFTRT:
ba0cfa17
JG
6184 op0 = XEXP (x, 0);
6185 op1 = XEXP (x, 1);
43e9d192 6186
ba0cfa17
JG
6187 if (CONST_INT_P (op1))
6188 {
6189 /* ASR (immediate) and friends. */
6190 if (speed)
6191 *cost += extra_cost->alu.shift;
43e9d192 6192
ba0cfa17
JG
6193 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6194 return true;
6195 }
6196 else
6197 {
6198
6199 /* ASR (register) and friends. */
6200 if (speed)
6201 *cost += extra_cost->alu.shift_reg;
6202
6203 return false; /* All arguments need to be in registers. */
6204 }
43e9d192 6205
909734be
JG
6206 case SYMBOL_REF:
6207
6208 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6209 {
6210 /* LDR. */
6211 if (speed)
6212 *cost += extra_cost->ldst.load;
6213 }
6214 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6215 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6216 {
6217 /* ADRP, followed by ADD. */
6218 *cost += COSTS_N_INSNS (1);
6219 if (speed)
6220 *cost += 2 * extra_cost->alu.arith;
6221 }
6222 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6223 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6224 {
6225 /* ADR. */
6226 if (speed)
6227 *cost += extra_cost->alu.arith;
6228 }
6229
6230 if (flag_pic)
6231 {
6232 /* One extra load instruction, after accessing the GOT. */
6233 *cost += COSTS_N_INSNS (1);
6234 if (speed)
6235 *cost += extra_cost->ldst.load;
6236 }
43e9d192
IB
6237 return true;
6238
909734be 6239 case HIGH:
43e9d192 6240 case LO_SUM:
909734be
JG
6241 /* ADRP/ADD (immediate). */
6242 if (speed)
6243 *cost += extra_cost->alu.arith;
43e9d192
IB
6244 return true;
6245
6246 case ZERO_EXTRACT:
6247 case SIGN_EXTRACT:
7cc2145f
JG
6248 /* UBFX/SBFX. */
6249 if (speed)
6250 *cost += extra_cost->alu.bfx;
6251
6252 /* We can trust that the immediates used will be correct (there
6253 are no by-register forms), so we need only cost op0. */
6254 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
6255 return true;
6256
6257 case MULT:
4745e701
JG
6258 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6259 /* aarch64_rtx_mult_cost always handles recursion to its
6260 operands. */
6261 return true;
43e9d192
IB
6262
6263 case MOD:
6264 case UMOD:
43e9d192
IB
6265 if (speed)
6266 {
6267 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
6268 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6269 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 6270 else if (GET_MODE (x) == DFmode)
73250c4c
KT
6271 *cost += (extra_cost->fp[1].mult
6272 + extra_cost->fp[1].div);
43e9d192 6273 else if (GET_MODE (x) == SFmode)
73250c4c
KT
6274 *cost += (extra_cost->fp[0].mult
6275 + extra_cost->fp[0].div);
43e9d192
IB
6276 }
6277 return false; /* All arguments need to be in registers. */
6278
6279 case DIV:
6280 case UDIV:
4105fe38 6281 case SQRT:
43e9d192
IB
6282 if (speed)
6283 {
4105fe38
JG
6284 if (GET_MODE_CLASS (mode) == MODE_INT)
6285 /* There is no integer SQRT, so only DIV and UDIV can get
6286 here. */
6287 *cost += extra_cost->mult[mode == DImode].idiv;
6288 else
6289 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
6290 }
6291 return false; /* All arguments need to be in registers. */
6292
a8eecd00 6293 case IF_THEN_ELSE:
2d5ffe46
AP
6294 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6295 XEXP (x, 2), cost, speed);
a8eecd00
JG
6296
6297 case EQ:
6298 case NE:
6299 case GT:
6300 case GTU:
6301 case LT:
6302 case LTU:
6303 case GE:
6304 case GEU:
6305 case LE:
6306 case LEU:
6307
6308 return false; /* All arguments must be in registers. */
6309
b292109f
JG
6310 case FMA:
6311 op0 = XEXP (x, 0);
6312 op1 = XEXP (x, 1);
6313 op2 = XEXP (x, 2);
6314
6315 if (speed)
6316 *cost += extra_cost->fp[mode == DFmode].fma;
6317
6318 /* FMSUB, FNMADD, and FNMSUB are free. */
6319 if (GET_CODE (op0) == NEG)
6320 op0 = XEXP (op0, 0);
6321
6322 if (GET_CODE (op2) == NEG)
6323 op2 = XEXP (op2, 0);
6324
6325 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6326 and the by-element operand as operand 0. */
6327 if (GET_CODE (op1) == NEG)
6328 op1 = XEXP (op1, 0);
6329
6330 /* Catch vector-by-element operations. The by-element operand can
6331 either be (vec_duplicate (vec_select (x))) or just
6332 (vec_select (x)), depending on whether we are multiplying by
6333 a vector or a scalar.
6334
6335 Canonicalization is not very good in these cases, FMA4 will put the
6336 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6337 if (GET_CODE (op0) == VEC_DUPLICATE)
6338 op0 = XEXP (op0, 0);
6339 else if (GET_CODE (op1) == VEC_DUPLICATE)
6340 op1 = XEXP (op1, 0);
6341
6342 if (GET_CODE (op0) == VEC_SELECT)
6343 op0 = XEXP (op0, 0);
6344 else if (GET_CODE (op1) == VEC_SELECT)
6345 op1 = XEXP (op1, 0);
6346
6347 /* If the remaining parameters are not registers,
6348 get the cost to put them into registers. */
6349 *cost += rtx_cost (op0, FMA, 0, speed);
6350 *cost += rtx_cost (op1, FMA, 1, speed);
6351 *cost += rtx_cost (op2, FMA, 2, speed);
6352 return true;
6353
6354 case FLOAT_EXTEND:
6355 if (speed)
6356 *cost += extra_cost->fp[mode == DFmode].widen;
6357 return false;
6358
6359 case FLOAT_TRUNCATE:
6360 if (speed)
6361 *cost += extra_cost->fp[mode == DFmode].narrow;
6362 return false;
6363
61263118
KT
6364 case FIX:
6365 case UNSIGNED_FIX:
6366 x = XEXP (x, 0);
6367 /* Strip the rounding part. They will all be implemented
6368 by the fcvt* family of instructions anyway. */
6369 if (GET_CODE (x) == UNSPEC)
6370 {
6371 unsigned int uns_code = XINT (x, 1);
6372
6373 if (uns_code == UNSPEC_FRINTA
6374 || uns_code == UNSPEC_FRINTM
6375 || uns_code == UNSPEC_FRINTN
6376 || uns_code == UNSPEC_FRINTP
6377 || uns_code == UNSPEC_FRINTZ)
6378 x = XVECEXP (x, 0, 0);
6379 }
6380
6381 if (speed)
6382 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6383
6384 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6385 return true;
6386
b292109f
JG
6387 case ABS:
6388 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6389 {
19261b99
KT
6390 op0 = XEXP (x, 0);
6391
6392 /* FABD, which is analogous to FADD. */
6393 if (GET_CODE (op0) == MINUS)
6394 {
6395 *cost += rtx_cost (XEXP (op0, 0), MINUS, 0, speed);
6396 + rtx_cost (XEXP (op0, 1), MINUS, 1, speed);
6397 if (speed)
6398 *cost += extra_cost->fp[mode == DFmode].addsub;
6399
6400 return true;
6401 }
6402 /* Simple FABS is analogous to FNEG. */
b292109f
JG
6403 if (speed)
6404 *cost += extra_cost->fp[mode == DFmode].neg;
6405 }
6406 else
6407 {
6408 /* Integer ABS will either be split to
6409 two arithmetic instructions, or will be an ABS
6410 (scalar), which we don't model. */
6411 *cost = COSTS_N_INSNS (2);
6412 if (speed)
6413 *cost += 2 * extra_cost->alu.arith;
6414 }
6415 return false;
6416
6417 case SMAX:
6418 case SMIN:
6419 if (speed)
6420 {
6421 /* FMAXNM/FMINNM/FMAX/FMIN.
6422 TODO: This may not be accurate for all implementations, but
6423 we do not model this in the cost tables. */
6424 *cost += extra_cost->fp[mode == DFmode].addsub;
6425 }
6426 return false;
6427
61263118
KT
6428 case UNSPEC:
6429 /* The floating point round to integer frint* instructions. */
6430 if (aarch64_frint_unspec_p (XINT (x, 1)))
6431 {
6432 if (speed)
6433 *cost += extra_cost->fp[mode == DFmode].roundint;
6434
6435 return false;
6436 }
781aeb73
KT
6437
6438 if (XINT (x, 1) == UNSPEC_RBIT)
6439 {
6440 if (speed)
6441 *cost += extra_cost->alu.rev;
6442
6443 return false;
6444 }
61263118
KT
6445 break;
6446
fb620c4a
JG
6447 case TRUNCATE:
6448
6449 /* Decompose <su>muldi3_highpart. */
6450 if (/* (truncate:DI */
6451 mode == DImode
6452 /* (lshiftrt:TI */
6453 && GET_MODE (XEXP (x, 0)) == TImode
6454 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6455 /* (mult:TI */
6456 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6457 /* (ANY_EXTEND:TI (reg:DI))
6458 (ANY_EXTEND:TI (reg:DI))) */
6459 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6460 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6461 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6462 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6463 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6464 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6465 /* (const_int 64) */
6466 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6467 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6468 {
6469 /* UMULH/SMULH. */
6470 if (speed)
6471 *cost += extra_cost->mult[mode == DImode].extend;
6472 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6473 MULT, 0, speed);
6474 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6475 MULT, 1, speed);
6476 return true;
6477 }
6478
6479 /* Fall through. */
43e9d192 6480 default:
61263118 6481 break;
43e9d192 6482 }
61263118
KT
6483
6484 if (dump_file && (dump_flags & TDF_DETAILS))
6485 fprintf (dump_file,
6486 "\nFailed to cost RTX. Assuming default cost.\n");
6487
6488 return true;
43e9d192
IB
6489}
6490
0ee859b5
JG
6491/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6492 calculated for X. This cost is stored in *COST. Returns true
6493 if the total cost of X was calculated. */
6494static bool
6495aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6496 int param, int *cost, bool speed)
6497{
6498 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6499
6500 if (dump_file && (dump_flags & TDF_DETAILS))
6501 {
6502 print_rtl_single (dump_file, x);
6503 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6504 speed ? "Hot" : "Cold",
6505 *cost, result ? "final" : "partial");
6506 }
6507
6508 return result;
6509}
6510
43e9d192 6511static int
ef4bddc2 6512aarch64_register_move_cost (machine_mode mode,
8a3a7e67 6513 reg_class_t from_i, reg_class_t to_i)
43e9d192 6514{
8a3a7e67
RH
6515 enum reg_class from = (enum reg_class) from_i;
6516 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
6517 const struct cpu_regmove_cost *regmove_cost
6518 = aarch64_tune_params->regmove_cost;
6519
3be07662
WD
6520 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6521 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6522 to = GENERAL_REGS;
6523
6524 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6525 from = GENERAL_REGS;
6526
6ee70f81
AP
6527 /* Moving between GPR and stack cost is the same as GP2GP. */
6528 if ((from == GENERAL_REGS && to == STACK_REG)
6529 || (to == GENERAL_REGS && from == STACK_REG))
6530 return regmove_cost->GP2GP;
6531
6532 /* To/From the stack register, we move via the gprs. */
6533 if (to == STACK_REG || from == STACK_REG)
6534 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6535 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6536
8919453c
WD
6537 if (GET_MODE_SIZE (mode) == 16)
6538 {
6539 /* 128-bit operations on general registers require 2 instructions. */
6540 if (from == GENERAL_REGS && to == GENERAL_REGS)
6541 return regmove_cost->GP2GP * 2;
6542 else if (from == GENERAL_REGS)
6543 return regmove_cost->GP2FP * 2;
6544 else if (to == GENERAL_REGS)
6545 return regmove_cost->FP2GP * 2;
6546
6547 /* When AdvSIMD instructions are disabled it is not possible to move
6548 a 128-bit value directly between Q registers. This is handled in
6549 secondary reload. A general register is used as a scratch to move
6550 the upper DI value and the lower DI value is moved directly,
6551 hence the cost is the sum of three moves. */
6552 if (! TARGET_SIMD)
6553 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6554
6555 return regmove_cost->FP2FP;
6556 }
6557
43e9d192
IB
6558 if (from == GENERAL_REGS && to == GENERAL_REGS)
6559 return regmove_cost->GP2GP;
6560 else if (from == GENERAL_REGS)
6561 return regmove_cost->GP2FP;
6562 else if (to == GENERAL_REGS)
6563 return regmove_cost->FP2GP;
6564
43e9d192
IB
6565 return regmove_cost->FP2FP;
6566}
6567
6568static int
ef4bddc2 6569aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
6570 reg_class_t rclass ATTRIBUTE_UNUSED,
6571 bool in ATTRIBUTE_UNUSED)
6572{
6573 return aarch64_tune_params->memmov_cost;
6574}
6575
d126a4ae
AP
6576/* Return the number of instructions that can be issued per cycle. */
6577static int
6578aarch64_sched_issue_rate (void)
6579{
6580 return aarch64_tune_params->issue_rate;
6581}
6582
d03f7e44
MK
6583static int
6584aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6585{
6586 int issue_rate = aarch64_sched_issue_rate ();
6587
6588 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
6589}
6590
8990e73a
TB
6591/* Vectorizer cost model target hooks. */
6592
6593/* Implement targetm.vectorize.builtin_vectorization_cost. */
6594static int
6595aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6596 tree vectype,
6597 int misalign ATTRIBUTE_UNUSED)
6598{
6599 unsigned elements;
6600
6601 switch (type_of_cost)
6602 {
6603 case scalar_stmt:
6604 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6605
6606 case scalar_load:
6607 return aarch64_tune_params->vec_costs->scalar_load_cost;
6608
6609 case scalar_store:
6610 return aarch64_tune_params->vec_costs->scalar_store_cost;
6611
6612 case vector_stmt:
6613 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6614
6615 case vector_load:
6616 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6617
6618 case vector_store:
6619 return aarch64_tune_params->vec_costs->vec_store_cost;
6620
6621 case vec_to_scalar:
6622 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6623
6624 case scalar_to_vec:
6625 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6626
6627 case unaligned_load:
6628 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6629
6630 case unaligned_store:
6631 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6632
6633 case cond_branch_taken:
6634 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6635
6636 case cond_branch_not_taken:
6637 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6638
6639 case vec_perm:
6640 case vec_promote_demote:
6641 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6642
6643 case vec_construct:
6644 elements = TYPE_VECTOR_SUBPARTS (vectype);
6645 return elements / 2 + 1;
6646
6647 default:
6648 gcc_unreachable ();
6649 }
6650}
6651
6652/* Implement targetm.vectorize.add_stmt_cost. */
6653static unsigned
6654aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6655 struct _stmt_vec_info *stmt_info, int misalign,
6656 enum vect_cost_model_location where)
6657{
6658 unsigned *cost = (unsigned *) data;
6659 unsigned retval = 0;
6660
6661 if (flag_vect_cost_model)
6662 {
6663 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6664 int stmt_cost =
6665 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6666
6667 /* Statements in an inner loop relative to the loop being
6668 vectorized are weighted more heavily. The value here is
6669 a function (linear for now) of the loop nest level. */
6670 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6671 {
6672 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6673 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6674 unsigned nest_level = loop_depth (loop);
6675
6676 count *= nest_level;
6677 }
6678
6679 retval = (unsigned) (count * stmt_cost);
6680 cost[where] += retval;
6681 }
6682
6683 return retval;
6684}
6685
43e9d192
IB
6686static void initialize_aarch64_code_model (void);
6687
6688/* Parse the architecture extension string. */
6689
6690static void
6691aarch64_parse_extension (char *str)
6692{
6693 /* The extension string is parsed left to right. */
6694 const struct aarch64_option_extension *opt = NULL;
6695
6696 /* Flag to say whether we are adding or removing an extension. */
6697 int adding_ext = -1;
6698
6699 while (str != NULL && *str != 0)
6700 {
6701 char *ext;
6702 size_t len;
6703
6704 str++;
6705 ext = strchr (str, '+');
6706
6707 if (ext != NULL)
6708 len = ext - str;
6709 else
6710 len = strlen (str);
6711
6712 if (len >= 2 && strncmp (str, "no", 2) == 0)
6713 {
6714 adding_ext = 0;
6715 len -= 2;
6716 str += 2;
6717 }
6718 else if (len > 0)
6719 adding_ext = 1;
6720
6721 if (len == 0)
6722 {
217d0904
KT
6723 error ("missing feature modifier after %qs", adding_ext ? "+"
6724 : "+no");
43e9d192
IB
6725 return;
6726 }
6727
6728 /* Scan over the extensions table trying to find an exact match. */
6729 for (opt = all_extensions; opt->name != NULL; opt++)
6730 {
6731 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6732 {
6733 /* Add or remove the extension. */
6734 if (adding_ext)
6735 aarch64_isa_flags |= opt->flags_on;
6736 else
6737 aarch64_isa_flags &= ~(opt->flags_off);
6738 break;
6739 }
6740 }
6741
6742 if (opt->name == NULL)
6743 {
6744 /* Extension not found in list. */
6745 error ("unknown feature modifier %qs", str);
6746 return;
6747 }
6748
6749 str = ext;
6750 };
6751
6752 return;
6753}
6754
6755/* Parse the ARCH string. */
6756
6757static void
6758aarch64_parse_arch (void)
6759{
6760 char *ext;
6761 const struct processor *arch;
6762 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6763 size_t len;
6764
6765 strcpy (str, aarch64_arch_string);
6766
6767 ext = strchr (str, '+');
6768
6769 if (ext != NULL)
6770 len = ext - str;
6771 else
6772 len = strlen (str);
6773
6774 if (len == 0)
6775 {
6776 error ("missing arch name in -march=%qs", str);
6777 return;
6778 }
6779
6780 /* Loop through the list of supported ARCHs to find a match. */
6781 for (arch = all_architectures; arch->name != NULL; arch++)
6782 {
6783 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6784 {
6785 selected_arch = arch;
6786 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6787
6788 if (!selected_cpu)
6789 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6790
6791 if (ext != NULL)
6792 {
6793 /* ARCH string contains at least one extension. */
6794 aarch64_parse_extension (ext);
6795 }
6796
ffee7aa9
JG
6797 if (strcmp (selected_arch->arch, selected_cpu->arch))
6798 {
6799 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6800 selected_cpu->name, selected_arch->name);
6801 }
6802
43e9d192
IB
6803 return;
6804 }
6805 }
6806
6807 /* ARCH name not found in list. */
6808 error ("unknown value %qs for -march", str);
6809 return;
6810}
6811
6812/* Parse the CPU string. */
6813
6814static void
6815aarch64_parse_cpu (void)
6816{
6817 char *ext;
6818 const struct processor *cpu;
6819 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6820 size_t len;
6821
6822 strcpy (str, aarch64_cpu_string);
6823
6824 ext = strchr (str, '+');
6825
6826 if (ext != NULL)
6827 len = ext - str;
6828 else
6829 len = strlen (str);
6830
6831 if (len == 0)
6832 {
6833 error ("missing cpu name in -mcpu=%qs", str);
6834 return;
6835 }
6836
6837 /* Loop through the list of supported CPUs to find a match. */
6838 for (cpu = all_cores; cpu->name != NULL; cpu++)
6839 {
6840 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6841 {
6842 selected_cpu = cpu;
6843 aarch64_isa_flags = selected_cpu->flags;
6844
6845 if (ext != NULL)
6846 {
6847 /* CPU string contains at least one extension. */
6848 aarch64_parse_extension (ext);
6849 }
6850
6851 return;
6852 }
6853 }
6854
6855 /* CPU name not found in list. */
6856 error ("unknown value %qs for -mcpu", str);
6857 return;
6858}
6859
6860/* Parse the TUNE string. */
6861
6862static void
6863aarch64_parse_tune (void)
6864{
6865 const struct processor *cpu;
6866 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6867 strcpy (str, aarch64_tune_string);
6868
6869 /* Loop through the list of supported CPUs to find a match. */
6870 for (cpu = all_cores; cpu->name != NULL; cpu++)
6871 {
6872 if (strcmp (cpu->name, str) == 0)
6873 {
6874 selected_tune = cpu;
6875 return;
6876 }
6877 }
6878
6879 /* CPU name not found in list. */
6880 error ("unknown value %qs for -mtune", str);
6881 return;
6882}
6883
6884
6885/* Implement TARGET_OPTION_OVERRIDE. */
6886
6887static void
6888aarch64_override_options (void)
6889{
ffee7aa9
JG
6890 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6891 If either of -march or -mtune is given, they override their
6892 respective component of -mcpu.
43e9d192 6893
ffee7aa9
JG
6894 So, first parse AARCH64_CPU_STRING, then the others, be careful
6895 with -march as, if -mcpu is not present on the command line, march
6896 must set a sensible default CPU. */
6897 if (aarch64_cpu_string)
43e9d192 6898 {
ffee7aa9 6899 aarch64_parse_cpu ();
43e9d192
IB
6900 }
6901
ffee7aa9 6902 if (aarch64_arch_string)
43e9d192 6903 {
ffee7aa9 6904 aarch64_parse_arch ();
43e9d192
IB
6905 }
6906
6907 if (aarch64_tune_string)
6908 {
6909 aarch64_parse_tune ();
6910 }
6911
63892fa2
KV
6912#ifndef HAVE_AS_MABI_OPTION
6913 /* The compiler may have been configured with 2.23.* binutils, which does
6914 not have support for ILP32. */
6915 if (TARGET_ILP32)
6916 error ("Assembler does not support -mabi=ilp32");
6917#endif
6918
43e9d192
IB
6919 initialize_aarch64_code_model ();
6920
6921 aarch64_build_bitmask_table ();
6922
6923 /* This target defaults to strict volatile bitfields. */
6924 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6925 flag_strict_volatile_bitfields = 1;
6926
6927 /* If the user did not specify a processor, choose the default
6928 one for them. This will be the CPU set during configuration using
a3cd0246 6929 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6930 if (!selected_cpu)
6931 {
6932 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6933 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6934 }
6935
6936 gcc_assert (selected_cpu);
6937
43e9d192 6938 if (!selected_tune)
3edaf26d 6939 selected_tune = selected_cpu;
43e9d192
IB
6940
6941 aarch64_tune_flags = selected_tune->flags;
6942 aarch64_tune = selected_tune->core;
6943 aarch64_tune_params = selected_tune->tune;
0c6caaf8 6944 aarch64_architecture_version = selected_cpu->architecture_version;
43e9d192 6945
5e396da6
KT
6946 if (aarch64_fix_a53_err835769 == 2)
6947 {
6948#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6949 aarch64_fix_a53_err835769 = 1;
6950#else
6951 aarch64_fix_a53_err835769 = 0;
6952#endif
6953 }
6954
b4917c98
AP
6955 /* If not opzimizing for size, set the default
6956 alignment to what the target wants */
6957 if (!optimize_size)
6958 {
6959 if (align_loops <= 0)
0b82a5a2 6960 align_loops = aarch64_tune_params->loop_align;
b4917c98 6961 if (align_jumps <= 0)
0b82a5a2 6962 align_jumps = aarch64_tune_params->jump_align;
b4917c98 6963 if (align_functions <= 0)
0b82a5a2 6964 align_functions = aarch64_tune_params->function_align;
b4917c98
AP
6965 }
6966
fde9b31b
TP
6967 if (AARCH64_TUNE_FMA_STEERING)
6968 aarch64_register_fma_steering ();
6969
43e9d192
IB
6970 aarch64_override_options_after_change ();
6971}
6972
6973/* Implement targetm.override_options_after_change. */
6974
6975static void
6976aarch64_override_options_after_change (void)
6977{
0b7f8166
MS
6978 if (flag_omit_frame_pointer)
6979 flag_omit_leaf_frame_pointer = false;
6980 else if (flag_omit_leaf_frame_pointer)
6981 flag_omit_frame_pointer = true;
43e9d192
IB
6982}
6983
6984static struct machine_function *
6985aarch64_init_machine_status (void)
6986{
6987 struct machine_function *machine;
766090c2 6988 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6989 return machine;
6990}
6991
6992void
6993aarch64_init_expanders (void)
6994{
6995 init_machine_status = aarch64_init_machine_status;
6996}
6997
6998/* A checking mechanism for the implementation of the various code models. */
6999static void
7000initialize_aarch64_code_model (void)
7001{
7002 if (flag_pic)
7003 {
7004 switch (aarch64_cmodel_var)
7005 {
7006 case AARCH64_CMODEL_TINY:
7007 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
7008 break;
7009 case AARCH64_CMODEL_SMALL:
7010 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
7011 break;
7012 case AARCH64_CMODEL_LARGE:
7013 sorry ("code model %qs with -f%s", "large",
7014 flag_pic > 1 ? "PIC" : "pic");
7015 default:
7016 gcc_unreachable ();
7017 }
7018 }
7019 else
7020 aarch64_cmodel = aarch64_cmodel_var;
7021}
7022
7023/* Return true if SYMBOL_REF X binds locally. */
7024
7025static bool
7026aarch64_symbol_binds_local_p (const_rtx x)
7027{
7028 return (SYMBOL_REF_DECL (x)
7029 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
7030 : SYMBOL_REF_LOCAL_P (x));
7031}
7032
7033/* Return true if SYMBOL_REF X is thread local */
7034static bool
7035aarch64_tls_symbol_p (rtx x)
7036{
7037 if (! TARGET_HAVE_TLS)
7038 return false;
7039
7040 if (GET_CODE (x) != SYMBOL_REF)
7041 return false;
7042
7043 return SYMBOL_REF_TLS_MODEL (x) != 0;
7044}
7045
7046/* Classify a TLS symbol into one of the TLS kinds. */
7047enum aarch64_symbol_type
7048aarch64_classify_tls_symbol (rtx x)
7049{
7050 enum tls_model tls_kind = tls_symbolic_operand_type (x);
7051
7052 switch (tls_kind)
7053 {
7054 case TLS_MODEL_GLOBAL_DYNAMIC:
7055 case TLS_MODEL_LOCAL_DYNAMIC:
7056 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
7057
7058 case TLS_MODEL_INITIAL_EXEC:
7059 return SYMBOL_SMALL_GOTTPREL;
7060
7061 case TLS_MODEL_LOCAL_EXEC:
7062 return SYMBOL_SMALL_TPREL;
7063
7064 case TLS_MODEL_EMULATED:
7065 case TLS_MODEL_NONE:
7066 return SYMBOL_FORCE_TO_MEM;
7067
7068 default:
7069 gcc_unreachable ();
7070 }
7071}
7072
7073/* Return the method that should be used to access SYMBOL_REF or
7074 LABEL_REF X in context CONTEXT. */
17f4d4bf 7075
43e9d192 7076enum aarch64_symbol_type
f8b756b7 7077aarch64_classify_symbol (rtx x, rtx offset,
43e9d192
IB
7078 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
7079{
7080 if (GET_CODE (x) == LABEL_REF)
7081 {
7082 switch (aarch64_cmodel)
7083 {
7084 case AARCH64_CMODEL_LARGE:
7085 return SYMBOL_FORCE_TO_MEM;
7086
7087 case AARCH64_CMODEL_TINY_PIC:
7088 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
7089 return SYMBOL_TINY_ABSOLUTE;
7090
43e9d192
IB
7091 case AARCH64_CMODEL_SMALL_PIC:
7092 case AARCH64_CMODEL_SMALL:
7093 return SYMBOL_SMALL_ABSOLUTE;
7094
7095 default:
7096 gcc_unreachable ();
7097 }
7098 }
7099
17f4d4bf 7100 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 7101 {
4a985a37
MS
7102 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
7103 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
7104
7105 if (aarch64_tls_symbol_p (x))
7106 return aarch64_classify_tls_symbol (x);
7107
17f4d4bf
CSS
7108 switch (aarch64_cmodel)
7109 {
7110 case AARCH64_CMODEL_TINY:
f8b756b7
TB
7111 /* When we retreive symbol + offset address, we have to make sure
7112 the offset does not cause overflow of the final address. But
7113 we have no way of knowing the address of symbol at compile time
7114 so we can't accurately say if the distance between the PC and
7115 symbol + offset is outside the addressible range of +/-1M in the
7116 TINY code model. So we rely on images not being greater than
7117 1M and cap the offset at 1M and anything beyond 1M will have to
7118 be loaded using an alternative mechanism. */
7119 if (SYMBOL_REF_WEAK (x)
7120 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
7121 return SYMBOL_FORCE_TO_MEM;
7122 return SYMBOL_TINY_ABSOLUTE;
7123
17f4d4bf 7124 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
7125 /* Same reasoning as the tiny code model, but the offset cap here is
7126 4G. */
7127 if (SYMBOL_REF_WEAK (x)
3ff5d1f0
TB
7128 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
7129 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
7130 return SYMBOL_FORCE_TO_MEM;
7131 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7132
17f4d4bf 7133 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 7134 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 7135 return SYMBOL_TINY_GOT;
38e6c9a6
MS
7136 return SYMBOL_TINY_ABSOLUTE;
7137
17f4d4bf
CSS
7138 case AARCH64_CMODEL_SMALL_PIC:
7139 if (!aarch64_symbol_binds_local_p (x))
7140 return SYMBOL_SMALL_GOT;
7141 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7142
17f4d4bf
CSS
7143 default:
7144 gcc_unreachable ();
7145 }
43e9d192 7146 }
17f4d4bf 7147
43e9d192
IB
7148 /* By default push everything into the constant pool. */
7149 return SYMBOL_FORCE_TO_MEM;
7150}
7151
43e9d192
IB
7152bool
7153aarch64_constant_address_p (rtx x)
7154{
7155 return (CONSTANT_P (x) && memory_address_p (DImode, x));
7156}
7157
7158bool
7159aarch64_legitimate_pic_operand_p (rtx x)
7160{
7161 if (GET_CODE (x) == SYMBOL_REF
7162 || (GET_CODE (x) == CONST
7163 && GET_CODE (XEXP (x, 0)) == PLUS
7164 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7165 return false;
7166
7167 return true;
7168}
7169
3520f7cc
JG
7170/* Return true if X holds either a quarter-precision or
7171 floating-point +0.0 constant. */
7172static bool
ef4bddc2 7173aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
7174{
7175 if (!CONST_DOUBLE_P (x))
7176 return false;
7177
7178 /* TODO: We could handle moving 0.0 to a TFmode register,
7179 but first we would like to refactor the movtf_aarch64
7180 to be more amicable to split moves properly and
7181 correctly gate on TARGET_SIMD. For now - reject all
7182 constants which are not to SFmode or DFmode registers. */
7183 if (!(mode == SFmode || mode == DFmode))
7184 return false;
7185
7186 if (aarch64_float_const_zero_rtx_p (x))
7187 return true;
7188 return aarch64_float_const_representable_p (x);
7189}
7190
43e9d192 7191static bool
ef4bddc2 7192aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
7193{
7194 /* Do not allow vector struct mode constants. We could support
7195 0 and -1 easily, but they need support in aarch64-simd.md. */
7196 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7197 return false;
7198
7199 /* This could probably go away because
7200 we now decompose CONST_INTs according to expand_mov_immediate. */
7201 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 7202 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
7203 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7204 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
7205
7206 if (GET_CODE (x) == HIGH
7207 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7208 return true;
7209
7210 return aarch64_constant_address_p (x);
7211}
7212
a5bc806c 7213rtx
43e9d192
IB
7214aarch64_load_tp (rtx target)
7215{
7216 if (!target
7217 || GET_MODE (target) != Pmode
7218 || !register_operand (target, Pmode))
7219 target = gen_reg_rtx (Pmode);
7220
7221 /* Can return in any reg. */
7222 emit_insn (gen_aarch64_load_tp_hard (target));
7223 return target;
7224}
7225
43e9d192
IB
7226/* On AAPCS systems, this is the "struct __va_list". */
7227static GTY(()) tree va_list_type;
7228
7229/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7230 Return the type to use as __builtin_va_list.
7231
7232 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7233
7234 struct __va_list
7235 {
7236 void *__stack;
7237 void *__gr_top;
7238 void *__vr_top;
7239 int __gr_offs;
7240 int __vr_offs;
7241 }; */
7242
7243static tree
7244aarch64_build_builtin_va_list (void)
7245{
7246 tree va_list_name;
7247 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7248
7249 /* Create the type. */
7250 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7251 /* Give it the required name. */
7252 va_list_name = build_decl (BUILTINS_LOCATION,
7253 TYPE_DECL,
7254 get_identifier ("__va_list"),
7255 va_list_type);
7256 DECL_ARTIFICIAL (va_list_name) = 1;
7257 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 7258 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
7259
7260 /* Create the fields. */
7261 f_stack = build_decl (BUILTINS_LOCATION,
7262 FIELD_DECL, get_identifier ("__stack"),
7263 ptr_type_node);
7264 f_grtop = build_decl (BUILTINS_LOCATION,
7265 FIELD_DECL, get_identifier ("__gr_top"),
7266 ptr_type_node);
7267 f_vrtop = build_decl (BUILTINS_LOCATION,
7268 FIELD_DECL, get_identifier ("__vr_top"),
7269 ptr_type_node);
7270 f_groff = build_decl (BUILTINS_LOCATION,
7271 FIELD_DECL, get_identifier ("__gr_offs"),
7272 integer_type_node);
7273 f_vroff = build_decl (BUILTINS_LOCATION,
7274 FIELD_DECL, get_identifier ("__vr_offs"),
7275 integer_type_node);
7276
7277 DECL_ARTIFICIAL (f_stack) = 1;
7278 DECL_ARTIFICIAL (f_grtop) = 1;
7279 DECL_ARTIFICIAL (f_vrtop) = 1;
7280 DECL_ARTIFICIAL (f_groff) = 1;
7281 DECL_ARTIFICIAL (f_vroff) = 1;
7282
7283 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7284 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7285 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7286 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7287 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7288
7289 TYPE_FIELDS (va_list_type) = f_stack;
7290 DECL_CHAIN (f_stack) = f_grtop;
7291 DECL_CHAIN (f_grtop) = f_vrtop;
7292 DECL_CHAIN (f_vrtop) = f_groff;
7293 DECL_CHAIN (f_groff) = f_vroff;
7294
7295 /* Compute its layout. */
7296 layout_type (va_list_type);
7297
7298 return va_list_type;
7299}
7300
7301/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7302static void
7303aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7304{
7305 const CUMULATIVE_ARGS *cum;
7306 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7307 tree stack, grtop, vrtop, groff, vroff;
7308 tree t;
7309 int gr_save_area_size;
7310 int vr_save_area_size;
7311 int vr_offset;
7312
7313 cum = &crtl->args.info;
7314 gr_save_area_size
7315 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7316 vr_save_area_size
7317 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7318
7319 if (TARGET_GENERAL_REGS_ONLY)
7320 {
7321 if (cum->aapcs_nvrn > 0)
7322 sorry ("%qs and floating point or vector arguments",
7323 "-mgeneral-regs-only");
7324 vr_save_area_size = 0;
7325 }
7326
7327 f_stack = TYPE_FIELDS (va_list_type_node);
7328 f_grtop = DECL_CHAIN (f_stack);
7329 f_vrtop = DECL_CHAIN (f_grtop);
7330 f_groff = DECL_CHAIN (f_vrtop);
7331 f_vroff = DECL_CHAIN (f_groff);
7332
7333 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7334 NULL_TREE);
7335 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7336 NULL_TREE);
7337 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7338 NULL_TREE);
7339 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7340 NULL_TREE);
7341 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7342 NULL_TREE);
7343
7344 /* Emit code to initialize STACK, which points to the next varargs stack
7345 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7346 by named arguments. STACK is 8-byte aligned. */
7347 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7348 if (cum->aapcs_stack_size > 0)
7349 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7350 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7351 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7352
7353 /* Emit code to initialize GRTOP, the top of the GR save area.
7354 virtual_incoming_args_rtx should have been 16 byte aligned. */
7355 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7356 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7357 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7358
7359 /* Emit code to initialize VRTOP, the top of the VR save area.
7360 This address is gr_save_area_bytes below GRTOP, rounded
7361 down to the next 16-byte boundary. */
7362 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7363 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7364 STACK_BOUNDARY / BITS_PER_UNIT);
7365
7366 if (vr_offset)
7367 t = fold_build_pointer_plus_hwi (t, -vr_offset);
7368 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7369 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7370
7371 /* Emit code to initialize GROFF, the offset from GRTOP of the
7372 next GPR argument. */
7373 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7374 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7375 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7376
7377 /* Likewise emit code to initialize VROFF, the offset from FTOP
7378 of the next VR argument. */
7379 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7380 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7381 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7382}
7383
7384/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7385
7386static tree
7387aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7388 gimple_seq *post_p ATTRIBUTE_UNUSED)
7389{
7390 tree addr;
7391 bool indirect_p;
7392 bool is_ha; /* is HFA or HVA. */
7393 bool dw_align; /* double-word align. */
ef4bddc2 7394 machine_mode ag_mode = VOIDmode;
43e9d192 7395 int nregs;
ef4bddc2 7396 machine_mode mode;
43e9d192
IB
7397
7398 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7399 tree stack, f_top, f_off, off, arg, roundup, on_stack;
7400 HOST_WIDE_INT size, rsize, adjust, align;
7401 tree t, u, cond1, cond2;
7402
7403 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7404 if (indirect_p)
7405 type = build_pointer_type (type);
7406
7407 mode = TYPE_MODE (type);
7408
7409 f_stack = TYPE_FIELDS (va_list_type_node);
7410 f_grtop = DECL_CHAIN (f_stack);
7411 f_vrtop = DECL_CHAIN (f_grtop);
7412 f_groff = DECL_CHAIN (f_vrtop);
7413 f_vroff = DECL_CHAIN (f_groff);
7414
7415 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7416 f_stack, NULL_TREE);
7417 size = int_size_in_bytes (type);
7418 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7419
7420 dw_align = false;
7421 adjust = 0;
7422 if (aarch64_vfp_is_call_or_return_candidate (mode,
7423 type,
7424 &ag_mode,
7425 &nregs,
7426 &is_ha))
7427 {
7428 /* TYPE passed in fp/simd registers. */
7429 if (TARGET_GENERAL_REGS_ONLY)
7430 sorry ("%qs and floating point or vector arguments",
7431 "-mgeneral-regs-only");
7432
7433 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7434 unshare_expr (valist), f_vrtop, NULL_TREE);
7435 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7436 unshare_expr (valist), f_vroff, NULL_TREE);
7437
7438 rsize = nregs * UNITS_PER_VREG;
7439
7440 if (is_ha)
7441 {
7442 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7443 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7444 }
7445 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7446 && size < UNITS_PER_VREG)
7447 {
7448 adjust = UNITS_PER_VREG - size;
7449 }
7450 }
7451 else
7452 {
7453 /* TYPE passed in general registers. */
7454 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7455 unshare_expr (valist), f_grtop, NULL_TREE);
7456 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7457 unshare_expr (valist), f_groff, NULL_TREE);
7458 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7459 nregs = rsize / UNITS_PER_WORD;
7460
7461 if (align > 8)
7462 dw_align = true;
7463
7464 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7465 && size < UNITS_PER_WORD)
7466 {
7467 adjust = UNITS_PER_WORD - size;
7468 }
7469 }
7470
7471 /* Get a local temporary for the field value. */
7472 off = get_initialized_tmp_var (f_off, pre_p, NULL);
7473
7474 /* Emit code to branch if off >= 0. */
7475 t = build2 (GE_EXPR, boolean_type_node, off,
7476 build_int_cst (TREE_TYPE (off), 0));
7477 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7478
7479 if (dw_align)
7480 {
7481 /* Emit: offs = (offs + 15) & -16. */
7482 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7483 build_int_cst (TREE_TYPE (off), 15));
7484 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7485 build_int_cst (TREE_TYPE (off), -16));
7486 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7487 }
7488 else
7489 roundup = NULL;
7490
7491 /* Update ap.__[g|v]r_offs */
7492 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7493 build_int_cst (TREE_TYPE (off), rsize));
7494 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7495
7496 /* String up. */
7497 if (roundup)
7498 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7499
7500 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7501 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7502 build_int_cst (TREE_TYPE (f_off), 0));
7503 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7504
7505 /* String up: make sure the assignment happens before the use. */
7506 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7507 COND_EXPR_ELSE (cond1) = t;
7508
7509 /* Prepare the trees handling the argument that is passed on the stack;
7510 the top level node will store in ON_STACK. */
7511 arg = get_initialized_tmp_var (stack, pre_p, NULL);
7512 if (align > 8)
7513 {
7514 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7515 t = fold_convert (intDI_type_node, arg);
7516 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7517 build_int_cst (TREE_TYPE (t), 15));
7518 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7519 build_int_cst (TREE_TYPE (t), -16));
7520 t = fold_convert (TREE_TYPE (arg), t);
7521 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7522 }
7523 else
7524 roundup = NULL;
7525 /* Advance ap.__stack */
7526 t = fold_convert (intDI_type_node, arg);
7527 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7528 build_int_cst (TREE_TYPE (t), size + 7));
7529 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7530 build_int_cst (TREE_TYPE (t), -8));
7531 t = fold_convert (TREE_TYPE (arg), t);
7532 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7533 /* String up roundup and advance. */
7534 if (roundup)
7535 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7536 /* String up with arg */
7537 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7538 /* Big-endianness related address adjustment. */
7539 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7540 && size < UNITS_PER_WORD)
7541 {
7542 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7543 size_int (UNITS_PER_WORD - size));
7544 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7545 }
7546
7547 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7548 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7549
7550 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7551 t = off;
7552 if (adjust)
7553 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7554 build_int_cst (TREE_TYPE (off), adjust));
7555
7556 t = fold_convert (sizetype, t);
7557 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7558
7559 if (is_ha)
7560 {
7561 /* type ha; // treat as "struct {ftype field[n];}"
7562 ... [computing offs]
7563 for (i = 0; i <nregs; ++i, offs += 16)
7564 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7565 return ha; */
7566 int i;
7567 tree tmp_ha, field_t, field_ptr_t;
7568
7569 /* Declare a local variable. */
7570 tmp_ha = create_tmp_var_raw (type, "ha");
7571 gimple_add_tmp_var (tmp_ha);
7572
7573 /* Establish the base type. */
7574 switch (ag_mode)
7575 {
7576 case SFmode:
7577 field_t = float_type_node;
7578 field_ptr_t = float_ptr_type_node;
7579 break;
7580 case DFmode:
7581 field_t = double_type_node;
7582 field_ptr_t = double_ptr_type_node;
7583 break;
7584 case TFmode:
7585 field_t = long_double_type_node;
7586 field_ptr_t = long_double_ptr_type_node;
7587 break;
7588/* The half precision and quad precision are not fully supported yet. Enable
7589 the following code after the support is complete. Need to find the correct
7590 type node for __fp16 *. */
7591#if 0
7592 case HFmode:
7593 field_t = float_type_node;
7594 field_ptr_t = float_ptr_type_node;
7595 break;
7596#endif
7597 case V2SImode:
7598 case V4SImode:
7599 {
7600 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7601 field_t = build_vector_type_for_mode (innertype, ag_mode);
7602 field_ptr_t = build_pointer_type (field_t);
7603 }
7604 break;
7605 default:
7606 gcc_assert (0);
7607 }
7608
7609 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7610 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7611 addr = t;
7612 t = fold_convert (field_ptr_t, addr);
7613 t = build2 (MODIFY_EXPR, field_t,
7614 build1 (INDIRECT_REF, field_t, tmp_ha),
7615 build1 (INDIRECT_REF, field_t, t));
7616
7617 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7618 for (i = 1; i < nregs; ++i)
7619 {
7620 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7621 u = fold_convert (field_ptr_t, addr);
7622 u = build2 (MODIFY_EXPR, field_t,
7623 build2 (MEM_REF, field_t, tmp_ha,
7624 build_int_cst (field_ptr_t,
7625 (i *
7626 int_size_in_bytes (field_t)))),
7627 build1 (INDIRECT_REF, field_t, u));
7628 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7629 }
7630
7631 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7632 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7633 }
7634
7635 COND_EXPR_ELSE (cond2) = t;
7636 addr = fold_convert (build_pointer_type (type), cond1);
7637 addr = build_va_arg_indirect_ref (addr);
7638
7639 if (indirect_p)
7640 addr = build_va_arg_indirect_ref (addr);
7641
7642 return addr;
7643}
7644
7645/* Implement TARGET_SETUP_INCOMING_VARARGS. */
7646
7647static void
ef4bddc2 7648aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
7649 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7650 int no_rtl)
7651{
7652 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7653 CUMULATIVE_ARGS local_cum;
7654 int gr_saved, vr_saved;
7655
7656 /* The caller has advanced CUM up to, but not beyond, the last named
7657 argument. Advance a local copy of CUM past the last "real" named
7658 argument, to find out how many registers are left over. */
7659 local_cum = *cum;
7660 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7661
7662 /* Found out how many registers we need to save. */
7663 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7664 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7665
7666 if (TARGET_GENERAL_REGS_ONLY)
7667 {
7668 if (local_cum.aapcs_nvrn > 0)
7669 sorry ("%qs and floating point or vector arguments",
7670 "-mgeneral-regs-only");
7671 vr_saved = 0;
7672 }
7673
7674 if (!no_rtl)
7675 {
7676 if (gr_saved > 0)
7677 {
7678 rtx ptr, mem;
7679
7680 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7681 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7682 - gr_saved * UNITS_PER_WORD);
7683 mem = gen_frame_mem (BLKmode, ptr);
7684 set_mem_alias_set (mem, get_varargs_alias_set ());
7685
7686 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7687 mem, gr_saved);
7688 }
7689 if (vr_saved > 0)
7690 {
7691 /* We can't use move_block_from_reg, because it will use
7692 the wrong mode, storing D regs only. */
ef4bddc2 7693 machine_mode mode = TImode;
43e9d192
IB
7694 int off, i;
7695
7696 /* Set OFF to the offset from virtual_incoming_args_rtx of
7697 the first vector register. The VR save area lies below
7698 the GR one, and is aligned to 16 bytes. */
7699 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7700 STACK_BOUNDARY / BITS_PER_UNIT);
7701 off -= vr_saved * UNITS_PER_VREG;
7702
7703 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7704 {
7705 rtx ptr, mem;
7706
7707 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7708 mem = gen_frame_mem (mode, ptr);
7709 set_mem_alias_set (mem, get_varargs_alias_set ());
7710 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7711 off += UNITS_PER_VREG;
7712 }
7713 }
7714 }
7715
7716 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7717 any complication of having crtl->args.pretend_args_size changed. */
8799637a 7718 cfun->machine->frame.saved_varargs_size
43e9d192
IB
7719 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7720 STACK_BOUNDARY / BITS_PER_UNIT)
7721 + vr_saved * UNITS_PER_VREG);
7722}
7723
7724static void
7725aarch64_conditional_register_usage (void)
7726{
7727 int i;
7728 if (!TARGET_FLOAT)
7729 {
7730 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7731 {
7732 fixed_regs[i] = 1;
7733 call_used_regs[i] = 1;
7734 }
7735 }
7736}
7737
7738/* Walk down the type tree of TYPE counting consecutive base elements.
7739 If *MODEP is VOIDmode, then set it to the first valid floating point
7740 type. If a non-floating point type is found, or if a floating point
7741 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7742 otherwise return the count in the sub-tree. */
7743static int
ef4bddc2 7744aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 7745{
ef4bddc2 7746 machine_mode mode;
43e9d192
IB
7747 HOST_WIDE_INT size;
7748
7749 switch (TREE_CODE (type))
7750 {
7751 case REAL_TYPE:
7752 mode = TYPE_MODE (type);
7753 if (mode != DFmode && mode != SFmode && mode != TFmode)
7754 return -1;
7755
7756 if (*modep == VOIDmode)
7757 *modep = mode;
7758
7759 if (*modep == mode)
7760 return 1;
7761
7762 break;
7763
7764 case COMPLEX_TYPE:
7765 mode = TYPE_MODE (TREE_TYPE (type));
7766 if (mode != DFmode && mode != SFmode && mode != TFmode)
7767 return -1;
7768
7769 if (*modep == VOIDmode)
7770 *modep = mode;
7771
7772 if (*modep == mode)
7773 return 2;
7774
7775 break;
7776
7777 case VECTOR_TYPE:
7778 /* Use V2SImode and V4SImode as representatives of all 64-bit
7779 and 128-bit vector types. */
7780 size = int_size_in_bytes (type);
7781 switch (size)
7782 {
7783 case 8:
7784 mode = V2SImode;
7785 break;
7786 case 16:
7787 mode = V4SImode;
7788 break;
7789 default:
7790 return -1;
7791 }
7792
7793 if (*modep == VOIDmode)
7794 *modep = mode;
7795
7796 /* Vector modes are considered to be opaque: two vectors are
7797 equivalent for the purposes of being homogeneous aggregates
7798 if they are the same size. */
7799 if (*modep == mode)
7800 return 1;
7801
7802 break;
7803
7804 case ARRAY_TYPE:
7805 {
7806 int count;
7807 tree index = TYPE_DOMAIN (type);
7808
807e902e
KZ
7809 /* Can't handle incomplete types nor sizes that are not
7810 fixed. */
7811 if (!COMPLETE_TYPE_P (type)
7812 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7813 return -1;
7814
7815 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7816 if (count == -1
7817 || !index
7818 || !TYPE_MAX_VALUE (index)
cc269bb6 7819 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7820 || !TYPE_MIN_VALUE (index)
cc269bb6 7821 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7822 || count < 0)
7823 return -1;
7824
ae7e9ddd
RS
7825 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7826 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7827
7828 /* There must be no padding. */
807e902e 7829 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7830 return -1;
7831
7832 return count;
7833 }
7834
7835 case RECORD_TYPE:
7836 {
7837 int count = 0;
7838 int sub_count;
7839 tree field;
7840
807e902e
KZ
7841 /* Can't handle incomplete types nor sizes that are not
7842 fixed. */
7843 if (!COMPLETE_TYPE_P (type)
7844 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7845 return -1;
7846
7847 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7848 {
7849 if (TREE_CODE (field) != FIELD_DECL)
7850 continue;
7851
7852 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7853 if (sub_count < 0)
7854 return -1;
7855 count += sub_count;
7856 }
7857
7858 /* There must be no padding. */
807e902e 7859 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7860 return -1;
7861
7862 return count;
7863 }
7864
7865 case UNION_TYPE:
7866 case QUAL_UNION_TYPE:
7867 {
7868 /* These aren't very interesting except in a degenerate case. */
7869 int count = 0;
7870 int sub_count;
7871 tree field;
7872
807e902e
KZ
7873 /* Can't handle incomplete types nor sizes that are not
7874 fixed. */
7875 if (!COMPLETE_TYPE_P (type)
7876 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7877 return -1;
7878
7879 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7880 {
7881 if (TREE_CODE (field) != FIELD_DECL)
7882 continue;
7883
7884 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7885 if (sub_count < 0)
7886 return -1;
7887 count = count > sub_count ? count : sub_count;
7888 }
7889
7890 /* There must be no padding. */
807e902e 7891 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7892 return -1;
7893
7894 return count;
7895 }
7896
7897 default:
7898 break;
7899 }
7900
7901 return -1;
7902}
7903
7904/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7905 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7906 array types. The C99 floating-point complex types are also considered
7907 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7908 types, which are GCC extensions and out of the scope of AAPCS64, are
7909 treated as composite types here as well.
7910
7911 Note that MODE itself is not sufficient in determining whether a type
7912 is such a composite type or not. This is because
7913 stor-layout.c:compute_record_mode may have already changed the MODE
7914 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7915 structure with only one field may have its MODE set to the mode of the
7916 field. Also an integer mode whose size matches the size of the
7917 RECORD_TYPE type may be used to substitute the original mode
7918 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7919 solely relied on. */
7920
7921static bool
7922aarch64_composite_type_p (const_tree type,
ef4bddc2 7923 machine_mode mode)
43e9d192
IB
7924{
7925 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7926 return true;
7927
7928 if (mode == BLKmode
7929 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7930 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7931 return true;
7932
7933 return false;
7934}
7935
7936/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7937 type as described in AAPCS64 \S 4.1.2.
7938
7939 See the comment above aarch64_composite_type_p for the notes on MODE. */
7940
7941static bool
7942aarch64_short_vector_p (const_tree type,
ef4bddc2 7943 machine_mode mode)
43e9d192
IB
7944{
7945 HOST_WIDE_INT size = -1;
7946
7947 if (type && TREE_CODE (type) == VECTOR_TYPE)
7948 size = int_size_in_bytes (type);
7949 else if (!aarch64_composite_type_p (type, mode)
7950 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7951 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7952 size = GET_MODE_SIZE (mode);
7953
7954 return (size == 8 || size == 16) ? true : false;
7955}
7956
7957/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7958 shall be passed or returned in simd/fp register(s) (providing these
7959 parameter passing registers are available).
7960
7961 Upon successful return, *COUNT returns the number of needed registers,
7962 *BASE_MODE returns the mode of the individual register and when IS_HAF
7963 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7964 floating-point aggregate or a homogeneous short-vector aggregate. */
7965
7966static bool
ef4bddc2 7967aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 7968 const_tree type,
ef4bddc2 7969 machine_mode *base_mode,
43e9d192
IB
7970 int *count,
7971 bool *is_ha)
7972{
ef4bddc2 7973 machine_mode new_mode = VOIDmode;
43e9d192
IB
7974 bool composite_p = aarch64_composite_type_p (type, mode);
7975
7976 if (is_ha != NULL) *is_ha = false;
7977
7978 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7979 || aarch64_short_vector_p (type, mode))
7980 {
7981 *count = 1;
7982 new_mode = mode;
7983 }
7984 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7985 {
7986 if (is_ha != NULL) *is_ha = true;
7987 *count = 2;
7988 new_mode = GET_MODE_INNER (mode);
7989 }
7990 else if (type && composite_p)
7991 {
7992 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7993
7994 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7995 {
7996 if (is_ha != NULL) *is_ha = true;
7997 *count = ag_count;
7998 }
7999 else
8000 return false;
8001 }
8002 else
8003 return false;
8004
8005 *base_mode = new_mode;
8006 return true;
8007}
8008
8009/* Implement TARGET_STRUCT_VALUE_RTX. */
8010
8011static rtx
8012aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
8013 int incoming ATTRIBUTE_UNUSED)
8014{
8015 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
8016}
8017
8018/* Implements target hook vector_mode_supported_p. */
8019static bool
ef4bddc2 8020aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
8021{
8022 if (TARGET_SIMD
8023 && (mode == V4SImode || mode == V8HImode
8024 || mode == V16QImode || mode == V2DImode
8025 || mode == V2SImode || mode == V4HImode
8026 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
8027 || mode == V4SFmode || mode == V2DFmode
8028 || mode == V1DFmode))
43e9d192
IB
8029 return true;
8030
8031 return false;
8032}
8033
b7342d25
IB
8034/* Return appropriate SIMD container
8035 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
8036static machine_mode
8037aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 8038{
b7342d25 8039 gcc_assert (width == 64 || width == 128);
43e9d192 8040 if (TARGET_SIMD)
b7342d25
IB
8041 {
8042 if (width == 128)
8043 switch (mode)
8044 {
8045 case DFmode:
8046 return V2DFmode;
8047 case SFmode:
8048 return V4SFmode;
8049 case SImode:
8050 return V4SImode;
8051 case HImode:
8052 return V8HImode;
8053 case QImode:
8054 return V16QImode;
8055 case DImode:
8056 return V2DImode;
8057 default:
8058 break;
8059 }
8060 else
8061 switch (mode)
8062 {
8063 case SFmode:
8064 return V2SFmode;
8065 case SImode:
8066 return V2SImode;
8067 case HImode:
8068 return V4HImode;
8069 case QImode:
8070 return V8QImode;
8071 default:
8072 break;
8073 }
8074 }
43e9d192
IB
8075 return word_mode;
8076}
8077
b7342d25 8078/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
8079static machine_mode
8080aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
8081{
8082 return aarch64_simd_container_mode (mode, 128);
8083}
8084
3b357264
JG
8085/* Return the bitmask of possible vector sizes for the vectorizer
8086 to iterate over. */
8087static unsigned int
8088aarch64_autovectorize_vector_sizes (void)
8089{
8090 return (16 | 8);
8091}
8092
ac2b960f
YZ
8093/* Implement TARGET_MANGLE_TYPE. */
8094
6f549691 8095static const char *
ac2b960f
YZ
8096aarch64_mangle_type (const_tree type)
8097{
8098 /* The AArch64 ABI documents say that "__va_list" has to be
8099 managled as if it is in the "std" namespace. */
8100 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
8101 return "St9__va_list";
8102
f9d53c27
TB
8103 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8104 builtin types. */
8105 if (TYPE_NAME (type) != NULL)
8106 return aarch64_mangle_builtin_type (type);
c6fc9e43 8107
ac2b960f
YZ
8108 /* Use the default mangling. */
8109 return NULL;
8110}
8111
8baff86e
KT
8112
8113/* Return true if the rtx_insn contains a MEM RTX somewhere
8114 in it. */
75cf1494
KT
8115
8116static bool
8baff86e 8117has_memory_op (rtx_insn *mem_insn)
75cf1494 8118{
8baff86e
KT
8119 subrtx_iterator::array_type array;
8120 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
8121 if (MEM_P (*iter))
8122 return true;
8123
8124 return false;
75cf1494
KT
8125}
8126
8127/* Find the first rtx_insn before insn that will generate an assembly
8128 instruction. */
8129
8130static rtx_insn *
8131aarch64_prev_real_insn (rtx_insn *insn)
8132{
8133 if (!insn)
8134 return NULL;
8135
8136 do
8137 {
8138 insn = prev_real_insn (insn);
8139 }
8140 while (insn && recog_memoized (insn) < 0);
8141
8142 return insn;
8143}
8144
8145static bool
8146is_madd_op (enum attr_type t1)
8147{
8148 unsigned int i;
8149 /* A number of these may be AArch32 only. */
8150 enum attr_type mlatypes[] = {
8151 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8152 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8153 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8154 };
8155
8156 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8157 {
8158 if (t1 == mlatypes[i])
8159 return true;
8160 }
8161
8162 return false;
8163}
8164
8165/* Check if there is a register dependency between a load and the insn
8166 for which we hold recog_data. */
8167
8168static bool
8169dep_between_memop_and_curr (rtx memop)
8170{
8171 rtx load_reg;
8172 int opno;
8173
8baff86e 8174 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
8175
8176 if (!REG_P (SET_DEST (memop)))
8177 return false;
8178
8179 load_reg = SET_DEST (memop);
8baff86e 8180 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
8181 {
8182 rtx operand = recog_data.operand[opno];
8183 if (REG_P (operand)
8184 && reg_overlap_mentioned_p (load_reg, operand))
8185 return true;
8186
8187 }
8188 return false;
8189}
8190
8baff86e
KT
8191
8192/* When working around the Cortex-A53 erratum 835769,
8193 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8194 instruction and has a preceding memory instruction such that a NOP
8195 should be inserted between them. */
8196
75cf1494
KT
8197bool
8198aarch64_madd_needs_nop (rtx_insn* insn)
8199{
8200 enum attr_type attr_type;
8201 rtx_insn *prev;
8202 rtx body;
8203
8204 if (!aarch64_fix_a53_err835769)
8205 return false;
8206
8207 if (recog_memoized (insn) < 0)
8208 return false;
8209
8210 attr_type = get_attr_type (insn);
8211 if (!is_madd_op (attr_type))
8212 return false;
8213
8214 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
8215 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8216 Restore recog state to INSN to avoid state corruption. */
8217 extract_constrain_insn_cached (insn);
8218
8baff86e 8219 if (!prev || !has_memory_op (prev))
75cf1494
KT
8220 return false;
8221
8222 body = single_set (prev);
8223
8224 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
8225 it and the DImode madd, emit a NOP between them. If body is NULL then we
8226 have a complex memory operation, probably a load/store pair.
8227 Be conservative for now and emit a NOP. */
8228 if (GET_MODE (recog_data.operand[0]) == DImode
8229 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
8230 return true;
8231
8232 return false;
8233
8234}
8235
8baff86e
KT
8236
8237/* Implement FINAL_PRESCAN_INSN. */
8238
75cf1494
KT
8239void
8240aarch64_final_prescan_insn (rtx_insn *insn)
8241{
8242 if (aarch64_madd_needs_nop (insn))
8243 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8244}
8245
8246
43e9d192 8247/* Return the equivalent letter for size. */
81c2dfb9 8248static char
43e9d192
IB
8249sizetochar (int size)
8250{
8251 switch (size)
8252 {
8253 case 64: return 'd';
8254 case 32: return 's';
8255 case 16: return 'h';
8256 case 8 : return 'b';
8257 default: gcc_unreachable ();
8258 }
8259}
8260
3520f7cc
JG
8261/* Return true iff x is a uniform vector of floating-point
8262 constants, and the constant can be represented in
8263 quarter-precision form. Note, as aarch64_float_const_representable
8264 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8265static bool
8266aarch64_vect_float_const_representable_p (rtx x)
8267{
8268 int i = 0;
8269 REAL_VALUE_TYPE r0, ri;
8270 rtx x0, xi;
8271
8272 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8273 return false;
8274
8275 x0 = CONST_VECTOR_ELT (x, 0);
8276 if (!CONST_DOUBLE_P (x0))
8277 return false;
8278
8279 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8280
8281 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8282 {
8283 xi = CONST_VECTOR_ELT (x, i);
8284 if (!CONST_DOUBLE_P (xi))
8285 return false;
8286
8287 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8288 if (!REAL_VALUES_EQUAL (r0, ri))
8289 return false;
8290 }
8291
8292 return aarch64_float_const_representable_p (x0);
8293}
8294
d8edd899 8295/* Return true for valid and false for invalid. */
3ea63f60 8296bool
ef4bddc2 8297aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 8298 struct simd_immediate_info *info)
43e9d192
IB
8299{
8300#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8301 matches = 1; \
8302 for (i = 0; i < idx; i += (STRIDE)) \
8303 if (!(TEST)) \
8304 matches = 0; \
8305 if (matches) \
8306 { \
8307 immtype = (CLASS); \
8308 elsize = (ELSIZE); \
43e9d192
IB
8309 eshift = (SHIFT); \
8310 emvn = (NEG); \
8311 break; \
8312 }
8313
8314 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8315 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8316 unsigned char bytes[16];
43e9d192
IB
8317 int immtype = -1, matches;
8318 unsigned int invmask = inverse ? 0xff : 0;
8319 int eshift, emvn;
8320
43e9d192 8321 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 8322 {
81c2dfb9
IB
8323 if (! (aarch64_simd_imm_zero_p (op, mode)
8324 || aarch64_vect_float_const_representable_p (op)))
d8edd899 8325 return false;
3520f7cc 8326
48063b9d
IB
8327 if (info)
8328 {
8329 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 8330 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
8331 info->mvn = false;
8332 info->shift = 0;
8333 }
3520f7cc 8334
d8edd899 8335 return true;
3520f7cc 8336 }
43e9d192
IB
8337
8338 /* Splat vector constant out into a byte vector. */
8339 for (i = 0; i < n_elts; i++)
8340 {
4b1e108c
AL
8341 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8342 it must be laid out in the vector register in reverse order. */
8343 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
8344 unsigned HOST_WIDE_INT elpart;
8345 unsigned int part, parts;
8346
4aa81c2e 8347 if (CONST_INT_P (el))
43e9d192
IB
8348 {
8349 elpart = INTVAL (el);
8350 parts = 1;
8351 }
8352 else if (GET_CODE (el) == CONST_DOUBLE)
8353 {
8354 elpart = CONST_DOUBLE_LOW (el);
8355 parts = 2;
8356 }
8357 else
8358 gcc_unreachable ();
8359
8360 for (part = 0; part < parts; part++)
8361 {
8362 unsigned int byte;
8363 for (byte = 0; byte < innersize; byte++)
8364 {
8365 bytes[idx++] = (elpart & 0xff) ^ invmask;
8366 elpart >>= BITS_PER_UNIT;
8367 }
8368 if (GET_CODE (el) == CONST_DOUBLE)
8369 elpart = CONST_DOUBLE_HIGH (el);
8370 }
8371 }
8372
8373 /* Sanity check. */
8374 gcc_assert (idx == GET_MODE_SIZE (mode));
8375
8376 do
8377 {
8378 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8379 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8380
8381 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8382 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8383
8384 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8385 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8386
8387 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8388 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8389
8390 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8391
8392 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8393
8394 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8395 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8396
8397 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8398 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8399
8400 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8401 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8402
8403 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8404 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8405
8406 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8407
8408 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8409
8410 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 8411 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
8412
8413 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 8414 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
8415
8416 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 8417 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
8418
8419 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 8420 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
8421
8422 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8423
8424 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8425 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8426 }
8427 while (0);
8428
e4f0f84d 8429 if (immtype == -1)
d8edd899 8430 return false;
43e9d192 8431
48063b9d 8432 if (info)
43e9d192 8433 {
48063b9d 8434 info->element_width = elsize;
48063b9d
IB
8435 info->mvn = emvn != 0;
8436 info->shift = eshift;
8437
43e9d192
IB
8438 unsigned HOST_WIDE_INT imm = 0;
8439
e4f0f84d
TB
8440 if (immtype >= 12 && immtype <= 15)
8441 info->msl = true;
8442
43e9d192
IB
8443 /* Un-invert bytes of recognized vector, if necessary. */
8444 if (invmask != 0)
8445 for (i = 0; i < idx; i++)
8446 bytes[i] ^= invmask;
8447
8448 if (immtype == 17)
8449 {
8450 /* FIXME: Broken on 32-bit H_W_I hosts. */
8451 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8452
8453 for (i = 0; i < 8; i++)
8454 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8455 << (i * BITS_PER_UNIT);
8456
43e9d192 8457
48063b9d
IB
8458 info->value = GEN_INT (imm);
8459 }
8460 else
8461 {
8462 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8463 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
8464
8465 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
8466 generic constants. */
8467 if (info->mvn)
43e9d192 8468 imm = ~imm;
48063b9d
IB
8469 imm = (imm >> info->shift) & 0xff;
8470 info->value = GEN_INT (imm);
8471 }
43e9d192
IB
8472 }
8473
48063b9d 8474 return true;
43e9d192
IB
8475#undef CHECK
8476}
8477
43e9d192
IB
8478/* Check of immediate shift constants are within range. */
8479bool
ef4bddc2 8480aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
8481{
8482 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8483 if (left)
ddeabd3e 8484 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 8485 else
ddeabd3e 8486 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
8487}
8488
3520f7cc
JG
8489/* Return true if X is a uniform vector where all elements
8490 are either the floating-point constant 0.0 or the
8491 integer constant 0. */
43e9d192 8492bool
ef4bddc2 8493aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 8494{
3520f7cc 8495 return x == CONST0_RTX (mode);
43e9d192
IB
8496}
8497
8498bool
ef4bddc2 8499aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
8500{
8501 HOST_WIDE_INT imm = INTVAL (x);
8502 int i;
8503
8504 for (i = 0; i < 8; i++)
8505 {
8506 unsigned int byte = imm & 0xff;
8507 if (byte != 0xff && byte != 0)
8508 return false;
8509 imm >>= 8;
8510 }
8511
8512 return true;
8513}
8514
83f8c414
CSS
8515bool
8516aarch64_mov_operand_p (rtx x,
a5350ddc 8517 enum aarch64_symbol_context context,
ef4bddc2 8518 machine_mode mode)
83f8c414 8519{
83f8c414
CSS
8520 if (GET_CODE (x) == HIGH
8521 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8522 return true;
8523
82614948 8524 if (CONST_INT_P (x))
83f8c414
CSS
8525 return true;
8526
8527 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8528 return true;
8529
a5350ddc
CSS
8530 return aarch64_classify_symbolic_expression (x, context)
8531 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
8532}
8533
43e9d192
IB
8534/* Return a const_int vector of VAL. */
8535rtx
ef4bddc2 8536aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
8537{
8538 int nunits = GET_MODE_NUNITS (mode);
8539 rtvec v = rtvec_alloc (nunits);
8540 int i;
8541
8542 for (i=0; i < nunits; i++)
8543 RTVEC_ELT (v, i) = GEN_INT (val);
8544
8545 return gen_rtx_CONST_VECTOR (mode, v);
8546}
8547
051d0e2f
SN
8548/* Check OP is a legal scalar immediate for the MOVI instruction. */
8549
8550bool
ef4bddc2 8551aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 8552{
ef4bddc2 8553 machine_mode vmode;
051d0e2f
SN
8554
8555 gcc_assert (!VECTOR_MODE_P (mode));
8556 vmode = aarch64_preferred_simd_mode (mode);
8557 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 8558 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
8559}
8560
988fa693
JG
8561/* Construct and return a PARALLEL RTX vector with elements numbering the
8562 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8563 the vector - from the perspective of the architecture. This does not
8564 line up with GCC's perspective on lane numbers, so we end up with
8565 different masks depending on our target endian-ness. The diagram
8566 below may help. We must draw the distinction when building masks
8567 which select one half of the vector. An instruction selecting
8568 architectural low-lanes for a big-endian target, must be described using
8569 a mask selecting GCC high-lanes.
8570
8571 Big-Endian Little-Endian
8572
8573GCC 0 1 2 3 3 2 1 0
8574 | x | x | x | x | | x | x | x | x |
8575Architecture 3 2 1 0 3 2 1 0
8576
8577Low Mask: { 2, 3 } { 0, 1 }
8578High Mask: { 0, 1 } { 2, 3 }
8579*/
8580
43e9d192 8581rtx
ef4bddc2 8582aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
8583{
8584 int nunits = GET_MODE_NUNITS (mode);
8585 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
8586 int high_base = nunits / 2;
8587 int low_base = 0;
8588 int base;
43e9d192
IB
8589 rtx t1;
8590 int i;
8591
988fa693
JG
8592 if (BYTES_BIG_ENDIAN)
8593 base = high ? low_base : high_base;
8594 else
8595 base = high ? high_base : low_base;
8596
8597 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
8598 RTVEC_ELT (v, i) = GEN_INT (base + i);
8599
8600 t1 = gen_rtx_PARALLEL (mode, v);
8601 return t1;
8602}
8603
988fa693
JG
8604/* Check OP for validity as a PARALLEL RTX vector with elements
8605 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8606 from the perspective of the architecture. See the diagram above
8607 aarch64_simd_vect_par_cnst_half for more details. */
8608
8609bool
ef4bddc2 8610aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
8611 bool high)
8612{
8613 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8614 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8615 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8616 int i = 0;
8617
8618 if (!VECTOR_MODE_P (mode))
8619 return false;
8620
8621 if (count_op != count_ideal)
8622 return false;
8623
8624 for (i = 0; i < count_ideal; i++)
8625 {
8626 rtx elt_op = XVECEXP (op, 0, i);
8627 rtx elt_ideal = XVECEXP (ideal, 0, i);
8628
4aa81c2e 8629 if (!CONST_INT_P (elt_op)
988fa693
JG
8630 || INTVAL (elt_ideal) != INTVAL (elt_op))
8631 return false;
8632 }
8633 return true;
8634}
8635
43e9d192
IB
8636/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8637 HIGH (exclusive). */
8638void
46ed6024
CB
8639aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8640 const_tree exp)
43e9d192
IB
8641{
8642 HOST_WIDE_INT lane;
4aa81c2e 8643 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
8644 lane = INTVAL (operand);
8645
8646 if (lane < low || lane >= high)
46ed6024
CB
8647 {
8648 if (exp)
8649 error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8650 else
8651 error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8652 }
43e9d192
IB
8653}
8654
43e9d192
IB
8655/* Return TRUE if OP is a valid vector addressing mode. */
8656bool
8657aarch64_simd_mem_operand_p (rtx op)
8658{
8659 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 8660 || REG_P (XEXP (op, 0)));
43e9d192
IB
8661}
8662
2d8c6dc1
AH
8663/* Emit a register copy from operand to operand, taking care not to
8664 early-clobber source registers in the process.
43e9d192 8665
2d8c6dc1
AH
8666 COUNT is the number of components into which the copy needs to be
8667 decomposed. */
43e9d192 8668void
2d8c6dc1
AH
8669aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
8670 unsigned int count)
43e9d192
IB
8671{
8672 unsigned int i;
2d8c6dc1
AH
8673 int rdest = REGNO (operands[0]);
8674 int rsrc = REGNO (operands[1]);
43e9d192
IB
8675
8676 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
8677 || rdest < rsrc)
8678 for (i = 0; i < count; i++)
8679 emit_move_insn (gen_rtx_REG (mode, rdest + i),
8680 gen_rtx_REG (mode, rsrc + i));
43e9d192 8681 else
2d8c6dc1
AH
8682 for (i = 0; i < count; i++)
8683 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
8684 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
8685}
8686
8687/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8688 one of VSTRUCT modes: OI, CI or XI. */
8689int
647d790d 8690aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 8691{
ef4bddc2 8692 machine_mode mode;
43e9d192
IB
8693
8694 extract_insn_cached (insn);
8695
8696 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8697 {
8698 mode = GET_MODE (recog_data.operand[0]);
8699 switch (mode)
8700 {
8701 case OImode:
8702 return 8;
8703 case CImode:
8704 return 12;
8705 case XImode:
8706 return 16;
8707 default:
8708 gcc_unreachable ();
8709 }
8710 }
8711 return 4;
8712}
8713
668046d1
DS
8714/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
8715 one of VSTRUCT modes: OI, CI, EI, or XI. */
8716int
8717aarch64_simd_attr_length_rglist (enum machine_mode mode)
8718{
8719 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
8720}
8721
db0253a4
TB
8722/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8723 alignment of a vector to 128 bits. */
8724static HOST_WIDE_INT
8725aarch64_simd_vector_alignment (const_tree type)
8726{
9439e9a1 8727 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
8728 return MIN (align, 128);
8729}
8730
8731/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8732static bool
8733aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8734{
8735 if (is_packed)
8736 return false;
8737
8738 /* We guarantee alignment for vectors up to 128-bits. */
8739 if (tree_int_cst_compare (TYPE_SIZE (type),
8740 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8741 return false;
8742
8743 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8744 return true;
8745}
8746
4369c11e
TB
8747/* If VALS is a vector constant that can be loaded into a register
8748 using DUP, generate instructions to do so and return an RTX to
8749 assign to the register. Otherwise return NULL_RTX. */
8750static rtx
8751aarch64_simd_dup_constant (rtx vals)
8752{
ef4bddc2
RS
8753 machine_mode mode = GET_MODE (vals);
8754 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
8755 int n_elts = GET_MODE_NUNITS (mode);
8756 bool all_same = true;
8757 rtx x;
8758 int i;
8759
8760 if (GET_CODE (vals) != CONST_VECTOR)
8761 return NULL_RTX;
8762
8763 for (i = 1; i < n_elts; ++i)
8764 {
8765 x = CONST_VECTOR_ELT (vals, i);
8766 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8767 all_same = false;
8768 }
8769
8770 if (!all_same)
8771 return NULL_RTX;
8772
8773 /* We can load this constant by using DUP and a constant in a
8774 single ARM register. This will be cheaper than a vector
8775 load. */
8776 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8777 return gen_rtx_VEC_DUPLICATE (mode, x);
8778}
8779
8780
8781/* Generate code to load VALS, which is a PARALLEL containing only
8782 constants (for vec_init) or CONST_VECTOR, efficiently into a
8783 register. Returns an RTX to copy into the register, or NULL_RTX
8784 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 8785static rtx
4369c11e
TB
8786aarch64_simd_make_constant (rtx vals)
8787{
ef4bddc2 8788 machine_mode mode = GET_MODE (vals);
4369c11e
TB
8789 rtx const_dup;
8790 rtx const_vec = NULL_RTX;
8791 int n_elts = GET_MODE_NUNITS (mode);
8792 int n_const = 0;
8793 int i;
8794
8795 if (GET_CODE (vals) == CONST_VECTOR)
8796 const_vec = vals;
8797 else if (GET_CODE (vals) == PARALLEL)
8798 {
8799 /* A CONST_VECTOR must contain only CONST_INTs and
8800 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8801 Only store valid constants in a CONST_VECTOR. */
8802 for (i = 0; i < n_elts; ++i)
8803 {
8804 rtx x = XVECEXP (vals, 0, i);
8805 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8806 n_const++;
8807 }
8808 if (n_const == n_elts)
8809 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8810 }
8811 else
8812 gcc_unreachable ();
8813
8814 if (const_vec != NULL_RTX
48063b9d 8815 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
8816 /* Load using MOVI/MVNI. */
8817 return const_vec;
8818 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8819 /* Loaded using DUP. */
8820 return const_dup;
8821 else if (const_vec != NULL_RTX)
8822 /* Load from constant pool. We can not take advantage of single-cycle
8823 LD1 because we need a PC-relative addressing mode. */
8824 return const_vec;
8825 else
8826 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8827 We can not construct an initializer. */
8828 return NULL_RTX;
8829}
8830
8831void
8832aarch64_expand_vector_init (rtx target, rtx vals)
8833{
ef4bddc2
RS
8834 machine_mode mode = GET_MODE (target);
8835 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 8836 int n_elts = GET_MODE_NUNITS (mode);
8b66a2d4
AL
8837 int n_var = 0;
8838 rtx any_const = NULL_RTX;
4369c11e 8839 bool all_same = true;
4369c11e 8840
8b66a2d4 8841 for (int i = 0; i < n_elts; ++i)
4369c11e 8842 {
8b66a2d4 8843 rtx x = XVECEXP (vals, 0, i);
4369c11e 8844 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8b66a2d4
AL
8845 ++n_var;
8846 else
8847 any_const = x;
4369c11e 8848
8b66a2d4 8849 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
4369c11e
TB
8850 all_same = false;
8851 }
8852
8853 if (n_var == 0)
8854 {
8855 rtx constant = aarch64_simd_make_constant (vals);
8856 if (constant != NULL_RTX)
8857 {
8858 emit_move_insn (target, constant);
8859 return;
8860 }
8861 }
8862
8863 /* Splat a single non-constant element if we can. */
8864 if (all_same)
8865 {
8b66a2d4 8866 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
4369c11e
TB
8867 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8868 return;
8869 }
8870
8b66a2d4
AL
8871 /* Half the fields (or less) are non-constant. Load constant then overwrite
8872 varying fields. Hope that this is more efficient than using the stack. */
8873 if (n_var <= n_elts/2)
4369c11e
TB
8874 {
8875 rtx copy = copy_rtx (vals);
4369c11e 8876
8b66a2d4
AL
8877 /* Load constant part of vector. We really don't care what goes into the
8878 parts we will overwrite, but we're more likely to be able to load the
8879 constant efficiently if it has fewer, larger, repeating parts
8880 (see aarch64_simd_valid_immediate). */
8881 for (int i = 0; i < n_elts; i++)
8882 {
8883 rtx x = XVECEXP (vals, 0, i);
8884 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8885 continue;
8886 rtx subst = any_const;
8887 for (int bit = n_elts / 2; bit > 0; bit /= 2)
8888 {
8889 /* Look in the copied vector, as more elements are const. */
8890 rtx test = XVECEXP (copy, 0, i ^ bit);
8891 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
8892 {
8893 subst = test;
8894 break;
8895 }
8896 }
8897 XVECEXP (copy, 0, i) = subst;
8898 }
4369c11e
TB
8899 aarch64_expand_vector_init (target, copy);
8900
8b66a2d4
AL
8901 /* Insert variables. */
8902 enum insn_code icode = optab_handler (vec_set_optab, mode);
4369c11e 8903 gcc_assert (icode != CODE_FOR_nothing);
8b66a2d4
AL
8904
8905 for (int i = 0; i < n_elts; i++)
8906 {
8907 rtx x = XVECEXP (vals, 0, i);
8908 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8909 continue;
8910 x = copy_to_mode_reg (inner_mode, x);
8911 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
8912 }
4369c11e
TB
8913 return;
8914 }
8915
8916 /* Construct the vector in memory one field at a time
8917 and load the whole vector. */
8b66a2d4
AL
8918 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8919 for (int i = 0; i < n_elts; i++)
4369c11e
TB
8920 emit_move_insn (adjust_address_nv (mem, inner_mode,
8921 i * GET_MODE_SIZE (inner_mode)),
8922 XVECEXP (vals, 0, i));
8923 emit_move_insn (target, mem);
8924
8925}
8926
43e9d192 8927static unsigned HOST_WIDE_INT
ef4bddc2 8928aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
8929{
8930 return
8931 (aarch64_vector_mode_supported_p (mode)
8932 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8933}
8934
8935#ifndef TLS_SECTION_ASM_FLAG
8936#define TLS_SECTION_ASM_FLAG 'T'
8937#endif
8938
8939void
8940aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8941 tree decl ATTRIBUTE_UNUSED)
8942{
8943 char flagchars[10], *f = flagchars;
8944
8945 /* If we have already declared this section, we can use an
8946 abbreviated form to switch back to it -- unless this section is
8947 part of a COMDAT groups, in which case GAS requires the full
8948 declaration every time. */
8949 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8950 && (flags & SECTION_DECLARED))
8951 {
8952 fprintf (asm_out_file, "\t.section\t%s\n", name);
8953 return;
8954 }
8955
8956 if (!(flags & SECTION_DEBUG))
8957 *f++ = 'a';
8958 if (flags & SECTION_WRITE)
8959 *f++ = 'w';
8960 if (flags & SECTION_CODE)
8961 *f++ = 'x';
8962 if (flags & SECTION_SMALL)
8963 *f++ = 's';
8964 if (flags & SECTION_MERGE)
8965 *f++ = 'M';
8966 if (flags & SECTION_STRINGS)
8967 *f++ = 'S';
8968 if (flags & SECTION_TLS)
8969 *f++ = TLS_SECTION_ASM_FLAG;
8970 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8971 *f++ = 'G';
8972 *f = '\0';
8973
8974 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8975
8976 if (!(flags & SECTION_NOTYPE))
8977 {
8978 const char *type;
8979 const char *format;
8980
8981 if (flags & SECTION_BSS)
8982 type = "nobits";
8983 else
8984 type = "progbits";
8985
8986#ifdef TYPE_OPERAND_FMT
8987 format = "," TYPE_OPERAND_FMT;
8988#else
8989 format = ",@%s";
8990#endif
8991
8992 fprintf (asm_out_file, format, type);
8993
8994 if (flags & SECTION_ENTSIZE)
8995 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8996 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8997 {
8998 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8999 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
9000 else
9001 fprintf (asm_out_file, ",%s,comdat",
9002 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
9003 }
9004 }
9005
9006 putc ('\n', asm_out_file);
9007}
9008
9009/* Select a format to encode pointers in exception handling data. */
9010int
9011aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
9012{
9013 int type;
9014 switch (aarch64_cmodel)
9015 {
9016 case AARCH64_CMODEL_TINY:
9017 case AARCH64_CMODEL_TINY_PIC:
9018 case AARCH64_CMODEL_SMALL:
9019 case AARCH64_CMODEL_SMALL_PIC:
9020 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
9021 for everything. */
9022 type = DW_EH_PE_sdata4;
9023 break;
9024 default:
9025 /* No assumptions here. 8-byte relocs required. */
9026 type = DW_EH_PE_sdata8;
9027 break;
9028 }
9029 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
9030}
9031
0462169c
SN
9032/* Emit load exclusive. */
9033
9034static void
ef4bddc2 9035aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
9036 rtx mem, rtx model_rtx)
9037{
9038 rtx (*gen) (rtx, rtx, rtx);
9039
9040 switch (mode)
9041 {
9042 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
9043 case HImode: gen = gen_aarch64_load_exclusivehi; break;
9044 case SImode: gen = gen_aarch64_load_exclusivesi; break;
9045 case DImode: gen = gen_aarch64_load_exclusivedi; break;
9046 default:
9047 gcc_unreachable ();
9048 }
9049
9050 emit_insn (gen (rval, mem, model_rtx));
9051}
9052
9053/* Emit store exclusive. */
9054
9055static void
ef4bddc2 9056aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
9057 rtx rval, rtx mem, rtx model_rtx)
9058{
9059 rtx (*gen) (rtx, rtx, rtx, rtx);
9060
9061 switch (mode)
9062 {
9063 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
9064 case HImode: gen = gen_aarch64_store_exclusivehi; break;
9065 case SImode: gen = gen_aarch64_store_exclusivesi; break;
9066 case DImode: gen = gen_aarch64_store_exclusivedi; break;
9067 default:
9068 gcc_unreachable ();
9069 }
9070
9071 emit_insn (gen (bval, rval, mem, model_rtx));
9072}
9073
9074/* Mark the previous jump instruction as unlikely. */
9075
9076static void
9077aarch64_emit_unlikely_jump (rtx insn)
9078{
e5af9ddd 9079 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
9080
9081 insn = emit_jump_insn (insn);
e5af9ddd 9082 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
9083}
9084
9085/* Expand a compare and swap pattern. */
9086
9087void
9088aarch64_expand_compare_and_swap (rtx operands[])
9089{
9090 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 9091 machine_mode mode, cmp_mode;
0462169c
SN
9092 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
9093
9094 bval = operands[0];
9095 rval = operands[1];
9096 mem = operands[2];
9097 oldval = operands[3];
9098 newval = operands[4];
9099 is_weak = operands[5];
9100 mod_s = operands[6];
9101 mod_f = operands[7];
9102 mode = GET_MODE (mem);
9103 cmp_mode = mode;
9104
9105 /* Normally the succ memory model must be stronger than fail, but in the
9106 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9107 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9108
9109 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
9110 && INTVAL (mod_s) == MEMMODEL_RELEASE)
9111 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
9112
9113 switch (mode)
9114 {
9115 case QImode:
9116 case HImode:
9117 /* For short modes, we're going to perform the comparison in SImode,
9118 so do the zero-extension now. */
9119 cmp_mode = SImode;
9120 rval = gen_reg_rtx (SImode);
9121 oldval = convert_modes (SImode, mode, oldval, true);
9122 /* Fall through. */
9123
9124 case SImode:
9125 case DImode:
9126 /* Force the value into a register if needed. */
9127 if (!aarch64_plus_operand (oldval, mode))
9128 oldval = force_reg (cmp_mode, oldval);
9129 break;
9130
9131 default:
9132 gcc_unreachable ();
9133 }
9134
9135 switch (mode)
9136 {
9137 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
9138 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
9139 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
9140 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
9141 default:
9142 gcc_unreachable ();
9143 }
9144
9145 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
9146
9147 if (mode == QImode || mode == HImode)
9148 emit_move_insn (operands[1], gen_lowpart (mode, rval));
9149
9150 x = gen_rtx_REG (CCmode, CC_REGNUM);
9151 x = gen_rtx_EQ (SImode, x, const0_rtx);
9152 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
9153}
9154
9155/* Split a compare and swap pattern. */
9156
9157void
9158aarch64_split_compare_and_swap (rtx operands[])
9159{
9160 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 9161 machine_mode mode;
0462169c 9162 bool is_weak;
5d8a22a5
DM
9163 rtx_code_label *label1, *label2;
9164 rtx x, cond;
0462169c
SN
9165
9166 rval = operands[0];
9167 mem = operands[1];
9168 oldval = operands[2];
9169 newval = operands[3];
9170 is_weak = (operands[4] != const0_rtx);
0462169c
SN
9171 scratch = operands[7];
9172 mode = GET_MODE (mem);
9173
5d8a22a5 9174 label1 = NULL;
0462169c
SN
9175 if (!is_weak)
9176 {
9177 label1 = gen_label_rtx ();
9178 emit_label (label1);
9179 }
9180 label2 = gen_label_rtx ();
9181
9182 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
9183
9184 cond = aarch64_gen_compare_reg (NE, rval, oldval);
9185 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9186 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9187 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9188 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9189
9190 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
9191
9192 if (!is_weak)
9193 {
9194 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9195 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9196 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9197 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9198 }
9199 else
9200 {
9201 cond = gen_rtx_REG (CCmode, CC_REGNUM);
9202 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9203 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9204 }
9205
9206 emit_label (label2);
9207}
9208
9209/* Split an atomic operation. */
9210
9211void
9212aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9213 rtx value, rtx model_rtx, rtx cond)
9214{
ef4bddc2
RS
9215 machine_mode mode = GET_MODE (mem);
9216 machine_mode wmode = (mode == DImode ? DImode : SImode);
5d8a22a5
DM
9217 rtx_code_label *label;
9218 rtx x;
0462169c
SN
9219
9220 label = gen_label_rtx ();
9221 emit_label (label);
9222
9223 if (new_out)
9224 new_out = gen_lowpart (wmode, new_out);
9225 if (old_out)
9226 old_out = gen_lowpart (wmode, old_out);
9227 else
9228 old_out = new_out;
9229 value = simplify_gen_subreg (wmode, value, mode, 0);
9230
9231 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9232
9233 switch (code)
9234 {
9235 case SET:
9236 new_out = value;
9237 break;
9238
9239 case NOT:
9240 x = gen_rtx_AND (wmode, old_out, value);
9241 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9242 x = gen_rtx_NOT (wmode, new_out);
9243 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9244 break;
9245
9246 case MINUS:
9247 if (CONST_INT_P (value))
9248 {
9249 value = GEN_INT (-INTVAL (value));
9250 code = PLUS;
9251 }
9252 /* Fall through. */
9253
9254 default:
9255 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9256 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9257 break;
9258 }
9259
9260 aarch64_emit_store_exclusive (mode, cond, mem,
9261 gen_lowpart (mode, new_out), model_rtx);
9262
9263 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9264 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9265 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9266 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9267}
9268
95ca411e
YZ
9269static void
9270aarch64_print_extension (void)
9271{
9272 const struct aarch64_option_extension *opt = NULL;
9273
9274 for (opt = all_extensions; opt->name != NULL; opt++)
9275 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9276 asm_fprintf (asm_out_file, "+%s", opt->name);
9277
9278 asm_fprintf (asm_out_file, "\n");
9279}
9280
43e9d192
IB
9281static void
9282aarch64_start_file (void)
9283{
9284 if (selected_arch)
95ca411e
YZ
9285 {
9286 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9287 aarch64_print_extension ();
9288 }
43e9d192 9289 else if (selected_cpu)
95ca411e 9290 {
682287fb
JG
9291 const char *truncated_name
9292 = aarch64_rewrite_selected_cpu (selected_cpu->name);
9293 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
9294 aarch64_print_extension ();
9295 }
43e9d192
IB
9296 default_file_start();
9297}
9298
9299/* Target hook for c_mode_for_suffix. */
ef4bddc2 9300static machine_mode
43e9d192
IB
9301aarch64_c_mode_for_suffix (char suffix)
9302{
9303 if (suffix == 'q')
9304 return TFmode;
9305
9306 return VOIDmode;
9307}
9308
3520f7cc
JG
9309/* We can only represent floating point constants which will fit in
9310 "quarter-precision" values. These values are characterised by
9311 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9312 by:
9313
9314 (-1)^s * (n/16) * 2^r
9315
9316 Where:
9317 's' is the sign bit.
9318 'n' is an integer in the range 16 <= n <= 31.
9319 'r' is an integer in the range -3 <= r <= 4. */
9320
9321/* Return true iff X can be represented by a quarter-precision
9322 floating point immediate operand X. Note, we cannot represent 0.0. */
9323bool
9324aarch64_float_const_representable_p (rtx x)
9325{
9326 /* This represents our current view of how many bits
9327 make up the mantissa. */
9328 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 9329 int exponent;
3520f7cc 9330 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 9331 REAL_VALUE_TYPE r, m;
807e902e 9332 bool fail;
3520f7cc
JG
9333
9334 if (!CONST_DOUBLE_P (x))
9335 return false;
9336
94bfa2da
TV
9337 if (GET_MODE (x) == VOIDmode)
9338 return false;
9339
3520f7cc
JG
9340 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9341
9342 /* We cannot represent infinities, NaNs or +/-zero. We won't
9343 know if we have +zero until we analyse the mantissa, but we
9344 can reject the other invalid values. */
9345 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9346 || REAL_VALUE_MINUS_ZERO (r))
9347 return false;
9348
ba96cdfb 9349 /* Extract exponent. */
3520f7cc
JG
9350 r = real_value_abs (&r);
9351 exponent = REAL_EXP (&r);
9352
9353 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9354 highest (sign) bit, with a fixed binary point at bit point_pos.
9355 m1 holds the low part of the mantissa, m2 the high part.
9356 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9357 bits for the mantissa, this can fail (low bits will be lost). */
9358 real_ldexp (&m, &r, point_pos - exponent);
807e902e 9359 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
9360
9361 /* If the low part of the mantissa has bits set we cannot represent
9362 the value. */
807e902e 9363 if (w.elt (0) != 0)
3520f7cc
JG
9364 return false;
9365 /* We have rejected the lower HOST_WIDE_INT, so update our
9366 understanding of how many bits lie in the mantissa and
9367 look only at the high HOST_WIDE_INT. */
807e902e 9368 mantissa = w.elt (1);
3520f7cc
JG
9369 point_pos -= HOST_BITS_PER_WIDE_INT;
9370
9371 /* We can only represent values with a mantissa of the form 1.xxxx. */
9372 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9373 if ((mantissa & mask) != 0)
9374 return false;
9375
9376 /* Having filtered unrepresentable values, we may now remove all
9377 but the highest 5 bits. */
9378 mantissa >>= point_pos - 5;
9379
9380 /* We cannot represent the value 0.0, so reject it. This is handled
9381 elsewhere. */
9382 if (mantissa == 0)
9383 return false;
9384
9385 /* Then, as bit 4 is always set, we can mask it off, leaving
9386 the mantissa in the range [0, 15]. */
9387 mantissa &= ~(1 << 4);
9388 gcc_assert (mantissa <= 15);
9389
9390 /* GCC internally does not use IEEE754-like encoding (where normalized
9391 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9392 Our mantissa values are shifted 4 places to the left relative to
9393 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9394 by 5 places to correct for GCC's representation. */
9395 exponent = 5 - exponent;
9396
9397 return (exponent >= 0 && exponent <= 7);
9398}
9399
9400char*
81c2dfb9 9401aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 9402 machine_mode mode,
3520f7cc
JG
9403 unsigned width)
9404{
3ea63f60 9405 bool is_valid;
3520f7cc 9406 static char templ[40];
3520f7cc 9407 const char *mnemonic;
e4f0f84d 9408 const char *shift_op;
3520f7cc 9409 unsigned int lane_count = 0;
81c2dfb9 9410 char element_char;
3520f7cc 9411
e4f0f84d 9412 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
9413
9414 /* This will return true to show const_vector is legal for use as either
9415 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9416 also update INFO to show how the immediate should be generated. */
81c2dfb9 9417 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
9418 gcc_assert (is_valid);
9419
81c2dfb9 9420 element_char = sizetochar (info.element_width);
48063b9d
IB
9421 lane_count = width / info.element_width;
9422
3520f7cc
JG
9423 mode = GET_MODE_INNER (mode);
9424 if (mode == SFmode || mode == DFmode)
9425 {
48063b9d
IB
9426 gcc_assert (info.shift == 0 && ! info.mvn);
9427 if (aarch64_float_const_zero_rtx_p (info.value))
9428 info.value = GEN_INT (0);
9429 else
9430 {
9431#define buf_size 20
9432 REAL_VALUE_TYPE r;
9433 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9434 char float_buf[buf_size] = {'\0'};
9435 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9436#undef buf_size
9437
9438 if (lane_count == 1)
9439 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9440 else
9441 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 9442 lane_count, element_char, float_buf);
48063b9d
IB
9443 return templ;
9444 }
3520f7cc 9445 }
3520f7cc 9446
48063b9d 9447 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 9448 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
9449
9450 if (lane_count == 1)
48063b9d
IB
9451 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9452 mnemonic, UINTVAL (info.value));
9453 else if (info.shift)
9454 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
9455 ", %s %d", mnemonic, lane_count, element_char,
9456 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 9457 else
48063b9d 9458 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 9459 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
9460 return templ;
9461}
9462
b7342d25
IB
9463char*
9464aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 9465 machine_mode mode)
b7342d25 9466{
ef4bddc2 9467 machine_mode vmode;
b7342d25
IB
9468
9469 gcc_assert (!VECTOR_MODE_P (mode));
9470 vmode = aarch64_simd_container_mode (mode, 64);
9471 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9472 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9473}
9474
88b08073
JG
9475/* Split operands into moves from op[1] + op[2] into op[0]. */
9476
9477void
9478aarch64_split_combinev16qi (rtx operands[3])
9479{
9480 unsigned int dest = REGNO (operands[0]);
9481 unsigned int src1 = REGNO (operands[1]);
9482 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 9483 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
9484 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9485 rtx destlo, desthi;
9486
9487 gcc_assert (halfmode == V16QImode);
9488
9489 if (src1 == dest && src2 == dest + halfregs)
9490 {
9491 /* No-op move. Can't split to nothing; emit something. */
9492 emit_note (NOTE_INSN_DELETED);
9493 return;
9494 }
9495
9496 /* Preserve register attributes for variable tracking. */
9497 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9498 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9499 GET_MODE_SIZE (halfmode));
9500
9501 /* Special case of reversed high/low parts. */
9502 if (reg_overlap_mentioned_p (operands[2], destlo)
9503 && reg_overlap_mentioned_p (operands[1], desthi))
9504 {
9505 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9506 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9507 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9508 }
9509 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9510 {
9511 /* Try to avoid unnecessary moves if part of the result
9512 is in the right place already. */
9513 if (src1 != dest)
9514 emit_move_insn (destlo, operands[1]);
9515 if (src2 != dest + halfregs)
9516 emit_move_insn (desthi, operands[2]);
9517 }
9518 else
9519 {
9520 if (src2 != dest + halfregs)
9521 emit_move_insn (desthi, operands[2]);
9522 if (src1 != dest)
9523 emit_move_insn (destlo, operands[1]);
9524 }
9525}
9526
9527/* vec_perm support. */
9528
9529#define MAX_VECT_LEN 16
9530
9531struct expand_vec_perm_d
9532{
9533 rtx target, op0, op1;
9534 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 9535 machine_mode vmode;
88b08073
JG
9536 unsigned char nelt;
9537 bool one_vector_p;
9538 bool testing_p;
9539};
9540
9541/* Generate a variable permutation. */
9542
9543static void
9544aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9545{
ef4bddc2 9546 machine_mode vmode = GET_MODE (target);
88b08073
JG
9547 bool one_vector_p = rtx_equal_p (op0, op1);
9548
9549 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9550 gcc_checking_assert (GET_MODE (op0) == vmode);
9551 gcc_checking_assert (GET_MODE (op1) == vmode);
9552 gcc_checking_assert (GET_MODE (sel) == vmode);
9553 gcc_checking_assert (TARGET_SIMD);
9554
9555 if (one_vector_p)
9556 {
9557 if (vmode == V8QImode)
9558 {
9559 /* Expand the argument to a V16QI mode by duplicating it. */
9560 rtx pair = gen_reg_rtx (V16QImode);
9561 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9562 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9563 }
9564 else
9565 {
9566 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9567 }
9568 }
9569 else
9570 {
9571 rtx pair;
9572
9573 if (vmode == V8QImode)
9574 {
9575 pair = gen_reg_rtx (V16QImode);
9576 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9577 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9578 }
9579 else
9580 {
9581 pair = gen_reg_rtx (OImode);
9582 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9583 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9584 }
9585 }
9586}
9587
9588void
9589aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9590{
ef4bddc2 9591 machine_mode vmode = GET_MODE (target);
c9d1a16a 9592 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 9593 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 9594 rtx mask;
88b08073
JG
9595
9596 /* The TBL instruction does not use a modulo index, so we must take care
9597 of that ourselves. */
f7c4e5b8
AL
9598 mask = aarch64_simd_gen_const_vector_dup (vmode,
9599 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
9600 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9601
f7c4e5b8
AL
9602 /* For big-endian, we also need to reverse the index within the vector
9603 (but not which vector). */
9604 if (BYTES_BIG_ENDIAN)
9605 {
9606 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9607 if (!one_vector_p)
9608 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9609 sel = expand_simple_binop (vmode, XOR, sel, mask,
9610 NULL, 0, OPTAB_LIB_WIDEN);
9611 }
88b08073
JG
9612 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9613}
9614
cc4d934f
JG
9615/* Recognize patterns suitable for the TRN instructions. */
9616static bool
9617aarch64_evpc_trn (struct expand_vec_perm_d *d)
9618{
9619 unsigned int i, odd, mask, nelt = d->nelt;
9620 rtx out, in0, in1, x;
9621 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9622 machine_mode vmode = d->vmode;
cc4d934f
JG
9623
9624 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9625 return false;
9626
9627 /* Note that these are little-endian tests.
9628 We correct for big-endian later. */
9629 if (d->perm[0] == 0)
9630 odd = 0;
9631 else if (d->perm[0] == 1)
9632 odd = 1;
9633 else
9634 return false;
9635 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9636
9637 for (i = 0; i < nelt; i += 2)
9638 {
9639 if (d->perm[i] != i + odd)
9640 return false;
9641 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9642 return false;
9643 }
9644
9645 /* Success! */
9646 if (d->testing_p)
9647 return true;
9648
9649 in0 = d->op0;
9650 in1 = d->op1;
9651 if (BYTES_BIG_ENDIAN)
9652 {
9653 x = in0, in0 = in1, in1 = x;
9654 odd = !odd;
9655 }
9656 out = d->target;
9657
9658 if (odd)
9659 {
9660 switch (vmode)
9661 {
9662 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9663 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9664 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9665 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9666 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9667 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9668 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9669 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9670 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9671 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9672 default:
9673 return false;
9674 }
9675 }
9676 else
9677 {
9678 switch (vmode)
9679 {
9680 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9681 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9682 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9683 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9684 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9685 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9686 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9687 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9688 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9689 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9690 default:
9691 return false;
9692 }
9693 }
9694
9695 emit_insn (gen (out, in0, in1));
9696 return true;
9697}
9698
9699/* Recognize patterns suitable for the UZP instructions. */
9700static bool
9701aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9702{
9703 unsigned int i, odd, mask, nelt = d->nelt;
9704 rtx out, in0, in1, x;
9705 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9706 machine_mode vmode = d->vmode;
cc4d934f
JG
9707
9708 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9709 return false;
9710
9711 /* Note that these are little-endian tests.
9712 We correct for big-endian later. */
9713 if (d->perm[0] == 0)
9714 odd = 0;
9715 else if (d->perm[0] == 1)
9716 odd = 1;
9717 else
9718 return false;
9719 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9720
9721 for (i = 0; i < nelt; i++)
9722 {
9723 unsigned elt = (i * 2 + odd) & mask;
9724 if (d->perm[i] != elt)
9725 return false;
9726 }
9727
9728 /* Success! */
9729 if (d->testing_p)
9730 return true;
9731
9732 in0 = d->op0;
9733 in1 = d->op1;
9734 if (BYTES_BIG_ENDIAN)
9735 {
9736 x = in0, in0 = in1, in1 = x;
9737 odd = !odd;
9738 }
9739 out = d->target;
9740
9741 if (odd)
9742 {
9743 switch (vmode)
9744 {
9745 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9746 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9747 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9748 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9749 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9750 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9751 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9752 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9753 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9754 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9755 default:
9756 return false;
9757 }
9758 }
9759 else
9760 {
9761 switch (vmode)
9762 {
9763 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9764 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9765 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9766 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9767 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9768 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9769 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9770 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9771 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9772 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9773 default:
9774 return false;
9775 }
9776 }
9777
9778 emit_insn (gen (out, in0, in1));
9779 return true;
9780}
9781
9782/* Recognize patterns suitable for the ZIP instructions. */
9783static bool
9784aarch64_evpc_zip (struct expand_vec_perm_d *d)
9785{
9786 unsigned int i, high, mask, nelt = d->nelt;
9787 rtx out, in0, in1, x;
9788 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9789 machine_mode vmode = d->vmode;
cc4d934f
JG
9790
9791 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9792 return false;
9793
9794 /* Note that these are little-endian tests.
9795 We correct for big-endian later. */
9796 high = nelt / 2;
9797 if (d->perm[0] == high)
9798 /* Do Nothing. */
9799 ;
9800 else if (d->perm[0] == 0)
9801 high = 0;
9802 else
9803 return false;
9804 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9805
9806 for (i = 0; i < nelt / 2; i++)
9807 {
9808 unsigned elt = (i + high) & mask;
9809 if (d->perm[i * 2] != elt)
9810 return false;
9811 elt = (elt + nelt) & mask;
9812 if (d->perm[i * 2 + 1] != elt)
9813 return false;
9814 }
9815
9816 /* Success! */
9817 if (d->testing_p)
9818 return true;
9819
9820 in0 = d->op0;
9821 in1 = d->op1;
9822 if (BYTES_BIG_ENDIAN)
9823 {
9824 x = in0, in0 = in1, in1 = x;
9825 high = !high;
9826 }
9827 out = d->target;
9828
9829 if (high)
9830 {
9831 switch (vmode)
9832 {
9833 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9834 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9835 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9836 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9837 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9838 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9839 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9840 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9841 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9842 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9843 default:
9844 return false;
9845 }
9846 }
9847 else
9848 {
9849 switch (vmode)
9850 {
9851 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9852 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9853 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9854 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9855 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9856 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9857 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9858 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9859 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9860 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9861 default:
9862 return false;
9863 }
9864 }
9865
9866 emit_insn (gen (out, in0, in1));
9867 return true;
9868}
9869
ae0533da
AL
9870/* Recognize patterns for the EXT insn. */
9871
9872static bool
9873aarch64_evpc_ext (struct expand_vec_perm_d *d)
9874{
9875 unsigned int i, nelt = d->nelt;
9876 rtx (*gen) (rtx, rtx, rtx, rtx);
9877 rtx offset;
9878
9879 unsigned int location = d->perm[0]; /* Always < nelt. */
9880
9881 /* Check if the extracted indices are increasing by one. */
9882 for (i = 1; i < nelt; i++)
9883 {
9884 unsigned int required = location + i;
9885 if (d->one_vector_p)
9886 {
9887 /* We'll pass the same vector in twice, so allow indices to wrap. */
9888 required &= (nelt - 1);
9889 }
9890 if (d->perm[i] != required)
9891 return false;
9892 }
9893
ae0533da
AL
9894 switch (d->vmode)
9895 {
9896 case V16QImode: gen = gen_aarch64_extv16qi; break;
9897 case V8QImode: gen = gen_aarch64_extv8qi; break;
9898 case V4HImode: gen = gen_aarch64_extv4hi; break;
9899 case V8HImode: gen = gen_aarch64_extv8hi; break;
9900 case V2SImode: gen = gen_aarch64_extv2si; break;
9901 case V4SImode: gen = gen_aarch64_extv4si; break;
9902 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9903 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9904 case V2DImode: gen = gen_aarch64_extv2di; break;
9905 case V2DFmode: gen = gen_aarch64_extv2df; break;
9906 default:
9907 return false;
9908 }
9909
9910 /* Success! */
9911 if (d->testing_p)
9912 return true;
9913
b31e65bb
AL
9914 /* The case where (location == 0) is a no-op for both big- and little-endian,
9915 and is removed by the mid-end at optimization levels -O1 and higher. */
9916
9917 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
9918 {
9919 /* After setup, we want the high elements of the first vector (stored
9920 at the LSB end of the register), and the low elements of the second
9921 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 9922 std::swap (d->op0, d->op1);
ae0533da
AL
9923 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9924 location = nelt - location;
9925 }
9926
9927 offset = GEN_INT (location);
9928 emit_insn (gen (d->target, d->op0, d->op1, offset));
9929 return true;
9930}
9931
923fcec3
AL
9932/* Recognize patterns for the REV insns. */
9933
9934static bool
9935aarch64_evpc_rev (struct expand_vec_perm_d *d)
9936{
9937 unsigned int i, j, diff, nelt = d->nelt;
9938 rtx (*gen) (rtx, rtx);
9939
9940 if (!d->one_vector_p)
9941 return false;
9942
9943 diff = d->perm[0];
9944 switch (diff)
9945 {
9946 case 7:
9947 switch (d->vmode)
9948 {
9949 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9950 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9951 default:
9952 return false;
9953 }
9954 break;
9955 case 3:
9956 switch (d->vmode)
9957 {
9958 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9959 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9960 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9961 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9962 default:
9963 return false;
9964 }
9965 break;
9966 case 1:
9967 switch (d->vmode)
9968 {
9969 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9970 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9971 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9972 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9973 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9974 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9975 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9976 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9977 default:
9978 return false;
9979 }
9980 break;
9981 default:
9982 return false;
9983 }
9984
9985 for (i = 0; i < nelt ; i += diff + 1)
9986 for (j = 0; j <= diff; j += 1)
9987 {
9988 /* This is guaranteed to be true as the value of diff
9989 is 7, 3, 1 and we should have enough elements in the
9990 queue to generate this. Getting a vector mask with a
9991 value of diff other than these values implies that
9992 something is wrong by the time we get here. */
9993 gcc_assert (i + j < nelt);
9994 if (d->perm[i + j] != i + diff - j)
9995 return false;
9996 }
9997
9998 /* Success! */
9999 if (d->testing_p)
10000 return true;
10001
10002 emit_insn (gen (d->target, d->op0));
10003 return true;
10004}
10005
91bd4114
JG
10006static bool
10007aarch64_evpc_dup (struct expand_vec_perm_d *d)
10008{
10009 rtx (*gen) (rtx, rtx, rtx);
10010 rtx out = d->target;
10011 rtx in0;
ef4bddc2 10012 machine_mode vmode = d->vmode;
91bd4114
JG
10013 unsigned int i, elt, nelt = d->nelt;
10014 rtx lane;
10015
91bd4114
JG
10016 elt = d->perm[0];
10017 for (i = 1; i < nelt; i++)
10018 {
10019 if (elt != d->perm[i])
10020 return false;
10021 }
10022
10023 /* The generic preparation in aarch64_expand_vec_perm_const_1
10024 swaps the operand order and the permute indices if it finds
10025 d->perm[0] to be in the second operand. Thus, we can always
10026 use d->op0 and need not do any extra arithmetic to get the
10027 correct lane number. */
10028 in0 = d->op0;
f901401e 10029 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
10030
10031 switch (vmode)
10032 {
10033 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
10034 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
10035 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
10036 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
10037 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
10038 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
10039 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
10040 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
10041 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
10042 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
10043 default:
10044 return false;
10045 }
10046
10047 emit_insn (gen (out, in0, lane));
10048 return true;
10049}
10050
88b08073
JG
10051static bool
10052aarch64_evpc_tbl (struct expand_vec_perm_d *d)
10053{
10054 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 10055 machine_mode vmode = d->vmode;
88b08073
JG
10056 unsigned int i, nelt = d->nelt;
10057
88b08073
JG
10058 if (d->testing_p)
10059 return true;
10060
10061 /* Generic code will try constant permutation twice. Once with the
10062 original mode and again with the elements lowered to QImode.
10063 So wait and don't do the selector expansion ourselves. */
10064 if (vmode != V8QImode && vmode != V16QImode)
10065 return false;
10066
10067 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
10068 {
10069 int nunits = GET_MODE_NUNITS (vmode);
10070
10071 /* If big-endian and two vectors we end up with a weird mixed-endian
10072 mode on NEON. Reverse the index within each word but not the word
10073 itself. */
10074 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
10075 : d->perm[i]);
10076 }
88b08073
JG
10077 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
10078 sel = force_reg (vmode, sel);
10079
10080 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
10081 return true;
10082}
10083
10084static bool
10085aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
10086{
10087 /* The pattern matching functions above are written to look for a small
10088 number to begin the sequence (0, 1, N/2). If we begin with an index
10089 from the second operand, we can swap the operands. */
10090 if (d->perm[0] >= d->nelt)
10091 {
10092 unsigned i, nelt = d->nelt;
88b08073 10093
0696116a 10094 gcc_assert (nelt == (nelt & -nelt));
88b08073 10095 for (i = 0; i < nelt; ++i)
0696116a 10096 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 10097
cb5c6c29 10098 std::swap (d->op0, d->op1);
88b08073
JG
10099 }
10100
10101 if (TARGET_SIMD)
cc4d934f 10102 {
923fcec3
AL
10103 if (aarch64_evpc_rev (d))
10104 return true;
10105 else if (aarch64_evpc_ext (d))
ae0533da 10106 return true;
f901401e
AL
10107 else if (aarch64_evpc_dup (d))
10108 return true;
ae0533da 10109 else if (aarch64_evpc_zip (d))
cc4d934f
JG
10110 return true;
10111 else if (aarch64_evpc_uzp (d))
10112 return true;
10113 else if (aarch64_evpc_trn (d))
10114 return true;
10115 return aarch64_evpc_tbl (d);
10116 }
88b08073
JG
10117 return false;
10118}
10119
10120/* Expand a vec_perm_const pattern. */
10121
10122bool
10123aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
10124{
10125 struct expand_vec_perm_d d;
10126 int i, nelt, which;
10127
10128 d.target = target;
10129 d.op0 = op0;
10130 d.op1 = op1;
10131
10132 d.vmode = GET_MODE (target);
10133 gcc_assert (VECTOR_MODE_P (d.vmode));
10134 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10135 d.testing_p = false;
10136
10137 for (i = which = 0; i < nelt; ++i)
10138 {
10139 rtx e = XVECEXP (sel, 0, i);
10140 int ei = INTVAL (e) & (2 * nelt - 1);
10141 which |= (ei < nelt ? 1 : 2);
10142 d.perm[i] = ei;
10143 }
10144
10145 switch (which)
10146 {
10147 default:
10148 gcc_unreachable ();
10149
10150 case 3:
10151 d.one_vector_p = false;
10152 if (!rtx_equal_p (op0, op1))
10153 break;
10154
10155 /* The elements of PERM do not suggest that only the first operand
10156 is used, but both operands are identical. Allow easier matching
10157 of the permutation by folding the permutation into the single
10158 input vector. */
10159 /* Fall Through. */
10160 case 2:
10161 for (i = 0; i < nelt; ++i)
10162 d.perm[i] &= nelt - 1;
10163 d.op0 = op1;
10164 d.one_vector_p = true;
10165 break;
10166
10167 case 1:
10168 d.op1 = op0;
10169 d.one_vector_p = true;
10170 break;
10171 }
10172
10173 return aarch64_expand_vec_perm_const_1 (&d);
10174}
10175
10176static bool
ef4bddc2 10177aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
10178 const unsigned char *sel)
10179{
10180 struct expand_vec_perm_d d;
10181 unsigned int i, nelt, which;
10182 bool ret;
10183
10184 d.vmode = vmode;
10185 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10186 d.testing_p = true;
10187 memcpy (d.perm, sel, nelt);
10188
10189 /* Calculate whether all elements are in one vector. */
10190 for (i = which = 0; i < nelt; ++i)
10191 {
10192 unsigned char e = d.perm[i];
10193 gcc_assert (e < 2 * nelt);
10194 which |= (e < nelt ? 1 : 2);
10195 }
10196
10197 /* If all elements are from the second vector, reindex as if from the
10198 first vector. */
10199 if (which == 2)
10200 for (i = 0; i < nelt; ++i)
10201 d.perm[i] -= nelt;
10202
10203 /* Check whether the mask can be applied to a single vector. */
10204 d.one_vector_p = (which != 3);
10205
10206 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10207 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10208 if (!d.one_vector_p)
10209 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10210
10211 start_sequence ();
10212 ret = aarch64_expand_vec_perm_const_1 (&d);
10213 end_sequence ();
10214
10215 return ret;
10216}
10217
668046d1
DS
10218rtx
10219aarch64_reverse_mask (enum machine_mode mode)
10220{
10221 /* We have to reverse each vector because we dont have
10222 a permuted load that can reverse-load according to ABI rules. */
10223 rtx mask;
10224 rtvec v = rtvec_alloc (16);
10225 int i, j;
10226 int nunits = GET_MODE_NUNITS (mode);
10227 int usize = GET_MODE_UNIT_SIZE (mode);
10228
10229 gcc_assert (BYTES_BIG_ENDIAN);
10230 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
10231
10232 for (i = 0; i < nunits; i++)
10233 for (j = 0; j < usize; j++)
10234 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
10235 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
10236 return force_reg (V16QImode, mask);
10237}
10238
97e1ad78
JG
10239/* Implement MODES_TIEABLE_P. */
10240
10241bool
ef4bddc2 10242aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
10243{
10244 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10245 return true;
10246
10247 /* We specifically want to allow elements of "structure" modes to
10248 be tieable to the structure. This more general condition allows
10249 other rarer situations too. */
10250 if (TARGET_SIMD
10251 && aarch64_vector_mode_p (mode1)
10252 && aarch64_vector_mode_p (mode2))
10253 return true;
10254
10255 return false;
10256}
10257
e2c75eea
JG
10258/* Return a new RTX holding the result of moving POINTER forward by
10259 AMOUNT bytes. */
10260
10261static rtx
10262aarch64_move_pointer (rtx pointer, int amount)
10263{
10264 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10265
10266 return adjust_automodify_address (pointer, GET_MODE (pointer),
10267 next, amount);
10268}
10269
10270/* Return a new RTX holding the result of moving POINTER forward by the
10271 size of the mode it points to. */
10272
10273static rtx
10274aarch64_progress_pointer (rtx pointer)
10275{
10276 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10277
10278 return aarch64_move_pointer (pointer, amount);
10279}
10280
10281/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10282 MODE bytes. */
10283
10284static void
10285aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 10286 machine_mode mode)
e2c75eea
JG
10287{
10288 rtx reg = gen_reg_rtx (mode);
10289
10290 /* "Cast" the pointers to the correct mode. */
10291 *src = adjust_address (*src, mode, 0);
10292 *dst = adjust_address (*dst, mode, 0);
10293 /* Emit the memcpy. */
10294 emit_move_insn (reg, *src);
10295 emit_move_insn (*dst, reg);
10296 /* Move the pointers forward. */
10297 *src = aarch64_progress_pointer (*src);
10298 *dst = aarch64_progress_pointer (*dst);
10299}
10300
10301/* Expand movmem, as if from a __builtin_memcpy. Return true if
10302 we succeed, otherwise return false. */
10303
10304bool
10305aarch64_expand_movmem (rtx *operands)
10306{
10307 unsigned int n;
10308 rtx dst = operands[0];
10309 rtx src = operands[1];
10310 rtx base;
10311 bool speed_p = !optimize_function_for_size_p (cfun);
10312
10313 /* When optimizing for size, give a better estimate of the length of a
10314 memcpy call, but use the default otherwise. */
10315 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10316
10317 /* We can't do anything smart if the amount to copy is not constant. */
10318 if (!CONST_INT_P (operands[2]))
10319 return false;
10320
10321 n = UINTVAL (operands[2]);
10322
10323 /* Try to keep the number of instructions low. For cases below 16 bytes we
10324 need to make at most two moves. For cases above 16 bytes it will be one
10325 move for each 16 byte chunk, then at most two additional moves. */
10326 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10327 return false;
10328
10329 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10330 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10331
10332 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10333 src = adjust_automodify_address (src, VOIDmode, base, 0);
10334
10335 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10336 1-byte chunk. */
10337 if (n < 4)
10338 {
10339 if (n >= 2)
10340 {
10341 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10342 n -= 2;
10343 }
10344
10345 if (n == 1)
10346 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10347
10348 return true;
10349 }
10350
10351 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10352 4-byte chunk, partially overlapping with the previously copied chunk. */
10353 if (n < 8)
10354 {
10355 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10356 n -= 4;
10357 if (n > 0)
10358 {
10359 int move = n - 4;
10360
10361 src = aarch64_move_pointer (src, move);
10362 dst = aarch64_move_pointer (dst, move);
10363 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10364 }
10365 return true;
10366 }
10367
10368 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10369 them, then (if applicable) an 8-byte chunk. */
10370 while (n >= 8)
10371 {
10372 if (n / 16)
10373 {
10374 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10375 n -= 16;
10376 }
10377 else
10378 {
10379 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10380 n -= 8;
10381 }
10382 }
10383
10384 /* Finish the final bytes of the copy. We can always do this in one
10385 instruction. We either copy the exact amount we need, or partially
10386 overlap with the previous chunk we copied and copy 8-bytes. */
10387 if (n == 0)
10388 return true;
10389 else if (n == 1)
10390 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10391 else if (n == 2)
10392 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10393 else if (n == 4)
10394 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10395 else
10396 {
10397 if (n == 3)
10398 {
10399 src = aarch64_move_pointer (src, -1);
10400 dst = aarch64_move_pointer (dst, -1);
10401 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10402 }
10403 else
10404 {
10405 int move = n - 8;
10406
10407 src = aarch64_move_pointer (src, move);
10408 dst = aarch64_move_pointer (dst, move);
10409 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10410 }
10411 }
10412
10413 return true;
10414}
10415
a3125fc2
CL
10416/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10417
10418static unsigned HOST_WIDE_INT
10419aarch64_asan_shadow_offset (void)
10420{
10421 return (HOST_WIDE_INT_1 << 36);
10422}
10423
d3006da6 10424static bool
445d7826 10425aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
10426 unsigned int align,
10427 enum by_pieces_operation op,
10428 bool speed_p)
10429{
10430 /* STORE_BY_PIECES can be used when copying a constant string, but
10431 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10432 For now we always fail this and let the move_by_pieces code copy
10433 the string from read-only memory. */
10434 if (op == STORE_BY_PIECES)
10435 return false;
10436
10437 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10438}
10439
5f3bc026
ZC
10440static enum machine_mode
10441aarch64_code_to_ccmode (enum rtx_code code)
10442{
10443 switch (code)
10444 {
10445 case NE:
10446 return CC_DNEmode;
10447
10448 case EQ:
10449 return CC_DEQmode;
10450
10451 case LE:
10452 return CC_DLEmode;
10453
10454 case LT:
10455 return CC_DLTmode;
10456
10457 case GE:
10458 return CC_DGEmode;
10459
10460 case GT:
10461 return CC_DGTmode;
10462
10463 case LEU:
10464 return CC_DLEUmode;
10465
10466 case LTU:
10467 return CC_DLTUmode;
10468
10469 case GEU:
10470 return CC_DGEUmode;
10471
10472 case GTU:
10473 return CC_DGTUmode;
10474
10475 default:
10476 return CCmode;
10477 }
10478}
10479
10480static rtx
10481aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
10482 int code, tree treeop0, tree treeop1)
10483{
10484 enum machine_mode op_mode, cmp_mode, cc_mode;
10485 rtx op0, op1, cmp, target;
10486 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10487 enum insn_code icode;
10488 struct expand_operand ops[4];
10489
10490 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
10491 if (cc_mode == CCmode)
10492 return NULL_RTX;
10493
10494 start_sequence ();
10495 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10496
10497 op_mode = GET_MODE (op0);
10498 if (op_mode == VOIDmode)
10499 op_mode = GET_MODE (op1);
10500
10501 switch (op_mode)
10502 {
10503 case QImode:
10504 case HImode:
10505 case SImode:
10506 cmp_mode = SImode;
10507 icode = CODE_FOR_cmpsi;
10508 break;
10509
10510 case DImode:
10511 cmp_mode = DImode;
10512 icode = CODE_FOR_cmpdi;
10513 break;
10514
10515 default:
10516 end_sequence ();
10517 return NULL_RTX;
10518 }
10519
10520 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10521 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10522 if (!op0 || !op1)
10523 {
10524 end_sequence ();
10525 return NULL_RTX;
10526 }
10527 *prep_seq = get_insns ();
10528 end_sequence ();
10529
10530 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
10531 target = gen_rtx_REG (CCmode, CC_REGNUM);
10532
10533 create_output_operand (&ops[0], target, CCmode);
10534 create_fixed_operand (&ops[1], cmp);
10535 create_fixed_operand (&ops[2], op0);
10536 create_fixed_operand (&ops[3], op1);
10537
10538 start_sequence ();
10539 if (!maybe_expand_insn (icode, 4, ops))
10540 {
10541 end_sequence ();
10542 return NULL_RTX;
10543 }
10544 *gen_seq = get_insns ();
10545 end_sequence ();
10546
10547 return gen_rtx_REG (cc_mode, CC_REGNUM);
10548}
10549
10550static rtx
10551aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
10552 tree treeop0, tree treeop1, int bit_code)
10553{
10554 rtx op0, op1, cmp0, cmp1, target;
10555 enum machine_mode op_mode, cmp_mode, cc_mode;
10556 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10557 enum insn_code icode = CODE_FOR_ccmp_andsi;
10558 struct expand_operand ops[6];
10559
10560 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
10561 if (cc_mode == CCmode)
10562 return NULL_RTX;
10563
10564 push_to_sequence ((rtx_insn*) *prep_seq);
10565 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10566
10567 op_mode = GET_MODE (op0);
10568 if (op_mode == VOIDmode)
10569 op_mode = GET_MODE (op1);
10570
10571 switch (op_mode)
10572 {
10573 case QImode:
10574 case HImode:
10575 case SImode:
10576 cmp_mode = SImode;
10577 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
10578 : CODE_FOR_ccmp_iorsi;
10579 break;
10580
10581 case DImode:
10582 cmp_mode = DImode;
10583 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
10584 : CODE_FOR_ccmp_iordi;
10585 break;
10586
10587 default:
10588 end_sequence ();
10589 return NULL_RTX;
10590 }
10591
10592 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10593 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10594 if (!op0 || !op1)
10595 {
10596 end_sequence ();
10597 return NULL_RTX;
10598 }
10599 *prep_seq = get_insns ();
10600 end_sequence ();
10601
10602 target = gen_rtx_REG (cc_mode, CC_REGNUM);
10603 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
10604 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
10605
10606 create_fixed_operand (&ops[0], prev);
10607 create_fixed_operand (&ops[1], target);
10608 create_fixed_operand (&ops[2], op0);
10609 create_fixed_operand (&ops[3], op1);
10610 create_fixed_operand (&ops[4], cmp0);
10611 create_fixed_operand (&ops[5], cmp1);
10612
10613 push_to_sequence ((rtx_insn*) *gen_seq);
10614 if (!maybe_expand_insn (icode, 6, ops))
10615 {
10616 end_sequence ();
10617 return NULL_RTX;
10618 }
10619
10620 *gen_seq = get_insns ();
10621 end_sequence ();
10622
10623 return target;
10624}
10625
10626#undef TARGET_GEN_CCMP_FIRST
10627#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10628
10629#undef TARGET_GEN_CCMP_NEXT
10630#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10631
6a569cdd
KT
10632/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10633 instruction fusion of some sort. */
10634
10635static bool
10636aarch64_macro_fusion_p (void)
10637{
10638 return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10639}
10640
10641
10642/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10643 should be kept together during scheduling. */
10644
10645static bool
10646aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10647{
10648 rtx set_dest;
10649 rtx prev_set = single_set (prev);
10650 rtx curr_set = single_set (curr);
10651 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10652 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10653
10654 if (!aarch64_macro_fusion_p ())
10655 return false;
10656
10657 if (simple_sets_p
10658 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10659 {
10660 /* We are trying to match:
10661 prev (mov) == (set (reg r0) (const_int imm16))
10662 curr (movk) == (set (zero_extract (reg r0)
10663 (const_int 16)
10664 (const_int 16))
10665 (const_int imm16_1)) */
10666
10667 set_dest = SET_DEST (curr_set);
10668
10669 if (GET_CODE (set_dest) == ZERO_EXTRACT
10670 && CONST_INT_P (SET_SRC (curr_set))
10671 && CONST_INT_P (SET_SRC (prev_set))
10672 && CONST_INT_P (XEXP (set_dest, 2))
10673 && INTVAL (XEXP (set_dest, 2)) == 16
10674 && REG_P (XEXP (set_dest, 0))
10675 && REG_P (SET_DEST (prev_set))
10676 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10677 {
10678 return true;
10679 }
10680 }
10681
9bbe08fe
KT
10682 if (simple_sets_p
10683 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10684 {
10685
10686 /* We're trying to match:
10687 prev (adrp) == (set (reg r1)
10688 (high (symbol_ref ("SYM"))))
10689 curr (add) == (set (reg r0)
10690 (lo_sum (reg r1)
10691 (symbol_ref ("SYM"))))
10692 Note that r0 need not necessarily be the same as r1, especially
10693 during pre-regalloc scheduling. */
10694
10695 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10696 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10697 {
10698 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10699 && REG_P (XEXP (SET_SRC (curr_set), 0))
10700 && REGNO (XEXP (SET_SRC (curr_set), 0))
10701 == REGNO (SET_DEST (prev_set))
10702 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10703 XEXP (SET_SRC (curr_set), 1)))
10704 return true;
10705 }
10706 }
10707
cd0cb232
KT
10708 if (simple_sets_p
10709 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10710 {
10711
10712 /* We're trying to match:
10713 prev (movk) == (set (zero_extract (reg r0)
10714 (const_int 16)
10715 (const_int 32))
10716 (const_int imm16_1))
10717 curr (movk) == (set (zero_extract (reg r0)
10718 (const_int 16)
10719 (const_int 48))
10720 (const_int imm16_2)) */
10721
10722 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10723 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10724 && REG_P (XEXP (SET_DEST (prev_set), 0))
10725 && REG_P (XEXP (SET_DEST (curr_set), 0))
10726 && REGNO (XEXP (SET_DEST (prev_set), 0))
10727 == REGNO (XEXP (SET_DEST (curr_set), 0))
10728 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10729 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10730 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10731 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10732 && CONST_INT_P (SET_SRC (prev_set))
10733 && CONST_INT_P (SET_SRC (curr_set)))
10734 return true;
10735
10736 }
d8354ad7
KT
10737 if (simple_sets_p
10738 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10739 {
10740 /* We're trying to match:
10741 prev (adrp) == (set (reg r0)
10742 (high (symbol_ref ("SYM"))))
10743 curr (ldr) == (set (reg r1)
10744 (mem (lo_sum (reg r0)
10745 (symbol_ref ("SYM")))))
10746 or
10747 curr (ldr) == (set (reg r1)
10748 (zero_extend (mem
10749 (lo_sum (reg r0)
10750 (symbol_ref ("SYM")))))) */
10751 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10752 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10753 {
10754 rtx curr_src = SET_SRC (curr_set);
10755
10756 if (GET_CODE (curr_src) == ZERO_EXTEND)
10757 curr_src = XEXP (curr_src, 0);
10758
10759 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10760 && REG_P (XEXP (XEXP (curr_src, 0), 0))
10761 && REGNO (XEXP (XEXP (curr_src, 0), 0))
10762 == REGNO (SET_DEST (prev_set))
10763 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10764 XEXP (SET_SRC (prev_set), 0)))
10765 return true;
10766 }
10767 }
cd0cb232 10768
3759108f
AP
10769 if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10770 && any_condjump_p (curr))
10771 {
10772 enum attr_type prev_type = get_attr_type (prev);
10773
10774 /* FIXME: this misses some which is considered simple arthematic
10775 instructions for ThunderX. Simple shifts are missed here. */
10776 if (prev_type == TYPE_ALUS_SREG
10777 || prev_type == TYPE_ALUS_IMM
10778 || prev_type == TYPE_LOGICS_REG
10779 || prev_type == TYPE_LOGICS_IMM)
10780 return true;
10781 }
10782
6a569cdd
KT
10783 return false;
10784}
10785
350013bc
BC
10786/* If MEM is in the form of [base+offset], extract the two parts
10787 of address and set to BASE and OFFSET, otherwise return false
10788 after clearing BASE and OFFSET. */
10789
10790bool
10791extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10792{
10793 rtx addr;
10794
10795 gcc_assert (MEM_P (mem));
10796
10797 addr = XEXP (mem, 0);
10798
10799 if (REG_P (addr))
10800 {
10801 *base = addr;
10802 *offset = const0_rtx;
10803 return true;
10804 }
10805
10806 if (GET_CODE (addr) == PLUS
10807 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10808 {
10809 *base = XEXP (addr, 0);
10810 *offset = XEXP (addr, 1);
10811 return true;
10812 }
10813
10814 *base = NULL_RTX;
10815 *offset = NULL_RTX;
10816
10817 return false;
10818}
10819
10820/* Types for scheduling fusion. */
10821enum sched_fusion_type
10822{
10823 SCHED_FUSION_NONE = 0,
10824 SCHED_FUSION_LD_SIGN_EXTEND,
10825 SCHED_FUSION_LD_ZERO_EXTEND,
10826 SCHED_FUSION_LD,
10827 SCHED_FUSION_ST,
10828 SCHED_FUSION_NUM
10829};
10830
10831/* If INSN is a load or store of address in the form of [base+offset],
10832 extract the two parts and set to BASE and OFFSET. Return scheduling
10833 fusion type this INSN is. */
10834
10835static enum sched_fusion_type
10836fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10837{
10838 rtx x, dest, src;
10839 enum sched_fusion_type fusion = SCHED_FUSION_LD;
10840
10841 gcc_assert (INSN_P (insn));
10842 x = PATTERN (insn);
10843 if (GET_CODE (x) != SET)
10844 return SCHED_FUSION_NONE;
10845
10846 src = SET_SRC (x);
10847 dest = SET_DEST (x);
10848
1f46bd52
AP
10849 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
10850 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
350013bc
BC
10851 return SCHED_FUSION_NONE;
10852
10853 if (GET_CODE (src) == SIGN_EXTEND)
10854 {
10855 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10856 src = XEXP (src, 0);
10857 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10858 return SCHED_FUSION_NONE;
10859 }
10860 else if (GET_CODE (src) == ZERO_EXTEND)
10861 {
10862 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10863 src = XEXP (src, 0);
10864 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10865 return SCHED_FUSION_NONE;
10866 }
10867
10868 if (GET_CODE (src) == MEM && REG_P (dest))
10869 extract_base_offset_in_addr (src, base, offset);
10870 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10871 {
10872 fusion = SCHED_FUSION_ST;
10873 extract_base_offset_in_addr (dest, base, offset);
10874 }
10875 else
10876 return SCHED_FUSION_NONE;
10877
10878 if (*base == NULL_RTX || *offset == NULL_RTX)
10879 fusion = SCHED_FUSION_NONE;
10880
10881 return fusion;
10882}
10883
10884/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10885
10886 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10887 and PRI are only calculated for these instructions. For other instruction,
10888 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10889 type instruction fusion can be added by returning different priorities.
10890
10891 It's important that irrelevant instructions get the largest FUSION_PRI. */
10892
10893static void
10894aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10895 int *fusion_pri, int *pri)
10896{
10897 int tmp, off_val;
10898 rtx base, offset;
10899 enum sched_fusion_type fusion;
10900
10901 gcc_assert (INSN_P (insn));
10902
10903 tmp = max_pri - 1;
10904 fusion = fusion_load_store (insn, &base, &offset);
10905 if (fusion == SCHED_FUSION_NONE)
10906 {
10907 *pri = tmp;
10908 *fusion_pri = tmp;
10909 return;
10910 }
10911
10912 /* Set FUSION_PRI according to fusion type and base register. */
10913 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
10914
10915 /* Calculate PRI. */
10916 tmp /= 2;
10917
10918 /* INSN with smaller offset goes first. */
10919 off_val = (int)(INTVAL (offset));
10920 if (off_val >= 0)
10921 tmp -= (off_val & 0xfffff);
10922 else
10923 tmp += ((- off_val) & 0xfffff);
10924
10925 *pri = tmp;
10926 return;
10927}
10928
10929/* Given OPERANDS of consecutive load/store, check if we can merge
10930 them into ldp/stp. LOAD is true if they are load instructions.
10931 MODE is the mode of memory operands. */
10932
10933bool
10934aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
10935 enum machine_mode mode)
10936{
10937 HOST_WIDE_INT offval_1, offval_2, msize;
10938 enum reg_class rclass_1, rclass_2;
10939 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
10940
10941 if (load)
10942 {
10943 mem_1 = operands[1];
10944 mem_2 = operands[3];
10945 reg_1 = operands[0];
10946 reg_2 = operands[2];
10947 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
10948 if (REGNO (reg_1) == REGNO (reg_2))
10949 return false;
10950 }
10951 else
10952 {
10953 mem_1 = operands[0];
10954 mem_2 = operands[2];
10955 reg_1 = operands[1];
10956 reg_2 = operands[3];
10957 }
10958
bf84ac44
AP
10959 /* The mems cannot be volatile. */
10960 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
10961 return false;
10962
350013bc
BC
10963 /* Check if the addresses are in the form of [base+offset]. */
10964 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10965 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10966 return false;
10967 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10968 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10969 return false;
10970
10971 /* Check if the bases are same. */
10972 if (!rtx_equal_p (base_1, base_2))
10973 return false;
10974
10975 offval_1 = INTVAL (offset_1);
10976 offval_2 = INTVAL (offset_2);
10977 msize = GET_MODE_SIZE (mode);
10978 /* Check if the offsets are consecutive. */
10979 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
10980 return false;
10981
10982 /* Check if the addresses are clobbered by load. */
10983 if (load)
10984 {
10985 if (reg_mentioned_p (reg_1, mem_1))
10986 return false;
10987
10988 /* In increasing order, the last load can clobber the address. */
10989 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
10990 return false;
10991 }
10992
10993 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10994 rclass_1 = FP_REGS;
10995 else
10996 rclass_1 = GENERAL_REGS;
10997
10998 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10999 rclass_2 = FP_REGS;
11000 else
11001 rclass_2 = GENERAL_REGS;
11002
11003 /* Check if the registers are of same class. */
11004 if (rclass_1 != rclass_2)
11005 return false;
11006
11007 return true;
11008}
11009
11010/* Given OPERANDS of consecutive load/store, check if we can merge
11011 them into ldp/stp by adjusting the offset. LOAD is true if they
11012 are load instructions. MODE is the mode of memory operands.
11013
11014 Given below consecutive stores:
11015
11016 str w1, [xb, 0x100]
11017 str w1, [xb, 0x104]
11018 str w1, [xb, 0x108]
11019 str w1, [xb, 0x10c]
11020
11021 Though the offsets are out of the range supported by stp, we can
11022 still pair them after adjusting the offset, like:
11023
11024 add scratch, xb, 0x100
11025 stp w1, w1, [scratch]
11026 stp w1, w1, [scratch, 0x8]
11027
11028 The peephole patterns detecting this opportunity should guarantee
11029 the scratch register is avaliable. */
11030
11031bool
11032aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
11033 enum machine_mode mode)
11034{
11035 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
11036 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
11037 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
11038 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
11039
11040 if (load)
11041 {
11042 reg_1 = operands[0];
11043 mem_1 = operands[1];
11044 reg_2 = operands[2];
11045 mem_2 = operands[3];
11046 reg_3 = operands[4];
11047 mem_3 = operands[5];
11048 reg_4 = operands[6];
11049 mem_4 = operands[7];
11050 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
11051 && REG_P (reg_3) && REG_P (reg_4));
11052 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
11053 return false;
11054 }
11055 else
11056 {
11057 mem_1 = operands[0];
11058 reg_1 = operands[1];
11059 mem_2 = operands[2];
11060 reg_2 = operands[3];
11061 mem_3 = operands[4];
11062 reg_3 = operands[5];
11063 mem_4 = operands[6];
11064 reg_4 = operands[7];
11065 }
11066 /* Skip if memory operand is by itslef valid for ldp/stp. */
11067 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
11068 return false;
11069
bf84ac44
AP
11070 /* The mems cannot be volatile. */
11071 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
11072 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
11073 return false;
11074
350013bc
BC
11075 /* Check if the addresses are in the form of [base+offset]. */
11076 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11077 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11078 return false;
11079 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11080 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11081 return false;
11082 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
11083 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
11084 return false;
11085 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
11086 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
11087 return false;
11088
11089 /* Check if the bases are same. */
11090 if (!rtx_equal_p (base_1, base_2)
11091 || !rtx_equal_p (base_2, base_3)
11092 || !rtx_equal_p (base_3, base_4))
11093 return false;
11094
11095 offval_1 = INTVAL (offset_1);
11096 offval_2 = INTVAL (offset_2);
11097 offval_3 = INTVAL (offset_3);
11098 offval_4 = INTVAL (offset_4);
11099 msize = GET_MODE_SIZE (mode);
11100 /* Check if the offsets are consecutive. */
11101 if ((offval_1 != (offval_2 + msize)
11102 || offval_1 != (offval_3 + msize * 2)
11103 || offval_1 != (offval_4 + msize * 3))
11104 && (offval_4 != (offval_3 + msize)
11105 || offval_4 != (offval_2 + msize * 2)
11106 || offval_4 != (offval_1 + msize * 3)))
11107 return false;
11108
11109 /* Check if the addresses are clobbered by load. */
11110 if (load)
11111 {
11112 if (reg_mentioned_p (reg_1, mem_1)
11113 || reg_mentioned_p (reg_2, mem_2)
11114 || reg_mentioned_p (reg_3, mem_3))
11115 return false;
11116
11117 /* In increasing order, the last load can clobber the address. */
11118 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
11119 return false;
11120 }
11121
11122 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11123 rclass_1 = FP_REGS;
11124 else
11125 rclass_1 = GENERAL_REGS;
11126
11127 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11128 rclass_2 = FP_REGS;
11129 else
11130 rclass_2 = GENERAL_REGS;
11131
11132 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
11133 rclass_3 = FP_REGS;
11134 else
11135 rclass_3 = GENERAL_REGS;
11136
11137 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
11138 rclass_4 = FP_REGS;
11139 else
11140 rclass_4 = GENERAL_REGS;
11141
11142 /* Check if the registers are of same class. */
11143 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
11144 return false;
11145
11146 return true;
11147}
11148
11149/* Given OPERANDS of consecutive load/store, this function pairs them
11150 into ldp/stp after adjusting the offset. It depends on the fact
11151 that addresses of load/store instructions are in increasing order.
11152 MODE is the mode of memory operands. CODE is the rtl operator
11153 which should be applied to all memory operands, it's SIGN_EXTEND,
11154 ZERO_EXTEND or UNKNOWN. */
11155
11156bool
11157aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
11158 enum machine_mode mode, RTX_CODE code)
11159{
11160 rtx base, offset, t1, t2;
11161 rtx mem_1, mem_2, mem_3, mem_4;
11162 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
11163
11164 if (load)
11165 {
11166 mem_1 = operands[1];
11167 mem_2 = operands[3];
11168 mem_3 = operands[5];
11169 mem_4 = operands[7];
11170 }
11171 else
11172 {
11173 mem_1 = operands[0];
11174 mem_2 = operands[2];
11175 mem_3 = operands[4];
11176 mem_4 = operands[6];
11177 gcc_assert (code == UNKNOWN);
11178 }
11179
11180 extract_base_offset_in_addr (mem_1, &base, &offset);
11181 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
11182
11183 /* Adjust offset thus it can fit in ldp/stp instruction. */
11184 msize = GET_MODE_SIZE (mode);
11185 stp_off_limit = msize * 0x40;
11186 off_val = INTVAL (offset);
11187 abs_off = (off_val < 0) ? -off_val : off_val;
11188 new_off = abs_off % stp_off_limit;
11189 adj_off = abs_off - new_off;
11190
11191 /* Further adjust to make sure all offsets are OK. */
11192 if ((new_off + msize * 2) >= stp_off_limit)
11193 {
11194 adj_off += stp_off_limit;
11195 new_off -= stp_off_limit;
11196 }
11197
11198 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11199 if (adj_off >= 0x1000)
11200 return false;
11201
11202 if (off_val < 0)
11203 {
11204 adj_off = -adj_off;
11205 new_off = -new_off;
11206 }
11207
11208 /* Create new memory references. */
11209 mem_1 = change_address (mem_1, VOIDmode,
11210 plus_constant (DImode, operands[8], new_off));
11211
11212 /* Check if the adjusted address is OK for ldp/stp. */
11213 if (!aarch64_mem_pair_operand (mem_1, mode))
11214 return false;
11215
11216 msize = GET_MODE_SIZE (mode);
11217 mem_2 = change_address (mem_2, VOIDmode,
11218 plus_constant (DImode,
11219 operands[8],
11220 new_off + msize));
11221 mem_3 = change_address (mem_3, VOIDmode,
11222 plus_constant (DImode,
11223 operands[8],
11224 new_off + msize * 2));
11225 mem_4 = change_address (mem_4, VOIDmode,
11226 plus_constant (DImode,
11227 operands[8],
11228 new_off + msize * 3));
11229
11230 if (code == ZERO_EXTEND)
11231 {
11232 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
11233 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
11234 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
11235 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
11236 }
11237 else if (code == SIGN_EXTEND)
11238 {
11239 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
11240 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
11241 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
11242 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
11243 }
11244
11245 if (load)
11246 {
11247 operands[1] = mem_1;
11248 operands[3] = mem_2;
11249 operands[5] = mem_3;
11250 operands[7] = mem_4;
11251 }
11252 else
11253 {
11254 operands[0] = mem_1;
11255 operands[2] = mem_2;
11256 operands[4] = mem_3;
11257 operands[6] = mem_4;
11258 }
11259
11260 /* Emit adjusting instruction. */
11261 emit_insn (gen_rtx_SET (VOIDmode, operands[8],
11262 plus_constant (DImode, base, adj_off)));
11263 /* Emit ldp/stp instructions. */
11264 t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
11265 t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
11266 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11267 t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
11268 t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
11269 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11270 return true;
11271}
11272
43e9d192
IB
11273#undef TARGET_ADDRESS_COST
11274#define TARGET_ADDRESS_COST aarch64_address_cost
11275
11276/* This hook will determines whether unnamed bitfields affect the alignment
11277 of the containing structure. The hook returns true if the structure
11278 should inherit the alignment requirements of an unnamed bitfield's
11279 type. */
11280#undef TARGET_ALIGN_ANON_BITFIELD
11281#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11282
11283#undef TARGET_ASM_ALIGNED_DI_OP
11284#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11285
11286#undef TARGET_ASM_ALIGNED_HI_OP
11287#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11288
11289#undef TARGET_ASM_ALIGNED_SI_OP
11290#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11291
11292#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11293#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11294 hook_bool_const_tree_hwi_hwi_const_tree_true
11295
11296#undef TARGET_ASM_FILE_START
11297#define TARGET_ASM_FILE_START aarch64_start_file
11298
11299#undef TARGET_ASM_OUTPUT_MI_THUNK
11300#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11301
11302#undef TARGET_ASM_SELECT_RTX_SECTION
11303#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11304
11305#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11306#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11307
11308#undef TARGET_BUILD_BUILTIN_VA_LIST
11309#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11310
11311#undef TARGET_CALLEE_COPIES
11312#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11313
11314#undef TARGET_CAN_ELIMINATE
11315#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11316
11317#undef TARGET_CANNOT_FORCE_CONST_MEM
11318#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11319
11320#undef TARGET_CONDITIONAL_REGISTER_USAGE
11321#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11322
11323/* Only the least significant bit is used for initialization guard
11324 variables. */
11325#undef TARGET_CXX_GUARD_MASK_BIT
11326#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11327
11328#undef TARGET_C_MODE_FOR_SUFFIX
11329#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11330
11331#ifdef TARGET_BIG_ENDIAN_DEFAULT
11332#undef TARGET_DEFAULT_TARGET_FLAGS
11333#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11334#endif
11335
11336#undef TARGET_CLASS_MAX_NREGS
11337#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11338
119103ca
JG
11339#undef TARGET_BUILTIN_DECL
11340#define TARGET_BUILTIN_DECL aarch64_builtin_decl
11341
43e9d192
IB
11342#undef TARGET_EXPAND_BUILTIN
11343#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11344
11345#undef TARGET_EXPAND_BUILTIN_VA_START
11346#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11347
9697e620
JG
11348#undef TARGET_FOLD_BUILTIN
11349#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11350
43e9d192
IB
11351#undef TARGET_FUNCTION_ARG
11352#define TARGET_FUNCTION_ARG aarch64_function_arg
11353
11354#undef TARGET_FUNCTION_ARG_ADVANCE
11355#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11356
11357#undef TARGET_FUNCTION_ARG_BOUNDARY
11358#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11359
11360#undef TARGET_FUNCTION_OK_FOR_SIBCALL
11361#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11362
11363#undef TARGET_FUNCTION_VALUE
11364#define TARGET_FUNCTION_VALUE aarch64_function_value
11365
11366#undef TARGET_FUNCTION_VALUE_REGNO_P
11367#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11368
11369#undef TARGET_FRAME_POINTER_REQUIRED
11370#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11371
fc72cba7
AL
11372#undef TARGET_GIMPLE_FOLD_BUILTIN
11373#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 11374
43e9d192
IB
11375#undef TARGET_GIMPLIFY_VA_ARG_EXPR
11376#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11377
11378#undef TARGET_INIT_BUILTINS
11379#define TARGET_INIT_BUILTINS aarch64_init_builtins
11380
11381#undef TARGET_LEGITIMATE_ADDRESS_P
11382#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11383
11384#undef TARGET_LEGITIMATE_CONSTANT_P
11385#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11386
11387#undef TARGET_LIBGCC_CMP_RETURN_MODE
11388#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11389
38e8f663 11390#undef TARGET_LRA_P
98d404be 11391#define TARGET_LRA_P hook_bool_void_true
38e8f663 11392
ac2b960f
YZ
11393#undef TARGET_MANGLE_TYPE
11394#define TARGET_MANGLE_TYPE aarch64_mangle_type
11395
43e9d192
IB
11396#undef TARGET_MEMORY_MOVE_COST
11397#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11398
26e0ff94
WD
11399#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11400#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11401
43e9d192
IB
11402#undef TARGET_MUST_PASS_IN_STACK
11403#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11404
11405/* This target hook should return true if accesses to volatile bitfields
11406 should use the narrowest mode possible. It should return false if these
11407 accesses should use the bitfield container type. */
11408#undef TARGET_NARROW_VOLATILE_BITFIELD
11409#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11410
11411#undef TARGET_OPTION_OVERRIDE
11412#define TARGET_OPTION_OVERRIDE aarch64_override_options
11413
11414#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11415#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11416 aarch64_override_options_after_change
11417
11418#undef TARGET_PASS_BY_REFERENCE
11419#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11420
11421#undef TARGET_PREFERRED_RELOAD_CLASS
11422#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11423
cee66c68
WD
11424#undef TARGET_SCHED_REASSOCIATION_WIDTH
11425#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11426
43e9d192
IB
11427#undef TARGET_SECONDARY_RELOAD
11428#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11429
11430#undef TARGET_SHIFT_TRUNCATION_MASK
11431#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11432
11433#undef TARGET_SETUP_INCOMING_VARARGS
11434#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11435
11436#undef TARGET_STRUCT_VALUE_RTX
11437#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11438
11439#undef TARGET_REGISTER_MOVE_COST
11440#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11441
11442#undef TARGET_RETURN_IN_MEMORY
11443#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11444
11445#undef TARGET_RETURN_IN_MSB
11446#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11447
11448#undef TARGET_RTX_COSTS
7cc2145f 11449#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 11450
d126a4ae
AP
11451#undef TARGET_SCHED_ISSUE_RATE
11452#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11453
d03f7e44
MK
11454#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11455#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
11456 aarch64_sched_first_cycle_multipass_dfa_lookahead
11457
43e9d192
IB
11458#undef TARGET_TRAMPOLINE_INIT
11459#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11460
11461#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11462#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11463
11464#undef TARGET_VECTOR_MODE_SUPPORTED_P
11465#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11466
11467#undef TARGET_ARRAY_MODE_SUPPORTED_P
11468#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11469
8990e73a
TB
11470#undef TARGET_VECTORIZE_ADD_STMT_COST
11471#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11472
11473#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11474#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11475 aarch64_builtin_vectorization_cost
11476
43e9d192
IB
11477#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11478#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11479
42fc9a7f
JG
11480#undef TARGET_VECTORIZE_BUILTINS
11481#define TARGET_VECTORIZE_BUILTINS
11482
11483#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11484#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11485 aarch64_builtin_vectorized_function
11486
3b357264
JG
11487#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11488#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11489 aarch64_autovectorize_vector_sizes
11490
aa87aced
KV
11491#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11492#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11493 aarch64_atomic_assign_expand_fenv
11494
43e9d192
IB
11495/* Section anchor support. */
11496
11497#undef TARGET_MIN_ANCHOR_OFFSET
11498#define TARGET_MIN_ANCHOR_OFFSET -256
11499
11500/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11501 byte offset; we can do much more for larger data types, but have no way
11502 to determine the size of the access. We assume accesses are aligned. */
11503#undef TARGET_MAX_ANCHOR_OFFSET
11504#define TARGET_MAX_ANCHOR_OFFSET 4095
11505
db0253a4
TB
11506#undef TARGET_VECTOR_ALIGNMENT
11507#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11508
11509#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11510#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11511 aarch64_simd_vector_alignment_reachable
11512
88b08073
JG
11513/* vec_perm support. */
11514
11515#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11516#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11517 aarch64_vectorize_vec_perm_const_ok
11518
70f09188 11519
706b2314 11520#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
11521#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11522
5cb74e90
RR
11523#undef TARGET_FLAGS_REGNUM
11524#define TARGET_FLAGS_REGNUM CC_REGNUM
11525
78607708
TV
11526#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11527#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11528
a3125fc2
CL
11529#undef TARGET_ASAN_SHADOW_OFFSET
11530#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11531
0c4ec427
RE
11532#undef TARGET_LEGITIMIZE_ADDRESS
11533#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11534
d3006da6
JG
11535#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11536#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11537 aarch64_use_by_pieces_infrastructure_p
11538
594bdd53
FY
11539#undef TARGET_CAN_USE_DOLOOP_P
11540#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11541
6a569cdd
KT
11542#undef TARGET_SCHED_MACRO_FUSION_P
11543#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11544
11545#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11546#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11547
350013bc
BC
11548#undef TARGET_SCHED_FUSION_PRIORITY
11549#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11550
43e9d192
IB
11551struct gcc_target targetm = TARGET_INITIALIZER;
11552
11553#include "gt-aarch64.h"