]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Daily bump.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
5624e564 2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
40e23961
MC
28#include "hash-set.h"
29#include "machmode.h"
30#include "vec.h"
31#include "double-int.h"
32#include "input.h"
33#include "alias.h"
34#include "symtab.h"
35#include "wide-int.h"
36#include "inchash.h"
43e9d192 37#include "tree.h"
40e23961 38#include "fold-const.h"
d8a2d370
DN
39#include "stringpool.h"
40#include "stor-layout.h"
41#include "calls.h"
42#include "varasm.h"
43e9d192 43#include "regs.h"
60393bbc
AM
44#include "dominance.h"
45#include "cfg.h"
46#include "cfgrtl.h"
47#include "cfganal.h"
48#include "lcm.h"
49#include "cfgbuild.h"
50#include "cfgcleanup.h"
51#include "predict.h"
52#include "basic-block.h"
43e9d192
IB
53#include "df.h"
54#include "hard-reg-set.h"
55#include "output.h"
36566b39
PK
56#include "hashtab.h"
57#include "function.h"
58#include "flags.h"
59#include "statistics.h"
60#include "real.h"
61#include "fixed-value.h"
62#include "insn-config.h"
63#include "expmed.h"
64#include "dojump.h"
65#include "explow.h"
66#include "emit-rtl.h"
67#include "stmt.h"
43e9d192
IB
68#include "expr.h"
69#include "reload.h"
70#include "toplev.h"
71#include "target.h"
72#include "target-def.h"
73#include "targhooks.h"
74#include "ggc.h"
43e9d192
IB
75#include "tm_p.h"
76#include "recog.h"
77#include "langhooks.h"
78#include "diagnostic-core.h"
2fb9a547 79#include "hash-table.h"
2fb9a547
AM
80#include "tree-ssa-alias.h"
81#include "internal-fn.h"
82#include "gimple-fold.h"
83#include "tree-eh.h"
84#include "gimple-expr.h"
85#include "is-a.h"
18f429e2 86#include "gimple.h"
45b0be94 87#include "gimplify.h"
43e9d192
IB
88#include "optabs.h"
89#include "dwarf2.h"
8990e73a
TB
90#include "cfgloop.h"
91#include "tree-vectorizer.h"
d1bcc29f 92#include "aarch64-cost-tables.h"
0ee859b5 93#include "dumpfile.h"
9b2b7279 94#include "builtins.h"
8baff86e 95#include "rtl-iter.h"
9bbe08fe 96#include "tm-constrs.h"
d03f7e44 97#include "sched-int.h"
43e9d192 98
28514dda
YZ
99/* Defined for convenience. */
100#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
101
43e9d192
IB
102/* Classifies an address.
103
104 ADDRESS_REG_IMM
105 A simple base register plus immediate offset.
106
107 ADDRESS_REG_WB
108 A base register indexed by immediate offset with writeback.
109
110 ADDRESS_REG_REG
111 A base register indexed by (optionally scaled) register.
112
113 ADDRESS_REG_UXTW
114 A base register indexed by (optionally scaled) zero-extended register.
115
116 ADDRESS_REG_SXTW
117 A base register indexed by (optionally scaled) sign-extended register.
118
119 ADDRESS_LO_SUM
120 A LO_SUM rtx with a base register and "LO12" symbol relocation.
121
122 ADDRESS_SYMBOLIC:
123 A constant symbolic address, in pc-relative literal pool. */
124
125enum aarch64_address_type {
126 ADDRESS_REG_IMM,
127 ADDRESS_REG_WB,
128 ADDRESS_REG_REG,
129 ADDRESS_REG_UXTW,
130 ADDRESS_REG_SXTW,
131 ADDRESS_LO_SUM,
132 ADDRESS_SYMBOLIC
133};
134
135struct aarch64_address_info {
136 enum aarch64_address_type type;
137 rtx base;
138 rtx offset;
139 int shift;
140 enum aarch64_symbol_type symbol_type;
141};
142
48063b9d
IB
143struct simd_immediate_info
144{
145 rtx value;
146 int shift;
147 int element_width;
48063b9d 148 bool mvn;
e4f0f84d 149 bool msl;
48063b9d
IB
150};
151
43e9d192
IB
152/* The current code model. */
153enum aarch64_code_model aarch64_cmodel;
154
155#ifdef HAVE_AS_TLS
156#undef TARGET_HAVE_TLS
157#define TARGET_HAVE_TLS 1
158#endif
159
ef4bddc2
RS
160static bool aarch64_composite_type_p (const_tree, machine_mode);
161static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 162 const_tree,
ef4bddc2 163 machine_mode *, int *,
43e9d192
IB
164 bool *);
165static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
166static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 167static void aarch64_override_options_after_change (void);
ef4bddc2 168static bool aarch64_vector_mode_supported_p (machine_mode);
43e9d192 169static unsigned bit_count (unsigned HOST_WIDE_INT);
ef4bddc2 170static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 171 const unsigned char *sel);
ef4bddc2 172static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 173
0c6caaf8
RL
174/* Major revision number of the ARM Architecture implemented by the target. */
175unsigned aarch64_architecture_version;
176
43e9d192 177/* The processor for which instructions should be scheduled. */
02fdbd5b 178enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
179
180/* The current tuning set. */
181const struct tune_params *aarch64_tune_params;
182
183/* Mask to specify which instructions we are allowed to generate. */
184unsigned long aarch64_isa_flags = 0;
185
186/* Mask to specify which instruction scheduling options should be used. */
187unsigned long aarch64_tune_flags = 0;
188
189/* Tuning parameters. */
190
43e9d192
IB
191static const struct cpu_addrcost_table generic_addrcost_table =
192{
67747367 193 {
bd95e655
JG
194 0, /* hi */
195 0, /* si */
196 0, /* di */
197 0, /* ti */
67747367 198 },
bd95e655
JG
199 0, /* pre_modify */
200 0, /* post_modify */
201 0, /* register_offset */
202 0, /* register_extend */
203 0 /* imm_offset */
43e9d192
IB
204};
205
60bff090
JG
206static const struct cpu_addrcost_table cortexa57_addrcost_table =
207{
60bff090 208 {
bd95e655
JG
209 1, /* hi */
210 0, /* si */
211 0, /* di */
212 1, /* ti */
60bff090 213 },
bd95e655
JG
214 0, /* pre_modify */
215 0, /* post_modify */
216 0, /* register_offset */
217 0, /* register_extend */
218 0, /* imm_offset */
60bff090
JG
219};
220
381e27aa
PT
221static const struct cpu_addrcost_table xgene1_addrcost_table =
222{
381e27aa 223 {
bd95e655
JG
224 1, /* hi */
225 0, /* si */
226 0, /* di */
227 1, /* ti */
381e27aa 228 },
bd95e655
JG
229 1, /* pre_modify */
230 0, /* post_modify */
231 0, /* register_offset */
232 1, /* register_extend */
233 0, /* imm_offset */
381e27aa
PT
234};
235
43e9d192
IB
236static const struct cpu_regmove_cost generic_regmove_cost =
237{
bd95e655 238 1, /* GP2GP */
3969c510
WD
239 /* Avoid the use of slow int<->fp moves for spilling by setting
240 their cost higher than memmov_cost. */
bd95e655
JG
241 5, /* GP2FP */
242 5, /* FP2GP */
243 2 /* FP2FP */
43e9d192
IB
244};
245
e4a9c55a
WD
246static const struct cpu_regmove_cost cortexa57_regmove_cost =
247{
bd95e655 248 1, /* GP2GP */
e4a9c55a
WD
249 /* Avoid the use of slow int<->fp moves for spilling by setting
250 their cost higher than memmov_cost. */
bd95e655
JG
251 5, /* GP2FP */
252 5, /* FP2GP */
253 2 /* FP2FP */
e4a9c55a
WD
254};
255
256static const struct cpu_regmove_cost cortexa53_regmove_cost =
257{
bd95e655 258 1, /* GP2GP */
e4a9c55a
WD
259 /* Avoid the use of slow int<->fp moves for spilling by setting
260 their cost higher than memmov_cost. */
bd95e655
JG
261 5, /* GP2FP */
262 5, /* FP2GP */
263 2 /* FP2FP */
e4a9c55a
WD
264};
265
d1bcc29f
AP
266static const struct cpu_regmove_cost thunderx_regmove_cost =
267{
bd95e655
JG
268 2, /* GP2GP */
269 2, /* GP2FP */
270 6, /* FP2GP */
271 4 /* FP2FP */
d1bcc29f
AP
272};
273
381e27aa
PT
274static const struct cpu_regmove_cost xgene1_regmove_cost =
275{
bd95e655 276 1, /* GP2GP */
381e27aa
PT
277 /* Avoid the use of slow int<->fp moves for spilling by setting
278 their cost higher than memmov_cost. */
bd95e655
JG
279 8, /* GP2FP */
280 8, /* FP2GP */
281 2 /* FP2FP */
381e27aa
PT
282};
283
8990e73a 284/* Generic costs for vector insn classes. */
8990e73a
TB
285static const struct cpu_vector_cost generic_vector_cost =
286{
bd95e655
JG
287 1, /* scalar_stmt_cost */
288 1, /* scalar_load_cost */
289 1, /* scalar_store_cost */
290 1, /* vec_stmt_cost */
291 1, /* vec_to_scalar_cost */
292 1, /* scalar_to_vec_cost */
293 1, /* vec_align_load_cost */
294 1, /* vec_unalign_load_cost */
295 1, /* vec_unalign_store_cost */
296 1, /* vec_store_cost */
297 3, /* cond_taken_branch_cost */
298 1 /* cond_not_taken_branch_cost */
8990e73a
TB
299};
300
60bff090 301/* Generic costs for vector insn classes. */
60bff090
JG
302static const struct cpu_vector_cost cortexa57_vector_cost =
303{
bd95e655
JG
304 1, /* scalar_stmt_cost */
305 4, /* scalar_load_cost */
306 1, /* scalar_store_cost */
307 3, /* vec_stmt_cost */
308 8, /* vec_to_scalar_cost */
309 8, /* scalar_to_vec_cost */
310 5, /* vec_align_load_cost */
311 5, /* vec_unalign_load_cost */
312 1, /* vec_unalign_store_cost */
313 1, /* vec_store_cost */
314 1, /* cond_taken_branch_cost */
315 1 /* cond_not_taken_branch_cost */
60bff090
JG
316};
317
381e27aa 318/* Generic costs for vector insn classes. */
381e27aa
PT
319static const struct cpu_vector_cost xgene1_vector_cost =
320{
bd95e655
JG
321 1, /* scalar_stmt_cost */
322 5, /* scalar_load_cost */
323 1, /* scalar_store_cost */
324 2, /* vec_stmt_cost */
325 4, /* vec_to_scalar_cost */
326 4, /* scalar_to_vec_cost */
327 10, /* vec_align_load_cost */
328 10, /* vec_unalign_load_cost */
329 2, /* vec_unalign_store_cost */
330 2, /* vec_store_cost */
331 2, /* cond_taken_branch_cost */
332 1 /* cond_not_taken_branch_cost */
381e27aa
PT
333};
334
6a569cdd
KT
335#define AARCH64_FUSE_NOTHING (0)
336#define AARCH64_FUSE_MOV_MOVK (1 << 0)
9bbe08fe 337#define AARCH64_FUSE_ADRP_ADD (1 << 1)
cd0cb232 338#define AARCH64_FUSE_MOVK_MOVK (1 << 2)
d8354ad7 339#define AARCH64_FUSE_ADRP_LDR (1 << 3)
3759108f 340#define AARCH64_FUSE_CMP_BRANCH (1 << 4)
6a569cdd 341
43e9d192
IB
342static const struct tune_params generic_tunings =
343{
4e2cd668 344 &cortexa57_extra_costs,
43e9d192
IB
345 &generic_addrcost_table,
346 &generic_regmove_cost,
8990e73a 347 &generic_vector_cost,
bd95e655
JG
348 4, /* memmov_cost */
349 2, /* issue_rate */
350 AARCH64_FUSE_NOTHING, /* fuseable_ops */
0b82a5a2
WD
351 8, /* function_align. */
352 8, /* jump_align. */
353 4, /* loop_align. */
cee66c68
WD
354 2, /* int_reassoc_width. */
355 4, /* fp_reassoc_width. */
356 1 /* vec_reassoc_width. */
43e9d192
IB
357};
358
984239ad
KT
359static const struct tune_params cortexa53_tunings =
360{
361 &cortexa53_extra_costs,
362 &generic_addrcost_table,
e4a9c55a 363 &cortexa53_regmove_cost,
984239ad 364 &generic_vector_cost,
bd95e655
JG
365 4, /* memmov_cost */
366 2, /* issue_rate */
367 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
368 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops */
0b82a5a2
WD
369 8, /* function_align. */
370 8, /* jump_align. */
371 4, /* loop_align. */
cee66c68
WD
372 2, /* int_reassoc_width. */
373 4, /* fp_reassoc_width. */
374 1 /* vec_reassoc_width. */
984239ad
KT
375};
376
4fd92af6
KT
377static const struct tune_params cortexa57_tunings =
378{
379 &cortexa57_extra_costs,
60bff090 380 &cortexa57_addrcost_table,
e4a9c55a 381 &cortexa57_regmove_cost,
60bff090 382 &cortexa57_vector_cost,
bd95e655
JG
383 4, /* memmov_cost */
384 3, /* issue_rate */
385 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
386 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
0b82a5a2
WD
387 16, /* function_align. */
388 8, /* jump_align. */
389 4, /* loop_align. */
cee66c68
WD
390 2, /* int_reassoc_width. */
391 4, /* fp_reassoc_width. */
392 1 /* vec_reassoc_width. */
4fd92af6
KT
393};
394
d1bcc29f
AP
395static const struct tune_params thunderx_tunings =
396{
397 &thunderx_extra_costs,
398 &generic_addrcost_table,
399 &thunderx_regmove_cost,
400 &generic_vector_cost,
bd95e655
JG
401 6, /* memmov_cost */
402 2, /* issue_rate */
403 AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */
0b82a5a2
WD
404 8, /* function_align. */
405 8, /* jump_align. */
406 8, /* loop_align. */
cee66c68
WD
407 2, /* int_reassoc_width. */
408 4, /* fp_reassoc_width. */
409 1 /* vec_reassoc_width. */
d1bcc29f
AP
410};
411
381e27aa
PT
412static const struct tune_params xgene1_tunings =
413{
414 &xgene1_extra_costs,
415 &xgene1_addrcost_table,
416 &xgene1_regmove_cost,
417 &xgene1_vector_cost,
bd95e655
JG
418 6, /* memmov_cost */
419 4, /* issue_rate */
420 AARCH64_FUSE_NOTHING, /* fuseable_ops */
381e27aa
PT
421 16, /* function_align. */
422 8, /* jump_align. */
423 16, /* loop_align. */
424 2, /* int_reassoc_width. */
425 4, /* fp_reassoc_width. */
426 1 /* vec_reassoc_width. */
427};
428
43e9d192
IB
429/* A processor implementing AArch64. */
430struct processor
431{
432 const char *const name;
433 enum aarch64_processor core;
434 const char *arch;
0c6caaf8 435 unsigned architecture_version;
43e9d192
IB
436 const unsigned long flags;
437 const struct tune_params *const tune;
438};
439
440/* Processor cores implementing AArch64. */
441static const struct processor all_cores[] =
442{
d86cb6d5 443#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
faa54226 444 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
43e9d192
IB
445#include "aarch64-cores.def"
446#undef AARCH64_CORE
faa54226 447 {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
0c6caaf8 448 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
449};
450
451/* Architectures implementing AArch64. */
452static const struct processor all_architectures[] =
453{
454#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
0c6caaf8 455 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
43e9d192
IB
456#include "aarch64-arches.def"
457#undef AARCH64_ARCH
0c6caaf8 458 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
459};
460
461/* Target specification. These are populated as commandline arguments
462 are processed, or NULL if not specified. */
463static const struct processor *selected_arch;
464static const struct processor *selected_cpu;
465static const struct processor *selected_tune;
466
467#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
468
469/* An ISA extension in the co-processor and main instruction set space. */
470struct aarch64_option_extension
471{
472 const char *const name;
473 const unsigned long flags_on;
474 const unsigned long flags_off;
475};
476
477/* ISA extensions in AArch64. */
478static const struct aarch64_option_extension all_extensions[] =
479{
480#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
481 {NAME, FLAGS_ON, FLAGS_OFF},
482#include "aarch64-option-extensions.def"
483#undef AARCH64_OPT_EXTENSION
484 {NULL, 0, 0}
485};
486
487/* Used to track the size of an address when generating a pre/post
488 increment address. */
ef4bddc2 489static machine_mode aarch64_memory_reference_mode;
43e9d192 490
43e9d192
IB
491/* A table of valid AArch64 "bitmask immediate" values for
492 logical instructions. */
493
494#define AARCH64_NUM_BITMASKS 5334
495static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
496
43e9d192
IB
497typedef enum aarch64_cond_code
498{
499 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
500 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
501 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
502}
503aarch64_cc;
504
505#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
506
507/* The condition codes of the processor, and the inverse function. */
508static const char * const aarch64_condition_codes[] =
509{
510 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
511 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
512};
513
26e0ff94
WD
514static unsigned int
515aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
516{
517 return 2;
518}
519
cee66c68
WD
520static int
521aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
522 enum machine_mode mode)
523{
524 if (VECTOR_MODE_P (mode))
525 return aarch64_tune_params->vec_reassoc_width;
526 if (INTEGRAL_MODE_P (mode))
527 return aarch64_tune_params->int_reassoc_width;
528 if (FLOAT_MODE_P (mode))
529 return aarch64_tune_params->fp_reassoc_width;
530 return 1;
531}
532
43e9d192
IB
533/* Provide a mapping from gcc register numbers to dwarf register numbers. */
534unsigned
535aarch64_dbx_register_number (unsigned regno)
536{
537 if (GP_REGNUM_P (regno))
538 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
539 else if (regno == SP_REGNUM)
540 return AARCH64_DWARF_SP;
541 else if (FP_REGNUM_P (regno))
542 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
543
544 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
545 equivalent DWARF register. */
546 return DWARF_FRAME_REGISTERS;
547}
548
549/* Return TRUE if MODE is any of the large INT modes. */
550static bool
ef4bddc2 551aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
552{
553 return mode == OImode || mode == CImode || mode == XImode;
554}
555
556/* Return TRUE if MODE is any of the vector modes. */
557static bool
ef4bddc2 558aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
559{
560 return aarch64_vector_mode_supported_p (mode)
561 || aarch64_vect_struct_mode_p (mode);
562}
563
564/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
565static bool
ef4bddc2 566aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
567 unsigned HOST_WIDE_INT nelems)
568{
569 if (TARGET_SIMD
570 && AARCH64_VALID_SIMD_QREG_MODE (mode)
571 && (nelems >= 2 && nelems <= 4))
572 return true;
573
574 return false;
575}
576
577/* Implement HARD_REGNO_NREGS. */
578
579int
ef4bddc2 580aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
581{
582 switch (aarch64_regno_regclass (regno))
583 {
584 case FP_REGS:
585 case FP_LO_REGS:
586 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
587 default:
588 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
589 }
590 gcc_unreachable ();
591}
592
593/* Implement HARD_REGNO_MODE_OK. */
594
595int
ef4bddc2 596aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
597{
598 if (GET_MODE_CLASS (mode) == MODE_CC)
599 return regno == CC_REGNUM;
600
9259db42
YZ
601 if (regno == SP_REGNUM)
602 /* The purpose of comparing with ptr_mode is to support the
603 global register variable associated with the stack pointer
604 register via the syntax of asm ("wsp") in ILP32. */
605 return mode == Pmode || mode == ptr_mode;
606
607 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
608 return mode == Pmode;
609
610 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
611 return 1;
612
613 if (FP_REGNUM_P (regno))
614 {
615 if (aarch64_vect_struct_mode_p (mode))
616 return
617 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
618 else
619 return 1;
620 }
621
622 return 0;
623}
624
73d9ac6a 625/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 626machine_mode
73d9ac6a 627aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 628 machine_mode mode)
73d9ac6a
IB
629{
630 /* Handle modes that fit within single registers. */
631 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
632 {
633 if (GET_MODE_SIZE (mode) >= 4)
634 return mode;
635 else
636 return SImode;
637 }
638 /* Fall back to generic for multi-reg and very large modes. */
639 else
640 return choose_hard_reg_mode (regno, nregs, false);
641}
642
43e9d192
IB
643/* Return true if calls to DECL should be treated as
644 long-calls (ie called via a register). */
645static bool
646aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
647{
648 return false;
649}
650
651/* Return true if calls to symbol-ref SYM should be treated as
652 long-calls (ie called via a register). */
653bool
654aarch64_is_long_call_p (rtx sym)
655{
656 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
657}
658
659/* Return true if the offsets to a zero/sign-extract operation
660 represent an expression that matches an extend operation. The
661 operands represent the paramters from
662
4745e701 663 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 664bool
ef4bddc2 665aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
666 rtx extract_imm)
667{
668 HOST_WIDE_INT mult_val, extract_val;
669
670 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
671 return false;
672
673 mult_val = INTVAL (mult_imm);
674 extract_val = INTVAL (extract_imm);
675
676 if (extract_val > 8
677 && extract_val < GET_MODE_BITSIZE (mode)
678 && exact_log2 (extract_val & ~7) > 0
679 && (extract_val & 7) <= 4
680 && mult_val == (1 << (extract_val & 7)))
681 return true;
682
683 return false;
684}
685
686/* Emit an insn that's a simple single-set. Both the operands must be
687 known to be valid. */
688inline static rtx
689emit_set_insn (rtx x, rtx y)
690{
691 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
692}
693
694/* X and Y are two things to compare using CODE. Emit the compare insn and
695 return the rtx for register 0 in the proper mode. */
696rtx
697aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
698{
ef4bddc2 699 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
700 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
701
702 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
703 return cc_reg;
704}
705
706/* Build the SYMBOL_REF for __tls_get_addr. */
707
708static GTY(()) rtx tls_get_addr_libfunc;
709
710rtx
711aarch64_tls_get_addr (void)
712{
713 if (!tls_get_addr_libfunc)
714 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
715 return tls_get_addr_libfunc;
716}
717
718/* Return the TLS model to use for ADDR. */
719
720static enum tls_model
721tls_symbolic_operand_type (rtx addr)
722{
723 enum tls_model tls_kind = TLS_MODEL_NONE;
724 rtx sym, addend;
725
726 if (GET_CODE (addr) == CONST)
727 {
728 split_const (addr, &sym, &addend);
729 if (GET_CODE (sym) == SYMBOL_REF)
730 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
731 }
732 else if (GET_CODE (addr) == SYMBOL_REF)
733 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
734
735 return tls_kind;
736}
737
738/* We'll allow lo_sum's in addresses in our legitimate addresses
739 so that combine would take care of combining addresses where
740 necessary, but for generation purposes, we'll generate the address
741 as :
742 RTL Absolute
743 tmp = hi (symbol_ref); adrp x1, foo
744 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
745 nop
746
747 PIC TLS
748 adrp x1, :got:foo adrp tmp, :tlsgd:foo
749 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
750 bl __tls_get_addr
751 nop
752
753 Load TLS symbol, depending on TLS mechanism and TLS access model.
754
755 Global Dynamic - Traditional TLS:
756 adrp tmp, :tlsgd:imm
757 add dest, tmp, #:tlsgd_lo12:imm
758 bl __tls_get_addr
759
760 Global Dynamic - TLS Descriptors:
761 adrp dest, :tlsdesc:imm
762 ldr tmp, [dest, #:tlsdesc_lo12:imm]
763 add dest, dest, #:tlsdesc_lo12:imm
764 blr tmp
765 mrs tp, tpidr_el0
766 add dest, dest, tp
767
768 Initial Exec:
769 mrs tp, tpidr_el0
770 adrp tmp, :gottprel:imm
771 ldr dest, [tmp, #:gottprel_lo12:imm]
772 add dest, dest, tp
773
774 Local Exec:
775 mrs tp, tpidr_el0
0699caae
RL
776 add t0, tp, #:tprel_hi12:imm, lsl #12
777 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
778*/
779
780static void
781aarch64_load_symref_appropriately (rtx dest, rtx imm,
782 enum aarch64_symbol_type type)
783{
784 switch (type)
785 {
786 case SYMBOL_SMALL_ABSOLUTE:
787 {
28514dda 788 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 789 rtx tmp_reg = dest;
ef4bddc2 790 machine_mode mode = GET_MODE (dest);
28514dda
YZ
791
792 gcc_assert (mode == Pmode || mode == ptr_mode);
793
43e9d192 794 if (can_create_pseudo_p ())
28514dda 795 tmp_reg = gen_reg_rtx (mode);
43e9d192 796
28514dda 797 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
798 emit_insn (gen_add_losym (dest, tmp_reg, imm));
799 return;
800 }
801
a5350ddc
CSS
802 case SYMBOL_TINY_ABSOLUTE:
803 emit_insn (gen_rtx_SET (Pmode, dest, imm));
804 return;
805
43e9d192
IB
806 case SYMBOL_SMALL_GOT:
807 {
28514dda
YZ
808 /* In ILP32, the mode of dest can be either SImode or DImode,
809 while the got entry is always of SImode size. The mode of
810 dest depends on how dest is used: if dest is assigned to a
811 pointer (e.g. in the memory), it has SImode; it may have
812 DImode if dest is dereferenced to access the memeory.
813 This is why we have to handle three different ldr_got_small
814 patterns here (two patterns for ILP32). */
43e9d192 815 rtx tmp_reg = dest;
ef4bddc2 816 machine_mode mode = GET_MODE (dest);
28514dda 817
43e9d192 818 if (can_create_pseudo_p ())
28514dda
YZ
819 tmp_reg = gen_reg_rtx (mode);
820
821 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
822 if (mode == ptr_mode)
823 {
824 if (mode == DImode)
825 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
826 else
827 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
828 }
829 else
830 {
831 gcc_assert (mode == Pmode);
832 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
833 }
834
43e9d192
IB
835 return;
836 }
837
838 case SYMBOL_SMALL_TLSGD:
839 {
5d8a22a5 840 rtx_insn *insns;
43e9d192
IB
841 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
842
843 start_sequence ();
78607708 844 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
845 insns = get_insns ();
846 end_sequence ();
847
848 RTL_CONST_CALL_P (insns) = 1;
849 emit_libcall_block (insns, dest, result, imm);
850 return;
851 }
852
853 case SYMBOL_SMALL_TLSDESC:
854 {
ef4bddc2 855 machine_mode mode = GET_MODE (dest);
621ad2de 856 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
857 rtx tp;
858
621ad2de
AP
859 gcc_assert (mode == Pmode || mode == ptr_mode);
860
861 /* In ILP32, the got entry is always of SImode size. Unlike
862 small GOT, the dest is fixed at reg 0. */
863 if (TARGET_ILP32)
864 emit_insn (gen_tlsdesc_small_si (imm));
865 else
866 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 867 tp = aarch64_load_tp (NULL);
621ad2de
AP
868
869 if (mode != Pmode)
870 tp = gen_lowpart (mode, tp);
871
872 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
873 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
874 return;
875 }
876
877 case SYMBOL_SMALL_GOTTPREL:
878 {
621ad2de
AP
879 /* In ILP32, the mode of dest can be either SImode or DImode,
880 while the got entry is always of SImode size. The mode of
881 dest depends on how dest is used: if dest is assigned to a
882 pointer (e.g. in the memory), it has SImode; it may have
883 DImode if dest is dereferenced to access the memeory.
884 This is why we have to handle three different tlsie_small
885 patterns here (two patterns for ILP32). */
ef4bddc2 886 machine_mode mode = GET_MODE (dest);
621ad2de 887 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 888 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
889
890 if (mode == ptr_mode)
891 {
892 if (mode == DImode)
893 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
894 else
895 {
896 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
897 tp = gen_lowpart (mode, tp);
898 }
899 }
900 else
901 {
902 gcc_assert (mode == Pmode);
903 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
904 }
905
906 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
907 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
908 return;
909 }
910
911 case SYMBOL_SMALL_TPREL:
912 {
913 rtx tp = aarch64_load_tp (NULL);
914 emit_insn (gen_tlsle_small (dest, tp, imm));
915 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
916 return;
917 }
918
87dd8ab0
MS
919 case SYMBOL_TINY_GOT:
920 emit_insn (gen_ldr_got_tiny (dest, imm));
921 return;
922
43e9d192
IB
923 default:
924 gcc_unreachable ();
925 }
926}
927
928/* Emit a move from SRC to DEST. Assume that the move expanders can
929 handle all moves if !can_create_pseudo_p (). The distinction is
930 important because, unlike emit_move_insn, the move expanders know
931 how to force Pmode objects into the constant pool even when the
932 constant pool address is not itself legitimate. */
933static rtx
934aarch64_emit_move (rtx dest, rtx src)
935{
936 return (can_create_pseudo_p ()
937 ? emit_move_insn (dest, src)
938 : emit_move_insn_1 (dest, src));
939}
940
030d03b8
RE
941/* Split a 128-bit move operation into two 64-bit move operations,
942 taking care to handle partial overlap of register to register
943 copies. Special cases are needed when moving between GP regs and
944 FP regs. SRC can be a register, constant or memory; DST a register
945 or memory. If either operand is memory it must not have any side
946 effects. */
43e9d192
IB
947void
948aarch64_split_128bit_move (rtx dst, rtx src)
949{
030d03b8
RE
950 rtx dst_lo, dst_hi;
951 rtx src_lo, src_hi;
43e9d192 952
ef4bddc2 953 machine_mode mode = GET_MODE (dst);
12dc6974 954
030d03b8
RE
955 gcc_assert (mode == TImode || mode == TFmode);
956 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
957 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
958
959 if (REG_P (dst) && REG_P (src))
960 {
030d03b8
RE
961 int src_regno = REGNO (src);
962 int dst_regno = REGNO (dst);
43e9d192 963
030d03b8 964 /* Handle FP <-> GP regs. */
43e9d192
IB
965 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
966 {
030d03b8
RE
967 src_lo = gen_lowpart (word_mode, src);
968 src_hi = gen_highpart (word_mode, src);
969
970 if (mode == TImode)
971 {
972 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
973 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
974 }
975 else
976 {
977 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
978 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
979 }
980 return;
43e9d192
IB
981 }
982 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
983 {
030d03b8
RE
984 dst_lo = gen_lowpart (word_mode, dst);
985 dst_hi = gen_highpart (word_mode, dst);
986
987 if (mode == TImode)
988 {
989 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
990 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
991 }
992 else
993 {
994 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
995 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
996 }
997 return;
43e9d192 998 }
43e9d192
IB
999 }
1000
030d03b8
RE
1001 dst_lo = gen_lowpart (word_mode, dst);
1002 dst_hi = gen_highpart (word_mode, dst);
1003 src_lo = gen_lowpart (word_mode, src);
1004 src_hi = gen_highpart_mode (word_mode, mode, src);
1005
1006 /* At most one pairing may overlap. */
1007 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1008 {
1009 aarch64_emit_move (dst_hi, src_hi);
1010 aarch64_emit_move (dst_lo, src_lo);
1011 }
1012 else
1013 {
1014 aarch64_emit_move (dst_lo, src_lo);
1015 aarch64_emit_move (dst_hi, src_hi);
1016 }
43e9d192
IB
1017}
1018
1019bool
1020aarch64_split_128bit_move_p (rtx dst, rtx src)
1021{
1022 return (! REG_P (src)
1023 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1024}
1025
8b033a8a
SN
1026/* Split a complex SIMD combine. */
1027
1028void
1029aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1030{
ef4bddc2
RS
1031 machine_mode src_mode = GET_MODE (src1);
1032 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1033
1034 gcc_assert (VECTOR_MODE_P (dst_mode));
1035
1036 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1037 {
1038 rtx (*gen) (rtx, rtx, rtx);
1039
1040 switch (src_mode)
1041 {
1042 case V8QImode:
1043 gen = gen_aarch64_simd_combinev8qi;
1044 break;
1045 case V4HImode:
1046 gen = gen_aarch64_simd_combinev4hi;
1047 break;
1048 case V2SImode:
1049 gen = gen_aarch64_simd_combinev2si;
1050 break;
1051 case V2SFmode:
1052 gen = gen_aarch64_simd_combinev2sf;
1053 break;
1054 case DImode:
1055 gen = gen_aarch64_simd_combinedi;
1056 break;
1057 case DFmode:
1058 gen = gen_aarch64_simd_combinedf;
1059 break;
1060 default:
1061 gcc_unreachable ();
1062 }
1063
1064 emit_insn (gen (dst, src1, src2));
1065 return;
1066 }
1067}
1068
fd4842cd
SN
1069/* Split a complex SIMD move. */
1070
1071void
1072aarch64_split_simd_move (rtx dst, rtx src)
1073{
ef4bddc2
RS
1074 machine_mode src_mode = GET_MODE (src);
1075 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1076
1077 gcc_assert (VECTOR_MODE_P (dst_mode));
1078
1079 if (REG_P (dst) && REG_P (src))
1080 {
c59b7e28
SN
1081 rtx (*gen) (rtx, rtx);
1082
fd4842cd
SN
1083 gcc_assert (VECTOR_MODE_P (src_mode));
1084
1085 switch (src_mode)
1086 {
1087 case V16QImode:
c59b7e28 1088 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1089 break;
1090 case V8HImode:
c59b7e28 1091 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1092 break;
1093 case V4SImode:
c59b7e28 1094 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1095 break;
1096 case V2DImode:
c59b7e28 1097 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
1098 break;
1099 case V4SFmode:
c59b7e28 1100 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1101 break;
1102 case V2DFmode:
c59b7e28 1103 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1104 break;
1105 default:
1106 gcc_unreachable ();
1107 }
c59b7e28
SN
1108
1109 emit_insn (gen (dst, src));
fd4842cd
SN
1110 return;
1111 }
1112}
1113
43e9d192 1114static rtx
ef4bddc2 1115aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1116{
1117 if (can_create_pseudo_p ())
e18b4a81 1118 return force_reg (mode, value);
43e9d192
IB
1119 else
1120 {
1121 x = aarch64_emit_move (x, value);
1122 return x;
1123 }
1124}
1125
1126
1127static rtx
ef4bddc2 1128aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1129{
9c023bf0 1130 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1131 {
1132 rtx high;
1133 /* Load the full offset into a register. This
1134 might be improvable in the future. */
1135 high = GEN_INT (offset);
1136 offset = 0;
e18b4a81
YZ
1137 high = aarch64_force_temporary (mode, temp, high);
1138 reg = aarch64_force_temporary (mode, temp,
1139 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1140 }
1141 return plus_constant (mode, reg, offset);
1142}
1143
82614948
RR
1144static int
1145aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1146 machine_mode mode)
43e9d192 1147{
43e9d192
IB
1148 unsigned HOST_WIDE_INT mask;
1149 int i;
1150 bool first;
1151 unsigned HOST_WIDE_INT val;
1152 bool subtargets;
1153 rtx subtarget;
c747993a 1154 int one_match, zero_match, first_not_ffff_match;
82614948 1155 int num_insns = 0;
43e9d192
IB
1156
1157 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1158 {
82614948 1159 if (generate)
43e9d192 1160 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
82614948
RR
1161 num_insns++;
1162 return num_insns;
43e9d192
IB
1163 }
1164
1165 if (mode == SImode)
1166 {
1167 /* We know we can't do this in 1 insn, and we must be able to do it
1168 in two; so don't mess around looking for sequences that don't buy
1169 us anything. */
82614948
RR
1170 if (generate)
1171 {
1172 emit_insn (gen_rtx_SET (VOIDmode, dest,
1173 GEN_INT (INTVAL (imm) & 0xffff)));
1174 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1175 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1176 }
1177 num_insns += 2;
1178 return num_insns;
43e9d192
IB
1179 }
1180
1181 /* Remaining cases are all for DImode. */
1182
1183 val = INTVAL (imm);
1184 subtargets = optimize && can_create_pseudo_p ();
1185
1186 one_match = 0;
1187 zero_match = 0;
1188 mask = 0xffff;
c747993a 1189 first_not_ffff_match = -1;
43e9d192
IB
1190
1191 for (i = 0; i < 64; i += 16, mask <<= 16)
1192 {
c747993a 1193 if ((val & mask) == mask)
43e9d192 1194 one_match++;
c747993a
IB
1195 else
1196 {
1197 if (first_not_ffff_match < 0)
1198 first_not_ffff_match = i;
1199 if ((val & mask) == 0)
1200 zero_match++;
1201 }
43e9d192
IB
1202 }
1203
1204 if (one_match == 2)
1205 {
c747993a
IB
1206 /* Set one of the quarters and then insert back into result. */
1207 mask = 0xffffll << first_not_ffff_match;
82614948
RR
1208 if (generate)
1209 {
1210 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1211 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1212 GEN_INT ((val >> first_not_ffff_match)
1213 & 0xffff)));
1214 }
1215 num_insns += 2;
1216 return num_insns;
c747993a
IB
1217 }
1218
43e9d192
IB
1219 if (zero_match == 2)
1220 goto simple_sequence;
1221
1222 mask = 0x0ffff0000UL;
1223 for (i = 16; i < 64; i += 16, mask <<= 16)
1224 {
1225 HOST_WIDE_INT comp = mask & ~(mask - 1);
1226
1227 if (aarch64_uimm12_shift (val - (val & mask)))
1228 {
82614948
RR
1229 if (generate)
1230 {
1231 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1232 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1233 GEN_INT (val & mask)));
1234 emit_insn (gen_adddi3 (dest, subtarget,
1235 GEN_INT (val - (val & mask))));
1236 }
1237 num_insns += 2;
1238 return num_insns;
43e9d192
IB
1239 }
1240 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1241 {
82614948
RR
1242 if (generate)
1243 {
1244 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1245 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1246 GEN_INT ((val + comp) & mask)));
1247 emit_insn (gen_adddi3 (dest, subtarget,
1248 GEN_INT (val - ((val + comp) & mask))));
1249 }
1250 num_insns += 2;
1251 return num_insns;
43e9d192
IB
1252 }
1253 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1254 {
82614948
RR
1255 if (generate)
1256 {
1257 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1258 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1259 GEN_INT ((val - comp) | ~mask)));
1260 emit_insn (gen_adddi3 (dest, subtarget,
1261 GEN_INT (val - ((val - comp) | ~mask))));
1262 }
1263 num_insns += 2;
1264 return num_insns;
43e9d192
IB
1265 }
1266 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1267 {
82614948
RR
1268 if (generate)
1269 {
1270 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1271 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1272 GEN_INT (val | ~mask)));
1273 emit_insn (gen_adddi3 (dest, subtarget,
1274 GEN_INT (val - (val | ~mask))));
1275 }
1276 num_insns += 2;
1277 return num_insns;
43e9d192
IB
1278 }
1279 }
1280
1281 /* See if we can do it by arithmetically combining two
1282 immediates. */
1283 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1284 {
1285 int j;
1286 mask = 0xffff;
1287
1288 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1289 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1290 {
82614948
RR
1291 if (generate)
1292 {
1293 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1294 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1295 GEN_INT (aarch64_bitmasks[i])));
1296 emit_insn (gen_adddi3 (dest, subtarget,
1297 GEN_INT (val - aarch64_bitmasks[i])));
1298 }
1299 num_insns += 2;
1300 return num_insns;
43e9d192
IB
1301 }
1302
1303 for (j = 0; j < 64; j += 16, mask <<= 16)
1304 {
1305 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1306 {
82614948
RR
1307 if (generate)
1308 {
1309 emit_insn (gen_rtx_SET (VOIDmode, dest,
1310 GEN_INT (aarch64_bitmasks[i])));
1311 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1312 GEN_INT ((val >> j) & 0xffff)));
1313 }
1314 num_insns += 2;
1315 return num_insns;
43e9d192
IB
1316 }
1317 }
1318 }
1319
1320 /* See if we can do it by logically combining two immediates. */
1321 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1322 {
1323 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1324 {
1325 int j;
1326
1327 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1328 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1329 {
82614948
RR
1330 if (generate)
1331 {
1332 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1333 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1334 GEN_INT (aarch64_bitmasks[i])));
1335 emit_insn (gen_iordi3 (dest, subtarget,
1336 GEN_INT (aarch64_bitmasks[j])));
1337 }
1338 num_insns += 2;
1339 return num_insns;
43e9d192
IB
1340 }
1341 }
1342 else if ((val & aarch64_bitmasks[i]) == val)
1343 {
1344 int j;
1345
1346 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1347 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1348 {
82614948
RR
1349 if (generate)
1350 {
1351 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1352 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1353 GEN_INT (aarch64_bitmasks[j])));
1354 emit_insn (gen_anddi3 (dest, subtarget,
1355 GEN_INT (aarch64_bitmasks[i])));
1356 }
1357 num_insns += 2;
1358 return num_insns;
43e9d192
IB
1359 }
1360 }
1361 }
1362
2c274197
KT
1363 if (one_match > zero_match)
1364 {
1365 /* Set either first three quarters or all but the third. */
1366 mask = 0xffffll << (16 - first_not_ffff_match);
82614948
RR
1367 if (generate)
1368 emit_insn (gen_rtx_SET (VOIDmode, dest,
1369 GEN_INT (val | mask | 0xffffffff00000000ull)));
1370 num_insns ++;
2c274197
KT
1371
1372 /* Now insert other two quarters. */
1373 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1374 i < 64; i += 16, mask <<= 16)
1375 {
1376 if ((val & mask) != mask)
82614948
RR
1377 {
1378 if (generate)
1379 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1380 GEN_INT ((val >> i) & 0xffff)));
1381 num_insns ++;
1382 }
2c274197 1383 }
82614948 1384 return num_insns;
2c274197
KT
1385 }
1386
43e9d192
IB
1387 simple_sequence:
1388 first = true;
1389 mask = 0xffff;
1390 for (i = 0; i < 64; i += 16, mask <<= 16)
1391 {
1392 if ((val & mask) != 0)
1393 {
1394 if (first)
1395 {
82614948
RR
1396 if (generate)
1397 emit_insn (gen_rtx_SET (VOIDmode, dest,
1398 GEN_INT (val & mask)));
1399 num_insns ++;
43e9d192
IB
1400 first = false;
1401 }
1402 else
82614948
RR
1403 {
1404 if (generate)
1405 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1406 GEN_INT ((val >> i) & 0xffff)));
1407 num_insns ++;
1408 }
1409 }
1410 }
1411
1412 return num_insns;
1413}
1414
1415
1416void
1417aarch64_expand_mov_immediate (rtx dest, rtx imm)
1418{
1419 machine_mode mode = GET_MODE (dest);
1420
1421 gcc_assert (mode == SImode || mode == DImode);
1422
1423 /* Check on what type of symbol it is. */
1424 if (GET_CODE (imm) == SYMBOL_REF
1425 || GET_CODE (imm) == LABEL_REF
1426 || GET_CODE (imm) == CONST)
1427 {
1428 rtx mem, base, offset;
1429 enum aarch64_symbol_type sty;
1430
1431 /* If we have (const (plus symbol offset)), separate out the offset
1432 before we start classifying the symbol. */
1433 split_const (imm, &base, &offset);
1434
f8b756b7 1435 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
82614948
RR
1436 switch (sty)
1437 {
1438 case SYMBOL_FORCE_TO_MEM:
1439 if (offset != const0_rtx
1440 && targetm.cannot_force_const_mem (mode, imm))
1441 {
1442 gcc_assert (can_create_pseudo_p ());
1443 base = aarch64_force_temporary (mode, dest, base);
1444 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1445 aarch64_emit_move (dest, base);
1446 return;
1447 }
1448 mem = force_const_mem (ptr_mode, imm);
1449 gcc_assert (mem);
1450 if (mode != ptr_mode)
1451 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1452 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1453 return;
1454
1455 case SYMBOL_SMALL_TLSGD:
1456 case SYMBOL_SMALL_TLSDESC:
1457 case SYMBOL_SMALL_GOTTPREL:
1458 case SYMBOL_SMALL_GOT:
1459 case SYMBOL_TINY_GOT:
1460 if (offset != const0_rtx)
1461 {
1462 gcc_assert(can_create_pseudo_p ());
1463 base = aarch64_force_temporary (mode, dest, base);
1464 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1465 aarch64_emit_move (dest, base);
1466 return;
1467 }
1468 /* FALLTHRU */
1469
1470 case SYMBOL_SMALL_TPREL:
1471 case SYMBOL_SMALL_ABSOLUTE:
1472 case SYMBOL_TINY_ABSOLUTE:
1473 aarch64_load_symref_appropriately (dest, imm, sty);
1474 return;
1475
1476 default:
1477 gcc_unreachable ();
1478 }
1479 }
1480
1481 if (!CONST_INT_P (imm))
1482 {
1483 if (GET_CODE (imm) == HIGH)
1484 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1485 else
1486 {
1487 rtx mem = force_const_mem (mode, imm);
1488 gcc_assert (mem);
1489 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
43e9d192 1490 }
82614948
RR
1491
1492 return;
43e9d192 1493 }
82614948
RR
1494
1495 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1496}
1497
1498static bool
fee9ba42
JW
1499aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1500 tree exp ATTRIBUTE_UNUSED)
43e9d192 1501{
fee9ba42 1502 /* Currently, always true. */
43e9d192
IB
1503 return true;
1504}
1505
1506/* Implement TARGET_PASS_BY_REFERENCE. */
1507
1508static bool
1509aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1510 machine_mode mode,
43e9d192
IB
1511 const_tree type,
1512 bool named ATTRIBUTE_UNUSED)
1513{
1514 HOST_WIDE_INT size;
ef4bddc2 1515 machine_mode dummymode;
43e9d192
IB
1516 int nregs;
1517
1518 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1519 size = (mode == BLKmode && type)
1520 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1521
aadc1c43
MHD
1522 /* Aggregates are passed by reference based on their size. */
1523 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1524 {
aadc1c43 1525 size = int_size_in_bytes (type);
43e9d192
IB
1526 }
1527
1528 /* Variable sized arguments are always returned by reference. */
1529 if (size < 0)
1530 return true;
1531
1532 /* Can this be a candidate to be passed in fp/simd register(s)? */
1533 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1534 &dummymode, &nregs,
1535 NULL))
1536 return false;
1537
1538 /* Arguments which are variable sized or larger than 2 registers are
1539 passed by reference unless they are a homogenous floating point
1540 aggregate. */
1541 return size > 2 * UNITS_PER_WORD;
1542}
1543
1544/* Return TRUE if VALTYPE is padded to its least significant bits. */
1545static bool
1546aarch64_return_in_msb (const_tree valtype)
1547{
ef4bddc2 1548 machine_mode dummy_mode;
43e9d192
IB
1549 int dummy_int;
1550
1551 /* Never happens in little-endian mode. */
1552 if (!BYTES_BIG_ENDIAN)
1553 return false;
1554
1555 /* Only composite types smaller than or equal to 16 bytes can
1556 be potentially returned in registers. */
1557 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1558 || int_size_in_bytes (valtype) <= 0
1559 || int_size_in_bytes (valtype) > 16)
1560 return false;
1561
1562 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1563 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1564 is always passed/returned in the least significant bits of fp/simd
1565 register(s). */
1566 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1567 &dummy_mode, &dummy_int, NULL))
1568 return false;
1569
1570 return true;
1571}
1572
1573/* Implement TARGET_FUNCTION_VALUE.
1574 Define how to find the value returned by a function. */
1575
1576static rtx
1577aarch64_function_value (const_tree type, const_tree func,
1578 bool outgoing ATTRIBUTE_UNUSED)
1579{
ef4bddc2 1580 machine_mode mode;
43e9d192
IB
1581 int unsignedp;
1582 int count;
ef4bddc2 1583 machine_mode ag_mode;
43e9d192
IB
1584
1585 mode = TYPE_MODE (type);
1586 if (INTEGRAL_TYPE_P (type))
1587 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1588
1589 if (aarch64_return_in_msb (type))
1590 {
1591 HOST_WIDE_INT size = int_size_in_bytes (type);
1592
1593 if (size % UNITS_PER_WORD != 0)
1594 {
1595 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1596 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1597 }
1598 }
1599
1600 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1601 &ag_mode, &count, NULL))
1602 {
1603 if (!aarch64_composite_type_p (type, mode))
1604 {
1605 gcc_assert (count == 1 && mode == ag_mode);
1606 return gen_rtx_REG (mode, V0_REGNUM);
1607 }
1608 else
1609 {
1610 int i;
1611 rtx par;
1612
1613 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1614 for (i = 0; i < count; i++)
1615 {
1616 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1617 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1618 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1619 XVECEXP (par, 0, i) = tmp;
1620 }
1621 return par;
1622 }
1623 }
1624 else
1625 return gen_rtx_REG (mode, R0_REGNUM);
1626}
1627
1628/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1629 Return true if REGNO is the number of a hard register in which the values
1630 of called function may come back. */
1631
1632static bool
1633aarch64_function_value_regno_p (const unsigned int regno)
1634{
1635 /* Maximum of 16 bytes can be returned in the general registers. Examples
1636 of 16-byte return values are: 128-bit integers and 16-byte small
1637 structures (excluding homogeneous floating-point aggregates). */
1638 if (regno == R0_REGNUM || regno == R1_REGNUM)
1639 return true;
1640
1641 /* Up to four fp/simd registers can return a function value, e.g. a
1642 homogeneous floating-point aggregate having four members. */
1643 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1644 return !TARGET_GENERAL_REGS_ONLY;
1645
1646 return false;
1647}
1648
1649/* Implement TARGET_RETURN_IN_MEMORY.
1650
1651 If the type T of the result of a function is such that
1652 void func (T arg)
1653 would require that arg be passed as a value in a register (or set of
1654 registers) according to the parameter passing rules, then the result
1655 is returned in the same registers as would be used for such an
1656 argument. */
1657
1658static bool
1659aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1660{
1661 HOST_WIDE_INT size;
ef4bddc2 1662 machine_mode ag_mode;
43e9d192
IB
1663 int count;
1664
1665 if (!AGGREGATE_TYPE_P (type)
1666 && TREE_CODE (type) != COMPLEX_TYPE
1667 && TREE_CODE (type) != VECTOR_TYPE)
1668 /* Simple scalar types always returned in registers. */
1669 return false;
1670
1671 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1672 type,
1673 &ag_mode,
1674 &count,
1675 NULL))
1676 return false;
1677
1678 /* Types larger than 2 registers returned in memory. */
1679 size = int_size_in_bytes (type);
1680 return (size < 0 || size > 2 * UNITS_PER_WORD);
1681}
1682
1683static bool
ef4bddc2 1684aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1685 const_tree type, int *nregs)
1686{
1687 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1688 return aarch64_vfp_is_call_or_return_candidate (mode,
1689 type,
1690 &pcum->aapcs_vfp_rmode,
1691 nregs,
1692 NULL);
1693}
1694
1695/* Given MODE and TYPE of a function argument, return the alignment in
1696 bits. The idea is to suppress any stronger alignment requested by
1697 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1698 This is a helper function for local use only. */
1699
1700static unsigned int
ef4bddc2 1701aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192
IB
1702{
1703 unsigned int alignment;
1704
1705 if (type)
1706 {
1707 if (!integer_zerop (TYPE_SIZE (type)))
1708 {
1709 if (TYPE_MODE (type) == mode)
1710 alignment = TYPE_ALIGN (type);
1711 else
1712 alignment = GET_MODE_ALIGNMENT (mode);
1713 }
1714 else
1715 alignment = 0;
1716 }
1717 else
1718 alignment = GET_MODE_ALIGNMENT (mode);
1719
1720 return alignment;
1721}
1722
1723/* Layout a function argument according to the AAPCS64 rules. The rule
1724 numbers refer to the rule numbers in the AAPCS64. */
1725
1726static void
ef4bddc2 1727aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1728 const_tree type,
1729 bool named ATTRIBUTE_UNUSED)
1730{
1731 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1732 int ncrn, nvrn, nregs;
1733 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1734 HOST_WIDE_INT size;
43e9d192
IB
1735
1736 /* We need to do this once per argument. */
1737 if (pcum->aapcs_arg_processed)
1738 return;
1739
1740 pcum->aapcs_arg_processed = true;
1741
3abf17cf
YZ
1742 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1743 size
1744 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1745 UNITS_PER_WORD);
1746
43e9d192
IB
1747 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1748 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1749 mode,
1750 type,
1751 &nregs);
1752
1753 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1754 The following code thus handles passing by SIMD/FP registers first. */
1755
1756 nvrn = pcum->aapcs_nvrn;
1757
1758 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1759 and homogenous short-vector aggregates (HVA). */
1760 if (allocate_nvrn)
1761 {
1762 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1763 {
1764 pcum->aapcs_nextnvrn = nvrn + nregs;
1765 if (!aarch64_composite_type_p (type, mode))
1766 {
1767 gcc_assert (nregs == 1);
1768 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1769 }
1770 else
1771 {
1772 rtx par;
1773 int i;
1774 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1775 for (i = 0; i < nregs; i++)
1776 {
1777 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1778 V0_REGNUM + nvrn + i);
1779 tmp = gen_rtx_EXPR_LIST
1780 (VOIDmode, tmp,
1781 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1782 XVECEXP (par, 0, i) = tmp;
1783 }
1784 pcum->aapcs_reg = par;
1785 }
1786 return;
1787 }
1788 else
1789 {
1790 /* C.3 NSRN is set to 8. */
1791 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1792 goto on_stack;
1793 }
1794 }
1795
1796 ncrn = pcum->aapcs_ncrn;
3abf17cf 1797 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1798
1799 /* C6 - C9. though the sign and zero extension semantics are
1800 handled elsewhere. This is the case where the argument fits
1801 entirely general registers. */
1802 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1803 {
1804 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1805
1806 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1807
1808 /* C.8 if the argument has an alignment of 16 then the NGRN is
1809 rounded up to the next even number. */
1810 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1811 {
1812 ++ncrn;
1813 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1814 }
1815 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1816 A reg is still generated for it, but the caller should be smart
1817 enough not to use it. */
1818 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1819 {
1820 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1821 }
1822 else
1823 {
1824 rtx par;
1825 int i;
1826
1827 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1828 for (i = 0; i < nregs; i++)
1829 {
1830 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1831 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1832 GEN_INT (i * UNITS_PER_WORD));
1833 XVECEXP (par, 0, i) = tmp;
1834 }
1835 pcum->aapcs_reg = par;
1836 }
1837
1838 pcum->aapcs_nextncrn = ncrn + nregs;
1839 return;
1840 }
1841
1842 /* C.11 */
1843 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1844
1845 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1846 this argument and align the total size if necessary. */
43e9d192 1847on_stack:
3abf17cf 1848 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1849 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1850 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1851 16 / UNITS_PER_WORD);
43e9d192
IB
1852 return;
1853}
1854
1855/* Implement TARGET_FUNCTION_ARG. */
1856
1857static rtx
ef4bddc2 1858aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1859 const_tree type, bool named)
1860{
1861 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1862 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1863
1864 if (mode == VOIDmode)
1865 return NULL_RTX;
1866
1867 aarch64_layout_arg (pcum_v, mode, type, named);
1868 return pcum->aapcs_reg;
1869}
1870
1871void
1872aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1873 const_tree fntype ATTRIBUTE_UNUSED,
1874 rtx libname ATTRIBUTE_UNUSED,
1875 const_tree fndecl ATTRIBUTE_UNUSED,
1876 unsigned n_named ATTRIBUTE_UNUSED)
1877{
1878 pcum->aapcs_ncrn = 0;
1879 pcum->aapcs_nvrn = 0;
1880 pcum->aapcs_nextncrn = 0;
1881 pcum->aapcs_nextnvrn = 0;
1882 pcum->pcs_variant = ARM_PCS_AAPCS64;
1883 pcum->aapcs_reg = NULL_RTX;
1884 pcum->aapcs_arg_processed = false;
1885 pcum->aapcs_stack_words = 0;
1886 pcum->aapcs_stack_size = 0;
1887
1888 return;
1889}
1890
1891static void
1892aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 1893 machine_mode mode,
43e9d192
IB
1894 const_tree type,
1895 bool named)
1896{
1897 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1898 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1899 {
1900 aarch64_layout_arg (pcum_v, mode, type, named);
1901 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1902 != (pcum->aapcs_stack_words != 0));
1903 pcum->aapcs_arg_processed = false;
1904 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1905 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1906 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1907 pcum->aapcs_stack_words = 0;
1908 pcum->aapcs_reg = NULL_RTX;
1909 }
1910}
1911
1912bool
1913aarch64_function_arg_regno_p (unsigned regno)
1914{
1915 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1916 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1917}
1918
1919/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1920 PARM_BOUNDARY bits of alignment, but will be given anything up
1921 to STACK_BOUNDARY bits if the type requires it. This makes sure
1922 that both before and after the layout of each argument, the Next
1923 Stacked Argument Address (NSAA) will have a minimum alignment of
1924 8 bytes. */
1925
1926static unsigned int
ef4bddc2 1927aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
1928{
1929 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1930
1931 if (alignment < PARM_BOUNDARY)
1932 alignment = PARM_BOUNDARY;
1933 if (alignment > STACK_BOUNDARY)
1934 alignment = STACK_BOUNDARY;
1935 return alignment;
1936}
1937
1938/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1939
1940 Return true if an argument passed on the stack should be padded upwards,
1941 i.e. if the least-significant byte of the stack slot has useful data.
1942
1943 Small aggregate types are placed in the lowest memory address.
1944
1945 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1946
1947bool
ef4bddc2 1948aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
1949{
1950 /* On little-endian targets, the least significant byte of every stack
1951 argument is passed at the lowest byte address of the stack slot. */
1952 if (!BYTES_BIG_ENDIAN)
1953 return true;
1954
00edcfbe 1955 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1956 the least significant byte of a stack argument is passed at the highest
1957 byte address of the stack slot. */
1958 if (type
00edcfbe
YZ
1959 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1960 || POINTER_TYPE_P (type))
43e9d192
IB
1961 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1962 return false;
1963
1964 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1965 return true;
1966}
1967
1968/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1969
1970 It specifies padding for the last (may also be the only)
1971 element of a block move between registers and memory. If
1972 assuming the block is in the memory, padding upward means that
1973 the last element is padded after its highest significant byte,
1974 while in downward padding, the last element is padded at the
1975 its least significant byte side.
1976
1977 Small aggregates and small complex types are always padded
1978 upwards.
1979
1980 We don't need to worry about homogeneous floating-point or
1981 short-vector aggregates; their move is not affected by the
1982 padding direction determined here. Regardless of endianness,
1983 each element of such an aggregate is put in the least
1984 significant bits of a fp/simd register.
1985
1986 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1987 register has useful data, and return the opposite if the most
1988 significant byte does. */
1989
1990bool
ef4bddc2 1991aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
1992 bool first ATTRIBUTE_UNUSED)
1993{
1994
1995 /* Small composite types are always padded upward. */
1996 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1997 {
1998 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1999 : GET_MODE_SIZE (mode));
2000 if (size < 2 * UNITS_PER_WORD)
2001 return true;
2002 }
2003
2004 /* Otherwise, use the default padding. */
2005 return !BYTES_BIG_ENDIAN;
2006}
2007
ef4bddc2 2008static machine_mode
43e9d192
IB
2009aarch64_libgcc_cmp_return_mode (void)
2010{
2011 return SImode;
2012}
2013
2014static bool
2015aarch64_frame_pointer_required (void)
2016{
0b7f8166
MS
2017 /* In aarch64_override_options_after_change
2018 flag_omit_leaf_frame_pointer turns off the frame pointer by
2019 default. Turn it back on now if we've not got a leaf
2020 function. */
2021 if (flag_omit_leaf_frame_pointer
2022 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2023 return true;
43e9d192 2024
0b7f8166 2025 return false;
43e9d192
IB
2026}
2027
2028/* Mark the registers that need to be saved by the callee and calculate
2029 the size of the callee-saved registers area and frame record (both FP
2030 and LR may be omitted). */
2031static void
2032aarch64_layout_frame (void)
2033{
2034 HOST_WIDE_INT offset = 0;
2035 int regno;
2036
2037 if (reload_completed && cfun->machine->frame.laid_out)
2038 return;
2039
97826595
MS
2040#define SLOT_NOT_REQUIRED (-2)
2041#define SLOT_REQUIRED (-1)
2042
363ffa50
JW
2043 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2044 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2045
43e9d192
IB
2046 /* First mark all the registers that really need to be saved... */
2047 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2048 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2049
2050 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2051 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2052
2053 /* ... that includes the eh data registers (if needed)... */
2054 if (crtl->calls_eh_return)
2055 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2056 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2057 = SLOT_REQUIRED;
43e9d192
IB
2058
2059 /* ... and any callee saved register that dataflow says is live. */
2060 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2061 if (df_regs_ever_live_p (regno)
1c923b60
JW
2062 && (regno == R30_REGNUM
2063 || !call_used_regs[regno]))
97826595 2064 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2065
2066 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2067 if (df_regs_ever_live_p (regno)
2068 && !call_used_regs[regno])
97826595 2069 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2070
2071 if (frame_pointer_needed)
2072 {
2e1cdae5 2073 /* FP and LR are placed in the linkage record. */
43e9d192 2074 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2075 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2076 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2077 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 2078 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 2079 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2080 }
2081
2082 /* Now assign stack slots for them. */
2e1cdae5 2083 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2084 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2085 {
2086 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2087 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2088 cfun->machine->frame.wb_candidate1 = regno;
2089 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2090 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2091 offset += UNITS_PER_WORD;
2092 }
2093
2094 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2095 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2096 {
2097 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2098 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2099 cfun->machine->frame.wb_candidate1 = regno;
2100 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2101 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2102 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2103 offset += UNITS_PER_WORD;
2104 }
2105
43e9d192
IB
2106 cfun->machine->frame.padding0 =
2107 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2108 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2109
2110 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
2111
2112 cfun->machine->frame.hard_fp_offset
2113 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2114 + get_frame_size ()
2115 + cfun->machine->frame.saved_regs_size,
2116 STACK_BOUNDARY / BITS_PER_UNIT);
2117
2118 cfun->machine->frame.frame_size
2119 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2120 + crtl->outgoing_args_size,
2121 STACK_BOUNDARY / BITS_PER_UNIT);
2122
43e9d192
IB
2123 cfun->machine->frame.laid_out = true;
2124}
2125
43e9d192
IB
2126static bool
2127aarch64_register_saved_on_entry (int regno)
2128{
97826595 2129 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2130}
2131
64dedd72
JW
2132static unsigned
2133aarch64_next_callee_save (unsigned regno, unsigned limit)
2134{
2135 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2136 regno ++;
2137 return regno;
2138}
43e9d192 2139
c5e1f66e 2140static void
ef4bddc2 2141aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2142 HOST_WIDE_INT adjustment)
2143 {
2144 rtx base_rtx = stack_pointer_rtx;
2145 rtx insn, reg, mem;
2146
2147 reg = gen_rtx_REG (mode, regno);
2148 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2149 plus_constant (Pmode, base_rtx, -adjustment));
2150 mem = gen_rtx_MEM (mode, mem);
2151
2152 insn = emit_move_insn (mem, reg);
2153 RTX_FRAME_RELATED_P (insn) = 1;
2154}
2155
80c11907 2156static rtx
ef4bddc2 2157aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2158 HOST_WIDE_INT adjustment)
2159{
2160 switch (mode)
2161 {
2162 case DImode:
2163 return gen_storewb_pairdi_di (base, base, reg, reg2,
2164 GEN_INT (-adjustment),
2165 GEN_INT (UNITS_PER_WORD - adjustment));
2166 case DFmode:
2167 return gen_storewb_pairdf_di (base, base, reg, reg2,
2168 GEN_INT (-adjustment),
2169 GEN_INT (UNITS_PER_WORD - adjustment));
2170 default:
2171 gcc_unreachable ();
2172 }
2173}
2174
2175static void
ef4bddc2 2176aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2177 unsigned regno2, HOST_WIDE_INT adjustment)
2178{
5d8a22a5 2179 rtx_insn *insn;
80c11907
JW
2180 rtx reg1 = gen_rtx_REG (mode, regno1);
2181 rtx reg2 = gen_rtx_REG (mode, regno2);
2182
2183 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2184 reg2, adjustment));
2185 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2186 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2187 RTX_FRAME_RELATED_P (insn) = 1;
2188}
2189
159313d9 2190static rtx
ef4bddc2 2191aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2192 HOST_WIDE_INT adjustment)
2193{
2194 switch (mode)
2195 {
2196 case DImode:
2197 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2198 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2199 case DFmode:
2200 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2201 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2202 default:
2203 gcc_unreachable ();
2204 }
2205}
2206
72df5c1f 2207static rtx
ef4bddc2 2208aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2209 rtx reg2)
2210{
2211 switch (mode)
2212 {
2213 case DImode:
2214 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2215
2216 case DFmode:
2217 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2218
2219 default:
2220 gcc_unreachable ();
2221 }
2222}
2223
2224static rtx
ef4bddc2 2225aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2226 rtx mem2)
2227{
2228 switch (mode)
2229 {
2230 case DImode:
2231 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2232
2233 case DFmode:
2234 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2235
2236 default:
2237 gcc_unreachable ();
2238 }
2239}
2240
43e9d192 2241
43e9d192 2242static void
ef4bddc2 2243aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2244 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2245{
5d8a22a5 2246 rtx_insn *insn;
ef4bddc2 2247 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2248 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2249 unsigned regno;
2250 unsigned regno2;
2251
0ec74a1e 2252 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2253 regno <= limit;
2254 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2255 {
ae13fce3
JW
2256 rtx reg, mem;
2257 HOST_WIDE_INT offset;
64dedd72 2258
ae13fce3
JW
2259 if (skip_wb
2260 && (regno == cfun->machine->frame.wb_candidate1
2261 || regno == cfun->machine->frame.wb_candidate2))
2262 continue;
2263
2264 reg = gen_rtx_REG (mode, regno);
2265 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2266 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2267 offset));
64dedd72
JW
2268
2269 regno2 = aarch64_next_callee_save (regno + 1, limit);
2270
2271 if (regno2 <= limit
2272 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2273 == cfun->machine->frame.reg_offset[regno2]))
2274
43e9d192 2275 {
0ec74a1e 2276 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2277 rtx mem2;
2278
2279 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2280 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2281 offset));
2282 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2283 reg2));
0b4a9743 2284
64dedd72
JW
2285 /* The first part of a frame-related parallel insn is
2286 always assumed to be relevant to the frame
2287 calculations; subsequent parts, are only
2288 frame-related if explicitly marked. */
2289 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2290 regno = regno2;
2291 }
2292 else
8ed2fc62
JW
2293 insn = emit_move_insn (mem, reg);
2294
2295 RTX_FRAME_RELATED_P (insn) = 1;
2296 }
2297}
2298
2299static void
ef4bddc2 2300aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2301 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2302 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2303{
8ed2fc62 2304 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2305 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2306 ? gen_frame_mem : gen_rtx_MEM);
2307 unsigned regno;
2308 unsigned regno2;
2309 HOST_WIDE_INT offset;
2310
2311 for (regno = aarch64_next_callee_save (start, limit);
2312 regno <= limit;
2313 regno = aarch64_next_callee_save (regno + 1, limit))
2314 {
ae13fce3 2315 rtx reg, mem;
8ed2fc62 2316
ae13fce3
JW
2317 if (skip_wb
2318 && (regno == cfun->machine->frame.wb_candidate1
2319 || regno == cfun->machine->frame.wb_candidate2))
2320 continue;
2321
2322 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2323 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2324 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2325
2326 regno2 = aarch64_next_callee_save (regno + 1, limit);
2327
2328 if (regno2 <= limit
2329 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2330 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2331 {
8ed2fc62
JW
2332 rtx reg2 = gen_rtx_REG (mode, regno2);
2333 rtx mem2;
2334
2335 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2336 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2337 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2338
dd991abb 2339 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2340 regno = regno2;
43e9d192 2341 }
8ed2fc62 2342 else
dd991abb
RH
2343 emit_move_insn (reg, mem);
2344 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2345 }
43e9d192
IB
2346}
2347
2348/* AArch64 stack frames generated by this compiler look like:
2349
2350 +-------------------------------+
2351 | |
2352 | incoming stack arguments |
2353 | |
34834420
MS
2354 +-------------------------------+
2355 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2356 | callee-allocated save area |
2357 | for register varargs |
2358 | |
34834420
MS
2359 +-------------------------------+
2360 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2361 | |
2362 +-------------------------------+
454fdba9
RL
2363 | padding0 | \
2364 +-------------------------------+ |
454fdba9 2365 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2366 +-------------------------------+ |
2367 | LR' | |
2368 +-------------------------------+ |
34834420
MS
2369 | FP' | / <- hard_frame_pointer_rtx (aligned)
2370 +-------------------------------+
43e9d192
IB
2371 | dynamic allocation |
2372 +-------------------------------+
34834420
MS
2373 | padding |
2374 +-------------------------------+
2375 | outgoing stack arguments | <-- arg_pointer
2376 | |
2377 +-------------------------------+
2378 | | <-- stack_pointer_rtx (aligned)
43e9d192 2379
34834420
MS
2380 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2381 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2382 unchanged. */
43e9d192
IB
2383
2384/* Generate the prologue instructions for entry into a function.
2385 Establish the stack frame by decreasing the stack pointer with a
2386 properly calculated size and, if necessary, create a frame record
2387 filled with the values of LR and previous frame pointer. The
6991c977 2388 current FP is also set up if it is in use. */
43e9d192
IB
2389
2390void
2391aarch64_expand_prologue (void)
2392{
2393 /* sub sp, sp, #<frame_size>
2394 stp {fp, lr}, [sp, #<frame_size> - 16]
2395 add fp, sp, #<frame_size> - hardfp_offset
2396 stp {cs_reg}, [fp, #-16] etc.
2397
2398 sub sp, sp, <final_adjustment_if_any>
2399 */
43e9d192 2400 HOST_WIDE_INT frame_size, offset;
1c960e02 2401 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2402 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2403 rtx_insn *insn;
43e9d192
IB
2404
2405 aarch64_layout_frame ();
43e9d192 2406
dd991abb
RH
2407 offset = frame_size = cfun->machine->frame.frame_size;
2408 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2409 fp_offset = frame_size - hard_fp_offset;
43e9d192 2410
dd991abb
RH
2411 if (flag_stack_usage_info)
2412 current_function_static_stack_size = frame_size;
43e9d192 2413
44c0e7b9 2414 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2415 if (offset >= 512)
2416 {
2417 /* When the frame has a large size, an initial decrease is done on
2418 the stack pointer to jump over the callee-allocated save area for
2419 register varargs, the local variable area and/or the callee-saved
2420 register area. This will allow the pre-index write-back
2421 store pair instructions to be used for setting up the stack frame
2422 efficiently. */
dd991abb 2423 offset = hard_fp_offset;
43e9d192
IB
2424 if (offset >= 512)
2425 offset = cfun->machine->frame.saved_regs_size;
2426
2427 frame_size -= (offset + crtl->outgoing_args_size);
2428 fp_offset = 0;
2429
2430 if (frame_size >= 0x1000000)
2431 {
2432 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2433 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2434 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2435
2436 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2437 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2438 plus_constant (Pmode, stack_pointer_rtx,
2439 -frame_size)));
2440 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2441 }
2442 else if (frame_size > 0)
2443 {
dd991abb
RH
2444 int hi_ofs = frame_size & 0xfff000;
2445 int lo_ofs = frame_size & 0x000fff;
2446
2447 if (hi_ofs)
43e9d192
IB
2448 {
2449 insn = emit_insn (gen_add2_insn
dd991abb 2450 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2451 RTX_FRAME_RELATED_P (insn) = 1;
2452 }
dd991abb 2453 if (lo_ofs)
43e9d192
IB
2454 {
2455 insn = emit_insn (gen_add2_insn
dd991abb 2456 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2457 RTX_FRAME_RELATED_P (insn) = 1;
2458 }
2459 }
2460 }
2461 else
2462 frame_size = -1;
2463
2464 if (offset > 0)
2465 {
ae13fce3
JW
2466 bool skip_wb = false;
2467
43e9d192
IB
2468 if (frame_pointer_needed)
2469 {
c5e1f66e
JW
2470 skip_wb = true;
2471
43e9d192
IB
2472 if (fp_offset)
2473 {
2474 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2475 GEN_INT (-offset)));
2476 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2477
2478 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2479 R30_REGNUM, false);
43e9d192
IB
2480 }
2481 else
80c11907 2482 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2483
2484 /* Set up frame pointer to point to the location of the
2485 previous frame pointer on the stack. */
2486 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2487 stack_pointer_rtx,
2488 GEN_INT (fp_offset)));
43e9d192 2489 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2490 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2491 }
2492 else
2493 {
c5e1f66e
JW
2494 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2495 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2496
c5e1f66e
JW
2497 if (fp_offset
2498 || reg1 == FIRST_PSEUDO_REGISTER
2499 || (reg2 == FIRST_PSEUDO_REGISTER
2500 && offset >= 256))
2501 {
2502 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2503 GEN_INT (-offset)));
2504 RTX_FRAME_RELATED_P (insn) = 1;
2505 }
2506 else
2507 {
ef4bddc2 2508 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
2509
2510 skip_wb = true;
2511
2512 if (reg2 == FIRST_PSEUDO_REGISTER)
2513 aarch64_pushwb_single_reg (mode1, reg1, offset);
2514 else
2515 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2516 }
43e9d192
IB
2517 }
2518
c5e1f66e
JW
2519 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2520 skip_wb);
ae13fce3
JW
2521 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2522 skip_wb);
43e9d192
IB
2523 }
2524
2525 /* when offset >= 512,
2526 sub sp, sp, #<outgoing_args_size> */
2527 if (frame_size > -1)
2528 {
2529 if (crtl->outgoing_args_size > 0)
2530 {
2531 insn = emit_insn (gen_add2_insn
2532 (stack_pointer_rtx,
2533 GEN_INT (- crtl->outgoing_args_size)));
2534 RTX_FRAME_RELATED_P (insn) = 1;
2535 }
2536 }
2537}
2538
4f942779
RL
2539/* Return TRUE if we can use a simple_return insn.
2540
2541 This function checks whether the callee saved stack is empty, which
2542 means no restore actions are need. The pro_and_epilogue will use
2543 this to check whether shrink-wrapping opt is feasible. */
2544
2545bool
2546aarch64_use_return_insn_p (void)
2547{
2548 if (!reload_completed)
2549 return false;
2550
2551 if (crtl->profile)
2552 return false;
2553
2554 aarch64_layout_frame ();
2555
2556 return cfun->machine->frame.frame_size == 0;
2557}
2558
43e9d192
IB
2559/* Generate the epilogue instructions for returning from a function. */
2560void
2561aarch64_expand_epilogue (bool for_sibcall)
2562{
1c960e02 2563 HOST_WIDE_INT frame_size, offset;
43e9d192 2564 HOST_WIDE_INT fp_offset;
dd991abb 2565 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2566 rtx_insn *insn;
7e8c2bd5
JW
2567 /* We need to add memory barrier to prevent read from deallocated stack. */
2568 bool need_barrier_p = (get_frame_size () != 0
2569 || cfun->machine->frame.saved_varargs_size);
43e9d192
IB
2570
2571 aarch64_layout_frame ();
43e9d192 2572
1c960e02 2573 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2574 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2575 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2576
2577 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2578 if (offset >= 512)
2579 {
dd991abb 2580 offset = hard_fp_offset;
43e9d192
IB
2581 if (offset >= 512)
2582 offset = cfun->machine->frame.saved_regs_size;
2583
2584 frame_size -= (offset + crtl->outgoing_args_size);
2585 fp_offset = 0;
2586 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2587 {
2588 insn = emit_insn (gen_add2_insn
2589 (stack_pointer_rtx,
2590 GEN_INT (crtl->outgoing_args_size)));
2591 RTX_FRAME_RELATED_P (insn) = 1;
2592 }
2593 }
2594 else
2595 frame_size = -1;
2596
2597 /* If there were outgoing arguments or we've done dynamic stack
2598 allocation, then restore the stack pointer from the frame
2599 pointer. This is at most one insn and more efficient than using
2600 GCC's internal mechanism. */
2601 if (frame_pointer_needed
2602 && (crtl->outgoing_args_size || cfun->calls_alloca))
2603 {
7e8c2bd5
JW
2604 if (cfun->calls_alloca)
2605 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2606
43e9d192
IB
2607 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2608 hard_frame_pointer_rtx,
8f454e9f
JW
2609 GEN_INT (0)));
2610 offset = offset - fp_offset;
43e9d192
IB
2611 }
2612
43e9d192
IB
2613 if (offset > 0)
2614 {
4b92caa1
JW
2615 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2616 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2617 bool skip_wb = true;
dd991abb 2618 rtx cfi_ops = NULL;
4b92caa1 2619
43e9d192 2620 if (frame_pointer_needed)
4b92caa1
JW
2621 fp_offset = 0;
2622 else if (fp_offset
2623 || reg1 == FIRST_PSEUDO_REGISTER
2624 || (reg2 == FIRST_PSEUDO_REGISTER
2625 && offset >= 256))
2626 skip_wb = false;
2627
2628 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2629 skip_wb, &cfi_ops);
4b92caa1 2630 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2631 skip_wb, &cfi_ops);
4b92caa1 2632
7e8c2bd5
JW
2633 if (need_barrier_p)
2634 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2635
4b92caa1 2636 if (skip_wb)
43e9d192 2637 {
ef4bddc2 2638 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2639 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2640
dd991abb 2641 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2642 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2643 {
2644 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2645 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2646 mem = gen_rtx_MEM (mode1, mem);
2647 insn = emit_move_insn (rreg1, mem);
2648 }
4b92caa1
JW
2649 else
2650 {
dd991abb 2651 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2652
dd991abb
RH
2653 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2654 insn = emit_insn (aarch64_gen_loadwb_pair
2655 (mode1, stack_pointer_rtx, rreg1,
2656 rreg2, offset));
4b92caa1 2657 }
43e9d192 2658 }
43e9d192
IB
2659 else
2660 {
2661 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2662 GEN_INT (offset)));
43e9d192 2663 }
43e9d192 2664
dd991abb
RH
2665 /* Reset the CFA to be SP + FRAME_SIZE. */
2666 rtx new_cfa = stack_pointer_rtx;
2667 if (frame_size > 0)
2668 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2669 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2670 REG_NOTES (insn) = cfi_ops;
43e9d192 2671 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2672 }
2673
dd991abb 2674 if (frame_size > 0)
43e9d192 2675 {
7e8c2bd5
JW
2676 if (need_barrier_p)
2677 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2678
43e9d192
IB
2679 if (frame_size >= 0x1000000)
2680 {
2681 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2682 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2683 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2684 }
dd991abb 2685 else
43e9d192 2686 {
dd991abb
RH
2687 int hi_ofs = frame_size & 0xfff000;
2688 int lo_ofs = frame_size & 0x000fff;
2689
2690 if (hi_ofs && lo_ofs)
43e9d192
IB
2691 {
2692 insn = emit_insn (gen_add2_insn
dd991abb 2693 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2694 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2695 frame_size = lo_ofs;
43e9d192 2696 }
dd991abb
RH
2697 insn = emit_insn (gen_add2_insn
2698 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2699 }
2700
dd991abb
RH
2701 /* Reset the CFA to be SP + 0. */
2702 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2703 RTX_FRAME_RELATED_P (insn) = 1;
2704 }
2705
2706 /* Stack adjustment for exception handler. */
2707 if (crtl->calls_eh_return)
2708 {
2709 /* We need to unwind the stack by the offset computed by
2710 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2711 to be SP; letting the CFA move during this adjustment
2712 is just as correct as retaining the CFA from the body
2713 of the function. Therefore, do nothing special. */
2714 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2715 }
2716
2717 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2718 if (!for_sibcall)
2719 emit_jump_insn (ret_rtx);
2720}
2721
2722/* Return the place to copy the exception unwinding return address to.
2723 This will probably be a stack slot, but could (in theory be the
2724 return register). */
2725rtx
2726aarch64_final_eh_return_addr (void)
2727{
1c960e02
MS
2728 HOST_WIDE_INT fp_offset;
2729
43e9d192 2730 aarch64_layout_frame ();
1c960e02
MS
2731
2732 fp_offset = cfun->machine->frame.frame_size
2733 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2734
2735 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2736 return gen_rtx_REG (DImode, LR_REGNUM);
2737
2738 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2739 result in a store to save LR introduced by builtin_eh_return () being
2740 incorrectly deleted because the alias is not detected.
2741 So in the calculation of the address to copy the exception unwinding
2742 return address to, we note 2 cases.
2743 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2744 we return a SP-relative location since all the addresses are SP-relative
2745 in this case. This prevents the store from being optimized away.
2746 If the fp_offset is not 0, then the addresses will be FP-relative and
2747 therefore we return a FP-relative location. */
2748
2749 if (frame_pointer_needed)
2750 {
2751 if (fp_offset)
2752 return gen_frame_mem (DImode,
2753 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2754 else
2755 return gen_frame_mem (DImode,
2756 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2757 }
2758
2759 /* If FP is not needed, we calculate the location of LR, which would be
2760 at the top of the saved registers block. */
2761
2762 return gen_frame_mem (DImode,
2763 plus_constant (Pmode,
2764 stack_pointer_rtx,
2765 fp_offset
2766 + cfun->machine->frame.saved_regs_size
2767 - 2 * UNITS_PER_WORD));
2768}
2769
9dfc162c
JG
2770/* Possibly output code to build up a constant in a register. For
2771 the benefit of the costs infrastructure, returns the number of
2772 instructions which would be emitted. GENERATE inhibits or
2773 enables code generation. */
2774
2775static int
2776aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2777{
9dfc162c
JG
2778 int insns = 0;
2779
43e9d192 2780 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2781 {
2782 if (generate)
2783 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2784 insns = 1;
2785 }
43e9d192
IB
2786 else
2787 {
2788 int i;
2789 int ncount = 0;
2790 int zcount = 0;
2791 HOST_WIDE_INT valp = val >> 16;
2792 HOST_WIDE_INT valm;
2793 HOST_WIDE_INT tval;
2794
2795 for (i = 16; i < 64; i += 16)
2796 {
2797 valm = (valp & 0xffff);
2798
2799 if (valm != 0)
2800 ++ zcount;
2801
2802 if (valm != 0xffff)
2803 ++ ncount;
2804
2805 valp >>= 16;
2806 }
2807
2808 /* zcount contains the number of additional MOVK instructions
2809 required if the constant is built up with an initial MOVZ instruction,
2810 while ncount is the number of MOVK instructions required if starting
2811 with a MOVN instruction. Choose the sequence that yields the fewest
2812 number of instructions, preferring MOVZ instructions when they are both
2813 the same. */
2814 if (ncount < zcount)
2815 {
9dfc162c
JG
2816 if (generate)
2817 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2818 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2819 tval = 0xffff;
9dfc162c 2820 insns++;
43e9d192
IB
2821 }
2822 else
2823 {
9dfc162c
JG
2824 if (generate)
2825 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2826 GEN_INT (val & 0xffff));
43e9d192 2827 tval = 0;
9dfc162c 2828 insns++;
43e9d192
IB
2829 }
2830
2831 val >>= 16;
2832
2833 for (i = 16; i < 64; i += 16)
2834 {
2835 if ((val & 0xffff) != tval)
9dfc162c
JG
2836 {
2837 if (generate)
2838 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2839 GEN_INT (i),
2840 GEN_INT (val & 0xffff)));
2841 insns++;
2842 }
43e9d192
IB
2843 val >>= 16;
2844 }
2845 }
9dfc162c 2846 return insns;
43e9d192
IB
2847}
2848
2849static void
d9600ae5 2850aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2851{
2852 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2853 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2854 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2855
2856 if (mdelta < 0)
2857 mdelta = -mdelta;
2858
2859 if (mdelta >= 4096 * 4096)
2860 {
9dfc162c 2861 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2862 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2863 }
2864 else if (mdelta > 0)
2865 {
43e9d192 2866 if (mdelta >= 4096)
d9600ae5
SN
2867 {
2868 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2869 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2870 if (delta < 0)
2871 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2872 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2873 else
2874 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2875 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2876 }
43e9d192 2877 if (mdelta % 4096 != 0)
d9600ae5
SN
2878 {
2879 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2880 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2881 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2882 }
43e9d192
IB
2883 }
2884}
2885
2886/* Output code to add DELTA to the first argument, and then jump
2887 to FUNCTION. Used for C++ multiple inheritance. */
2888static void
2889aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2890 HOST_WIDE_INT delta,
2891 HOST_WIDE_INT vcall_offset,
2892 tree function)
2893{
2894 /* The this pointer is always in x0. Note that this differs from
2895 Arm where the this pointer maybe bumped to r1 if r0 is required
2896 to return a pointer to an aggregate. On AArch64 a result value
2897 pointer will be in x8. */
2898 int this_regno = R0_REGNUM;
5d8a22a5
DM
2899 rtx this_rtx, temp0, temp1, addr, funexp;
2900 rtx_insn *insn;
43e9d192 2901
75f1d6fc
SN
2902 reload_completed = 1;
2903 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2904
2905 if (vcall_offset == 0)
d9600ae5 2906 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2907 else
2908 {
28514dda 2909 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2910
75f1d6fc
SN
2911 this_rtx = gen_rtx_REG (Pmode, this_regno);
2912 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2913 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2914
75f1d6fc
SN
2915 addr = this_rtx;
2916 if (delta != 0)
2917 {
2918 if (delta >= -256 && delta < 256)
2919 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2920 plus_constant (Pmode, this_rtx, delta));
2921 else
d9600ae5 2922 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2923 }
2924
28514dda
YZ
2925 if (Pmode == ptr_mode)
2926 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2927 else
2928 aarch64_emit_move (temp0,
2929 gen_rtx_ZERO_EXTEND (Pmode,
2930 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2931
28514dda 2932 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2933 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2934 else
2935 {
9dfc162c 2936 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2937 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2938 }
2939
28514dda
YZ
2940 if (Pmode == ptr_mode)
2941 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2942 else
2943 aarch64_emit_move (temp1,
2944 gen_rtx_SIGN_EXTEND (Pmode,
2945 gen_rtx_MEM (ptr_mode, addr)));
2946
75f1d6fc 2947 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2948 }
2949
75f1d6fc
SN
2950 /* Generate a tail call to the target function. */
2951 if (!TREE_USED (function))
2952 {
2953 assemble_external (function);
2954 TREE_USED (function) = 1;
2955 }
2956 funexp = XEXP (DECL_RTL (function), 0);
2957 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2958 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2959 SIBLING_CALL_P (insn) = 1;
2960
2961 insn = get_insns ();
2962 shorten_branches (insn);
2963 final_start_function (insn, file, 1);
2964 final (insn, file, 1);
43e9d192 2965 final_end_function ();
75f1d6fc
SN
2966
2967 /* Stop pretending to be a post-reload pass. */
2968 reload_completed = 0;
43e9d192
IB
2969}
2970
43e9d192
IB
2971static bool
2972aarch64_tls_referenced_p (rtx x)
2973{
2974 if (!TARGET_HAVE_TLS)
2975 return false;
e7de8563
RS
2976 subrtx_iterator::array_type array;
2977 FOR_EACH_SUBRTX (iter, array, x, ALL)
2978 {
2979 const_rtx x = *iter;
2980 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
2981 return true;
2982 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2983 TLS offsets, not real symbol references. */
2984 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
2985 iter.skip_subrtxes ();
2986 }
2987 return false;
43e9d192
IB
2988}
2989
2990
2991static int
2992aarch64_bitmasks_cmp (const void *i1, const void *i2)
2993{
2994 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2995 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2996
2997 if (*imm1 < *imm2)
2998 return -1;
2999 if (*imm1 > *imm2)
3000 return +1;
3001 return 0;
3002}
3003
3004
3005static void
3006aarch64_build_bitmask_table (void)
3007{
3008 unsigned HOST_WIDE_INT mask, imm;
3009 unsigned int log_e, e, s, r;
3010 unsigned int nimms = 0;
3011
3012 for (log_e = 1; log_e <= 6; log_e++)
3013 {
3014 e = 1 << log_e;
3015 if (e == 64)
3016 mask = ~(HOST_WIDE_INT) 0;
3017 else
3018 mask = ((HOST_WIDE_INT) 1 << e) - 1;
3019 for (s = 1; s < e; s++)
3020 {
3021 for (r = 0; r < e; r++)
3022 {
3023 /* set s consecutive bits to 1 (s < 64) */
3024 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3025 /* rotate right by r */
3026 if (r != 0)
3027 imm = ((imm >> r) | (imm << (e - r))) & mask;
3028 /* replicate the constant depending on SIMD size */
3029 switch (log_e) {
3030 case 1: imm |= (imm << 2);
3031 case 2: imm |= (imm << 4);
3032 case 3: imm |= (imm << 8);
3033 case 4: imm |= (imm << 16);
3034 case 5: imm |= (imm << 32);
3035 case 6:
3036 break;
3037 default:
3038 gcc_unreachable ();
3039 }
3040 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3041 aarch64_bitmasks[nimms++] = imm;
3042 }
3043 }
3044 }
3045
3046 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3047 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3048 aarch64_bitmasks_cmp);
3049}
3050
3051
3052/* Return true if val can be encoded as a 12-bit unsigned immediate with
3053 a left shift of 0 or 12 bits. */
3054bool
3055aarch64_uimm12_shift (HOST_WIDE_INT val)
3056{
3057 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3058 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3059 );
3060}
3061
3062
3063/* Return true if val is an immediate that can be loaded into a
3064 register by a MOVZ instruction. */
3065static bool
ef4bddc2 3066aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3067{
3068 if (GET_MODE_SIZE (mode) > 4)
3069 {
3070 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3071 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3072 return 1;
3073 }
3074 else
3075 {
3076 /* Ignore sign extension. */
3077 val &= (HOST_WIDE_INT) 0xffffffff;
3078 }
3079 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3080 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3081}
3082
3083
3084/* Return true if val is a valid bitmask immediate. */
3085bool
ef4bddc2 3086aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3087{
3088 if (GET_MODE_SIZE (mode) < 8)
3089 {
3090 /* Replicate bit pattern. */
3091 val &= (HOST_WIDE_INT) 0xffffffff;
3092 val |= val << 32;
3093 }
3094 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3095 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3096}
3097
3098
3099/* Return true if val is an immediate that can be loaded into a
3100 register in a single instruction. */
3101bool
ef4bddc2 3102aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3103{
3104 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3105 return 1;
3106 return aarch64_bitmask_imm (val, mode);
3107}
3108
3109static bool
ef4bddc2 3110aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3111{
3112 rtx base, offset;
7eda14e1 3113
43e9d192
IB
3114 if (GET_CODE (x) == HIGH)
3115 return true;
3116
3117 split_const (x, &base, &offset);
3118 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3119 {
f8b756b7 3120 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
28514dda
YZ
3121 != SYMBOL_FORCE_TO_MEM)
3122 return true;
3123 else
3124 /* Avoid generating a 64-bit relocation in ILP32; leave
3125 to aarch64_expand_mov_immediate to handle it properly. */
3126 return mode != ptr_mode;
3127 }
43e9d192
IB
3128
3129 return aarch64_tls_referenced_p (x);
3130}
3131
3132/* Return true if register REGNO is a valid index register.
3133 STRICT_P is true if REG_OK_STRICT is in effect. */
3134
3135bool
3136aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3137{
3138 if (!HARD_REGISTER_NUM_P (regno))
3139 {
3140 if (!strict_p)
3141 return true;
3142
3143 if (!reg_renumber)
3144 return false;
3145
3146 regno = reg_renumber[regno];
3147 }
3148 return GP_REGNUM_P (regno);
3149}
3150
3151/* Return true if register REGNO is a valid base register for mode MODE.
3152 STRICT_P is true if REG_OK_STRICT is in effect. */
3153
3154bool
3155aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3156{
3157 if (!HARD_REGISTER_NUM_P (regno))
3158 {
3159 if (!strict_p)
3160 return true;
3161
3162 if (!reg_renumber)
3163 return false;
3164
3165 regno = reg_renumber[regno];
3166 }
3167
3168 /* The fake registers will be eliminated to either the stack or
3169 hard frame pointer, both of which are usually valid base registers.
3170 Reload deals with the cases where the eliminated form isn't valid. */
3171 return (GP_REGNUM_P (regno)
3172 || regno == SP_REGNUM
3173 || regno == FRAME_POINTER_REGNUM
3174 || regno == ARG_POINTER_REGNUM);
3175}
3176
3177/* Return true if X is a valid base register for mode MODE.
3178 STRICT_P is true if REG_OK_STRICT is in effect. */
3179
3180static bool
3181aarch64_base_register_rtx_p (rtx x, bool strict_p)
3182{
3183 if (!strict_p && GET_CODE (x) == SUBREG)
3184 x = SUBREG_REG (x);
3185
3186 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3187}
3188
3189/* Return true if address offset is a valid index. If it is, fill in INFO
3190 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3191
3192static bool
3193aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3194 machine_mode mode, bool strict_p)
43e9d192
IB
3195{
3196 enum aarch64_address_type type;
3197 rtx index;
3198 int shift;
3199
3200 /* (reg:P) */
3201 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3202 && GET_MODE (x) == Pmode)
3203 {
3204 type = ADDRESS_REG_REG;
3205 index = x;
3206 shift = 0;
3207 }
3208 /* (sign_extend:DI (reg:SI)) */
3209 else if ((GET_CODE (x) == SIGN_EXTEND
3210 || GET_CODE (x) == ZERO_EXTEND)
3211 && GET_MODE (x) == DImode
3212 && GET_MODE (XEXP (x, 0)) == SImode)
3213 {
3214 type = (GET_CODE (x) == SIGN_EXTEND)
3215 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3216 index = XEXP (x, 0);
3217 shift = 0;
3218 }
3219 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3220 else if (GET_CODE (x) == MULT
3221 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3222 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3223 && GET_MODE (XEXP (x, 0)) == DImode
3224 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3225 && CONST_INT_P (XEXP (x, 1)))
3226 {
3227 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3228 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3229 index = XEXP (XEXP (x, 0), 0);
3230 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3231 }
3232 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3233 else if (GET_CODE (x) == ASHIFT
3234 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3235 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3236 && GET_MODE (XEXP (x, 0)) == DImode
3237 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3238 && CONST_INT_P (XEXP (x, 1)))
3239 {
3240 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3241 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3242 index = XEXP (XEXP (x, 0), 0);
3243 shift = INTVAL (XEXP (x, 1));
3244 }
3245 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3246 else if ((GET_CODE (x) == SIGN_EXTRACT
3247 || GET_CODE (x) == ZERO_EXTRACT)
3248 && GET_MODE (x) == DImode
3249 && GET_CODE (XEXP (x, 0)) == MULT
3250 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3251 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3252 {
3253 type = (GET_CODE (x) == SIGN_EXTRACT)
3254 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3255 index = XEXP (XEXP (x, 0), 0);
3256 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3257 if (INTVAL (XEXP (x, 1)) != 32 + shift
3258 || INTVAL (XEXP (x, 2)) != 0)
3259 shift = -1;
3260 }
3261 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3262 (const_int 0xffffffff<<shift)) */
3263 else if (GET_CODE (x) == AND
3264 && GET_MODE (x) == DImode
3265 && GET_CODE (XEXP (x, 0)) == MULT
3266 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3267 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3268 && CONST_INT_P (XEXP (x, 1)))
3269 {
3270 type = ADDRESS_REG_UXTW;
3271 index = XEXP (XEXP (x, 0), 0);
3272 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3273 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3274 shift = -1;
3275 }
3276 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3277 else if ((GET_CODE (x) == SIGN_EXTRACT
3278 || GET_CODE (x) == ZERO_EXTRACT)
3279 && GET_MODE (x) == DImode
3280 && GET_CODE (XEXP (x, 0)) == ASHIFT
3281 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3282 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3283 {
3284 type = (GET_CODE (x) == SIGN_EXTRACT)
3285 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3286 index = XEXP (XEXP (x, 0), 0);
3287 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3288 if (INTVAL (XEXP (x, 1)) != 32 + shift
3289 || INTVAL (XEXP (x, 2)) != 0)
3290 shift = -1;
3291 }
3292 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3293 (const_int 0xffffffff<<shift)) */
3294 else if (GET_CODE (x) == AND
3295 && GET_MODE (x) == DImode
3296 && GET_CODE (XEXP (x, 0)) == ASHIFT
3297 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3298 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3299 && CONST_INT_P (XEXP (x, 1)))
3300 {
3301 type = ADDRESS_REG_UXTW;
3302 index = XEXP (XEXP (x, 0), 0);
3303 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3304 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3305 shift = -1;
3306 }
3307 /* (mult:P (reg:P) (const_int scale)) */
3308 else if (GET_CODE (x) == MULT
3309 && GET_MODE (x) == Pmode
3310 && GET_MODE (XEXP (x, 0)) == Pmode
3311 && CONST_INT_P (XEXP (x, 1)))
3312 {
3313 type = ADDRESS_REG_REG;
3314 index = XEXP (x, 0);
3315 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3316 }
3317 /* (ashift:P (reg:P) (const_int shift)) */
3318 else if (GET_CODE (x) == ASHIFT
3319 && GET_MODE (x) == Pmode
3320 && GET_MODE (XEXP (x, 0)) == Pmode
3321 && CONST_INT_P (XEXP (x, 1)))
3322 {
3323 type = ADDRESS_REG_REG;
3324 index = XEXP (x, 0);
3325 shift = INTVAL (XEXP (x, 1));
3326 }
3327 else
3328 return false;
3329
3330 if (GET_CODE (index) == SUBREG)
3331 index = SUBREG_REG (index);
3332
3333 if ((shift == 0 ||
3334 (shift > 0 && shift <= 3
3335 && (1 << shift) == GET_MODE_SIZE (mode)))
3336 && REG_P (index)
3337 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3338 {
3339 info->type = type;
3340 info->offset = index;
3341 info->shift = shift;
3342 return true;
3343 }
3344
3345 return false;
3346}
3347
44707478 3348bool
ef4bddc2 3349aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3350{
3351 return (offset >= -64 * GET_MODE_SIZE (mode)
3352 && offset < 64 * GET_MODE_SIZE (mode)
3353 && offset % GET_MODE_SIZE (mode) == 0);
3354}
3355
3356static inline bool
ef4bddc2 3357offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3358 HOST_WIDE_INT offset)
3359{
3360 return offset >= -256 && offset < 256;
3361}
3362
3363static inline bool
ef4bddc2 3364offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3365{
3366 return (offset >= 0
3367 && offset < 4096 * GET_MODE_SIZE (mode)
3368 && offset % GET_MODE_SIZE (mode) == 0);
3369}
3370
3371/* Return true if X is a valid address for machine mode MODE. If it is,
3372 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3373 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3374
3375static bool
3376aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3377 rtx x, machine_mode mode,
43e9d192
IB
3378 RTX_CODE outer_code, bool strict_p)
3379{
3380 enum rtx_code code = GET_CODE (x);
3381 rtx op0, op1;
2d8c6dc1
AH
3382
3383 /* On BE, we use load/store pair for all large int mode load/stores. */
3384 bool load_store_pair_p = (outer_code == PARALLEL
3385 || (BYTES_BIG_ENDIAN
3386 && aarch64_vect_struct_mode_p (mode)));
3387
43e9d192 3388 bool allow_reg_index_p =
2d8c6dc1
AH
3389 !load_store_pair_p
3390 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3391 && !aarch64_vect_struct_mode_p (mode);
3392
3393 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3394 REG addressing. */
3395 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
3396 && (code != POST_INC && code != REG))
3397 return false;
3398
3399 switch (code)
3400 {
3401 case REG:
3402 case SUBREG:
3403 info->type = ADDRESS_REG_IMM;
3404 info->base = x;
3405 info->offset = const0_rtx;
3406 return aarch64_base_register_rtx_p (x, strict_p);
3407
3408 case PLUS:
3409 op0 = XEXP (x, 0);
3410 op1 = XEXP (x, 1);
15c0c5c9
JW
3411
3412 if (! strict_p
4aa81c2e 3413 && REG_P (op0)
15c0c5c9
JW
3414 && (op0 == virtual_stack_vars_rtx
3415 || op0 == frame_pointer_rtx
3416 || op0 == arg_pointer_rtx)
4aa81c2e 3417 && CONST_INT_P (op1))
15c0c5c9
JW
3418 {
3419 info->type = ADDRESS_REG_IMM;
3420 info->base = op0;
3421 info->offset = op1;
3422
3423 return true;
3424 }
3425
43e9d192
IB
3426 if (GET_MODE_SIZE (mode) != 0
3427 && CONST_INT_P (op1)
3428 && aarch64_base_register_rtx_p (op0, strict_p))
3429 {
3430 HOST_WIDE_INT offset = INTVAL (op1);
3431
3432 info->type = ADDRESS_REG_IMM;
3433 info->base = op0;
3434 info->offset = op1;
3435
3436 /* TImode and TFmode values are allowed in both pairs of X
3437 registers and individual Q registers. The available
3438 address modes are:
3439 X,X: 7-bit signed scaled offset
3440 Q: 9-bit signed offset
3441 We conservatively require an offset representable in either mode.
3442 */
3443 if (mode == TImode || mode == TFmode)
44707478 3444 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3445 && offset_9bit_signed_unscaled_p (mode, offset));
3446
2d8c6dc1
AH
3447 /* A 7bit offset check because OImode will emit a ldp/stp
3448 instruction (only big endian will get here).
3449 For ldp/stp instructions, the offset is scaled for the size of a
3450 single element of the pair. */
3451 if (mode == OImode)
3452 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3453
3454 /* Three 9/12 bit offsets checks because CImode will emit three
3455 ldr/str instructions (only big endian will get here). */
3456 if (mode == CImode)
3457 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3458 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3459 || offset_12bit_unsigned_scaled_p (V16QImode,
3460 offset + 32)));
3461
3462 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3463 instructions (only big endian will get here). */
3464 if (mode == XImode)
3465 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3466 && aarch64_offset_7bit_signed_scaled_p (TImode,
3467 offset + 32));
3468
3469 if (load_store_pair_p)
43e9d192 3470 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3471 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3472 else
3473 return (offset_9bit_signed_unscaled_p (mode, offset)
3474 || offset_12bit_unsigned_scaled_p (mode, offset));
3475 }
3476
3477 if (allow_reg_index_p)
3478 {
3479 /* Look for base + (scaled/extended) index register. */
3480 if (aarch64_base_register_rtx_p (op0, strict_p)
3481 && aarch64_classify_index (info, op1, mode, strict_p))
3482 {
3483 info->base = op0;
3484 return true;
3485 }
3486 if (aarch64_base_register_rtx_p (op1, strict_p)
3487 && aarch64_classify_index (info, op0, mode, strict_p))
3488 {
3489 info->base = op1;
3490 return true;
3491 }
3492 }
3493
3494 return false;
3495
3496 case POST_INC:
3497 case POST_DEC:
3498 case PRE_INC:
3499 case PRE_DEC:
3500 info->type = ADDRESS_REG_WB;
3501 info->base = XEXP (x, 0);
3502 info->offset = NULL_RTX;
3503 return aarch64_base_register_rtx_p (info->base, strict_p);
3504
3505 case POST_MODIFY:
3506 case PRE_MODIFY:
3507 info->type = ADDRESS_REG_WB;
3508 info->base = XEXP (x, 0);
3509 if (GET_CODE (XEXP (x, 1)) == PLUS
3510 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3511 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3512 && aarch64_base_register_rtx_p (info->base, strict_p))
3513 {
3514 HOST_WIDE_INT offset;
3515 info->offset = XEXP (XEXP (x, 1), 1);
3516 offset = INTVAL (info->offset);
3517
3518 /* TImode and TFmode values are allowed in both pairs of X
3519 registers and individual Q registers. The available
3520 address modes are:
3521 X,X: 7-bit signed scaled offset
3522 Q: 9-bit signed offset
3523 We conservatively require an offset representable in either mode.
3524 */
3525 if (mode == TImode || mode == TFmode)
44707478 3526 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3527 && offset_9bit_signed_unscaled_p (mode, offset));
3528
2d8c6dc1 3529 if (load_store_pair_p)
43e9d192 3530 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3531 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3532 else
3533 return offset_9bit_signed_unscaled_p (mode, offset);
3534 }
3535 return false;
3536
3537 case CONST:
3538 case SYMBOL_REF:
3539 case LABEL_REF:
79517551
SN
3540 /* load literal: pc-relative constant pool entry. Only supported
3541 for SI mode or larger. */
43e9d192 3542 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
3543
3544 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3545 {
3546 rtx sym, addend;
3547
3548 split_const (x, &sym, &addend);
3549 return (GET_CODE (sym) == LABEL_REF
3550 || (GET_CODE (sym) == SYMBOL_REF
3551 && CONSTANT_POOL_ADDRESS_P (sym)));
3552 }
3553 return false;
3554
3555 case LO_SUM:
3556 info->type = ADDRESS_LO_SUM;
3557 info->base = XEXP (x, 0);
3558 info->offset = XEXP (x, 1);
3559 if (allow_reg_index_p
3560 && aarch64_base_register_rtx_p (info->base, strict_p))
3561 {
3562 rtx sym, offs;
3563 split_const (info->offset, &sym, &offs);
3564 if (GET_CODE (sym) == SYMBOL_REF
f8b756b7 3565 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
43e9d192
IB
3566 == SYMBOL_SMALL_ABSOLUTE))
3567 {
3568 /* The symbol and offset must be aligned to the access size. */
3569 unsigned int align;
3570 unsigned int ref_size;
3571
3572 if (CONSTANT_POOL_ADDRESS_P (sym))
3573 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3574 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3575 {
3576 tree exp = SYMBOL_REF_DECL (sym);
3577 align = TYPE_ALIGN (TREE_TYPE (exp));
3578 align = CONSTANT_ALIGNMENT (exp, align);
3579 }
3580 else if (SYMBOL_REF_DECL (sym))
3581 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3582 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3583 && SYMBOL_REF_BLOCK (sym) != NULL)
3584 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3585 else
3586 align = BITS_PER_UNIT;
3587
3588 ref_size = GET_MODE_SIZE (mode);
3589 if (ref_size == 0)
3590 ref_size = GET_MODE_SIZE (DImode);
3591
3592 return ((INTVAL (offs) & (ref_size - 1)) == 0
3593 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3594 }
3595 }
3596 return false;
3597
3598 default:
3599 return false;
3600 }
3601}
3602
3603bool
3604aarch64_symbolic_address_p (rtx x)
3605{
3606 rtx offset;
3607
3608 split_const (x, &x, &offset);
3609 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3610}
3611
3612/* Classify the base of symbolic expression X, given that X appears in
3613 context CONTEXT. */
da4f13a4
MS
3614
3615enum aarch64_symbol_type
3616aarch64_classify_symbolic_expression (rtx x,
3617 enum aarch64_symbol_context context)
43e9d192
IB
3618{
3619 rtx offset;
da4f13a4 3620
43e9d192 3621 split_const (x, &x, &offset);
f8b756b7 3622 return aarch64_classify_symbol (x, offset, context);
43e9d192
IB
3623}
3624
3625
3626/* Return TRUE if X is a legitimate address for accessing memory in
3627 mode MODE. */
3628static bool
ef4bddc2 3629aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
3630{
3631 struct aarch64_address_info addr;
3632
3633 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3634}
3635
3636/* Return TRUE if X is a legitimate address for accessing memory in
3637 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3638 pair operation. */
3639bool
ef4bddc2 3640aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 3641 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3642{
3643 struct aarch64_address_info addr;
3644
3645 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3646}
3647
3648/* Return TRUE if rtx X is immediate constant 0.0 */
3649bool
3520f7cc 3650aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3651{
3652 REAL_VALUE_TYPE r;
3653
3654 if (GET_MODE (x) == VOIDmode)
3655 return false;
3656
3657 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3658 if (REAL_VALUE_MINUS_ZERO (r))
3659 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3660 return REAL_VALUES_EQUAL (r, dconst0);
3661}
3662
70f09188
AP
3663/* Return the fixed registers used for condition codes. */
3664
3665static bool
3666aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3667{
3668 *p1 = CC_REGNUM;
3669 *p2 = INVALID_REGNUM;
3670 return true;
3671}
3672
78607708
TV
3673/* Emit call insn with PAT and do aarch64-specific handling. */
3674
d07a3fed 3675void
78607708
TV
3676aarch64_emit_call_insn (rtx pat)
3677{
3678 rtx insn = emit_call_insn (pat);
3679
3680 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3681 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3682 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3683}
3684
ef4bddc2 3685machine_mode
43e9d192
IB
3686aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3687{
3688 /* All floating point compares return CCFP if it is an equality
3689 comparison, and CCFPE otherwise. */
3690 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3691 {
3692 switch (code)
3693 {
3694 case EQ:
3695 case NE:
3696 case UNORDERED:
3697 case ORDERED:
3698 case UNLT:
3699 case UNLE:
3700 case UNGT:
3701 case UNGE:
3702 case UNEQ:
3703 case LTGT:
3704 return CCFPmode;
3705
3706 case LT:
3707 case LE:
3708 case GT:
3709 case GE:
3710 return CCFPEmode;
3711
3712 default:
3713 gcc_unreachable ();
3714 }
3715 }
3716
3717 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3718 && y == const0_rtx
3719 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3720 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3721 || GET_CODE (x) == NEG))
43e9d192
IB
3722 return CC_NZmode;
3723
1c992d1e 3724 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3725 the comparison will have to be swapped when we emit the assembly
3726 code. */
3727 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3728 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3729 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3730 || GET_CODE (x) == LSHIFTRT
1c992d1e 3731 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3732 return CC_SWPmode;
3733
1c992d1e
RE
3734 /* Similarly for a negated operand, but we can only do this for
3735 equalities. */
3736 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3737 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3738 && (code == EQ || code == NE)
3739 && GET_CODE (x) == NEG)
3740 return CC_Zmode;
3741
43e9d192
IB
3742 /* A compare of a mode narrower than SI mode against zero can be done
3743 by extending the value in the comparison. */
3744 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3745 && y == const0_rtx)
3746 /* Only use sign-extension if we really need it. */
3747 return ((code == GT || code == GE || code == LE || code == LT)
3748 ? CC_SESWPmode : CC_ZESWPmode);
3749
3750 /* For everything else, return CCmode. */
3751 return CCmode;
3752}
3753
3dfa7055
ZC
3754static int
3755aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3756
cd5660ab 3757int
43e9d192
IB
3758aarch64_get_condition_code (rtx x)
3759{
ef4bddc2 3760 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
3761 enum rtx_code comp_code = GET_CODE (x);
3762
3763 if (GET_MODE_CLASS (mode) != MODE_CC)
3764 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
3765 return aarch64_get_condition_code_1 (mode, comp_code);
3766}
43e9d192 3767
3dfa7055
ZC
3768static int
3769aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3770{
3771 int ne = -1, eq = -1;
43e9d192
IB
3772 switch (mode)
3773 {
3774 case CCFPmode:
3775 case CCFPEmode:
3776 switch (comp_code)
3777 {
3778 case GE: return AARCH64_GE;
3779 case GT: return AARCH64_GT;
3780 case LE: return AARCH64_LS;
3781 case LT: return AARCH64_MI;
3782 case NE: return AARCH64_NE;
3783 case EQ: return AARCH64_EQ;
3784 case ORDERED: return AARCH64_VC;
3785 case UNORDERED: return AARCH64_VS;
3786 case UNLT: return AARCH64_LT;
3787 case UNLE: return AARCH64_LE;
3788 case UNGT: return AARCH64_HI;
3789 case UNGE: return AARCH64_PL;
cd5660ab 3790 default: return -1;
43e9d192
IB
3791 }
3792 break;
3793
3dfa7055
ZC
3794 case CC_DNEmode:
3795 ne = AARCH64_NE;
3796 eq = AARCH64_EQ;
3797 break;
3798
3799 case CC_DEQmode:
3800 ne = AARCH64_EQ;
3801 eq = AARCH64_NE;
3802 break;
3803
3804 case CC_DGEmode:
3805 ne = AARCH64_GE;
3806 eq = AARCH64_LT;
3807 break;
3808
3809 case CC_DLTmode:
3810 ne = AARCH64_LT;
3811 eq = AARCH64_GE;
3812 break;
3813
3814 case CC_DGTmode:
3815 ne = AARCH64_GT;
3816 eq = AARCH64_LE;
3817 break;
3818
3819 case CC_DLEmode:
3820 ne = AARCH64_LE;
3821 eq = AARCH64_GT;
3822 break;
3823
3824 case CC_DGEUmode:
3825 ne = AARCH64_CS;
3826 eq = AARCH64_CC;
3827 break;
3828
3829 case CC_DLTUmode:
3830 ne = AARCH64_CC;
3831 eq = AARCH64_CS;
3832 break;
3833
3834 case CC_DGTUmode:
3835 ne = AARCH64_HI;
3836 eq = AARCH64_LS;
3837 break;
3838
3839 case CC_DLEUmode:
3840 ne = AARCH64_LS;
3841 eq = AARCH64_HI;
3842 break;
3843
43e9d192
IB
3844 case CCmode:
3845 switch (comp_code)
3846 {
3847 case NE: return AARCH64_NE;
3848 case EQ: return AARCH64_EQ;
3849 case GE: return AARCH64_GE;
3850 case GT: return AARCH64_GT;
3851 case LE: return AARCH64_LE;
3852 case LT: return AARCH64_LT;
3853 case GEU: return AARCH64_CS;
3854 case GTU: return AARCH64_HI;
3855 case LEU: return AARCH64_LS;
3856 case LTU: return AARCH64_CC;
cd5660ab 3857 default: return -1;
43e9d192
IB
3858 }
3859 break;
3860
3861 case CC_SWPmode:
3862 case CC_ZESWPmode:
3863 case CC_SESWPmode:
3864 switch (comp_code)
3865 {
3866 case NE: return AARCH64_NE;
3867 case EQ: return AARCH64_EQ;
3868 case GE: return AARCH64_LE;
3869 case GT: return AARCH64_LT;
3870 case LE: return AARCH64_GE;
3871 case LT: return AARCH64_GT;
3872 case GEU: return AARCH64_LS;
3873 case GTU: return AARCH64_CC;
3874 case LEU: return AARCH64_CS;
3875 case LTU: return AARCH64_HI;
cd5660ab 3876 default: return -1;
43e9d192
IB
3877 }
3878 break;
3879
3880 case CC_NZmode:
3881 switch (comp_code)
3882 {
3883 case NE: return AARCH64_NE;
3884 case EQ: return AARCH64_EQ;
3885 case GE: return AARCH64_PL;
3886 case LT: return AARCH64_MI;
cd5660ab 3887 default: return -1;
43e9d192
IB
3888 }
3889 break;
3890
1c992d1e
RE
3891 case CC_Zmode:
3892 switch (comp_code)
3893 {
3894 case NE: return AARCH64_NE;
3895 case EQ: return AARCH64_EQ;
cd5660ab 3896 default: return -1;
1c992d1e
RE
3897 }
3898 break;
3899
43e9d192 3900 default:
cd5660ab 3901 return -1;
43e9d192
IB
3902 break;
3903 }
3dfa7055
ZC
3904
3905 if (comp_code == NE)
3906 return ne;
3907
3908 if (comp_code == EQ)
3909 return eq;
3910
3911 return -1;
43e9d192
IB
3912}
3913
ddeabd3e
AL
3914bool
3915aarch64_const_vec_all_same_in_range_p (rtx x,
3916 HOST_WIDE_INT minval,
3917 HOST_WIDE_INT maxval)
3918{
3919 HOST_WIDE_INT firstval;
3920 int count, i;
3921
3922 if (GET_CODE (x) != CONST_VECTOR
3923 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3924 return false;
3925
3926 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3927 if (firstval < minval || firstval > maxval)
3928 return false;
3929
3930 count = CONST_VECTOR_NUNITS (x);
3931 for (i = 1; i < count; i++)
3932 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3933 return false;
3934
3935 return true;
3936}
3937
3938bool
3939aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3940{
3941 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3942}
3943
43e9d192
IB
3944static unsigned
3945bit_count (unsigned HOST_WIDE_INT value)
3946{
3947 unsigned count = 0;
3948
3949 while (value)
3950 {
3951 count++;
3952 value &= value - 1;
3953 }
3954
3955 return count;
3956}
3957
cf670503
ZC
3958/* N Z C V. */
3959#define AARCH64_CC_V 1
3960#define AARCH64_CC_C (1 << 1)
3961#define AARCH64_CC_Z (1 << 2)
3962#define AARCH64_CC_N (1 << 3)
3963
3964/* N Z C V flags for ccmp. The first code is for AND op and the other
3965 is for IOR op. Indexed by AARCH64_COND_CODE. */
3966static const int aarch64_nzcv_codes[][2] =
3967{
3968 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
3969 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
3970 {AARCH64_CC_C, 0}, /* CS, C == 1. */
3971 {0, AARCH64_CC_C}, /* CC, C == 0. */
3972 {AARCH64_CC_N, 0}, /* MI, N == 1. */
3973 {0, AARCH64_CC_N}, /* PL, N == 0. */
3974 {AARCH64_CC_V, 0}, /* VS, V == 1. */
3975 {0, AARCH64_CC_V}, /* VC, V == 0. */
3976 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
3977 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
3978 {0, AARCH64_CC_V}, /* GE, N == V. */
3979 {AARCH64_CC_V, 0}, /* LT, N != V. */
3980 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
3981 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
3982 {0, 0}, /* AL, Any. */
3983 {0, 0}, /* NV, Any. */
3984};
3985
3986int
3987aarch64_ccmp_mode_to_code (enum machine_mode mode)
3988{
3989 switch (mode)
3990 {
3991 case CC_DNEmode:
3992 return NE;
3993
3994 case CC_DEQmode:
3995 return EQ;
3996
3997 case CC_DLEmode:
3998 return LE;
3999
4000 case CC_DGTmode:
4001 return GT;
4002
4003 case CC_DLTmode:
4004 return LT;
4005
4006 case CC_DGEmode:
4007 return GE;
4008
4009 case CC_DLEUmode:
4010 return LEU;
4011
4012 case CC_DGTUmode:
4013 return GTU;
4014
4015 case CC_DLTUmode:
4016 return LTU;
4017
4018 case CC_DGEUmode:
4019 return GEU;
4020
4021 default:
4022 gcc_unreachable ();
4023 }
4024}
4025
4026
43e9d192
IB
4027void
4028aarch64_print_operand (FILE *f, rtx x, char code)
4029{
4030 switch (code)
4031 {
f541a481
KT
4032 /* An integer or symbol address without a preceding # sign. */
4033 case 'c':
4034 switch (GET_CODE (x))
4035 {
4036 case CONST_INT:
4037 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4038 break;
4039
4040 case SYMBOL_REF:
4041 output_addr_const (f, x);
4042 break;
4043
4044 case CONST:
4045 if (GET_CODE (XEXP (x, 0)) == PLUS
4046 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4047 {
4048 output_addr_const (f, x);
4049 break;
4050 }
4051 /* Fall through. */
4052
4053 default:
4054 output_operand_lossage ("Unsupported operand for code '%c'", code);
4055 }
4056 break;
4057
43e9d192
IB
4058 case 'e':
4059 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4060 {
4061 int n;
4062
4aa81c2e 4063 if (!CONST_INT_P (x)
43e9d192
IB
4064 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4065 {
4066 output_operand_lossage ("invalid operand for '%%%c'", code);
4067 return;
4068 }
4069
4070 switch (n)
4071 {
4072 case 3:
4073 fputc ('b', f);
4074 break;
4075 case 4:
4076 fputc ('h', f);
4077 break;
4078 case 5:
4079 fputc ('w', f);
4080 break;
4081 default:
4082 output_operand_lossage ("invalid operand for '%%%c'", code);
4083 return;
4084 }
4085 }
4086 break;
4087
4088 case 'p':
4089 {
4090 int n;
4091
4092 /* Print N such that 2^N == X. */
4aa81c2e 4093 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4094 {
4095 output_operand_lossage ("invalid operand for '%%%c'", code);
4096 return;
4097 }
4098
4099 asm_fprintf (f, "%d", n);
4100 }
4101 break;
4102
4103 case 'P':
4104 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4105 if (!CONST_INT_P (x))
43e9d192
IB
4106 {
4107 output_operand_lossage ("invalid operand for '%%%c'", code);
4108 return;
4109 }
4110
4111 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4112 break;
4113
4114 case 'H':
4115 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4116 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4117 {
4118 output_operand_lossage ("invalid operand for '%%%c'", code);
4119 return;
4120 }
4121
01a3a324 4122 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4123 break;
4124
43e9d192 4125 case 'm':
cd5660ab
KT
4126 {
4127 int cond_code;
4128 /* Print a condition (eq, ne, etc). */
43e9d192 4129
cd5660ab
KT
4130 /* CONST_TRUE_RTX means always -- that's the default. */
4131 if (x == const_true_rtx)
43e9d192 4132 return;
43e9d192 4133
cd5660ab
KT
4134 if (!COMPARISON_P (x))
4135 {
4136 output_operand_lossage ("invalid operand for '%%%c'", code);
4137 return;
4138 }
4139
4140 cond_code = aarch64_get_condition_code (x);
4141 gcc_assert (cond_code >= 0);
4142 fputs (aarch64_condition_codes[cond_code], f);
4143 }
43e9d192
IB
4144 break;
4145
4146 case 'M':
cd5660ab
KT
4147 {
4148 int cond_code;
4149 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 4150
cd5660ab
KT
4151 /* CONST_TRUE_RTX means never -- that's the default. */
4152 if (x == const_true_rtx)
4153 {
4154 fputs ("nv", f);
4155 return;
4156 }
43e9d192 4157
cd5660ab
KT
4158 if (!COMPARISON_P (x))
4159 {
4160 output_operand_lossage ("invalid operand for '%%%c'", code);
4161 return;
4162 }
4163 cond_code = aarch64_get_condition_code (x);
4164 gcc_assert (cond_code >= 0);
4165 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4166 (cond_code)], f);
4167 }
43e9d192
IB
4168 break;
4169
4170 case 'b':
4171 case 'h':
4172 case 's':
4173 case 'd':
4174 case 'q':
4175 /* Print a scalar FP/SIMD register name. */
4176 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4177 {
4178 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4179 return;
4180 }
50ce6f88 4181 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4182 break;
4183
4184 case 'S':
4185 case 'T':
4186 case 'U':
4187 case 'V':
4188 /* Print the first FP/SIMD register name in a list. */
4189 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4190 {
4191 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4192 return;
4193 }
50ce6f88 4194 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4195 break;
4196
2d8c6dc1
AH
4197 case 'R':
4198 /* Print a scalar FP/SIMD register name + 1. */
4199 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4200 {
4201 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4202 return;
4203 }
4204 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4205 break;
4206
a05c0ddf 4207 case 'X':
50d38551 4208 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4209 if (!CONST_INT_P (x))
a05c0ddf
IB
4210 {
4211 output_operand_lossage ("invalid operand for '%%%c'", code);
4212 return;
4213 }
50d38551 4214 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4215 break;
4216
43e9d192
IB
4217 case 'w':
4218 case 'x':
4219 /* Print a general register name or the zero register (32-bit or
4220 64-bit). */
3520f7cc
JG
4221 if (x == const0_rtx
4222 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4223 {
50ce6f88 4224 asm_fprintf (f, "%czr", code);
43e9d192
IB
4225 break;
4226 }
4227
4228 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4229 {
50ce6f88 4230 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4231 break;
4232 }
4233
4234 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4235 {
50ce6f88 4236 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4237 break;
4238 }
4239
4240 /* Fall through */
4241
4242 case 0:
4243 /* Print a normal operand, if it's a general register, then we
4244 assume DImode. */
4245 if (x == NULL)
4246 {
4247 output_operand_lossage ("missing operand");
4248 return;
4249 }
4250
4251 switch (GET_CODE (x))
4252 {
4253 case REG:
01a3a324 4254 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4255 break;
4256
4257 case MEM:
4258 aarch64_memory_reference_mode = GET_MODE (x);
4259 output_address (XEXP (x, 0));
4260 break;
4261
4262 case LABEL_REF:
4263 case SYMBOL_REF:
4264 output_addr_const (asm_out_file, x);
4265 break;
4266
4267 case CONST_INT:
4268 asm_fprintf (f, "%wd", INTVAL (x));
4269 break;
4270
4271 case CONST_VECTOR:
3520f7cc
JG
4272 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4273 {
ddeabd3e
AL
4274 gcc_assert (
4275 aarch64_const_vec_all_same_in_range_p (x,
4276 HOST_WIDE_INT_MIN,
4277 HOST_WIDE_INT_MAX));
3520f7cc
JG
4278 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4279 }
4280 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4281 {
4282 fputc ('0', f);
4283 }
4284 else
4285 gcc_unreachable ();
43e9d192
IB
4286 break;
4287
3520f7cc
JG
4288 case CONST_DOUBLE:
4289 /* CONST_DOUBLE can represent a double-width integer.
4290 In this case, the mode of x is VOIDmode. */
4291 if (GET_MODE (x) == VOIDmode)
4292 ; /* Do Nothing. */
4293 else if (aarch64_float_const_zero_rtx_p (x))
4294 {
4295 fputc ('0', f);
4296 break;
4297 }
4298 else if (aarch64_float_const_representable_p (x))
4299 {
4300#define buf_size 20
4301 char float_buf[buf_size] = {'\0'};
4302 REAL_VALUE_TYPE r;
4303 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4304 real_to_decimal_for_mode (float_buf, &r,
4305 buf_size, buf_size,
4306 1, GET_MODE (x));
4307 asm_fprintf (asm_out_file, "%s", float_buf);
4308 break;
4309#undef buf_size
4310 }
4311 output_operand_lossage ("invalid constant");
4312 return;
43e9d192
IB
4313 default:
4314 output_operand_lossage ("invalid operand");
4315 return;
4316 }
4317 break;
4318
4319 case 'A':
4320 if (GET_CODE (x) == HIGH)
4321 x = XEXP (x, 0);
4322
4323 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4324 {
4325 case SYMBOL_SMALL_GOT:
4326 asm_fprintf (asm_out_file, ":got:");
4327 break;
4328
4329 case SYMBOL_SMALL_TLSGD:
4330 asm_fprintf (asm_out_file, ":tlsgd:");
4331 break;
4332
4333 case SYMBOL_SMALL_TLSDESC:
4334 asm_fprintf (asm_out_file, ":tlsdesc:");
4335 break;
4336
4337 case SYMBOL_SMALL_GOTTPREL:
4338 asm_fprintf (asm_out_file, ":gottprel:");
4339 break;
4340
4341 case SYMBOL_SMALL_TPREL:
4342 asm_fprintf (asm_out_file, ":tprel:");
4343 break;
4344
87dd8ab0
MS
4345 case SYMBOL_TINY_GOT:
4346 gcc_unreachable ();
4347 break;
4348
43e9d192
IB
4349 default:
4350 break;
4351 }
4352 output_addr_const (asm_out_file, x);
4353 break;
4354
4355 case 'L':
4356 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4357 {
4358 case SYMBOL_SMALL_GOT:
4359 asm_fprintf (asm_out_file, ":lo12:");
4360 break;
4361
4362 case SYMBOL_SMALL_TLSGD:
4363 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4364 break;
4365
4366 case SYMBOL_SMALL_TLSDESC:
4367 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4368 break;
4369
4370 case SYMBOL_SMALL_GOTTPREL:
4371 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4372 break;
4373
4374 case SYMBOL_SMALL_TPREL:
4375 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4376 break;
4377
87dd8ab0
MS
4378 case SYMBOL_TINY_GOT:
4379 asm_fprintf (asm_out_file, ":got:");
4380 break;
4381
43e9d192
IB
4382 default:
4383 break;
4384 }
4385 output_addr_const (asm_out_file, x);
4386 break;
4387
4388 case 'G':
4389
4390 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4391 {
4392 case SYMBOL_SMALL_TPREL:
4393 asm_fprintf (asm_out_file, ":tprel_hi12:");
4394 break;
4395 default:
4396 break;
4397 }
4398 output_addr_const (asm_out_file, x);
4399 break;
4400
cf670503
ZC
4401 case 'K':
4402 {
4403 int cond_code;
4404 /* Print nzcv. */
4405
4406 if (!COMPARISON_P (x))
4407 {
4408 output_operand_lossage ("invalid operand for '%%%c'", code);
4409 return;
4410 }
4411
4412 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4413 gcc_assert (cond_code >= 0);
4414 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4415 }
4416 break;
4417
4418 case 'k':
4419 {
4420 int cond_code;
4421 /* Print nzcv. */
4422
4423 if (!COMPARISON_P (x))
4424 {
4425 output_operand_lossage ("invalid operand for '%%%c'", code);
4426 return;
4427 }
4428
4429 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4430 gcc_assert (cond_code >= 0);
4431 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4432 }
4433 break;
4434
43e9d192
IB
4435 default:
4436 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4437 return;
4438 }
4439}
4440
4441void
4442aarch64_print_operand_address (FILE *f, rtx x)
4443{
4444 struct aarch64_address_info addr;
4445
4446 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4447 MEM, true))
4448 switch (addr.type)
4449 {
4450 case ADDRESS_REG_IMM:
4451 if (addr.offset == const0_rtx)
01a3a324 4452 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4453 else
16a3246f 4454 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4455 INTVAL (addr.offset));
4456 return;
4457
4458 case ADDRESS_REG_REG:
4459 if (addr.shift == 0)
16a3246f 4460 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4461 reg_names [REGNO (addr.offset)]);
43e9d192 4462 else
16a3246f 4463 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4464 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4465 return;
4466
4467 case ADDRESS_REG_UXTW:
4468 if (addr.shift == 0)
16a3246f 4469 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4470 REGNO (addr.offset) - R0_REGNUM);
4471 else
16a3246f 4472 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4473 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4474 return;
4475
4476 case ADDRESS_REG_SXTW:
4477 if (addr.shift == 0)
16a3246f 4478 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4479 REGNO (addr.offset) - R0_REGNUM);
4480 else
16a3246f 4481 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4482 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4483 return;
4484
4485 case ADDRESS_REG_WB:
4486 switch (GET_CODE (x))
4487 {
4488 case PRE_INC:
16a3246f 4489 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4490 GET_MODE_SIZE (aarch64_memory_reference_mode));
4491 return;
4492 case POST_INC:
16a3246f 4493 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4494 GET_MODE_SIZE (aarch64_memory_reference_mode));
4495 return;
4496 case PRE_DEC:
16a3246f 4497 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4498 GET_MODE_SIZE (aarch64_memory_reference_mode));
4499 return;
4500 case POST_DEC:
16a3246f 4501 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4502 GET_MODE_SIZE (aarch64_memory_reference_mode));
4503 return;
4504 case PRE_MODIFY:
16a3246f 4505 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4506 INTVAL (addr.offset));
4507 return;
4508 case POST_MODIFY:
16a3246f 4509 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4510 INTVAL (addr.offset));
4511 return;
4512 default:
4513 break;
4514 }
4515 break;
4516
4517 case ADDRESS_LO_SUM:
16a3246f 4518 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4519 output_addr_const (f, addr.offset);
4520 asm_fprintf (f, "]");
4521 return;
4522
4523 case ADDRESS_SYMBOLIC:
4524 break;
4525 }
4526
4527 output_addr_const (f, x);
4528}
4529
43e9d192
IB
4530bool
4531aarch64_label_mentioned_p (rtx x)
4532{
4533 const char *fmt;
4534 int i;
4535
4536 if (GET_CODE (x) == LABEL_REF)
4537 return true;
4538
4539 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4540 referencing instruction, but they are constant offsets, not
4541 symbols. */
4542 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4543 return false;
4544
4545 fmt = GET_RTX_FORMAT (GET_CODE (x));
4546 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4547 {
4548 if (fmt[i] == 'E')
4549 {
4550 int j;
4551
4552 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4553 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4554 return 1;
4555 }
4556 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4557 return 1;
4558 }
4559
4560 return 0;
4561}
4562
4563/* Implement REGNO_REG_CLASS. */
4564
4565enum reg_class
4566aarch64_regno_regclass (unsigned regno)
4567{
4568 if (GP_REGNUM_P (regno))
a4a182c6 4569 return GENERAL_REGS;
43e9d192
IB
4570
4571 if (regno == SP_REGNUM)
4572 return STACK_REG;
4573
4574 if (regno == FRAME_POINTER_REGNUM
4575 || regno == ARG_POINTER_REGNUM)
f24bb080 4576 return POINTER_REGS;
43e9d192
IB
4577
4578 if (FP_REGNUM_P (regno))
4579 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4580
4581 return NO_REGS;
4582}
4583
0c4ec427 4584static rtx
ef4bddc2 4585aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
4586{
4587 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4588 where mask is selected by alignment and size of the offset.
4589 We try to pick as large a range for the offset as possible to
4590 maximize the chance of a CSE. However, for aligned addresses
4591 we limit the range to 4k so that structures with different sized
4592 elements are likely to use the same base. */
4593
4594 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4595 {
4596 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4597 HOST_WIDE_INT base_offset;
4598
4599 /* Does it look like we'll need a load/store-pair operation? */
4600 if (GET_MODE_SIZE (mode) > 16
4601 || mode == TImode)
4602 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4603 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4604 /* For offsets aren't a multiple of the access size, the limit is
4605 -256...255. */
4606 else if (offset & (GET_MODE_SIZE (mode) - 1))
4607 base_offset = (offset + 0x100) & ~0x1ff;
4608 else
4609 base_offset = offset & ~0xfff;
4610
4611 if (base_offset == 0)
4612 return x;
4613
4614 offset -= base_offset;
4615 rtx base_reg = gen_reg_rtx (Pmode);
4616 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4617 NULL_RTX);
4618 emit_move_insn (base_reg, val);
4619 x = plus_constant (Pmode, base_reg, offset);
4620 }
4621
4622 return x;
4623}
4624
43e9d192
IB
4625/* Try a machine-dependent way of reloading an illegitimate address
4626 operand. If we find one, push the reload and return the new rtx. */
4627
4628rtx
4629aarch64_legitimize_reload_address (rtx *x_p,
ef4bddc2 4630 machine_mode mode,
43e9d192
IB
4631 int opnum, int type,
4632 int ind_levels ATTRIBUTE_UNUSED)
4633{
4634 rtx x = *x_p;
4635
348d4b0a
BC
4636 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4637 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4638 && GET_CODE (x) == PLUS
4639 && REG_P (XEXP (x, 0))
4640 && CONST_INT_P (XEXP (x, 1)))
4641 {
4642 rtx orig_rtx = x;
4643 x = copy_rtx (x);
4644 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4645 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4646 opnum, (enum reload_type) type);
4647 return x;
4648 }
4649
4650 /* We must recognize output that we have already generated ourselves. */
4651 if (GET_CODE (x) == PLUS
4652 && GET_CODE (XEXP (x, 0)) == PLUS
4653 && REG_P (XEXP (XEXP (x, 0), 0))
4654 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4655 && CONST_INT_P (XEXP (x, 1)))
4656 {
4657 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4658 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4659 opnum, (enum reload_type) type);
4660 return x;
4661 }
4662
4663 /* We wish to handle large displacements off a base register by splitting
4664 the addend across an add and the mem insn. This can cut the number of
4665 extra insns needed from 3 to 1. It is only useful for load/store of a
4666 single register with 12 bit offset field. */
4667 if (GET_CODE (x) == PLUS
4668 && REG_P (XEXP (x, 0))
4669 && CONST_INT_P (XEXP (x, 1))
4670 && HARD_REGISTER_P (XEXP (x, 0))
4671 && mode != TImode
4672 && mode != TFmode
4673 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4674 {
4675 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4676 HOST_WIDE_INT low = val & 0xfff;
4677 HOST_WIDE_INT high = val - low;
4678 HOST_WIDE_INT offs;
4679 rtx cst;
ef4bddc2 4680 machine_mode xmode = GET_MODE (x);
28514dda
YZ
4681
4682 /* In ILP32, xmode can be either DImode or SImode. */
4683 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4684
4685 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4686 BLKmode alignment. */
4687 if (GET_MODE_SIZE (mode) == 0)
4688 return NULL_RTX;
4689
4690 offs = low % GET_MODE_SIZE (mode);
4691
4692 /* Align misaligned offset by adjusting high part to compensate. */
4693 if (offs != 0)
4694 {
4695 if (aarch64_uimm12_shift (high + offs))
4696 {
4697 /* Align down. */
4698 low = low - offs;
4699 high = high + offs;
4700 }
4701 else
4702 {
4703 /* Align up. */
4704 offs = GET_MODE_SIZE (mode) - offs;
4705 low = low + offs;
4706 high = high + (low & 0x1000) - offs;
4707 low &= 0xfff;
4708 }
4709 }
4710
4711 /* Check for overflow. */
4712 if (high + low != val)
4713 return NULL_RTX;
4714
4715 cst = GEN_INT (high);
4716 if (!aarch64_uimm12_shift (high))
28514dda 4717 cst = force_const_mem (xmode, cst);
43e9d192
IB
4718
4719 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4720 in the mem instruction.
4721 Note that replacing this gen_rtx_PLUS with plus_constant is
4722 wrong in this case because we rely on the
4723 (plus (plus reg c1) c2) structure being preserved so that
4724 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4725 x = gen_rtx_PLUS (xmode,
4726 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4727 GEN_INT (low));
43e9d192
IB
4728
4729 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4730 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4731 opnum, (enum reload_type) type);
4732 return x;
4733 }
4734
4735 return NULL_RTX;
4736}
4737
4738
4739static reg_class_t
4740aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4741 reg_class_t rclass,
ef4bddc2 4742 machine_mode mode,
43e9d192
IB
4743 secondary_reload_info *sri)
4744{
43e9d192
IB
4745 /* Without the TARGET_SIMD instructions we cannot move a Q register
4746 to a Q register directly. We need a scratch. */
4747 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4748 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4749 && reg_class_subset_p (rclass, FP_REGS))
4750 {
4751 if (mode == TFmode)
4752 sri->icode = CODE_FOR_aarch64_reload_movtf;
4753 else if (mode == TImode)
4754 sri->icode = CODE_FOR_aarch64_reload_movti;
4755 return NO_REGS;
4756 }
4757
4758 /* A TFmode or TImode memory access should be handled via an FP_REGS
4759 because AArch64 has richer addressing modes for LDR/STR instructions
4760 than LDP/STP instructions. */
a4a182c6 4761 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4762 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4763 return FP_REGS;
4764
4765 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4766 return GENERAL_REGS;
43e9d192
IB
4767
4768 return NO_REGS;
4769}
4770
4771static bool
4772aarch64_can_eliminate (const int from, const int to)
4773{
4774 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4775 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4776
4777 if (frame_pointer_needed)
4778 {
4779 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4780 return true;
4781 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4782 return false;
4783 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4784 && !cfun->calls_alloca)
4785 return true;
4786 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4787 return true;
0b7f8166
MS
4788
4789 return false;
43e9d192 4790 }
1c923b60
JW
4791 else
4792 {
4793 /* If we decided that we didn't need a leaf frame pointer but then used
4794 LR in the function, then we'll want a frame pointer after all, so
4795 prevent this elimination to ensure a frame pointer is used. */
4796 if (to == STACK_POINTER_REGNUM
4797 && flag_omit_leaf_frame_pointer
4798 && df_regs_ever_live_p (LR_REGNUM))
4799 return false;
4800 }
777e6976 4801
43e9d192
IB
4802 return true;
4803}
4804
4805HOST_WIDE_INT
4806aarch64_initial_elimination_offset (unsigned from, unsigned to)
4807{
43e9d192 4808 aarch64_layout_frame ();
78c29983
MS
4809
4810 if (to == HARD_FRAME_POINTER_REGNUM)
4811 {
4812 if (from == ARG_POINTER_REGNUM)
1c960e02 4813 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4814
4815 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4816 return (cfun->machine->frame.hard_fp_offset
4817 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4818 }
4819
4820 if (to == STACK_POINTER_REGNUM)
4821 {
4822 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4823 return (cfun->machine->frame.frame_size
4824 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4825 }
4826
1c960e02 4827 return cfun->machine->frame.frame_size;
43e9d192
IB
4828}
4829
43e9d192
IB
4830/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4831 previous frame. */
4832
4833rtx
4834aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4835{
4836 if (count != 0)
4837 return const0_rtx;
4838 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4839}
4840
4841
4842static void
4843aarch64_asm_trampoline_template (FILE *f)
4844{
28514dda
YZ
4845 if (TARGET_ILP32)
4846 {
4847 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4848 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4849 }
4850 else
4851 {
4852 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4853 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4854 }
01a3a324 4855 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4856 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4857 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4858 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4859}
4860
4861static void
4862aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4863{
4864 rtx fnaddr, mem, a_tramp;
28514dda 4865 const int tramp_code_sz = 16;
43e9d192
IB
4866
4867 /* Don't need to copy the trailing D-words, we fill those in below. */
4868 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4869 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4870 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4871 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4872 if (GET_MODE (fnaddr) != ptr_mode)
4873 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4874 emit_move_insn (mem, fnaddr);
4875
28514dda 4876 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4877 emit_move_insn (mem, chain_value);
4878
4879 /* XXX We should really define a "clear_cache" pattern and use
4880 gen_clear_cache(). */
4881 a_tramp = XEXP (m_tramp, 0);
4882 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4883 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4884 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4885 ptr_mode);
43e9d192
IB
4886}
4887
4888static unsigned char
ef4bddc2 4889aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
4890{
4891 switch (regclass)
4892 {
fee9ba42 4893 case CALLER_SAVE_REGS:
43e9d192
IB
4894 case POINTER_REGS:
4895 case GENERAL_REGS:
4896 case ALL_REGS:
4897 case FP_REGS:
4898 case FP_LO_REGS:
4899 return
4900 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4901 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4902 case STACK_REG:
4903 return 1;
4904
4905 case NO_REGS:
4906 return 0;
4907
4908 default:
4909 break;
4910 }
4911 gcc_unreachable ();
4912}
4913
4914static reg_class_t
78d8b9f0 4915aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4916{
51bb310d 4917 if (regclass == POINTER_REGS)
78d8b9f0
IB
4918 return GENERAL_REGS;
4919
51bb310d
MS
4920 if (regclass == STACK_REG)
4921 {
4922 if (REG_P(x)
4923 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4924 return regclass;
4925
4926 return NO_REGS;
4927 }
4928
78d8b9f0
IB
4929 /* If it's an integer immediate that MOVI can't handle, then
4930 FP_REGS is not an option, so we return NO_REGS instead. */
4931 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4932 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4933 return NO_REGS;
4934
27bd251b
IB
4935 /* Register eliminiation can result in a request for
4936 SP+constant->FP_REGS. We cannot support such operations which
4937 use SP as source and an FP_REG as destination, so reject out
4938 right now. */
4939 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4940 {
4941 rtx lhs = XEXP (x, 0);
4942
4943 /* Look through a possible SUBREG introduced by ILP32. */
4944 if (GET_CODE (lhs) == SUBREG)
4945 lhs = SUBREG_REG (lhs);
4946
4947 gcc_assert (REG_P (lhs));
4948 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4949 POINTER_REGS));
4950 return NO_REGS;
4951 }
4952
78d8b9f0 4953 return regclass;
43e9d192
IB
4954}
4955
4956void
4957aarch64_asm_output_labelref (FILE* f, const char *name)
4958{
4959 asm_fprintf (f, "%U%s", name);
4960}
4961
4962static void
4963aarch64_elf_asm_constructor (rtx symbol, int priority)
4964{
4965 if (priority == DEFAULT_INIT_PRIORITY)
4966 default_ctor_section_asm_out_constructor (symbol, priority);
4967 else
4968 {
4969 section *s;
4970 char buf[18];
4971 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4972 s = get_section (buf, SECTION_WRITE, NULL);
4973 switch_to_section (s);
4974 assemble_align (POINTER_SIZE);
28514dda 4975 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4976 }
4977}
4978
4979static void
4980aarch64_elf_asm_destructor (rtx symbol, int priority)
4981{
4982 if (priority == DEFAULT_INIT_PRIORITY)
4983 default_dtor_section_asm_out_destructor (symbol, priority);
4984 else
4985 {
4986 section *s;
4987 char buf[18];
4988 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4989 s = get_section (buf, SECTION_WRITE, NULL);
4990 switch_to_section (s);
4991 assemble_align (POINTER_SIZE);
28514dda 4992 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4993 }
4994}
4995
4996const char*
4997aarch64_output_casesi (rtx *operands)
4998{
4999 char buf[100];
5000 char label[100];
b32d5189 5001 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5002 int index;
5003 static const char *const patterns[4][2] =
5004 {
5005 {
5006 "ldrb\t%w3, [%0,%w1,uxtw]",
5007 "add\t%3, %4, %w3, sxtb #2"
5008 },
5009 {
5010 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5011 "add\t%3, %4, %w3, sxth #2"
5012 },
5013 {
5014 "ldr\t%w3, [%0,%w1,uxtw #2]",
5015 "add\t%3, %4, %w3, sxtw #2"
5016 },
5017 /* We assume that DImode is only generated when not optimizing and
5018 that we don't really need 64-bit address offsets. That would
5019 imply an object file with 8GB of code in a single function! */
5020 {
5021 "ldr\t%w3, [%0,%w1,uxtw #2]",
5022 "add\t%3, %4, %w3, sxtw #2"
5023 }
5024 };
5025
5026 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5027
5028 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5029
5030 gcc_assert (index >= 0 && index <= 3);
5031
5032 /* Need to implement table size reduction, by chaning the code below. */
5033 output_asm_insn (patterns[index][0], operands);
5034 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5035 snprintf (buf, sizeof (buf),
5036 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5037 output_asm_insn (buf, operands);
5038 output_asm_insn (patterns[index][1], operands);
5039 output_asm_insn ("br\t%3", operands);
5040 assemble_label (asm_out_file, label);
5041 return "";
5042}
5043
5044
5045/* Return size in bits of an arithmetic operand which is shifted/scaled and
5046 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5047 operator. */
5048
5049int
5050aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5051{
5052 if (shift >= 0 && shift <= 3)
5053 {
5054 int size;
5055 for (size = 8; size <= 32; size *= 2)
5056 {
5057 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5058 if (mask == bits << shift)
5059 return size;
5060 }
5061 }
5062 return 0;
5063}
5064
5065static bool
ef4bddc2 5066aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5067 const_rtx x ATTRIBUTE_UNUSED)
5068{
5069 /* We can't use blocks for constants when we're using a per-function
5070 constant pool. */
5071 return false;
5072}
5073
5074static section *
ef4bddc2 5075aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5076 rtx x ATTRIBUTE_UNUSED,
5077 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5078{
5079 /* Force all constant pool entries into the current function section. */
5080 return function_section (current_function_decl);
5081}
5082
5083
5084/* Costs. */
5085
5086/* Helper function for rtx cost calculation. Strip a shift expression
5087 from X. Returns the inner operand if successful, or the original
5088 expression on failure. */
5089static rtx
5090aarch64_strip_shift (rtx x)
5091{
5092 rtx op = x;
5093
57b77d46
RE
5094 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5095 we can convert both to ROR during final output. */
43e9d192
IB
5096 if ((GET_CODE (op) == ASHIFT
5097 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5098 || GET_CODE (op) == LSHIFTRT
5099 || GET_CODE (op) == ROTATERT
5100 || GET_CODE (op) == ROTATE)
43e9d192
IB
5101 && CONST_INT_P (XEXP (op, 1)))
5102 return XEXP (op, 0);
5103
5104 if (GET_CODE (op) == MULT
5105 && CONST_INT_P (XEXP (op, 1))
5106 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5107 return XEXP (op, 0);
5108
5109 return x;
5110}
5111
4745e701 5112/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5113 expression from X. Returns the inner operand if successful, or the
5114 original expression on failure. We deal with a number of possible
5115 canonicalization variations here. */
5116static rtx
4745e701 5117aarch64_strip_extend (rtx x)
43e9d192
IB
5118{
5119 rtx op = x;
5120
5121 /* Zero and sign extraction of a widened value. */
5122 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5123 && XEXP (op, 2) == const0_rtx
4745e701 5124 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5125 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5126 XEXP (op, 1)))
5127 return XEXP (XEXP (op, 0), 0);
5128
5129 /* It can also be represented (for zero-extend) as an AND with an
5130 immediate. */
5131 if (GET_CODE (op) == AND
5132 && GET_CODE (XEXP (op, 0)) == MULT
5133 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5134 && CONST_INT_P (XEXP (op, 1))
5135 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5136 INTVAL (XEXP (op, 1))) != 0)
5137 return XEXP (XEXP (op, 0), 0);
5138
5139 /* Now handle extended register, as this may also have an optional
5140 left shift by 1..4. */
5141 if (GET_CODE (op) == ASHIFT
5142 && CONST_INT_P (XEXP (op, 1))
5143 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5144 op = XEXP (op, 0);
5145
5146 if (GET_CODE (op) == ZERO_EXTEND
5147 || GET_CODE (op) == SIGN_EXTEND)
5148 op = XEXP (op, 0);
5149
5150 if (op != x)
5151 return op;
5152
4745e701
JG
5153 return x;
5154}
5155
5156/* Helper function for rtx cost calculation. Calculate the cost of
5157 a MULT, which may be part of a multiply-accumulate rtx. Return
5158 the calculated cost of the expression, recursing manually in to
5159 operands where needed. */
5160
5161static int
5162aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5163{
5164 rtx op0, op1;
5165 const struct cpu_cost_table *extra_cost
5166 = aarch64_tune_params->insn_extra_cost;
5167 int cost = 0;
5168 bool maybe_fma = (outer == PLUS || outer == MINUS);
ef4bddc2 5169 machine_mode mode = GET_MODE (x);
4745e701
JG
5170
5171 gcc_checking_assert (code == MULT);
5172
5173 op0 = XEXP (x, 0);
5174 op1 = XEXP (x, 1);
5175
5176 if (VECTOR_MODE_P (mode))
5177 mode = GET_MODE_INNER (mode);
5178
5179 /* Integer multiply/fma. */
5180 if (GET_MODE_CLASS (mode) == MODE_INT)
5181 {
5182 /* The multiply will be canonicalized as a shift, cost it as such. */
5183 if (CONST_INT_P (op1)
5184 && exact_log2 (INTVAL (op1)) > 0)
5185 {
5186 if (speed)
5187 {
5188 if (maybe_fma)
5189 /* ADD (shifted register). */
5190 cost += extra_cost->alu.arith_shift;
5191 else
5192 /* LSL (immediate). */
5193 cost += extra_cost->alu.shift;
5194 }
5195
5196 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5197
5198 return cost;
5199 }
5200
5201 /* Integer multiplies or FMAs have zero/sign extending variants. */
5202 if ((GET_CODE (op0) == ZERO_EXTEND
5203 && GET_CODE (op1) == ZERO_EXTEND)
5204 || (GET_CODE (op0) == SIGN_EXTEND
5205 && GET_CODE (op1) == SIGN_EXTEND))
5206 {
5207 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5208 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5209
5210 if (speed)
5211 {
5212 if (maybe_fma)
5213 /* MADD/SMADDL/UMADDL. */
5214 cost += extra_cost->mult[0].extend_add;
5215 else
5216 /* MUL/SMULL/UMULL. */
5217 cost += extra_cost->mult[0].extend;
5218 }
5219
5220 return cost;
5221 }
5222
5223 /* This is either an integer multiply or an FMA. In both cases
5224 we want to recurse and cost the operands. */
5225 cost += rtx_cost (op0, MULT, 0, speed)
5226 + rtx_cost (op1, MULT, 1, speed);
5227
5228 if (speed)
5229 {
5230 if (maybe_fma)
5231 /* MADD. */
5232 cost += extra_cost->mult[mode == DImode].add;
5233 else
5234 /* MUL. */
5235 cost += extra_cost->mult[mode == DImode].simple;
5236 }
5237
5238 return cost;
5239 }
5240 else
5241 {
5242 if (speed)
5243 {
3d840f7d 5244 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
5245 operands. */
5246 if (GET_CODE (op0) == NEG)
3d840f7d 5247 op0 = XEXP (op0, 0);
4745e701 5248 if (GET_CODE (op1) == NEG)
3d840f7d 5249 op1 = XEXP (op1, 0);
4745e701
JG
5250
5251 if (maybe_fma)
5252 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5253 cost += extra_cost->fp[mode == DFmode].fma;
5254 else
3d840f7d 5255 /* FMUL/FNMUL. */
4745e701
JG
5256 cost += extra_cost->fp[mode == DFmode].mult;
5257 }
5258
5259 cost += rtx_cost (op0, MULT, 0, speed)
5260 + rtx_cost (op1, MULT, 1, speed);
5261 return cost;
5262 }
43e9d192
IB
5263}
5264
67747367
JG
5265static int
5266aarch64_address_cost (rtx x,
ef4bddc2 5267 machine_mode mode,
67747367
JG
5268 addr_space_t as ATTRIBUTE_UNUSED,
5269 bool speed)
5270{
5271 enum rtx_code c = GET_CODE (x);
5272 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5273 struct aarch64_address_info info;
5274 int cost = 0;
5275 info.shift = 0;
5276
5277 if (!aarch64_classify_address (&info, x, mode, c, false))
5278 {
5279 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5280 {
5281 /* This is a CONST or SYMBOL ref which will be split
5282 in a different way depending on the code model in use.
5283 Cost it through the generic infrastructure. */
5284 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5285 /* Divide through by the cost of one instruction to
5286 bring it to the same units as the address costs. */
5287 cost_symbol_ref /= COSTS_N_INSNS (1);
5288 /* The cost is then the cost of preparing the address,
5289 followed by an immediate (possibly 0) offset. */
5290 return cost_symbol_ref + addr_cost->imm_offset;
5291 }
5292 else
5293 {
5294 /* This is most likely a jump table from a case
5295 statement. */
5296 return addr_cost->register_offset;
5297 }
5298 }
5299
5300 switch (info.type)
5301 {
5302 case ADDRESS_LO_SUM:
5303 case ADDRESS_SYMBOLIC:
5304 case ADDRESS_REG_IMM:
5305 cost += addr_cost->imm_offset;
5306 break;
5307
5308 case ADDRESS_REG_WB:
5309 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5310 cost += addr_cost->pre_modify;
5311 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5312 cost += addr_cost->post_modify;
5313 else
5314 gcc_unreachable ();
5315
5316 break;
5317
5318 case ADDRESS_REG_REG:
5319 cost += addr_cost->register_offset;
5320 break;
5321
5322 case ADDRESS_REG_UXTW:
5323 case ADDRESS_REG_SXTW:
5324 cost += addr_cost->register_extend;
5325 break;
5326
5327 default:
5328 gcc_unreachable ();
5329 }
5330
5331
5332 if (info.shift > 0)
5333 {
5334 /* For the sake of calculating the cost of the shifted register
5335 component, we can treat same sized modes in the same way. */
5336 switch (GET_MODE_BITSIZE (mode))
5337 {
5338 case 16:
5339 cost += addr_cost->addr_scale_costs.hi;
5340 break;
5341
5342 case 32:
5343 cost += addr_cost->addr_scale_costs.si;
5344 break;
5345
5346 case 64:
5347 cost += addr_cost->addr_scale_costs.di;
5348 break;
5349
5350 /* We can't tell, or this is a 128-bit vector. */
5351 default:
5352 cost += addr_cost->addr_scale_costs.ti;
5353 break;
5354 }
5355 }
5356
5357 return cost;
5358}
5359
7cc2145f
JG
5360/* Return true if the RTX X in mode MODE is a zero or sign extract
5361 usable in an ADD or SUB (extended register) instruction. */
5362static bool
ef4bddc2 5363aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5364{
5365 /* Catch add with a sign extract.
5366 This is add_<optab><mode>_multp2. */
5367 if (GET_CODE (x) == SIGN_EXTRACT
5368 || GET_CODE (x) == ZERO_EXTRACT)
5369 {
5370 rtx op0 = XEXP (x, 0);
5371 rtx op1 = XEXP (x, 1);
5372 rtx op2 = XEXP (x, 2);
5373
5374 if (GET_CODE (op0) == MULT
5375 && CONST_INT_P (op1)
5376 && op2 == const0_rtx
5377 && CONST_INT_P (XEXP (op0, 1))
5378 && aarch64_is_extend_from_extract (mode,
5379 XEXP (op0, 1),
5380 op1))
5381 {
5382 return true;
5383 }
5384 }
5385
5386 return false;
5387}
5388
61263118
KT
5389static bool
5390aarch64_frint_unspec_p (unsigned int u)
5391{
5392 switch (u)
5393 {
5394 case UNSPEC_FRINTZ:
5395 case UNSPEC_FRINTP:
5396 case UNSPEC_FRINTM:
5397 case UNSPEC_FRINTA:
5398 case UNSPEC_FRINTN:
5399 case UNSPEC_FRINTX:
5400 case UNSPEC_FRINTI:
5401 return true;
5402
5403 default:
5404 return false;
5405 }
5406}
5407
2d5ffe46
AP
5408/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5409 storing it in *COST. Result is true if the total cost of the operation
5410 has now been calculated. */
5411static bool
5412aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5413{
b9e3afe9
AP
5414 rtx inner;
5415 rtx comparator;
5416 enum rtx_code cmpcode;
5417
5418 if (COMPARISON_P (op0))
5419 {
5420 inner = XEXP (op0, 0);
5421 comparator = XEXP (op0, 1);
5422 cmpcode = GET_CODE (op0);
5423 }
5424 else
5425 {
5426 inner = op0;
5427 comparator = const0_rtx;
5428 cmpcode = NE;
5429 }
5430
2d5ffe46
AP
5431 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5432 {
5433 /* Conditional branch. */
b9e3afe9 5434 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5435 return true;
5436 else
5437 {
b9e3afe9 5438 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5439 {
2d5ffe46
AP
5440 if (comparator == const0_rtx)
5441 {
5442 /* TBZ/TBNZ/CBZ/CBNZ. */
5443 if (GET_CODE (inner) == ZERO_EXTRACT)
5444 /* TBZ/TBNZ. */
5445 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5446 0, speed);
5447 else
5448 /* CBZ/CBNZ. */
b9e3afe9 5449 *cost += rtx_cost (inner, cmpcode, 0, speed);
2d5ffe46
AP
5450
5451 return true;
5452 }
5453 }
b9e3afe9 5454 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5455 {
2d5ffe46
AP
5456 /* TBZ/TBNZ. */
5457 if (comparator == const0_rtx)
5458 return true;
5459 }
5460 }
5461 }
b9e3afe9 5462 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5463 {
5464 /* It's a conditional operation based on the status flags,
5465 so it must be some flavor of CSEL. */
5466
5467 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5468 if (GET_CODE (op1) == NEG
5469 || GET_CODE (op1) == NOT
5470 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5471 op1 = XEXP (op1, 0);
5472
5473 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5474 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5475 return true;
5476 }
5477
5478 /* We don't know what this is, cost all operands. */
5479 return false;
5480}
5481
43e9d192
IB
5482/* Calculate the cost of calculating X, storing it in *COST. Result
5483 is true if the total cost of the operation has now been calculated. */
5484static bool
5485aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5486 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5487{
a8eecd00 5488 rtx op0, op1, op2;
73250c4c 5489 const struct cpu_cost_table *extra_cost
43e9d192 5490 = aarch64_tune_params->insn_extra_cost;
ef4bddc2 5491 machine_mode mode = GET_MODE (x);
43e9d192 5492
7fc5ef02
JG
5493 /* By default, assume that everything has equivalent cost to the
5494 cheapest instruction. Any additional costs are applied as a delta
5495 above this default. */
5496 *cost = COSTS_N_INSNS (1);
5497
5498 /* TODO: The cost infrastructure currently does not handle
5499 vector operations. Assume that all vector operations
5500 are equally expensive. */
5501 if (VECTOR_MODE_P (mode))
5502 {
5503 if (speed)
5504 *cost += extra_cost->vect.alu;
5505 return true;
5506 }
5507
43e9d192
IB
5508 switch (code)
5509 {
5510 case SET:
ba123b0d
JG
5511 /* The cost depends entirely on the operands to SET. */
5512 *cost = 0;
43e9d192
IB
5513 op0 = SET_DEST (x);
5514 op1 = SET_SRC (x);
5515
5516 switch (GET_CODE (op0))
5517 {
5518 case MEM:
5519 if (speed)
2961177e
JG
5520 {
5521 rtx address = XEXP (op0, 0);
5522 if (GET_MODE_CLASS (mode) == MODE_INT)
5523 *cost += extra_cost->ldst.store;
5524 else if (mode == SFmode)
5525 *cost += extra_cost->ldst.storef;
5526 else if (mode == DFmode)
5527 *cost += extra_cost->ldst.stored;
5528
5529 *cost +=
5530 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5531 0, speed));
5532 }
43e9d192 5533
ba123b0d 5534 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5535 return true;
5536
5537 case SUBREG:
5538 if (! REG_P (SUBREG_REG (op0)))
5539 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 5540
43e9d192
IB
5541 /* Fall through. */
5542 case REG:
ba123b0d
JG
5543 /* const0_rtx is in general free, but we will use an
5544 instruction to set a register to 0. */
5545 if (REG_P (op1) || op1 == const0_rtx)
5546 {
5547 /* The cost is 1 per register copied. */
5548 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5549 / UNITS_PER_WORD;
5550 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5551 }
5552 else
5553 /* Cost is just the cost of the RHS of the set. */
5554 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5555 return true;
5556
ba123b0d 5557 case ZERO_EXTRACT:
43e9d192 5558 case SIGN_EXTRACT:
ba123b0d
JG
5559 /* Bit-field insertion. Strip any redundant widening of
5560 the RHS to meet the width of the target. */
43e9d192
IB
5561 if (GET_CODE (op1) == SUBREG)
5562 op1 = SUBREG_REG (op1);
5563 if ((GET_CODE (op1) == ZERO_EXTEND
5564 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5565 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5566 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5567 >= INTVAL (XEXP (op0, 1))))
5568 op1 = XEXP (op1, 0);
ba123b0d
JG
5569
5570 if (CONST_INT_P (op1))
5571 {
5572 /* MOV immediate is assumed to always be cheap. */
5573 *cost = COSTS_N_INSNS (1);
5574 }
5575 else
5576 {
5577 /* BFM. */
5578 if (speed)
5579 *cost += extra_cost->alu.bfi;
5580 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5581 }
5582
43e9d192
IB
5583 return true;
5584
5585 default:
ba123b0d
JG
5586 /* We can't make sense of this, assume default cost. */
5587 *cost = COSTS_N_INSNS (1);
61263118 5588 return false;
43e9d192
IB
5589 }
5590 return false;
5591
9dfc162c
JG
5592 case CONST_INT:
5593 /* If an instruction can incorporate a constant within the
5594 instruction, the instruction's expression avoids calling
5595 rtx_cost() on the constant. If rtx_cost() is called on a
5596 constant, then it is usually because the constant must be
5597 moved into a register by one or more instructions.
5598
5599 The exception is constant 0, which can be expressed
5600 as XZR/WZR and is therefore free. The exception to this is
5601 if we have (set (reg) (const0_rtx)) in which case we must cost
5602 the move. However, we can catch that when we cost the SET, so
5603 we don't need to consider that here. */
5604 if (x == const0_rtx)
5605 *cost = 0;
5606 else
5607 {
5608 /* To an approximation, building any other constant is
5609 proportionally expensive to the number of instructions
5610 required to build that constant. This is true whether we
5611 are compiling for SPEED or otherwise. */
82614948
RR
5612 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5613 (NULL_RTX, x, false, mode));
9dfc162c
JG
5614 }
5615 return true;
5616
5617 case CONST_DOUBLE:
5618 if (speed)
5619 {
5620 /* mov[df,sf]_aarch64. */
5621 if (aarch64_float_const_representable_p (x))
5622 /* FMOV (scalar immediate). */
5623 *cost += extra_cost->fp[mode == DFmode].fpconst;
5624 else if (!aarch64_float_const_zero_rtx_p (x))
5625 {
5626 /* This will be a load from memory. */
5627 if (mode == DFmode)
5628 *cost += extra_cost->ldst.loadd;
5629 else
5630 *cost += extra_cost->ldst.loadf;
5631 }
5632 else
5633 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5634 or MOV v0.s[0], wzr - neither of which are modeled by the
5635 cost tables. Just use the default cost. */
5636 {
5637 }
5638 }
5639
5640 return true;
5641
43e9d192
IB
5642 case MEM:
5643 if (speed)
2961177e
JG
5644 {
5645 /* For loads we want the base cost of a load, plus an
5646 approximation for the additional cost of the addressing
5647 mode. */
5648 rtx address = XEXP (x, 0);
5649 if (GET_MODE_CLASS (mode) == MODE_INT)
5650 *cost += extra_cost->ldst.load;
5651 else if (mode == SFmode)
5652 *cost += extra_cost->ldst.loadf;
5653 else if (mode == DFmode)
5654 *cost += extra_cost->ldst.loadd;
5655
5656 *cost +=
5657 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5658 0, speed));
5659 }
43e9d192
IB
5660
5661 return true;
5662
5663 case NEG:
4745e701
JG
5664 op0 = XEXP (x, 0);
5665
5666 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5667 {
5668 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5669 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5670 {
5671 /* CSETM. */
5672 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5673 return true;
5674 }
5675
5676 /* Cost this as SUB wzr, X. */
5677 op0 = CONST0_RTX (GET_MODE (x));
5678 op1 = XEXP (x, 0);
5679 goto cost_minus;
5680 }
5681
5682 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5683 {
5684 /* Support (neg(fma...)) as a single instruction only if
5685 sign of zeros is unimportant. This matches the decision
5686 making in aarch64.md. */
5687 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5688 {
5689 /* FNMADD. */
5690 *cost = rtx_cost (op0, NEG, 0, speed);
5691 return true;
5692 }
5693 if (speed)
5694 /* FNEG. */
5695 *cost += extra_cost->fp[mode == DFmode].neg;
5696 return false;
5697 }
5698
5699 return false;
43e9d192 5700
781aeb73
KT
5701 case CLRSB:
5702 case CLZ:
5703 if (speed)
5704 *cost += extra_cost->alu.clz;
5705
5706 return false;
5707
43e9d192
IB
5708 case COMPARE:
5709 op0 = XEXP (x, 0);
5710 op1 = XEXP (x, 1);
5711
5712 if (op1 == const0_rtx
5713 && GET_CODE (op0) == AND)
5714 {
5715 x = op0;
5716 goto cost_logic;
5717 }
5718
a8eecd00
JG
5719 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5720 {
5721 /* TODO: A write to the CC flags possibly costs extra, this
5722 needs encoding in the cost tables. */
5723
5724 /* CC_ZESWPmode supports zero extend for free. */
5725 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5726 op0 = XEXP (op0, 0);
5727
5728 /* ANDS. */
5729 if (GET_CODE (op0) == AND)
5730 {
5731 x = op0;
5732 goto cost_logic;
5733 }
5734
5735 if (GET_CODE (op0) == PLUS)
5736 {
5737 /* ADDS (and CMN alias). */
5738 x = op0;
5739 goto cost_plus;
5740 }
5741
5742 if (GET_CODE (op0) == MINUS)
5743 {
5744 /* SUBS. */
5745 x = op0;
5746 goto cost_minus;
5747 }
5748
5749 if (GET_CODE (op1) == NEG)
5750 {
5751 /* CMN. */
5752 if (speed)
5753 *cost += extra_cost->alu.arith;
5754
5755 *cost += rtx_cost (op0, COMPARE, 0, speed);
5756 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5757 return true;
5758 }
5759
5760 /* CMP.
5761
5762 Compare can freely swap the order of operands, and
5763 canonicalization puts the more complex operation first.
5764 But the integer MINUS logic expects the shift/extend
5765 operation in op1. */
5766 if (! (REG_P (op0)
5767 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5768 {
5769 op0 = XEXP (x, 1);
5770 op1 = XEXP (x, 0);
5771 }
5772 goto cost_minus;
5773 }
5774
5775 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5776 {
5777 /* FCMP. */
5778 if (speed)
5779 *cost += extra_cost->fp[mode == DFmode].compare;
5780
5781 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5782 {
5783 /* FCMP supports constant 0.0 for no extra cost. */
5784 return true;
5785 }
5786 return false;
5787 }
5788
5789 return false;
43e9d192
IB
5790
5791 case MINUS:
4745e701
JG
5792 {
5793 op0 = XEXP (x, 0);
5794 op1 = XEXP (x, 1);
5795
5796cost_minus:
5797 /* Detect valid immediates. */
5798 if ((GET_MODE_CLASS (mode) == MODE_INT
5799 || (GET_MODE_CLASS (mode) == MODE_CC
5800 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5801 && CONST_INT_P (op1)
5802 && aarch64_uimm12_shift (INTVAL (op1)))
5803 {
5804 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5805
4745e701
JG
5806 if (speed)
5807 /* SUB(S) (immediate). */
5808 *cost += extra_cost->alu.arith;
5809 return true;
5810
5811 }
5812
7cc2145f
JG
5813 /* Look for SUB (extended register). */
5814 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5815 {
5816 if (speed)
5817 *cost += extra_cost->alu.arith_shift;
5818
5819 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5820 (enum rtx_code) GET_CODE (op1),
5821 0, speed);
5822 return true;
5823 }
5824
4745e701
JG
5825 rtx new_op1 = aarch64_strip_extend (op1);
5826
5827 /* Cost this as an FMA-alike operation. */
5828 if ((GET_CODE (new_op1) == MULT
5829 || GET_CODE (new_op1) == ASHIFT)
5830 && code != COMPARE)
5831 {
5832 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5833 (enum rtx_code) code,
5834 speed);
43e9d192 5835 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5836 return true;
5837 }
43e9d192 5838
4745e701 5839 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5840
4745e701
JG
5841 if (speed)
5842 {
5843 if (GET_MODE_CLASS (mode) == MODE_INT)
5844 /* SUB(S). */
5845 *cost += extra_cost->alu.arith;
5846 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5847 /* FSUB. */
5848 *cost += extra_cost->fp[mode == DFmode].addsub;
5849 }
5850 return true;
5851 }
43e9d192
IB
5852
5853 case PLUS:
4745e701
JG
5854 {
5855 rtx new_op0;
43e9d192 5856
4745e701
JG
5857 op0 = XEXP (x, 0);
5858 op1 = XEXP (x, 1);
43e9d192 5859
a8eecd00 5860cost_plus:
4745e701
JG
5861 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5862 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5863 {
5864 /* CSINC. */
5865 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5866 *cost += rtx_cost (op1, PLUS, 1, speed);
5867 return true;
5868 }
43e9d192 5869
4745e701
JG
5870 if (GET_MODE_CLASS (mode) == MODE_INT
5871 && CONST_INT_P (op1)
5872 && aarch64_uimm12_shift (INTVAL (op1)))
5873 {
5874 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5875
4745e701
JG
5876 if (speed)
5877 /* ADD (immediate). */
5878 *cost += extra_cost->alu.arith;
5879 return true;
5880 }
5881
7cc2145f
JG
5882 /* Look for ADD (extended register). */
5883 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5884 {
5885 if (speed)
5886 *cost += extra_cost->alu.arith_shift;
5887
5888 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5889 (enum rtx_code) GET_CODE (op0),
5890 0, speed);
5891 return true;
5892 }
5893
4745e701
JG
5894 /* Strip any extend, leave shifts behind as we will
5895 cost them through mult_cost. */
5896 new_op0 = aarch64_strip_extend (op0);
5897
5898 if (GET_CODE (new_op0) == MULT
5899 || GET_CODE (new_op0) == ASHIFT)
5900 {
5901 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5902 speed);
5903 *cost += rtx_cost (op1, PLUS, 1, speed);
5904 return true;
5905 }
5906
5907 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5908 + rtx_cost (op1, PLUS, 1, speed));
5909
5910 if (speed)
5911 {
5912 if (GET_MODE_CLASS (mode) == MODE_INT)
5913 /* ADD. */
5914 *cost += extra_cost->alu.arith;
5915 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5916 /* FADD. */
5917 *cost += extra_cost->fp[mode == DFmode].addsub;
5918 }
5919 return true;
5920 }
43e9d192 5921
18b42b2a
KT
5922 case BSWAP:
5923 *cost = COSTS_N_INSNS (1);
5924
5925 if (speed)
5926 *cost += extra_cost->alu.rev;
5927
5928 return false;
5929
43e9d192 5930 case IOR:
f7d5cf8d
KT
5931 if (aarch_rev16_p (x))
5932 {
5933 *cost = COSTS_N_INSNS (1);
5934
5935 if (speed)
5936 *cost += extra_cost->alu.rev;
5937
5938 return true;
5939 }
5940 /* Fall through. */
43e9d192
IB
5941 case XOR:
5942 case AND:
5943 cost_logic:
5944 op0 = XEXP (x, 0);
5945 op1 = XEXP (x, 1);
5946
268c3b47
JG
5947 if (code == AND
5948 && GET_CODE (op0) == MULT
5949 && CONST_INT_P (XEXP (op0, 1))
5950 && CONST_INT_P (op1)
5951 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5952 INTVAL (op1)) != 0)
5953 {
5954 /* This is a UBFM/SBFM. */
5955 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5956 if (speed)
5957 *cost += extra_cost->alu.bfx;
5958 return true;
5959 }
5960
43e9d192
IB
5961 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5962 {
268c3b47
JG
5963 /* We possibly get the immediate for free, this is not
5964 modelled. */
43e9d192
IB
5965 if (CONST_INT_P (op1)
5966 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5967 {
268c3b47
JG
5968 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5969
5970 if (speed)
5971 *cost += extra_cost->alu.logical;
5972
5973 return true;
43e9d192
IB
5974 }
5975 else
5976 {
268c3b47
JG
5977 rtx new_op0 = op0;
5978
5979 /* Handle ORN, EON, or BIC. */
43e9d192
IB
5980 if (GET_CODE (op0) == NOT)
5981 op0 = XEXP (op0, 0);
268c3b47
JG
5982
5983 new_op0 = aarch64_strip_shift (op0);
5984
5985 /* If we had a shift on op0 then this is a logical-shift-
5986 by-register/immediate operation. Otherwise, this is just
5987 a logical operation. */
5988 if (speed)
5989 {
5990 if (new_op0 != op0)
5991 {
5992 /* Shift by immediate. */
5993 if (CONST_INT_P (XEXP (op0, 1)))
5994 *cost += extra_cost->alu.log_shift;
5995 else
5996 *cost += extra_cost->alu.log_shift_reg;
5997 }
5998 else
5999 *cost += extra_cost->alu.logical;
6000 }
6001
6002 /* In both cases we want to cost both operands. */
6003 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
6004 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
6005
6006 return true;
43e9d192 6007 }
43e9d192
IB
6008 }
6009 return false;
6010
268c3b47
JG
6011 case NOT:
6012 /* MVN. */
6013 if (speed)
6014 *cost += extra_cost->alu.logical;
6015
6016 /* The logical instruction could have the shifted register form,
6017 but the cost is the same if the shift is processed as a separate
6018 instruction, so we don't bother with it here. */
6019 return false;
6020
43e9d192 6021 case ZERO_EXTEND:
b1685e62
JG
6022
6023 op0 = XEXP (x, 0);
6024 /* If a value is written in SI mode, then zero extended to DI
6025 mode, the operation will in general be free as a write to
6026 a 'w' register implicitly zeroes the upper bits of an 'x'
6027 register. However, if this is
6028
6029 (set (reg) (zero_extend (reg)))
6030
6031 we must cost the explicit register move. */
6032 if (mode == DImode
6033 && GET_MODE (op0) == SImode
6034 && outer == SET)
6035 {
6036 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
6037
6038 if (!op_cost && speed)
6039 /* MOV. */
6040 *cost += extra_cost->alu.extend;
6041 else
6042 /* Free, the cost is that of the SI mode operation. */
6043 *cost = op_cost;
6044
6045 return true;
6046 }
6047 else if (MEM_P (XEXP (x, 0)))
43e9d192 6048 {
b1685e62
JG
6049 /* All loads can zero extend to any size for free. */
6050 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
6051 return true;
6052 }
b1685e62
JG
6053
6054 /* UXTB/UXTH. */
6055 if (speed)
6056 *cost += extra_cost->alu.extend;
6057
43e9d192
IB
6058 return false;
6059
6060 case SIGN_EXTEND:
b1685e62 6061 if (MEM_P (XEXP (x, 0)))
43e9d192 6062 {
b1685e62
JG
6063 /* LDRSH. */
6064 if (speed)
6065 {
6066 rtx address = XEXP (XEXP (x, 0), 0);
6067 *cost += extra_cost->ldst.load_sign_extend;
6068
6069 *cost +=
6070 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6071 0, speed));
6072 }
43e9d192
IB
6073 return true;
6074 }
b1685e62
JG
6075
6076 if (speed)
6077 *cost += extra_cost->alu.extend;
43e9d192
IB
6078 return false;
6079
ba0cfa17
JG
6080 case ASHIFT:
6081 op0 = XEXP (x, 0);
6082 op1 = XEXP (x, 1);
6083
6084 if (CONST_INT_P (op1))
6085 {
6086 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6087 aliases. */
6088 if (speed)
6089 *cost += extra_cost->alu.shift;
6090
6091 /* We can incorporate zero/sign extend for free. */
6092 if (GET_CODE (op0) == ZERO_EXTEND
6093 || GET_CODE (op0) == SIGN_EXTEND)
6094 op0 = XEXP (op0, 0);
6095
6096 *cost += rtx_cost (op0, ASHIFT, 0, speed);
6097 return true;
6098 }
6099 else
6100 {
6101 /* LSLV. */
6102 if (speed)
6103 *cost += extra_cost->alu.shift_reg;
6104
6105 return false; /* All arguments need to be in registers. */
6106 }
6107
43e9d192 6108 case ROTATE:
43e9d192
IB
6109 case ROTATERT:
6110 case LSHIFTRT:
43e9d192 6111 case ASHIFTRT:
ba0cfa17
JG
6112 op0 = XEXP (x, 0);
6113 op1 = XEXP (x, 1);
43e9d192 6114
ba0cfa17
JG
6115 if (CONST_INT_P (op1))
6116 {
6117 /* ASR (immediate) and friends. */
6118 if (speed)
6119 *cost += extra_cost->alu.shift;
43e9d192 6120
ba0cfa17
JG
6121 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6122 return true;
6123 }
6124 else
6125 {
6126
6127 /* ASR (register) and friends. */
6128 if (speed)
6129 *cost += extra_cost->alu.shift_reg;
6130
6131 return false; /* All arguments need to be in registers. */
6132 }
43e9d192 6133
909734be
JG
6134 case SYMBOL_REF:
6135
6136 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6137 {
6138 /* LDR. */
6139 if (speed)
6140 *cost += extra_cost->ldst.load;
6141 }
6142 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6143 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6144 {
6145 /* ADRP, followed by ADD. */
6146 *cost += COSTS_N_INSNS (1);
6147 if (speed)
6148 *cost += 2 * extra_cost->alu.arith;
6149 }
6150 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6151 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6152 {
6153 /* ADR. */
6154 if (speed)
6155 *cost += extra_cost->alu.arith;
6156 }
6157
6158 if (flag_pic)
6159 {
6160 /* One extra load instruction, after accessing the GOT. */
6161 *cost += COSTS_N_INSNS (1);
6162 if (speed)
6163 *cost += extra_cost->ldst.load;
6164 }
43e9d192
IB
6165 return true;
6166
909734be 6167 case HIGH:
43e9d192 6168 case LO_SUM:
909734be
JG
6169 /* ADRP/ADD (immediate). */
6170 if (speed)
6171 *cost += extra_cost->alu.arith;
43e9d192
IB
6172 return true;
6173
6174 case ZERO_EXTRACT:
6175 case SIGN_EXTRACT:
7cc2145f
JG
6176 /* UBFX/SBFX. */
6177 if (speed)
6178 *cost += extra_cost->alu.bfx;
6179
6180 /* We can trust that the immediates used will be correct (there
6181 are no by-register forms), so we need only cost op0. */
6182 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
6183 return true;
6184
6185 case MULT:
4745e701
JG
6186 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6187 /* aarch64_rtx_mult_cost always handles recursion to its
6188 operands. */
6189 return true;
43e9d192
IB
6190
6191 case MOD:
6192 case UMOD:
43e9d192
IB
6193 if (speed)
6194 {
6195 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
6196 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6197 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 6198 else if (GET_MODE (x) == DFmode)
73250c4c
KT
6199 *cost += (extra_cost->fp[1].mult
6200 + extra_cost->fp[1].div);
43e9d192 6201 else if (GET_MODE (x) == SFmode)
73250c4c
KT
6202 *cost += (extra_cost->fp[0].mult
6203 + extra_cost->fp[0].div);
43e9d192
IB
6204 }
6205 return false; /* All arguments need to be in registers. */
6206
6207 case DIV:
6208 case UDIV:
4105fe38 6209 case SQRT:
43e9d192
IB
6210 if (speed)
6211 {
4105fe38
JG
6212 if (GET_MODE_CLASS (mode) == MODE_INT)
6213 /* There is no integer SQRT, so only DIV and UDIV can get
6214 here. */
6215 *cost += extra_cost->mult[mode == DImode].idiv;
6216 else
6217 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
6218 }
6219 return false; /* All arguments need to be in registers. */
6220
a8eecd00 6221 case IF_THEN_ELSE:
2d5ffe46
AP
6222 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6223 XEXP (x, 2), cost, speed);
a8eecd00
JG
6224
6225 case EQ:
6226 case NE:
6227 case GT:
6228 case GTU:
6229 case LT:
6230 case LTU:
6231 case GE:
6232 case GEU:
6233 case LE:
6234 case LEU:
6235
6236 return false; /* All arguments must be in registers. */
6237
b292109f
JG
6238 case FMA:
6239 op0 = XEXP (x, 0);
6240 op1 = XEXP (x, 1);
6241 op2 = XEXP (x, 2);
6242
6243 if (speed)
6244 *cost += extra_cost->fp[mode == DFmode].fma;
6245
6246 /* FMSUB, FNMADD, and FNMSUB are free. */
6247 if (GET_CODE (op0) == NEG)
6248 op0 = XEXP (op0, 0);
6249
6250 if (GET_CODE (op2) == NEG)
6251 op2 = XEXP (op2, 0);
6252
6253 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6254 and the by-element operand as operand 0. */
6255 if (GET_CODE (op1) == NEG)
6256 op1 = XEXP (op1, 0);
6257
6258 /* Catch vector-by-element operations. The by-element operand can
6259 either be (vec_duplicate (vec_select (x))) or just
6260 (vec_select (x)), depending on whether we are multiplying by
6261 a vector or a scalar.
6262
6263 Canonicalization is not very good in these cases, FMA4 will put the
6264 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6265 if (GET_CODE (op0) == VEC_DUPLICATE)
6266 op0 = XEXP (op0, 0);
6267 else if (GET_CODE (op1) == VEC_DUPLICATE)
6268 op1 = XEXP (op1, 0);
6269
6270 if (GET_CODE (op0) == VEC_SELECT)
6271 op0 = XEXP (op0, 0);
6272 else if (GET_CODE (op1) == VEC_SELECT)
6273 op1 = XEXP (op1, 0);
6274
6275 /* If the remaining parameters are not registers,
6276 get the cost to put them into registers. */
6277 *cost += rtx_cost (op0, FMA, 0, speed);
6278 *cost += rtx_cost (op1, FMA, 1, speed);
6279 *cost += rtx_cost (op2, FMA, 2, speed);
6280 return true;
6281
6282 case FLOAT_EXTEND:
6283 if (speed)
6284 *cost += extra_cost->fp[mode == DFmode].widen;
6285 return false;
6286
6287 case FLOAT_TRUNCATE:
6288 if (speed)
6289 *cost += extra_cost->fp[mode == DFmode].narrow;
6290 return false;
6291
61263118
KT
6292 case FIX:
6293 case UNSIGNED_FIX:
6294 x = XEXP (x, 0);
6295 /* Strip the rounding part. They will all be implemented
6296 by the fcvt* family of instructions anyway. */
6297 if (GET_CODE (x) == UNSPEC)
6298 {
6299 unsigned int uns_code = XINT (x, 1);
6300
6301 if (uns_code == UNSPEC_FRINTA
6302 || uns_code == UNSPEC_FRINTM
6303 || uns_code == UNSPEC_FRINTN
6304 || uns_code == UNSPEC_FRINTP
6305 || uns_code == UNSPEC_FRINTZ)
6306 x = XVECEXP (x, 0, 0);
6307 }
6308
6309 if (speed)
6310 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6311
6312 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6313 return true;
6314
b292109f
JG
6315 case ABS:
6316 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6317 {
6318 /* FABS and FNEG are analogous. */
6319 if (speed)
6320 *cost += extra_cost->fp[mode == DFmode].neg;
6321 }
6322 else
6323 {
6324 /* Integer ABS will either be split to
6325 two arithmetic instructions, or will be an ABS
6326 (scalar), which we don't model. */
6327 *cost = COSTS_N_INSNS (2);
6328 if (speed)
6329 *cost += 2 * extra_cost->alu.arith;
6330 }
6331 return false;
6332
6333 case SMAX:
6334 case SMIN:
6335 if (speed)
6336 {
6337 /* FMAXNM/FMINNM/FMAX/FMIN.
6338 TODO: This may not be accurate for all implementations, but
6339 we do not model this in the cost tables. */
6340 *cost += extra_cost->fp[mode == DFmode].addsub;
6341 }
6342 return false;
6343
61263118
KT
6344 case UNSPEC:
6345 /* The floating point round to integer frint* instructions. */
6346 if (aarch64_frint_unspec_p (XINT (x, 1)))
6347 {
6348 if (speed)
6349 *cost += extra_cost->fp[mode == DFmode].roundint;
6350
6351 return false;
6352 }
781aeb73
KT
6353
6354 if (XINT (x, 1) == UNSPEC_RBIT)
6355 {
6356 if (speed)
6357 *cost += extra_cost->alu.rev;
6358
6359 return false;
6360 }
61263118
KT
6361 break;
6362
fb620c4a
JG
6363 case TRUNCATE:
6364
6365 /* Decompose <su>muldi3_highpart. */
6366 if (/* (truncate:DI */
6367 mode == DImode
6368 /* (lshiftrt:TI */
6369 && GET_MODE (XEXP (x, 0)) == TImode
6370 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6371 /* (mult:TI */
6372 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6373 /* (ANY_EXTEND:TI (reg:DI))
6374 (ANY_EXTEND:TI (reg:DI))) */
6375 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6376 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6377 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6378 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6379 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6380 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6381 /* (const_int 64) */
6382 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6383 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6384 {
6385 /* UMULH/SMULH. */
6386 if (speed)
6387 *cost += extra_cost->mult[mode == DImode].extend;
6388 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6389 MULT, 0, speed);
6390 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6391 MULT, 1, speed);
6392 return true;
6393 }
6394
6395 /* Fall through. */
43e9d192 6396 default:
61263118 6397 break;
43e9d192 6398 }
61263118
KT
6399
6400 if (dump_file && (dump_flags & TDF_DETAILS))
6401 fprintf (dump_file,
6402 "\nFailed to cost RTX. Assuming default cost.\n");
6403
6404 return true;
43e9d192
IB
6405}
6406
0ee859b5
JG
6407/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6408 calculated for X. This cost is stored in *COST. Returns true
6409 if the total cost of X was calculated. */
6410static bool
6411aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6412 int param, int *cost, bool speed)
6413{
6414 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6415
6416 if (dump_file && (dump_flags & TDF_DETAILS))
6417 {
6418 print_rtl_single (dump_file, x);
6419 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6420 speed ? "Hot" : "Cold",
6421 *cost, result ? "final" : "partial");
6422 }
6423
6424 return result;
6425}
6426
43e9d192 6427static int
ef4bddc2 6428aarch64_register_move_cost (machine_mode mode,
8a3a7e67 6429 reg_class_t from_i, reg_class_t to_i)
43e9d192 6430{
8a3a7e67
RH
6431 enum reg_class from = (enum reg_class) from_i;
6432 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
6433 const struct cpu_regmove_cost *regmove_cost
6434 = aarch64_tune_params->regmove_cost;
6435
3be07662
WD
6436 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6437 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6438 to = GENERAL_REGS;
6439
6440 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6441 from = GENERAL_REGS;
6442
6ee70f81
AP
6443 /* Moving between GPR and stack cost is the same as GP2GP. */
6444 if ((from == GENERAL_REGS && to == STACK_REG)
6445 || (to == GENERAL_REGS && from == STACK_REG))
6446 return regmove_cost->GP2GP;
6447
6448 /* To/From the stack register, we move via the gprs. */
6449 if (to == STACK_REG || from == STACK_REG)
6450 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6451 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6452
8919453c
WD
6453 if (GET_MODE_SIZE (mode) == 16)
6454 {
6455 /* 128-bit operations on general registers require 2 instructions. */
6456 if (from == GENERAL_REGS && to == GENERAL_REGS)
6457 return regmove_cost->GP2GP * 2;
6458 else if (from == GENERAL_REGS)
6459 return regmove_cost->GP2FP * 2;
6460 else if (to == GENERAL_REGS)
6461 return regmove_cost->FP2GP * 2;
6462
6463 /* When AdvSIMD instructions are disabled it is not possible to move
6464 a 128-bit value directly between Q registers. This is handled in
6465 secondary reload. A general register is used as a scratch to move
6466 the upper DI value and the lower DI value is moved directly,
6467 hence the cost is the sum of three moves. */
6468 if (! TARGET_SIMD)
6469 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6470
6471 return regmove_cost->FP2FP;
6472 }
6473
43e9d192
IB
6474 if (from == GENERAL_REGS && to == GENERAL_REGS)
6475 return regmove_cost->GP2GP;
6476 else if (from == GENERAL_REGS)
6477 return regmove_cost->GP2FP;
6478 else if (to == GENERAL_REGS)
6479 return regmove_cost->FP2GP;
6480
43e9d192
IB
6481 return regmove_cost->FP2FP;
6482}
6483
6484static int
ef4bddc2 6485aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
6486 reg_class_t rclass ATTRIBUTE_UNUSED,
6487 bool in ATTRIBUTE_UNUSED)
6488{
6489 return aarch64_tune_params->memmov_cost;
6490}
6491
d126a4ae
AP
6492/* Return the number of instructions that can be issued per cycle. */
6493static int
6494aarch64_sched_issue_rate (void)
6495{
6496 return aarch64_tune_params->issue_rate;
6497}
6498
d03f7e44
MK
6499static int
6500aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6501{
6502 int issue_rate = aarch64_sched_issue_rate ();
6503
6504 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
6505}
6506
8990e73a
TB
6507/* Vectorizer cost model target hooks. */
6508
6509/* Implement targetm.vectorize.builtin_vectorization_cost. */
6510static int
6511aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6512 tree vectype,
6513 int misalign ATTRIBUTE_UNUSED)
6514{
6515 unsigned elements;
6516
6517 switch (type_of_cost)
6518 {
6519 case scalar_stmt:
6520 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6521
6522 case scalar_load:
6523 return aarch64_tune_params->vec_costs->scalar_load_cost;
6524
6525 case scalar_store:
6526 return aarch64_tune_params->vec_costs->scalar_store_cost;
6527
6528 case vector_stmt:
6529 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6530
6531 case vector_load:
6532 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6533
6534 case vector_store:
6535 return aarch64_tune_params->vec_costs->vec_store_cost;
6536
6537 case vec_to_scalar:
6538 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6539
6540 case scalar_to_vec:
6541 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6542
6543 case unaligned_load:
6544 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6545
6546 case unaligned_store:
6547 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6548
6549 case cond_branch_taken:
6550 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6551
6552 case cond_branch_not_taken:
6553 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6554
6555 case vec_perm:
6556 case vec_promote_demote:
6557 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6558
6559 case vec_construct:
6560 elements = TYPE_VECTOR_SUBPARTS (vectype);
6561 return elements / 2 + 1;
6562
6563 default:
6564 gcc_unreachable ();
6565 }
6566}
6567
6568/* Implement targetm.vectorize.add_stmt_cost. */
6569static unsigned
6570aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6571 struct _stmt_vec_info *stmt_info, int misalign,
6572 enum vect_cost_model_location where)
6573{
6574 unsigned *cost = (unsigned *) data;
6575 unsigned retval = 0;
6576
6577 if (flag_vect_cost_model)
6578 {
6579 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6580 int stmt_cost =
6581 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6582
6583 /* Statements in an inner loop relative to the loop being
6584 vectorized are weighted more heavily. The value here is
6585 a function (linear for now) of the loop nest level. */
6586 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6587 {
6588 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6589 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6590 unsigned nest_level = loop_depth (loop);
6591
6592 count *= nest_level;
6593 }
6594
6595 retval = (unsigned) (count * stmt_cost);
6596 cost[where] += retval;
6597 }
6598
6599 return retval;
6600}
6601
43e9d192
IB
6602static void initialize_aarch64_code_model (void);
6603
6604/* Parse the architecture extension string. */
6605
6606static void
6607aarch64_parse_extension (char *str)
6608{
6609 /* The extension string is parsed left to right. */
6610 const struct aarch64_option_extension *opt = NULL;
6611
6612 /* Flag to say whether we are adding or removing an extension. */
6613 int adding_ext = -1;
6614
6615 while (str != NULL && *str != 0)
6616 {
6617 char *ext;
6618 size_t len;
6619
6620 str++;
6621 ext = strchr (str, '+');
6622
6623 if (ext != NULL)
6624 len = ext - str;
6625 else
6626 len = strlen (str);
6627
6628 if (len >= 2 && strncmp (str, "no", 2) == 0)
6629 {
6630 adding_ext = 0;
6631 len -= 2;
6632 str += 2;
6633 }
6634 else if (len > 0)
6635 adding_ext = 1;
6636
6637 if (len == 0)
6638 {
217d0904
KT
6639 error ("missing feature modifier after %qs", adding_ext ? "+"
6640 : "+no");
43e9d192
IB
6641 return;
6642 }
6643
6644 /* Scan over the extensions table trying to find an exact match. */
6645 for (opt = all_extensions; opt->name != NULL; opt++)
6646 {
6647 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6648 {
6649 /* Add or remove the extension. */
6650 if (adding_ext)
6651 aarch64_isa_flags |= opt->flags_on;
6652 else
6653 aarch64_isa_flags &= ~(opt->flags_off);
6654 break;
6655 }
6656 }
6657
6658 if (opt->name == NULL)
6659 {
6660 /* Extension not found in list. */
6661 error ("unknown feature modifier %qs", str);
6662 return;
6663 }
6664
6665 str = ext;
6666 };
6667
6668 return;
6669}
6670
6671/* Parse the ARCH string. */
6672
6673static void
6674aarch64_parse_arch (void)
6675{
6676 char *ext;
6677 const struct processor *arch;
6678 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6679 size_t len;
6680
6681 strcpy (str, aarch64_arch_string);
6682
6683 ext = strchr (str, '+');
6684
6685 if (ext != NULL)
6686 len = ext - str;
6687 else
6688 len = strlen (str);
6689
6690 if (len == 0)
6691 {
6692 error ("missing arch name in -march=%qs", str);
6693 return;
6694 }
6695
6696 /* Loop through the list of supported ARCHs to find a match. */
6697 for (arch = all_architectures; arch->name != NULL; arch++)
6698 {
6699 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6700 {
6701 selected_arch = arch;
6702 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6703
6704 if (!selected_cpu)
6705 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6706
6707 if (ext != NULL)
6708 {
6709 /* ARCH string contains at least one extension. */
6710 aarch64_parse_extension (ext);
6711 }
6712
ffee7aa9
JG
6713 if (strcmp (selected_arch->arch, selected_cpu->arch))
6714 {
6715 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6716 selected_cpu->name, selected_arch->name);
6717 }
6718
43e9d192
IB
6719 return;
6720 }
6721 }
6722
6723 /* ARCH name not found in list. */
6724 error ("unknown value %qs for -march", str);
6725 return;
6726}
6727
6728/* Parse the CPU string. */
6729
6730static void
6731aarch64_parse_cpu (void)
6732{
6733 char *ext;
6734 const struct processor *cpu;
6735 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6736 size_t len;
6737
6738 strcpy (str, aarch64_cpu_string);
6739
6740 ext = strchr (str, '+');
6741
6742 if (ext != NULL)
6743 len = ext - str;
6744 else
6745 len = strlen (str);
6746
6747 if (len == 0)
6748 {
6749 error ("missing cpu name in -mcpu=%qs", str);
6750 return;
6751 }
6752
6753 /* Loop through the list of supported CPUs to find a match. */
6754 for (cpu = all_cores; cpu->name != NULL; cpu++)
6755 {
6756 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6757 {
6758 selected_cpu = cpu;
6759 aarch64_isa_flags = selected_cpu->flags;
6760
6761 if (ext != NULL)
6762 {
6763 /* CPU string contains at least one extension. */
6764 aarch64_parse_extension (ext);
6765 }
6766
6767 return;
6768 }
6769 }
6770
6771 /* CPU name not found in list. */
6772 error ("unknown value %qs for -mcpu", str);
6773 return;
6774}
6775
6776/* Parse the TUNE string. */
6777
6778static void
6779aarch64_parse_tune (void)
6780{
6781 const struct processor *cpu;
6782 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6783 strcpy (str, aarch64_tune_string);
6784
6785 /* Loop through the list of supported CPUs to find a match. */
6786 for (cpu = all_cores; cpu->name != NULL; cpu++)
6787 {
6788 if (strcmp (cpu->name, str) == 0)
6789 {
6790 selected_tune = cpu;
6791 return;
6792 }
6793 }
6794
6795 /* CPU name not found in list. */
6796 error ("unknown value %qs for -mtune", str);
6797 return;
6798}
6799
6800
6801/* Implement TARGET_OPTION_OVERRIDE. */
6802
6803static void
6804aarch64_override_options (void)
6805{
ffee7aa9
JG
6806 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6807 If either of -march or -mtune is given, they override their
6808 respective component of -mcpu.
43e9d192 6809
ffee7aa9
JG
6810 So, first parse AARCH64_CPU_STRING, then the others, be careful
6811 with -march as, if -mcpu is not present on the command line, march
6812 must set a sensible default CPU. */
6813 if (aarch64_cpu_string)
43e9d192 6814 {
ffee7aa9 6815 aarch64_parse_cpu ();
43e9d192
IB
6816 }
6817
ffee7aa9 6818 if (aarch64_arch_string)
43e9d192 6819 {
ffee7aa9 6820 aarch64_parse_arch ();
43e9d192
IB
6821 }
6822
6823 if (aarch64_tune_string)
6824 {
6825 aarch64_parse_tune ();
6826 }
6827
63892fa2
KV
6828#ifndef HAVE_AS_MABI_OPTION
6829 /* The compiler may have been configured with 2.23.* binutils, which does
6830 not have support for ILP32. */
6831 if (TARGET_ILP32)
6832 error ("Assembler does not support -mabi=ilp32");
6833#endif
6834
43e9d192
IB
6835 initialize_aarch64_code_model ();
6836
6837 aarch64_build_bitmask_table ();
6838
6839 /* This target defaults to strict volatile bitfields. */
6840 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6841 flag_strict_volatile_bitfields = 1;
6842
6843 /* If the user did not specify a processor, choose the default
6844 one for them. This will be the CPU set during configuration using
a3cd0246 6845 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6846 if (!selected_cpu)
6847 {
6848 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6849 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6850 }
6851
6852 gcc_assert (selected_cpu);
6853
43e9d192 6854 if (!selected_tune)
3edaf26d 6855 selected_tune = selected_cpu;
43e9d192
IB
6856
6857 aarch64_tune_flags = selected_tune->flags;
6858 aarch64_tune = selected_tune->core;
6859 aarch64_tune_params = selected_tune->tune;
0c6caaf8 6860 aarch64_architecture_version = selected_cpu->architecture_version;
43e9d192 6861
5e396da6
KT
6862 if (aarch64_fix_a53_err835769 == 2)
6863 {
6864#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6865 aarch64_fix_a53_err835769 = 1;
6866#else
6867 aarch64_fix_a53_err835769 = 0;
6868#endif
6869 }
6870
b4917c98
AP
6871 /* If not opzimizing for size, set the default
6872 alignment to what the target wants */
6873 if (!optimize_size)
6874 {
6875 if (align_loops <= 0)
0b82a5a2 6876 align_loops = aarch64_tune_params->loop_align;
b4917c98 6877 if (align_jumps <= 0)
0b82a5a2 6878 align_jumps = aarch64_tune_params->jump_align;
b4917c98 6879 if (align_functions <= 0)
0b82a5a2 6880 align_functions = aarch64_tune_params->function_align;
b4917c98
AP
6881 }
6882
43e9d192
IB
6883 aarch64_override_options_after_change ();
6884}
6885
6886/* Implement targetm.override_options_after_change. */
6887
6888static void
6889aarch64_override_options_after_change (void)
6890{
0b7f8166
MS
6891 if (flag_omit_frame_pointer)
6892 flag_omit_leaf_frame_pointer = false;
6893 else if (flag_omit_leaf_frame_pointer)
6894 flag_omit_frame_pointer = true;
43e9d192
IB
6895}
6896
6897static struct machine_function *
6898aarch64_init_machine_status (void)
6899{
6900 struct machine_function *machine;
766090c2 6901 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6902 return machine;
6903}
6904
6905void
6906aarch64_init_expanders (void)
6907{
6908 init_machine_status = aarch64_init_machine_status;
6909}
6910
6911/* A checking mechanism for the implementation of the various code models. */
6912static void
6913initialize_aarch64_code_model (void)
6914{
6915 if (flag_pic)
6916 {
6917 switch (aarch64_cmodel_var)
6918 {
6919 case AARCH64_CMODEL_TINY:
6920 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6921 break;
6922 case AARCH64_CMODEL_SMALL:
6923 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6924 break;
6925 case AARCH64_CMODEL_LARGE:
6926 sorry ("code model %qs with -f%s", "large",
6927 flag_pic > 1 ? "PIC" : "pic");
6928 default:
6929 gcc_unreachable ();
6930 }
6931 }
6932 else
6933 aarch64_cmodel = aarch64_cmodel_var;
6934}
6935
6936/* Return true if SYMBOL_REF X binds locally. */
6937
6938static bool
6939aarch64_symbol_binds_local_p (const_rtx x)
6940{
6941 return (SYMBOL_REF_DECL (x)
6942 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6943 : SYMBOL_REF_LOCAL_P (x));
6944}
6945
6946/* Return true if SYMBOL_REF X is thread local */
6947static bool
6948aarch64_tls_symbol_p (rtx x)
6949{
6950 if (! TARGET_HAVE_TLS)
6951 return false;
6952
6953 if (GET_CODE (x) != SYMBOL_REF)
6954 return false;
6955
6956 return SYMBOL_REF_TLS_MODEL (x) != 0;
6957}
6958
6959/* Classify a TLS symbol into one of the TLS kinds. */
6960enum aarch64_symbol_type
6961aarch64_classify_tls_symbol (rtx x)
6962{
6963 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6964
6965 switch (tls_kind)
6966 {
6967 case TLS_MODEL_GLOBAL_DYNAMIC:
6968 case TLS_MODEL_LOCAL_DYNAMIC:
6969 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6970
6971 case TLS_MODEL_INITIAL_EXEC:
6972 return SYMBOL_SMALL_GOTTPREL;
6973
6974 case TLS_MODEL_LOCAL_EXEC:
6975 return SYMBOL_SMALL_TPREL;
6976
6977 case TLS_MODEL_EMULATED:
6978 case TLS_MODEL_NONE:
6979 return SYMBOL_FORCE_TO_MEM;
6980
6981 default:
6982 gcc_unreachable ();
6983 }
6984}
6985
6986/* Return the method that should be used to access SYMBOL_REF or
6987 LABEL_REF X in context CONTEXT. */
17f4d4bf 6988
43e9d192 6989enum aarch64_symbol_type
f8b756b7 6990aarch64_classify_symbol (rtx x, rtx offset,
43e9d192
IB
6991 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6992{
6993 if (GET_CODE (x) == LABEL_REF)
6994 {
6995 switch (aarch64_cmodel)
6996 {
6997 case AARCH64_CMODEL_LARGE:
6998 return SYMBOL_FORCE_TO_MEM;
6999
7000 case AARCH64_CMODEL_TINY_PIC:
7001 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
7002 return SYMBOL_TINY_ABSOLUTE;
7003
43e9d192
IB
7004 case AARCH64_CMODEL_SMALL_PIC:
7005 case AARCH64_CMODEL_SMALL:
7006 return SYMBOL_SMALL_ABSOLUTE;
7007
7008 default:
7009 gcc_unreachable ();
7010 }
7011 }
7012
17f4d4bf 7013 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 7014 {
4a985a37
MS
7015 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
7016 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
7017
7018 if (aarch64_tls_symbol_p (x))
7019 return aarch64_classify_tls_symbol (x);
7020
17f4d4bf
CSS
7021 switch (aarch64_cmodel)
7022 {
7023 case AARCH64_CMODEL_TINY:
f8b756b7
TB
7024 /* When we retreive symbol + offset address, we have to make sure
7025 the offset does not cause overflow of the final address. But
7026 we have no way of knowing the address of symbol at compile time
7027 so we can't accurately say if the distance between the PC and
7028 symbol + offset is outside the addressible range of +/-1M in the
7029 TINY code model. So we rely on images not being greater than
7030 1M and cap the offset at 1M and anything beyond 1M will have to
7031 be loaded using an alternative mechanism. */
7032 if (SYMBOL_REF_WEAK (x)
7033 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
7034 return SYMBOL_FORCE_TO_MEM;
7035 return SYMBOL_TINY_ABSOLUTE;
7036
17f4d4bf 7037 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
7038 /* Same reasoning as the tiny code model, but the offset cap here is
7039 4G. */
7040 if (SYMBOL_REF_WEAK (x)
3ff5d1f0
TB
7041 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
7042 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
7043 return SYMBOL_FORCE_TO_MEM;
7044 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7045
17f4d4bf 7046 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 7047 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 7048 return SYMBOL_TINY_GOT;
38e6c9a6
MS
7049 return SYMBOL_TINY_ABSOLUTE;
7050
17f4d4bf
CSS
7051 case AARCH64_CMODEL_SMALL_PIC:
7052 if (!aarch64_symbol_binds_local_p (x))
7053 return SYMBOL_SMALL_GOT;
7054 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7055
17f4d4bf
CSS
7056 default:
7057 gcc_unreachable ();
7058 }
43e9d192 7059 }
17f4d4bf 7060
43e9d192
IB
7061 /* By default push everything into the constant pool. */
7062 return SYMBOL_FORCE_TO_MEM;
7063}
7064
43e9d192
IB
7065bool
7066aarch64_constant_address_p (rtx x)
7067{
7068 return (CONSTANT_P (x) && memory_address_p (DImode, x));
7069}
7070
7071bool
7072aarch64_legitimate_pic_operand_p (rtx x)
7073{
7074 if (GET_CODE (x) == SYMBOL_REF
7075 || (GET_CODE (x) == CONST
7076 && GET_CODE (XEXP (x, 0)) == PLUS
7077 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7078 return false;
7079
7080 return true;
7081}
7082
3520f7cc
JG
7083/* Return true if X holds either a quarter-precision or
7084 floating-point +0.0 constant. */
7085static bool
ef4bddc2 7086aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
7087{
7088 if (!CONST_DOUBLE_P (x))
7089 return false;
7090
7091 /* TODO: We could handle moving 0.0 to a TFmode register,
7092 but first we would like to refactor the movtf_aarch64
7093 to be more amicable to split moves properly and
7094 correctly gate on TARGET_SIMD. For now - reject all
7095 constants which are not to SFmode or DFmode registers. */
7096 if (!(mode == SFmode || mode == DFmode))
7097 return false;
7098
7099 if (aarch64_float_const_zero_rtx_p (x))
7100 return true;
7101 return aarch64_float_const_representable_p (x);
7102}
7103
43e9d192 7104static bool
ef4bddc2 7105aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
7106{
7107 /* Do not allow vector struct mode constants. We could support
7108 0 and -1 easily, but they need support in aarch64-simd.md. */
7109 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7110 return false;
7111
7112 /* This could probably go away because
7113 we now decompose CONST_INTs according to expand_mov_immediate. */
7114 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 7115 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
7116 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7117 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
7118
7119 if (GET_CODE (x) == HIGH
7120 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7121 return true;
7122
7123 return aarch64_constant_address_p (x);
7124}
7125
a5bc806c 7126rtx
43e9d192
IB
7127aarch64_load_tp (rtx target)
7128{
7129 if (!target
7130 || GET_MODE (target) != Pmode
7131 || !register_operand (target, Pmode))
7132 target = gen_reg_rtx (Pmode);
7133
7134 /* Can return in any reg. */
7135 emit_insn (gen_aarch64_load_tp_hard (target));
7136 return target;
7137}
7138
43e9d192
IB
7139/* On AAPCS systems, this is the "struct __va_list". */
7140static GTY(()) tree va_list_type;
7141
7142/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7143 Return the type to use as __builtin_va_list.
7144
7145 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7146
7147 struct __va_list
7148 {
7149 void *__stack;
7150 void *__gr_top;
7151 void *__vr_top;
7152 int __gr_offs;
7153 int __vr_offs;
7154 }; */
7155
7156static tree
7157aarch64_build_builtin_va_list (void)
7158{
7159 tree va_list_name;
7160 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7161
7162 /* Create the type. */
7163 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7164 /* Give it the required name. */
7165 va_list_name = build_decl (BUILTINS_LOCATION,
7166 TYPE_DECL,
7167 get_identifier ("__va_list"),
7168 va_list_type);
7169 DECL_ARTIFICIAL (va_list_name) = 1;
7170 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 7171 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
7172
7173 /* Create the fields. */
7174 f_stack = build_decl (BUILTINS_LOCATION,
7175 FIELD_DECL, get_identifier ("__stack"),
7176 ptr_type_node);
7177 f_grtop = build_decl (BUILTINS_LOCATION,
7178 FIELD_DECL, get_identifier ("__gr_top"),
7179 ptr_type_node);
7180 f_vrtop = build_decl (BUILTINS_LOCATION,
7181 FIELD_DECL, get_identifier ("__vr_top"),
7182 ptr_type_node);
7183 f_groff = build_decl (BUILTINS_LOCATION,
7184 FIELD_DECL, get_identifier ("__gr_offs"),
7185 integer_type_node);
7186 f_vroff = build_decl (BUILTINS_LOCATION,
7187 FIELD_DECL, get_identifier ("__vr_offs"),
7188 integer_type_node);
7189
7190 DECL_ARTIFICIAL (f_stack) = 1;
7191 DECL_ARTIFICIAL (f_grtop) = 1;
7192 DECL_ARTIFICIAL (f_vrtop) = 1;
7193 DECL_ARTIFICIAL (f_groff) = 1;
7194 DECL_ARTIFICIAL (f_vroff) = 1;
7195
7196 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7197 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7198 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7199 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7200 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7201
7202 TYPE_FIELDS (va_list_type) = f_stack;
7203 DECL_CHAIN (f_stack) = f_grtop;
7204 DECL_CHAIN (f_grtop) = f_vrtop;
7205 DECL_CHAIN (f_vrtop) = f_groff;
7206 DECL_CHAIN (f_groff) = f_vroff;
7207
7208 /* Compute its layout. */
7209 layout_type (va_list_type);
7210
7211 return va_list_type;
7212}
7213
7214/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7215static void
7216aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7217{
7218 const CUMULATIVE_ARGS *cum;
7219 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7220 tree stack, grtop, vrtop, groff, vroff;
7221 tree t;
7222 int gr_save_area_size;
7223 int vr_save_area_size;
7224 int vr_offset;
7225
7226 cum = &crtl->args.info;
7227 gr_save_area_size
7228 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7229 vr_save_area_size
7230 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7231
7232 if (TARGET_GENERAL_REGS_ONLY)
7233 {
7234 if (cum->aapcs_nvrn > 0)
7235 sorry ("%qs and floating point or vector arguments",
7236 "-mgeneral-regs-only");
7237 vr_save_area_size = 0;
7238 }
7239
7240 f_stack = TYPE_FIELDS (va_list_type_node);
7241 f_grtop = DECL_CHAIN (f_stack);
7242 f_vrtop = DECL_CHAIN (f_grtop);
7243 f_groff = DECL_CHAIN (f_vrtop);
7244 f_vroff = DECL_CHAIN (f_groff);
7245
7246 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7247 NULL_TREE);
7248 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7249 NULL_TREE);
7250 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7251 NULL_TREE);
7252 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7253 NULL_TREE);
7254 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7255 NULL_TREE);
7256
7257 /* Emit code to initialize STACK, which points to the next varargs stack
7258 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7259 by named arguments. STACK is 8-byte aligned. */
7260 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7261 if (cum->aapcs_stack_size > 0)
7262 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7263 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7265
7266 /* Emit code to initialize GRTOP, the top of the GR save area.
7267 virtual_incoming_args_rtx should have been 16 byte aligned. */
7268 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7269 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7270 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7271
7272 /* Emit code to initialize VRTOP, the top of the VR save area.
7273 This address is gr_save_area_bytes below GRTOP, rounded
7274 down to the next 16-byte boundary. */
7275 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7276 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7277 STACK_BOUNDARY / BITS_PER_UNIT);
7278
7279 if (vr_offset)
7280 t = fold_build_pointer_plus_hwi (t, -vr_offset);
7281 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7282 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7283
7284 /* Emit code to initialize GROFF, the offset from GRTOP of the
7285 next GPR argument. */
7286 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7287 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7288 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7289
7290 /* Likewise emit code to initialize VROFF, the offset from FTOP
7291 of the next VR argument. */
7292 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7293 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7294 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7295}
7296
7297/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7298
7299static tree
7300aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7301 gimple_seq *post_p ATTRIBUTE_UNUSED)
7302{
7303 tree addr;
7304 bool indirect_p;
7305 bool is_ha; /* is HFA or HVA. */
7306 bool dw_align; /* double-word align. */
ef4bddc2 7307 machine_mode ag_mode = VOIDmode;
43e9d192 7308 int nregs;
ef4bddc2 7309 machine_mode mode;
43e9d192
IB
7310
7311 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7312 tree stack, f_top, f_off, off, arg, roundup, on_stack;
7313 HOST_WIDE_INT size, rsize, adjust, align;
7314 tree t, u, cond1, cond2;
7315
7316 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7317 if (indirect_p)
7318 type = build_pointer_type (type);
7319
7320 mode = TYPE_MODE (type);
7321
7322 f_stack = TYPE_FIELDS (va_list_type_node);
7323 f_grtop = DECL_CHAIN (f_stack);
7324 f_vrtop = DECL_CHAIN (f_grtop);
7325 f_groff = DECL_CHAIN (f_vrtop);
7326 f_vroff = DECL_CHAIN (f_groff);
7327
7328 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7329 f_stack, NULL_TREE);
7330 size = int_size_in_bytes (type);
7331 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7332
7333 dw_align = false;
7334 adjust = 0;
7335 if (aarch64_vfp_is_call_or_return_candidate (mode,
7336 type,
7337 &ag_mode,
7338 &nregs,
7339 &is_ha))
7340 {
7341 /* TYPE passed in fp/simd registers. */
7342 if (TARGET_GENERAL_REGS_ONLY)
7343 sorry ("%qs and floating point or vector arguments",
7344 "-mgeneral-regs-only");
7345
7346 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7347 unshare_expr (valist), f_vrtop, NULL_TREE);
7348 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7349 unshare_expr (valist), f_vroff, NULL_TREE);
7350
7351 rsize = nregs * UNITS_PER_VREG;
7352
7353 if (is_ha)
7354 {
7355 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7356 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7357 }
7358 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7359 && size < UNITS_PER_VREG)
7360 {
7361 adjust = UNITS_PER_VREG - size;
7362 }
7363 }
7364 else
7365 {
7366 /* TYPE passed in general registers. */
7367 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7368 unshare_expr (valist), f_grtop, NULL_TREE);
7369 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7370 unshare_expr (valist), f_groff, NULL_TREE);
7371 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7372 nregs = rsize / UNITS_PER_WORD;
7373
7374 if (align > 8)
7375 dw_align = true;
7376
7377 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7378 && size < UNITS_PER_WORD)
7379 {
7380 adjust = UNITS_PER_WORD - size;
7381 }
7382 }
7383
7384 /* Get a local temporary for the field value. */
7385 off = get_initialized_tmp_var (f_off, pre_p, NULL);
7386
7387 /* Emit code to branch if off >= 0. */
7388 t = build2 (GE_EXPR, boolean_type_node, off,
7389 build_int_cst (TREE_TYPE (off), 0));
7390 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7391
7392 if (dw_align)
7393 {
7394 /* Emit: offs = (offs + 15) & -16. */
7395 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7396 build_int_cst (TREE_TYPE (off), 15));
7397 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7398 build_int_cst (TREE_TYPE (off), -16));
7399 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7400 }
7401 else
7402 roundup = NULL;
7403
7404 /* Update ap.__[g|v]r_offs */
7405 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7406 build_int_cst (TREE_TYPE (off), rsize));
7407 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7408
7409 /* String up. */
7410 if (roundup)
7411 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7412
7413 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7414 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7415 build_int_cst (TREE_TYPE (f_off), 0));
7416 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7417
7418 /* String up: make sure the assignment happens before the use. */
7419 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7420 COND_EXPR_ELSE (cond1) = t;
7421
7422 /* Prepare the trees handling the argument that is passed on the stack;
7423 the top level node will store in ON_STACK. */
7424 arg = get_initialized_tmp_var (stack, pre_p, NULL);
7425 if (align > 8)
7426 {
7427 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7428 t = fold_convert (intDI_type_node, arg);
7429 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7430 build_int_cst (TREE_TYPE (t), 15));
7431 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7432 build_int_cst (TREE_TYPE (t), -16));
7433 t = fold_convert (TREE_TYPE (arg), t);
7434 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7435 }
7436 else
7437 roundup = NULL;
7438 /* Advance ap.__stack */
7439 t = fold_convert (intDI_type_node, arg);
7440 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7441 build_int_cst (TREE_TYPE (t), size + 7));
7442 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7443 build_int_cst (TREE_TYPE (t), -8));
7444 t = fold_convert (TREE_TYPE (arg), t);
7445 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7446 /* String up roundup and advance. */
7447 if (roundup)
7448 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7449 /* String up with arg */
7450 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7451 /* Big-endianness related address adjustment. */
7452 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7453 && size < UNITS_PER_WORD)
7454 {
7455 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7456 size_int (UNITS_PER_WORD - size));
7457 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7458 }
7459
7460 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7461 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7462
7463 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7464 t = off;
7465 if (adjust)
7466 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7467 build_int_cst (TREE_TYPE (off), adjust));
7468
7469 t = fold_convert (sizetype, t);
7470 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7471
7472 if (is_ha)
7473 {
7474 /* type ha; // treat as "struct {ftype field[n];}"
7475 ... [computing offs]
7476 for (i = 0; i <nregs; ++i, offs += 16)
7477 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7478 return ha; */
7479 int i;
7480 tree tmp_ha, field_t, field_ptr_t;
7481
7482 /* Declare a local variable. */
7483 tmp_ha = create_tmp_var_raw (type, "ha");
7484 gimple_add_tmp_var (tmp_ha);
7485
7486 /* Establish the base type. */
7487 switch (ag_mode)
7488 {
7489 case SFmode:
7490 field_t = float_type_node;
7491 field_ptr_t = float_ptr_type_node;
7492 break;
7493 case DFmode:
7494 field_t = double_type_node;
7495 field_ptr_t = double_ptr_type_node;
7496 break;
7497 case TFmode:
7498 field_t = long_double_type_node;
7499 field_ptr_t = long_double_ptr_type_node;
7500 break;
7501/* The half precision and quad precision are not fully supported yet. Enable
7502 the following code after the support is complete. Need to find the correct
7503 type node for __fp16 *. */
7504#if 0
7505 case HFmode:
7506 field_t = float_type_node;
7507 field_ptr_t = float_ptr_type_node;
7508 break;
7509#endif
7510 case V2SImode:
7511 case V4SImode:
7512 {
7513 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7514 field_t = build_vector_type_for_mode (innertype, ag_mode);
7515 field_ptr_t = build_pointer_type (field_t);
7516 }
7517 break;
7518 default:
7519 gcc_assert (0);
7520 }
7521
7522 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7523 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7524 addr = t;
7525 t = fold_convert (field_ptr_t, addr);
7526 t = build2 (MODIFY_EXPR, field_t,
7527 build1 (INDIRECT_REF, field_t, tmp_ha),
7528 build1 (INDIRECT_REF, field_t, t));
7529
7530 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7531 for (i = 1; i < nregs; ++i)
7532 {
7533 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7534 u = fold_convert (field_ptr_t, addr);
7535 u = build2 (MODIFY_EXPR, field_t,
7536 build2 (MEM_REF, field_t, tmp_ha,
7537 build_int_cst (field_ptr_t,
7538 (i *
7539 int_size_in_bytes (field_t)))),
7540 build1 (INDIRECT_REF, field_t, u));
7541 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7542 }
7543
7544 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7545 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7546 }
7547
7548 COND_EXPR_ELSE (cond2) = t;
7549 addr = fold_convert (build_pointer_type (type), cond1);
7550 addr = build_va_arg_indirect_ref (addr);
7551
7552 if (indirect_p)
7553 addr = build_va_arg_indirect_ref (addr);
7554
7555 return addr;
7556}
7557
7558/* Implement TARGET_SETUP_INCOMING_VARARGS. */
7559
7560static void
ef4bddc2 7561aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
7562 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7563 int no_rtl)
7564{
7565 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7566 CUMULATIVE_ARGS local_cum;
7567 int gr_saved, vr_saved;
7568
7569 /* The caller has advanced CUM up to, but not beyond, the last named
7570 argument. Advance a local copy of CUM past the last "real" named
7571 argument, to find out how many registers are left over. */
7572 local_cum = *cum;
7573 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7574
7575 /* Found out how many registers we need to save. */
7576 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7577 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7578
7579 if (TARGET_GENERAL_REGS_ONLY)
7580 {
7581 if (local_cum.aapcs_nvrn > 0)
7582 sorry ("%qs and floating point or vector arguments",
7583 "-mgeneral-regs-only");
7584 vr_saved = 0;
7585 }
7586
7587 if (!no_rtl)
7588 {
7589 if (gr_saved > 0)
7590 {
7591 rtx ptr, mem;
7592
7593 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7594 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7595 - gr_saved * UNITS_PER_WORD);
7596 mem = gen_frame_mem (BLKmode, ptr);
7597 set_mem_alias_set (mem, get_varargs_alias_set ());
7598
7599 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7600 mem, gr_saved);
7601 }
7602 if (vr_saved > 0)
7603 {
7604 /* We can't use move_block_from_reg, because it will use
7605 the wrong mode, storing D regs only. */
ef4bddc2 7606 machine_mode mode = TImode;
43e9d192
IB
7607 int off, i;
7608
7609 /* Set OFF to the offset from virtual_incoming_args_rtx of
7610 the first vector register. The VR save area lies below
7611 the GR one, and is aligned to 16 bytes. */
7612 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7613 STACK_BOUNDARY / BITS_PER_UNIT);
7614 off -= vr_saved * UNITS_PER_VREG;
7615
7616 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7617 {
7618 rtx ptr, mem;
7619
7620 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7621 mem = gen_frame_mem (mode, ptr);
7622 set_mem_alias_set (mem, get_varargs_alias_set ());
7623 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7624 off += UNITS_PER_VREG;
7625 }
7626 }
7627 }
7628
7629 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7630 any complication of having crtl->args.pretend_args_size changed. */
8799637a 7631 cfun->machine->frame.saved_varargs_size
43e9d192
IB
7632 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7633 STACK_BOUNDARY / BITS_PER_UNIT)
7634 + vr_saved * UNITS_PER_VREG);
7635}
7636
7637static void
7638aarch64_conditional_register_usage (void)
7639{
7640 int i;
7641 if (!TARGET_FLOAT)
7642 {
7643 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7644 {
7645 fixed_regs[i] = 1;
7646 call_used_regs[i] = 1;
7647 }
7648 }
7649}
7650
7651/* Walk down the type tree of TYPE counting consecutive base elements.
7652 If *MODEP is VOIDmode, then set it to the first valid floating point
7653 type. If a non-floating point type is found, or if a floating point
7654 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7655 otherwise return the count in the sub-tree. */
7656static int
ef4bddc2 7657aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 7658{
ef4bddc2 7659 machine_mode mode;
43e9d192
IB
7660 HOST_WIDE_INT size;
7661
7662 switch (TREE_CODE (type))
7663 {
7664 case REAL_TYPE:
7665 mode = TYPE_MODE (type);
7666 if (mode != DFmode && mode != SFmode && mode != TFmode)
7667 return -1;
7668
7669 if (*modep == VOIDmode)
7670 *modep = mode;
7671
7672 if (*modep == mode)
7673 return 1;
7674
7675 break;
7676
7677 case COMPLEX_TYPE:
7678 mode = TYPE_MODE (TREE_TYPE (type));
7679 if (mode != DFmode && mode != SFmode && mode != TFmode)
7680 return -1;
7681
7682 if (*modep == VOIDmode)
7683 *modep = mode;
7684
7685 if (*modep == mode)
7686 return 2;
7687
7688 break;
7689
7690 case VECTOR_TYPE:
7691 /* Use V2SImode and V4SImode as representatives of all 64-bit
7692 and 128-bit vector types. */
7693 size = int_size_in_bytes (type);
7694 switch (size)
7695 {
7696 case 8:
7697 mode = V2SImode;
7698 break;
7699 case 16:
7700 mode = V4SImode;
7701 break;
7702 default:
7703 return -1;
7704 }
7705
7706 if (*modep == VOIDmode)
7707 *modep = mode;
7708
7709 /* Vector modes are considered to be opaque: two vectors are
7710 equivalent for the purposes of being homogeneous aggregates
7711 if they are the same size. */
7712 if (*modep == mode)
7713 return 1;
7714
7715 break;
7716
7717 case ARRAY_TYPE:
7718 {
7719 int count;
7720 tree index = TYPE_DOMAIN (type);
7721
807e902e
KZ
7722 /* Can't handle incomplete types nor sizes that are not
7723 fixed. */
7724 if (!COMPLETE_TYPE_P (type)
7725 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7726 return -1;
7727
7728 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7729 if (count == -1
7730 || !index
7731 || !TYPE_MAX_VALUE (index)
cc269bb6 7732 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7733 || !TYPE_MIN_VALUE (index)
cc269bb6 7734 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7735 || count < 0)
7736 return -1;
7737
ae7e9ddd
RS
7738 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7739 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7740
7741 /* There must be no padding. */
807e902e 7742 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7743 return -1;
7744
7745 return count;
7746 }
7747
7748 case RECORD_TYPE:
7749 {
7750 int count = 0;
7751 int sub_count;
7752 tree field;
7753
807e902e
KZ
7754 /* Can't handle incomplete types nor sizes that are not
7755 fixed. */
7756 if (!COMPLETE_TYPE_P (type)
7757 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7758 return -1;
7759
7760 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7761 {
7762 if (TREE_CODE (field) != FIELD_DECL)
7763 continue;
7764
7765 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7766 if (sub_count < 0)
7767 return -1;
7768 count += sub_count;
7769 }
7770
7771 /* There must be no padding. */
807e902e 7772 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7773 return -1;
7774
7775 return count;
7776 }
7777
7778 case UNION_TYPE:
7779 case QUAL_UNION_TYPE:
7780 {
7781 /* These aren't very interesting except in a degenerate case. */
7782 int count = 0;
7783 int sub_count;
7784 tree field;
7785
807e902e
KZ
7786 /* Can't handle incomplete types nor sizes that are not
7787 fixed. */
7788 if (!COMPLETE_TYPE_P (type)
7789 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7790 return -1;
7791
7792 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7793 {
7794 if (TREE_CODE (field) != FIELD_DECL)
7795 continue;
7796
7797 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7798 if (sub_count < 0)
7799 return -1;
7800 count = count > sub_count ? count : sub_count;
7801 }
7802
7803 /* There must be no padding. */
807e902e 7804 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7805 return -1;
7806
7807 return count;
7808 }
7809
7810 default:
7811 break;
7812 }
7813
7814 return -1;
7815}
7816
7817/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7818 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7819 array types. The C99 floating-point complex types are also considered
7820 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7821 types, which are GCC extensions and out of the scope of AAPCS64, are
7822 treated as composite types here as well.
7823
7824 Note that MODE itself is not sufficient in determining whether a type
7825 is such a composite type or not. This is because
7826 stor-layout.c:compute_record_mode may have already changed the MODE
7827 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7828 structure with only one field may have its MODE set to the mode of the
7829 field. Also an integer mode whose size matches the size of the
7830 RECORD_TYPE type may be used to substitute the original mode
7831 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7832 solely relied on. */
7833
7834static bool
7835aarch64_composite_type_p (const_tree type,
ef4bddc2 7836 machine_mode mode)
43e9d192
IB
7837{
7838 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7839 return true;
7840
7841 if (mode == BLKmode
7842 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7843 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7844 return true;
7845
7846 return false;
7847}
7848
7849/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7850 type as described in AAPCS64 \S 4.1.2.
7851
7852 See the comment above aarch64_composite_type_p for the notes on MODE. */
7853
7854static bool
7855aarch64_short_vector_p (const_tree type,
ef4bddc2 7856 machine_mode mode)
43e9d192
IB
7857{
7858 HOST_WIDE_INT size = -1;
7859
7860 if (type && TREE_CODE (type) == VECTOR_TYPE)
7861 size = int_size_in_bytes (type);
7862 else if (!aarch64_composite_type_p (type, mode)
7863 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7864 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7865 size = GET_MODE_SIZE (mode);
7866
7867 return (size == 8 || size == 16) ? true : false;
7868}
7869
7870/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7871 shall be passed or returned in simd/fp register(s) (providing these
7872 parameter passing registers are available).
7873
7874 Upon successful return, *COUNT returns the number of needed registers,
7875 *BASE_MODE returns the mode of the individual register and when IS_HAF
7876 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7877 floating-point aggregate or a homogeneous short-vector aggregate. */
7878
7879static bool
ef4bddc2 7880aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 7881 const_tree type,
ef4bddc2 7882 machine_mode *base_mode,
43e9d192
IB
7883 int *count,
7884 bool *is_ha)
7885{
ef4bddc2 7886 machine_mode new_mode = VOIDmode;
43e9d192
IB
7887 bool composite_p = aarch64_composite_type_p (type, mode);
7888
7889 if (is_ha != NULL) *is_ha = false;
7890
7891 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7892 || aarch64_short_vector_p (type, mode))
7893 {
7894 *count = 1;
7895 new_mode = mode;
7896 }
7897 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7898 {
7899 if (is_ha != NULL) *is_ha = true;
7900 *count = 2;
7901 new_mode = GET_MODE_INNER (mode);
7902 }
7903 else if (type && composite_p)
7904 {
7905 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7906
7907 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7908 {
7909 if (is_ha != NULL) *is_ha = true;
7910 *count = ag_count;
7911 }
7912 else
7913 return false;
7914 }
7915 else
7916 return false;
7917
7918 *base_mode = new_mode;
7919 return true;
7920}
7921
7922/* Implement TARGET_STRUCT_VALUE_RTX. */
7923
7924static rtx
7925aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7926 int incoming ATTRIBUTE_UNUSED)
7927{
7928 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7929}
7930
7931/* Implements target hook vector_mode_supported_p. */
7932static bool
ef4bddc2 7933aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
7934{
7935 if (TARGET_SIMD
7936 && (mode == V4SImode || mode == V8HImode
7937 || mode == V16QImode || mode == V2DImode
7938 || mode == V2SImode || mode == V4HImode
7939 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
7940 || mode == V4SFmode || mode == V2DFmode
7941 || mode == V1DFmode))
43e9d192
IB
7942 return true;
7943
7944 return false;
7945}
7946
b7342d25
IB
7947/* Return appropriate SIMD container
7948 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
7949static machine_mode
7950aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 7951{
b7342d25 7952 gcc_assert (width == 64 || width == 128);
43e9d192 7953 if (TARGET_SIMD)
b7342d25
IB
7954 {
7955 if (width == 128)
7956 switch (mode)
7957 {
7958 case DFmode:
7959 return V2DFmode;
7960 case SFmode:
7961 return V4SFmode;
7962 case SImode:
7963 return V4SImode;
7964 case HImode:
7965 return V8HImode;
7966 case QImode:
7967 return V16QImode;
7968 case DImode:
7969 return V2DImode;
7970 default:
7971 break;
7972 }
7973 else
7974 switch (mode)
7975 {
7976 case SFmode:
7977 return V2SFmode;
7978 case SImode:
7979 return V2SImode;
7980 case HImode:
7981 return V4HImode;
7982 case QImode:
7983 return V8QImode;
7984 default:
7985 break;
7986 }
7987 }
43e9d192
IB
7988 return word_mode;
7989}
7990
b7342d25 7991/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
7992static machine_mode
7993aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
7994{
7995 return aarch64_simd_container_mode (mode, 128);
7996}
7997
3b357264
JG
7998/* Return the bitmask of possible vector sizes for the vectorizer
7999 to iterate over. */
8000static unsigned int
8001aarch64_autovectorize_vector_sizes (void)
8002{
8003 return (16 | 8);
8004}
8005
ac2b960f
YZ
8006/* Implement TARGET_MANGLE_TYPE. */
8007
6f549691 8008static const char *
ac2b960f
YZ
8009aarch64_mangle_type (const_tree type)
8010{
8011 /* The AArch64 ABI documents say that "__va_list" has to be
8012 managled as if it is in the "std" namespace. */
8013 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
8014 return "St9__va_list";
8015
f9d53c27
TB
8016 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8017 builtin types. */
8018 if (TYPE_NAME (type) != NULL)
8019 return aarch64_mangle_builtin_type (type);
c6fc9e43 8020
ac2b960f
YZ
8021 /* Use the default mangling. */
8022 return NULL;
8023}
8024
8baff86e
KT
8025
8026/* Return true if the rtx_insn contains a MEM RTX somewhere
8027 in it. */
75cf1494
KT
8028
8029static bool
8baff86e 8030has_memory_op (rtx_insn *mem_insn)
75cf1494 8031{
8baff86e
KT
8032 subrtx_iterator::array_type array;
8033 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
8034 if (MEM_P (*iter))
8035 return true;
8036
8037 return false;
75cf1494
KT
8038}
8039
8040/* Find the first rtx_insn before insn that will generate an assembly
8041 instruction. */
8042
8043static rtx_insn *
8044aarch64_prev_real_insn (rtx_insn *insn)
8045{
8046 if (!insn)
8047 return NULL;
8048
8049 do
8050 {
8051 insn = prev_real_insn (insn);
8052 }
8053 while (insn && recog_memoized (insn) < 0);
8054
8055 return insn;
8056}
8057
8058static bool
8059is_madd_op (enum attr_type t1)
8060{
8061 unsigned int i;
8062 /* A number of these may be AArch32 only. */
8063 enum attr_type mlatypes[] = {
8064 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8065 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8066 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8067 };
8068
8069 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8070 {
8071 if (t1 == mlatypes[i])
8072 return true;
8073 }
8074
8075 return false;
8076}
8077
8078/* Check if there is a register dependency between a load and the insn
8079 for which we hold recog_data. */
8080
8081static bool
8082dep_between_memop_and_curr (rtx memop)
8083{
8084 rtx load_reg;
8085 int opno;
8086
8baff86e 8087 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
8088
8089 if (!REG_P (SET_DEST (memop)))
8090 return false;
8091
8092 load_reg = SET_DEST (memop);
8baff86e 8093 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
8094 {
8095 rtx operand = recog_data.operand[opno];
8096 if (REG_P (operand)
8097 && reg_overlap_mentioned_p (load_reg, operand))
8098 return true;
8099
8100 }
8101 return false;
8102}
8103
8baff86e
KT
8104
8105/* When working around the Cortex-A53 erratum 835769,
8106 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8107 instruction and has a preceding memory instruction such that a NOP
8108 should be inserted between them. */
8109
75cf1494
KT
8110bool
8111aarch64_madd_needs_nop (rtx_insn* insn)
8112{
8113 enum attr_type attr_type;
8114 rtx_insn *prev;
8115 rtx body;
8116
8117 if (!aarch64_fix_a53_err835769)
8118 return false;
8119
8120 if (recog_memoized (insn) < 0)
8121 return false;
8122
8123 attr_type = get_attr_type (insn);
8124 if (!is_madd_op (attr_type))
8125 return false;
8126
8127 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
8128 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8129 Restore recog state to INSN to avoid state corruption. */
8130 extract_constrain_insn_cached (insn);
8131
8baff86e 8132 if (!prev || !has_memory_op (prev))
75cf1494
KT
8133 return false;
8134
8135 body = single_set (prev);
8136
8137 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
8138 it and the DImode madd, emit a NOP between them. If body is NULL then we
8139 have a complex memory operation, probably a load/store pair.
8140 Be conservative for now and emit a NOP. */
8141 if (GET_MODE (recog_data.operand[0]) == DImode
8142 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
8143 return true;
8144
8145 return false;
8146
8147}
8148
8baff86e
KT
8149
8150/* Implement FINAL_PRESCAN_INSN. */
8151
75cf1494
KT
8152void
8153aarch64_final_prescan_insn (rtx_insn *insn)
8154{
8155 if (aarch64_madd_needs_nop (insn))
8156 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8157}
8158
8159
43e9d192 8160/* Return the equivalent letter for size. */
81c2dfb9 8161static char
43e9d192
IB
8162sizetochar (int size)
8163{
8164 switch (size)
8165 {
8166 case 64: return 'd';
8167 case 32: return 's';
8168 case 16: return 'h';
8169 case 8 : return 'b';
8170 default: gcc_unreachable ();
8171 }
8172}
8173
3520f7cc
JG
8174/* Return true iff x is a uniform vector of floating-point
8175 constants, and the constant can be represented in
8176 quarter-precision form. Note, as aarch64_float_const_representable
8177 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8178static bool
8179aarch64_vect_float_const_representable_p (rtx x)
8180{
8181 int i = 0;
8182 REAL_VALUE_TYPE r0, ri;
8183 rtx x0, xi;
8184
8185 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8186 return false;
8187
8188 x0 = CONST_VECTOR_ELT (x, 0);
8189 if (!CONST_DOUBLE_P (x0))
8190 return false;
8191
8192 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8193
8194 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8195 {
8196 xi = CONST_VECTOR_ELT (x, i);
8197 if (!CONST_DOUBLE_P (xi))
8198 return false;
8199
8200 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8201 if (!REAL_VALUES_EQUAL (r0, ri))
8202 return false;
8203 }
8204
8205 return aarch64_float_const_representable_p (x0);
8206}
8207
d8edd899 8208/* Return true for valid and false for invalid. */
3ea63f60 8209bool
ef4bddc2 8210aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 8211 struct simd_immediate_info *info)
43e9d192
IB
8212{
8213#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8214 matches = 1; \
8215 for (i = 0; i < idx; i += (STRIDE)) \
8216 if (!(TEST)) \
8217 matches = 0; \
8218 if (matches) \
8219 { \
8220 immtype = (CLASS); \
8221 elsize = (ELSIZE); \
43e9d192
IB
8222 eshift = (SHIFT); \
8223 emvn = (NEG); \
8224 break; \
8225 }
8226
8227 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8228 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8229 unsigned char bytes[16];
43e9d192
IB
8230 int immtype = -1, matches;
8231 unsigned int invmask = inverse ? 0xff : 0;
8232 int eshift, emvn;
8233
43e9d192 8234 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 8235 {
81c2dfb9
IB
8236 if (! (aarch64_simd_imm_zero_p (op, mode)
8237 || aarch64_vect_float_const_representable_p (op)))
d8edd899 8238 return false;
3520f7cc 8239
48063b9d
IB
8240 if (info)
8241 {
8242 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 8243 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
8244 info->mvn = false;
8245 info->shift = 0;
8246 }
3520f7cc 8247
d8edd899 8248 return true;
3520f7cc 8249 }
43e9d192
IB
8250
8251 /* Splat vector constant out into a byte vector. */
8252 for (i = 0; i < n_elts; i++)
8253 {
4b1e108c
AL
8254 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8255 it must be laid out in the vector register in reverse order. */
8256 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
8257 unsigned HOST_WIDE_INT elpart;
8258 unsigned int part, parts;
8259
4aa81c2e 8260 if (CONST_INT_P (el))
43e9d192
IB
8261 {
8262 elpart = INTVAL (el);
8263 parts = 1;
8264 }
8265 else if (GET_CODE (el) == CONST_DOUBLE)
8266 {
8267 elpart = CONST_DOUBLE_LOW (el);
8268 parts = 2;
8269 }
8270 else
8271 gcc_unreachable ();
8272
8273 for (part = 0; part < parts; part++)
8274 {
8275 unsigned int byte;
8276 for (byte = 0; byte < innersize; byte++)
8277 {
8278 bytes[idx++] = (elpart & 0xff) ^ invmask;
8279 elpart >>= BITS_PER_UNIT;
8280 }
8281 if (GET_CODE (el) == CONST_DOUBLE)
8282 elpart = CONST_DOUBLE_HIGH (el);
8283 }
8284 }
8285
8286 /* Sanity check. */
8287 gcc_assert (idx == GET_MODE_SIZE (mode));
8288
8289 do
8290 {
8291 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8292 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8293
8294 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8295 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8296
8297 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8298 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8299
8300 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8301 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8302
8303 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8304
8305 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8306
8307 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8308 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8309
8310 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8311 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8312
8313 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8314 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8315
8316 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8317 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8318
8319 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8320
8321 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8322
8323 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 8324 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
8325
8326 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 8327 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
8328
8329 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 8330 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
8331
8332 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 8333 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
8334
8335 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8336
8337 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8338 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8339 }
8340 while (0);
8341
e4f0f84d 8342 if (immtype == -1)
d8edd899 8343 return false;
43e9d192 8344
48063b9d 8345 if (info)
43e9d192 8346 {
48063b9d 8347 info->element_width = elsize;
48063b9d
IB
8348 info->mvn = emvn != 0;
8349 info->shift = eshift;
8350
43e9d192
IB
8351 unsigned HOST_WIDE_INT imm = 0;
8352
e4f0f84d
TB
8353 if (immtype >= 12 && immtype <= 15)
8354 info->msl = true;
8355
43e9d192
IB
8356 /* Un-invert bytes of recognized vector, if necessary. */
8357 if (invmask != 0)
8358 for (i = 0; i < idx; i++)
8359 bytes[i] ^= invmask;
8360
8361 if (immtype == 17)
8362 {
8363 /* FIXME: Broken on 32-bit H_W_I hosts. */
8364 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8365
8366 for (i = 0; i < 8; i++)
8367 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8368 << (i * BITS_PER_UNIT);
8369
43e9d192 8370
48063b9d
IB
8371 info->value = GEN_INT (imm);
8372 }
8373 else
8374 {
8375 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8376 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
8377
8378 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
8379 generic constants. */
8380 if (info->mvn)
43e9d192 8381 imm = ~imm;
48063b9d
IB
8382 imm = (imm >> info->shift) & 0xff;
8383 info->value = GEN_INT (imm);
8384 }
43e9d192
IB
8385 }
8386
48063b9d 8387 return true;
43e9d192
IB
8388#undef CHECK
8389}
8390
43e9d192
IB
8391/* Check of immediate shift constants are within range. */
8392bool
ef4bddc2 8393aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
8394{
8395 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8396 if (left)
ddeabd3e 8397 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 8398 else
ddeabd3e 8399 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
8400}
8401
3520f7cc
JG
8402/* Return true if X is a uniform vector where all elements
8403 are either the floating-point constant 0.0 or the
8404 integer constant 0. */
43e9d192 8405bool
ef4bddc2 8406aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 8407{
3520f7cc 8408 return x == CONST0_RTX (mode);
43e9d192
IB
8409}
8410
8411bool
ef4bddc2 8412aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
8413{
8414 HOST_WIDE_INT imm = INTVAL (x);
8415 int i;
8416
8417 for (i = 0; i < 8; i++)
8418 {
8419 unsigned int byte = imm & 0xff;
8420 if (byte != 0xff && byte != 0)
8421 return false;
8422 imm >>= 8;
8423 }
8424
8425 return true;
8426}
8427
83f8c414
CSS
8428bool
8429aarch64_mov_operand_p (rtx x,
a5350ddc 8430 enum aarch64_symbol_context context,
ef4bddc2 8431 machine_mode mode)
83f8c414 8432{
83f8c414
CSS
8433 if (GET_CODE (x) == HIGH
8434 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8435 return true;
8436
82614948 8437 if (CONST_INT_P (x))
83f8c414
CSS
8438 return true;
8439
8440 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8441 return true;
8442
a5350ddc
CSS
8443 return aarch64_classify_symbolic_expression (x, context)
8444 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
8445}
8446
43e9d192
IB
8447/* Return a const_int vector of VAL. */
8448rtx
ef4bddc2 8449aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
8450{
8451 int nunits = GET_MODE_NUNITS (mode);
8452 rtvec v = rtvec_alloc (nunits);
8453 int i;
8454
8455 for (i=0; i < nunits; i++)
8456 RTVEC_ELT (v, i) = GEN_INT (val);
8457
8458 return gen_rtx_CONST_VECTOR (mode, v);
8459}
8460
051d0e2f
SN
8461/* Check OP is a legal scalar immediate for the MOVI instruction. */
8462
8463bool
ef4bddc2 8464aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 8465{
ef4bddc2 8466 machine_mode vmode;
051d0e2f
SN
8467
8468 gcc_assert (!VECTOR_MODE_P (mode));
8469 vmode = aarch64_preferred_simd_mode (mode);
8470 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 8471 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
8472}
8473
988fa693
JG
8474/* Construct and return a PARALLEL RTX vector with elements numbering the
8475 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8476 the vector - from the perspective of the architecture. This does not
8477 line up with GCC's perspective on lane numbers, so we end up with
8478 different masks depending on our target endian-ness. The diagram
8479 below may help. We must draw the distinction when building masks
8480 which select one half of the vector. An instruction selecting
8481 architectural low-lanes for a big-endian target, must be described using
8482 a mask selecting GCC high-lanes.
8483
8484 Big-Endian Little-Endian
8485
8486GCC 0 1 2 3 3 2 1 0
8487 | x | x | x | x | | x | x | x | x |
8488Architecture 3 2 1 0 3 2 1 0
8489
8490Low Mask: { 2, 3 } { 0, 1 }
8491High Mask: { 0, 1 } { 2, 3 }
8492*/
8493
43e9d192 8494rtx
ef4bddc2 8495aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
8496{
8497 int nunits = GET_MODE_NUNITS (mode);
8498 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
8499 int high_base = nunits / 2;
8500 int low_base = 0;
8501 int base;
43e9d192
IB
8502 rtx t1;
8503 int i;
8504
988fa693
JG
8505 if (BYTES_BIG_ENDIAN)
8506 base = high ? low_base : high_base;
8507 else
8508 base = high ? high_base : low_base;
8509
8510 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
8511 RTVEC_ELT (v, i) = GEN_INT (base + i);
8512
8513 t1 = gen_rtx_PARALLEL (mode, v);
8514 return t1;
8515}
8516
988fa693
JG
8517/* Check OP for validity as a PARALLEL RTX vector with elements
8518 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8519 from the perspective of the architecture. See the diagram above
8520 aarch64_simd_vect_par_cnst_half for more details. */
8521
8522bool
ef4bddc2 8523aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
8524 bool high)
8525{
8526 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8527 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8528 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8529 int i = 0;
8530
8531 if (!VECTOR_MODE_P (mode))
8532 return false;
8533
8534 if (count_op != count_ideal)
8535 return false;
8536
8537 for (i = 0; i < count_ideal; i++)
8538 {
8539 rtx elt_op = XVECEXP (op, 0, i);
8540 rtx elt_ideal = XVECEXP (ideal, 0, i);
8541
4aa81c2e 8542 if (!CONST_INT_P (elt_op)
988fa693
JG
8543 || INTVAL (elt_ideal) != INTVAL (elt_op))
8544 return false;
8545 }
8546 return true;
8547}
8548
43e9d192
IB
8549/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8550 HIGH (exclusive). */
8551void
46ed6024
CB
8552aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8553 const_tree exp)
43e9d192
IB
8554{
8555 HOST_WIDE_INT lane;
4aa81c2e 8556 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
8557 lane = INTVAL (operand);
8558
8559 if (lane < low || lane >= high)
46ed6024
CB
8560 {
8561 if (exp)
8562 error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8563 else
8564 error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8565 }
43e9d192
IB
8566}
8567
43e9d192
IB
8568/* Emit code to place a AdvSIMD pair result in memory locations (with equal
8569 registers). */
8570void
ef4bddc2 8571aarch64_simd_emit_pair_result_insn (machine_mode mode,
43e9d192
IB
8572 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8573 rtx op1)
8574{
8575 rtx mem = gen_rtx_MEM (mode, destaddr);
8576 rtx tmp1 = gen_reg_rtx (mode);
8577 rtx tmp2 = gen_reg_rtx (mode);
8578
8579 emit_insn (intfn (tmp1, op1, tmp2));
8580
8581 emit_move_insn (mem, tmp1);
8582 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8583 emit_move_insn (mem, tmp2);
8584}
8585
8586/* Return TRUE if OP is a valid vector addressing mode. */
8587bool
8588aarch64_simd_mem_operand_p (rtx op)
8589{
8590 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 8591 || REG_P (XEXP (op, 0)));
43e9d192
IB
8592}
8593
2d8c6dc1
AH
8594/* Emit a register copy from operand to operand, taking care not to
8595 early-clobber source registers in the process.
43e9d192 8596
2d8c6dc1
AH
8597 COUNT is the number of components into which the copy needs to be
8598 decomposed. */
43e9d192 8599void
2d8c6dc1
AH
8600aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
8601 unsigned int count)
43e9d192
IB
8602{
8603 unsigned int i;
2d8c6dc1
AH
8604 int rdest = REGNO (operands[0]);
8605 int rsrc = REGNO (operands[1]);
43e9d192
IB
8606
8607 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
8608 || rdest < rsrc)
8609 for (i = 0; i < count; i++)
8610 emit_move_insn (gen_rtx_REG (mode, rdest + i),
8611 gen_rtx_REG (mode, rsrc + i));
43e9d192 8612 else
2d8c6dc1
AH
8613 for (i = 0; i < count; i++)
8614 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
8615 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
8616}
8617
8618/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8619 one of VSTRUCT modes: OI, CI or XI. */
8620int
647d790d 8621aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 8622{
ef4bddc2 8623 machine_mode mode;
43e9d192
IB
8624
8625 extract_insn_cached (insn);
8626
8627 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8628 {
8629 mode = GET_MODE (recog_data.operand[0]);
8630 switch (mode)
8631 {
8632 case OImode:
8633 return 8;
8634 case CImode:
8635 return 12;
8636 case XImode:
8637 return 16;
8638 default:
8639 gcc_unreachable ();
8640 }
8641 }
8642 return 4;
8643}
8644
668046d1
DS
8645/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
8646 one of VSTRUCT modes: OI, CI, EI, or XI. */
8647int
8648aarch64_simd_attr_length_rglist (enum machine_mode mode)
8649{
8650 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
8651}
8652
db0253a4
TB
8653/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8654 alignment of a vector to 128 bits. */
8655static HOST_WIDE_INT
8656aarch64_simd_vector_alignment (const_tree type)
8657{
9439e9a1 8658 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
8659 return MIN (align, 128);
8660}
8661
8662/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8663static bool
8664aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8665{
8666 if (is_packed)
8667 return false;
8668
8669 /* We guarantee alignment for vectors up to 128-bits. */
8670 if (tree_int_cst_compare (TYPE_SIZE (type),
8671 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8672 return false;
8673
8674 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8675 return true;
8676}
8677
4369c11e
TB
8678/* If VALS is a vector constant that can be loaded into a register
8679 using DUP, generate instructions to do so and return an RTX to
8680 assign to the register. Otherwise return NULL_RTX. */
8681static rtx
8682aarch64_simd_dup_constant (rtx vals)
8683{
ef4bddc2
RS
8684 machine_mode mode = GET_MODE (vals);
8685 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
8686 int n_elts = GET_MODE_NUNITS (mode);
8687 bool all_same = true;
8688 rtx x;
8689 int i;
8690
8691 if (GET_CODE (vals) != CONST_VECTOR)
8692 return NULL_RTX;
8693
8694 for (i = 1; i < n_elts; ++i)
8695 {
8696 x = CONST_VECTOR_ELT (vals, i);
8697 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8698 all_same = false;
8699 }
8700
8701 if (!all_same)
8702 return NULL_RTX;
8703
8704 /* We can load this constant by using DUP and a constant in a
8705 single ARM register. This will be cheaper than a vector
8706 load. */
8707 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8708 return gen_rtx_VEC_DUPLICATE (mode, x);
8709}
8710
8711
8712/* Generate code to load VALS, which is a PARALLEL containing only
8713 constants (for vec_init) or CONST_VECTOR, efficiently into a
8714 register. Returns an RTX to copy into the register, or NULL_RTX
8715 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 8716static rtx
4369c11e
TB
8717aarch64_simd_make_constant (rtx vals)
8718{
ef4bddc2 8719 machine_mode mode = GET_MODE (vals);
4369c11e
TB
8720 rtx const_dup;
8721 rtx const_vec = NULL_RTX;
8722 int n_elts = GET_MODE_NUNITS (mode);
8723 int n_const = 0;
8724 int i;
8725
8726 if (GET_CODE (vals) == CONST_VECTOR)
8727 const_vec = vals;
8728 else if (GET_CODE (vals) == PARALLEL)
8729 {
8730 /* A CONST_VECTOR must contain only CONST_INTs and
8731 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8732 Only store valid constants in a CONST_VECTOR. */
8733 for (i = 0; i < n_elts; ++i)
8734 {
8735 rtx x = XVECEXP (vals, 0, i);
8736 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8737 n_const++;
8738 }
8739 if (n_const == n_elts)
8740 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8741 }
8742 else
8743 gcc_unreachable ();
8744
8745 if (const_vec != NULL_RTX
48063b9d 8746 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
8747 /* Load using MOVI/MVNI. */
8748 return const_vec;
8749 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8750 /* Loaded using DUP. */
8751 return const_dup;
8752 else if (const_vec != NULL_RTX)
8753 /* Load from constant pool. We can not take advantage of single-cycle
8754 LD1 because we need a PC-relative addressing mode. */
8755 return const_vec;
8756 else
8757 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8758 We can not construct an initializer. */
8759 return NULL_RTX;
8760}
8761
8762void
8763aarch64_expand_vector_init (rtx target, rtx vals)
8764{
ef4bddc2
RS
8765 machine_mode mode = GET_MODE (target);
8766 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
8767 int n_elts = GET_MODE_NUNITS (mode);
8768 int n_var = 0, one_var = -1;
8769 bool all_same = true;
8770 rtx x, mem;
8771 int i;
8772
8773 x = XVECEXP (vals, 0, 0);
8774 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8775 n_var = 1, one_var = 0;
8776
8777 for (i = 1; i < n_elts; ++i)
8778 {
8779 x = XVECEXP (vals, 0, i);
8780 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8781 ++n_var, one_var = i;
8782
8783 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8784 all_same = false;
8785 }
8786
8787 if (n_var == 0)
8788 {
8789 rtx constant = aarch64_simd_make_constant (vals);
8790 if (constant != NULL_RTX)
8791 {
8792 emit_move_insn (target, constant);
8793 return;
8794 }
8795 }
8796
8797 /* Splat a single non-constant element if we can. */
8798 if (all_same)
8799 {
8800 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8801 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8802 return;
8803 }
8804
8805 /* One field is non-constant. Load constant then overwrite varying
8806 field. This is more efficient than using the stack. */
8807 if (n_var == 1)
8808 {
8809 rtx copy = copy_rtx (vals);
8810 rtx index = GEN_INT (one_var);
8811 enum insn_code icode;
8812
8813 /* Load constant part of vector, substitute neighboring value for
8814 varying element. */
8815 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8816 aarch64_expand_vector_init (target, copy);
8817
8818 /* Insert variable. */
8819 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8820 icode = optab_handler (vec_set_optab, mode);
8821 gcc_assert (icode != CODE_FOR_nothing);
8822 emit_insn (GEN_FCN (icode) (target, x, index));
8823 return;
8824 }
8825
8826 /* Construct the vector in memory one field at a time
8827 and load the whole vector. */
8828 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8829 for (i = 0; i < n_elts; i++)
8830 emit_move_insn (adjust_address_nv (mem, inner_mode,
8831 i * GET_MODE_SIZE (inner_mode)),
8832 XVECEXP (vals, 0, i));
8833 emit_move_insn (target, mem);
8834
8835}
8836
43e9d192 8837static unsigned HOST_WIDE_INT
ef4bddc2 8838aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
8839{
8840 return
8841 (aarch64_vector_mode_supported_p (mode)
8842 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8843}
8844
8845#ifndef TLS_SECTION_ASM_FLAG
8846#define TLS_SECTION_ASM_FLAG 'T'
8847#endif
8848
8849void
8850aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8851 tree decl ATTRIBUTE_UNUSED)
8852{
8853 char flagchars[10], *f = flagchars;
8854
8855 /* If we have already declared this section, we can use an
8856 abbreviated form to switch back to it -- unless this section is
8857 part of a COMDAT groups, in which case GAS requires the full
8858 declaration every time. */
8859 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8860 && (flags & SECTION_DECLARED))
8861 {
8862 fprintf (asm_out_file, "\t.section\t%s\n", name);
8863 return;
8864 }
8865
8866 if (!(flags & SECTION_DEBUG))
8867 *f++ = 'a';
8868 if (flags & SECTION_WRITE)
8869 *f++ = 'w';
8870 if (flags & SECTION_CODE)
8871 *f++ = 'x';
8872 if (flags & SECTION_SMALL)
8873 *f++ = 's';
8874 if (flags & SECTION_MERGE)
8875 *f++ = 'M';
8876 if (flags & SECTION_STRINGS)
8877 *f++ = 'S';
8878 if (flags & SECTION_TLS)
8879 *f++ = TLS_SECTION_ASM_FLAG;
8880 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8881 *f++ = 'G';
8882 *f = '\0';
8883
8884 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8885
8886 if (!(flags & SECTION_NOTYPE))
8887 {
8888 const char *type;
8889 const char *format;
8890
8891 if (flags & SECTION_BSS)
8892 type = "nobits";
8893 else
8894 type = "progbits";
8895
8896#ifdef TYPE_OPERAND_FMT
8897 format = "," TYPE_OPERAND_FMT;
8898#else
8899 format = ",@%s";
8900#endif
8901
8902 fprintf (asm_out_file, format, type);
8903
8904 if (flags & SECTION_ENTSIZE)
8905 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8906 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8907 {
8908 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8909 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8910 else
8911 fprintf (asm_out_file, ",%s,comdat",
8912 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8913 }
8914 }
8915
8916 putc ('\n', asm_out_file);
8917}
8918
8919/* Select a format to encode pointers in exception handling data. */
8920int
8921aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8922{
8923 int type;
8924 switch (aarch64_cmodel)
8925 {
8926 case AARCH64_CMODEL_TINY:
8927 case AARCH64_CMODEL_TINY_PIC:
8928 case AARCH64_CMODEL_SMALL:
8929 case AARCH64_CMODEL_SMALL_PIC:
8930 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8931 for everything. */
8932 type = DW_EH_PE_sdata4;
8933 break;
8934 default:
8935 /* No assumptions here. 8-byte relocs required. */
8936 type = DW_EH_PE_sdata8;
8937 break;
8938 }
8939 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8940}
8941
0462169c
SN
8942/* Emit load exclusive. */
8943
8944static void
ef4bddc2 8945aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
8946 rtx mem, rtx model_rtx)
8947{
8948 rtx (*gen) (rtx, rtx, rtx);
8949
8950 switch (mode)
8951 {
8952 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8953 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8954 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8955 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8956 default:
8957 gcc_unreachable ();
8958 }
8959
8960 emit_insn (gen (rval, mem, model_rtx));
8961}
8962
8963/* Emit store exclusive. */
8964
8965static void
ef4bddc2 8966aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
8967 rtx rval, rtx mem, rtx model_rtx)
8968{
8969 rtx (*gen) (rtx, rtx, rtx, rtx);
8970
8971 switch (mode)
8972 {
8973 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8974 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8975 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8976 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8977 default:
8978 gcc_unreachable ();
8979 }
8980
8981 emit_insn (gen (bval, rval, mem, model_rtx));
8982}
8983
8984/* Mark the previous jump instruction as unlikely. */
8985
8986static void
8987aarch64_emit_unlikely_jump (rtx insn)
8988{
e5af9ddd 8989 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
8990
8991 insn = emit_jump_insn (insn);
e5af9ddd 8992 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
8993}
8994
8995/* Expand a compare and swap pattern. */
8996
8997void
8998aarch64_expand_compare_and_swap (rtx operands[])
8999{
9000 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 9001 machine_mode mode, cmp_mode;
0462169c
SN
9002 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
9003
9004 bval = operands[0];
9005 rval = operands[1];
9006 mem = operands[2];
9007 oldval = operands[3];
9008 newval = operands[4];
9009 is_weak = operands[5];
9010 mod_s = operands[6];
9011 mod_f = operands[7];
9012 mode = GET_MODE (mem);
9013 cmp_mode = mode;
9014
9015 /* Normally the succ memory model must be stronger than fail, but in the
9016 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9017 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9018
9019 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
9020 && INTVAL (mod_s) == MEMMODEL_RELEASE)
9021 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
9022
9023 switch (mode)
9024 {
9025 case QImode:
9026 case HImode:
9027 /* For short modes, we're going to perform the comparison in SImode,
9028 so do the zero-extension now. */
9029 cmp_mode = SImode;
9030 rval = gen_reg_rtx (SImode);
9031 oldval = convert_modes (SImode, mode, oldval, true);
9032 /* Fall through. */
9033
9034 case SImode:
9035 case DImode:
9036 /* Force the value into a register if needed. */
9037 if (!aarch64_plus_operand (oldval, mode))
9038 oldval = force_reg (cmp_mode, oldval);
9039 break;
9040
9041 default:
9042 gcc_unreachable ();
9043 }
9044
9045 switch (mode)
9046 {
9047 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
9048 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
9049 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
9050 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
9051 default:
9052 gcc_unreachable ();
9053 }
9054
9055 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
9056
9057 if (mode == QImode || mode == HImode)
9058 emit_move_insn (operands[1], gen_lowpart (mode, rval));
9059
9060 x = gen_rtx_REG (CCmode, CC_REGNUM);
9061 x = gen_rtx_EQ (SImode, x, const0_rtx);
9062 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
9063}
9064
9065/* Split a compare and swap pattern. */
9066
9067void
9068aarch64_split_compare_and_swap (rtx operands[])
9069{
9070 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 9071 machine_mode mode;
0462169c 9072 bool is_weak;
5d8a22a5
DM
9073 rtx_code_label *label1, *label2;
9074 rtx x, cond;
0462169c
SN
9075
9076 rval = operands[0];
9077 mem = operands[1];
9078 oldval = operands[2];
9079 newval = operands[3];
9080 is_weak = (operands[4] != const0_rtx);
0462169c
SN
9081 scratch = operands[7];
9082 mode = GET_MODE (mem);
9083
5d8a22a5 9084 label1 = NULL;
0462169c
SN
9085 if (!is_weak)
9086 {
9087 label1 = gen_label_rtx ();
9088 emit_label (label1);
9089 }
9090 label2 = gen_label_rtx ();
9091
9092 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
9093
9094 cond = aarch64_gen_compare_reg (NE, rval, oldval);
9095 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9096 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9097 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9098 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9099
9100 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
9101
9102 if (!is_weak)
9103 {
9104 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9105 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9106 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9107 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9108 }
9109 else
9110 {
9111 cond = gen_rtx_REG (CCmode, CC_REGNUM);
9112 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9113 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9114 }
9115
9116 emit_label (label2);
9117}
9118
9119/* Split an atomic operation. */
9120
9121void
9122aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9123 rtx value, rtx model_rtx, rtx cond)
9124{
ef4bddc2
RS
9125 machine_mode mode = GET_MODE (mem);
9126 machine_mode wmode = (mode == DImode ? DImode : SImode);
5d8a22a5
DM
9127 rtx_code_label *label;
9128 rtx x;
0462169c
SN
9129
9130 label = gen_label_rtx ();
9131 emit_label (label);
9132
9133 if (new_out)
9134 new_out = gen_lowpart (wmode, new_out);
9135 if (old_out)
9136 old_out = gen_lowpart (wmode, old_out);
9137 else
9138 old_out = new_out;
9139 value = simplify_gen_subreg (wmode, value, mode, 0);
9140
9141 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9142
9143 switch (code)
9144 {
9145 case SET:
9146 new_out = value;
9147 break;
9148
9149 case NOT:
9150 x = gen_rtx_AND (wmode, old_out, value);
9151 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9152 x = gen_rtx_NOT (wmode, new_out);
9153 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9154 break;
9155
9156 case MINUS:
9157 if (CONST_INT_P (value))
9158 {
9159 value = GEN_INT (-INTVAL (value));
9160 code = PLUS;
9161 }
9162 /* Fall through. */
9163
9164 default:
9165 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9166 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9167 break;
9168 }
9169
9170 aarch64_emit_store_exclusive (mode, cond, mem,
9171 gen_lowpart (mode, new_out), model_rtx);
9172
9173 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9174 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9175 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9176 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9177}
9178
95ca411e
YZ
9179static void
9180aarch64_print_extension (void)
9181{
9182 const struct aarch64_option_extension *opt = NULL;
9183
9184 for (opt = all_extensions; opt->name != NULL; opt++)
9185 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9186 asm_fprintf (asm_out_file, "+%s", opt->name);
9187
9188 asm_fprintf (asm_out_file, "\n");
9189}
9190
43e9d192
IB
9191static void
9192aarch64_start_file (void)
9193{
9194 if (selected_arch)
95ca411e
YZ
9195 {
9196 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9197 aarch64_print_extension ();
9198 }
43e9d192 9199 else if (selected_cpu)
95ca411e 9200 {
682287fb
JG
9201 const char *truncated_name
9202 = aarch64_rewrite_selected_cpu (selected_cpu->name);
9203 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
9204 aarch64_print_extension ();
9205 }
43e9d192
IB
9206 default_file_start();
9207}
9208
9209/* Target hook for c_mode_for_suffix. */
ef4bddc2 9210static machine_mode
43e9d192
IB
9211aarch64_c_mode_for_suffix (char suffix)
9212{
9213 if (suffix == 'q')
9214 return TFmode;
9215
9216 return VOIDmode;
9217}
9218
3520f7cc
JG
9219/* We can only represent floating point constants which will fit in
9220 "quarter-precision" values. These values are characterised by
9221 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9222 by:
9223
9224 (-1)^s * (n/16) * 2^r
9225
9226 Where:
9227 's' is the sign bit.
9228 'n' is an integer in the range 16 <= n <= 31.
9229 'r' is an integer in the range -3 <= r <= 4. */
9230
9231/* Return true iff X can be represented by a quarter-precision
9232 floating point immediate operand X. Note, we cannot represent 0.0. */
9233bool
9234aarch64_float_const_representable_p (rtx x)
9235{
9236 /* This represents our current view of how many bits
9237 make up the mantissa. */
9238 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 9239 int exponent;
3520f7cc 9240 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 9241 REAL_VALUE_TYPE r, m;
807e902e 9242 bool fail;
3520f7cc
JG
9243
9244 if (!CONST_DOUBLE_P (x))
9245 return false;
9246
94bfa2da
TV
9247 if (GET_MODE (x) == VOIDmode)
9248 return false;
9249
3520f7cc
JG
9250 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9251
9252 /* We cannot represent infinities, NaNs or +/-zero. We won't
9253 know if we have +zero until we analyse the mantissa, but we
9254 can reject the other invalid values. */
9255 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9256 || REAL_VALUE_MINUS_ZERO (r))
9257 return false;
9258
ba96cdfb 9259 /* Extract exponent. */
3520f7cc
JG
9260 r = real_value_abs (&r);
9261 exponent = REAL_EXP (&r);
9262
9263 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9264 highest (sign) bit, with a fixed binary point at bit point_pos.
9265 m1 holds the low part of the mantissa, m2 the high part.
9266 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9267 bits for the mantissa, this can fail (low bits will be lost). */
9268 real_ldexp (&m, &r, point_pos - exponent);
807e902e 9269 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
9270
9271 /* If the low part of the mantissa has bits set we cannot represent
9272 the value. */
807e902e 9273 if (w.elt (0) != 0)
3520f7cc
JG
9274 return false;
9275 /* We have rejected the lower HOST_WIDE_INT, so update our
9276 understanding of how many bits lie in the mantissa and
9277 look only at the high HOST_WIDE_INT. */
807e902e 9278 mantissa = w.elt (1);
3520f7cc
JG
9279 point_pos -= HOST_BITS_PER_WIDE_INT;
9280
9281 /* We can only represent values with a mantissa of the form 1.xxxx. */
9282 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9283 if ((mantissa & mask) != 0)
9284 return false;
9285
9286 /* Having filtered unrepresentable values, we may now remove all
9287 but the highest 5 bits. */
9288 mantissa >>= point_pos - 5;
9289
9290 /* We cannot represent the value 0.0, so reject it. This is handled
9291 elsewhere. */
9292 if (mantissa == 0)
9293 return false;
9294
9295 /* Then, as bit 4 is always set, we can mask it off, leaving
9296 the mantissa in the range [0, 15]. */
9297 mantissa &= ~(1 << 4);
9298 gcc_assert (mantissa <= 15);
9299
9300 /* GCC internally does not use IEEE754-like encoding (where normalized
9301 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9302 Our mantissa values are shifted 4 places to the left relative to
9303 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9304 by 5 places to correct for GCC's representation. */
9305 exponent = 5 - exponent;
9306
9307 return (exponent >= 0 && exponent <= 7);
9308}
9309
9310char*
81c2dfb9 9311aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 9312 machine_mode mode,
3520f7cc
JG
9313 unsigned width)
9314{
3ea63f60 9315 bool is_valid;
3520f7cc 9316 static char templ[40];
3520f7cc 9317 const char *mnemonic;
e4f0f84d 9318 const char *shift_op;
3520f7cc 9319 unsigned int lane_count = 0;
81c2dfb9 9320 char element_char;
3520f7cc 9321
e4f0f84d 9322 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
9323
9324 /* This will return true to show const_vector is legal for use as either
9325 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9326 also update INFO to show how the immediate should be generated. */
81c2dfb9 9327 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
9328 gcc_assert (is_valid);
9329
81c2dfb9 9330 element_char = sizetochar (info.element_width);
48063b9d
IB
9331 lane_count = width / info.element_width;
9332
3520f7cc
JG
9333 mode = GET_MODE_INNER (mode);
9334 if (mode == SFmode || mode == DFmode)
9335 {
48063b9d
IB
9336 gcc_assert (info.shift == 0 && ! info.mvn);
9337 if (aarch64_float_const_zero_rtx_p (info.value))
9338 info.value = GEN_INT (0);
9339 else
9340 {
9341#define buf_size 20
9342 REAL_VALUE_TYPE r;
9343 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9344 char float_buf[buf_size] = {'\0'};
9345 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9346#undef buf_size
9347
9348 if (lane_count == 1)
9349 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9350 else
9351 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 9352 lane_count, element_char, float_buf);
48063b9d
IB
9353 return templ;
9354 }
3520f7cc 9355 }
3520f7cc 9356
48063b9d 9357 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 9358 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
9359
9360 if (lane_count == 1)
48063b9d
IB
9361 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9362 mnemonic, UINTVAL (info.value));
9363 else if (info.shift)
9364 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
9365 ", %s %d", mnemonic, lane_count, element_char,
9366 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 9367 else
48063b9d 9368 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 9369 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
9370 return templ;
9371}
9372
b7342d25
IB
9373char*
9374aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 9375 machine_mode mode)
b7342d25 9376{
ef4bddc2 9377 machine_mode vmode;
b7342d25
IB
9378
9379 gcc_assert (!VECTOR_MODE_P (mode));
9380 vmode = aarch64_simd_container_mode (mode, 64);
9381 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9382 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9383}
9384
88b08073
JG
9385/* Split operands into moves from op[1] + op[2] into op[0]. */
9386
9387void
9388aarch64_split_combinev16qi (rtx operands[3])
9389{
9390 unsigned int dest = REGNO (operands[0]);
9391 unsigned int src1 = REGNO (operands[1]);
9392 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 9393 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
9394 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9395 rtx destlo, desthi;
9396
9397 gcc_assert (halfmode == V16QImode);
9398
9399 if (src1 == dest && src2 == dest + halfregs)
9400 {
9401 /* No-op move. Can't split to nothing; emit something. */
9402 emit_note (NOTE_INSN_DELETED);
9403 return;
9404 }
9405
9406 /* Preserve register attributes for variable tracking. */
9407 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9408 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9409 GET_MODE_SIZE (halfmode));
9410
9411 /* Special case of reversed high/low parts. */
9412 if (reg_overlap_mentioned_p (operands[2], destlo)
9413 && reg_overlap_mentioned_p (operands[1], desthi))
9414 {
9415 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9416 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9417 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9418 }
9419 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9420 {
9421 /* Try to avoid unnecessary moves if part of the result
9422 is in the right place already. */
9423 if (src1 != dest)
9424 emit_move_insn (destlo, operands[1]);
9425 if (src2 != dest + halfregs)
9426 emit_move_insn (desthi, operands[2]);
9427 }
9428 else
9429 {
9430 if (src2 != dest + halfregs)
9431 emit_move_insn (desthi, operands[2]);
9432 if (src1 != dest)
9433 emit_move_insn (destlo, operands[1]);
9434 }
9435}
9436
9437/* vec_perm support. */
9438
9439#define MAX_VECT_LEN 16
9440
9441struct expand_vec_perm_d
9442{
9443 rtx target, op0, op1;
9444 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 9445 machine_mode vmode;
88b08073
JG
9446 unsigned char nelt;
9447 bool one_vector_p;
9448 bool testing_p;
9449};
9450
9451/* Generate a variable permutation. */
9452
9453static void
9454aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9455{
ef4bddc2 9456 machine_mode vmode = GET_MODE (target);
88b08073
JG
9457 bool one_vector_p = rtx_equal_p (op0, op1);
9458
9459 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9460 gcc_checking_assert (GET_MODE (op0) == vmode);
9461 gcc_checking_assert (GET_MODE (op1) == vmode);
9462 gcc_checking_assert (GET_MODE (sel) == vmode);
9463 gcc_checking_assert (TARGET_SIMD);
9464
9465 if (one_vector_p)
9466 {
9467 if (vmode == V8QImode)
9468 {
9469 /* Expand the argument to a V16QI mode by duplicating it. */
9470 rtx pair = gen_reg_rtx (V16QImode);
9471 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9472 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9473 }
9474 else
9475 {
9476 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9477 }
9478 }
9479 else
9480 {
9481 rtx pair;
9482
9483 if (vmode == V8QImode)
9484 {
9485 pair = gen_reg_rtx (V16QImode);
9486 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9487 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9488 }
9489 else
9490 {
9491 pair = gen_reg_rtx (OImode);
9492 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9493 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9494 }
9495 }
9496}
9497
9498void
9499aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9500{
ef4bddc2 9501 machine_mode vmode = GET_MODE (target);
c9d1a16a 9502 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 9503 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 9504 rtx mask;
88b08073
JG
9505
9506 /* The TBL instruction does not use a modulo index, so we must take care
9507 of that ourselves. */
f7c4e5b8
AL
9508 mask = aarch64_simd_gen_const_vector_dup (vmode,
9509 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
9510 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9511
f7c4e5b8
AL
9512 /* For big-endian, we also need to reverse the index within the vector
9513 (but not which vector). */
9514 if (BYTES_BIG_ENDIAN)
9515 {
9516 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9517 if (!one_vector_p)
9518 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9519 sel = expand_simple_binop (vmode, XOR, sel, mask,
9520 NULL, 0, OPTAB_LIB_WIDEN);
9521 }
88b08073
JG
9522 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9523}
9524
cc4d934f
JG
9525/* Recognize patterns suitable for the TRN instructions. */
9526static bool
9527aarch64_evpc_trn (struct expand_vec_perm_d *d)
9528{
9529 unsigned int i, odd, mask, nelt = d->nelt;
9530 rtx out, in0, in1, x;
9531 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9532 machine_mode vmode = d->vmode;
cc4d934f
JG
9533
9534 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9535 return false;
9536
9537 /* Note that these are little-endian tests.
9538 We correct for big-endian later. */
9539 if (d->perm[0] == 0)
9540 odd = 0;
9541 else if (d->perm[0] == 1)
9542 odd = 1;
9543 else
9544 return false;
9545 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9546
9547 for (i = 0; i < nelt; i += 2)
9548 {
9549 if (d->perm[i] != i + odd)
9550 return false;
9551 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9552 return false;
9553 }
9554
9555 /* Success! */
9556 if (d->testing_p)
9557 return true;
9558
9559 in0 = d->op0;
9560 in1 = d->op1;
9561 if (BYTES_BIG_ENDIAN)
9562 {
9563 x = in0, in0 = in1, in1 = x;
9564 odd = !odd;
9565 }
9566 out = d->target;
9567
9568 if (odd)
9569 {
9570 switch (vmode)
9571 {
9572 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9573 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9574 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9575 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9576 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9577 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9578 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9579 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9580 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9581 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9582 default:
9583 return false;
9584 }
9585 }
9586 else
9587 {
9588 switch (vmode)
9589 {
9590 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9591 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9592 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9593 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9594 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9595 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9596 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9597 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9598 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9599 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9600 default:
9601 return false;
9602 }
9603 }
9604
9605 emit_insn (gen (out, in0, in1));
9606 return true;
9607}
9608
9609/* Recognize patterns suitable for the UZP instructions. */
9610static bool
9611aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9612{
9613 unsigned int i, odd, mask, nelt = d->nelt;
9614 rtx out, in0, in1, x;
9615 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9616 machine_mode vmode = d->vmode;
cc4d934f
JG
9617
9618 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9619 return false;
9620
9621 /* Note that these are little-endian tests.
9622 We correct for big-endian later. */
9623 if (d->perm[0] == 0)
9624 odd = 0;
9625 else if (d->perm[0] == 1)
9626 odd = 1;
9627 else
9628 return false;
9629 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9630
9631 for (i = 0; i < nelt; i++)
9632 {
9633 unsigned elt = (i * 2 + odd) & mask;
9634 if (d->perm[i] != elt)
9635 return false;
9636 }
9637
9638 /* Success! */
9639 if (d->testing_p)
9640 return true;
9641
9642 in0 = d->op0;
9643 in1 = d->op1;
9644 if (BYTES_BIG_ENDIAN)
9645 {
9646 x = in0, in0 = in1, in1 = x;
9647 odd = !odd;
9648 }
9649 out = d->target;
9650
9651 if (odd)
9652 {
9653 switch (vmode)
9654 {
9655 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9656 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9657 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9658 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9659 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9660 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9661 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9662 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9663 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9664 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9665 default:
9666 return false;
9667 }
9668 }
9669 else
9670 {
9671 switch (vmode)
9672 {
9673 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9674 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9675 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9676 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9677 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9678 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9679 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9680 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9681 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9682 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9683 default:
9684 return false;
9685 }
9686 }
9687
9688 emit_insn (gen (out, in0, in1));
9689 return true;
9690}
9691
9692/* Recognize patterns suitable for the ZIP instructions. */
9693static bool
9694aarch64_evpc_zip (struct expand_vec_perm_d *d)
9695{
9696 unsigned int i, high, mask, nelt = d->nelt;
9697 rtx out, in0, in1, x;
9698 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9699 machine_mode vmode = d->vmode;
cc4d934f
JG
9700
9701 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9702 return false;
9703
9704 /* Note that these are little-endian tests.
9705 We correct for big-endian later. */
9706 high = nelt / 2;
9707 if (d->perm[0] == high)
9708 /* Do Nothing. */
9709 ;
9710 else if (d->perm[0] == 0)
9711 high = 0;
9712 else
9713 return false;
9714 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9715
9716 for (i = 0; i < nelt / 2; i++)
9717 {
9718 unsigned elt = (i + high) & mask;
9719 if (d->perm[i * 2] != elt)
9720 return false;
9721 elt = (elt + nelt) & mask;
9722 if (d->perm[i * 2 + 1] != elt)
9723 return false;
9724 }
9725
9726 /* Success! */
9727 if (d->testing_p)
9728 return true;
9729
9730 in0 = d->op0;
9731 in1 = d->op1;
9732 if (BYTES_BIG_ENDIAN)
9733 {
9734 x = in0, in0 = in1, in1 = x;
9735 high = !high;
9736 }
9737 out = d->target;
9738
9739 if (high)
9740 {
9741 switch (vmode)
9742 {
9743 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9744 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9745 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9746 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9747 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9748 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9749 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9750 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9751 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9752 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9753 default:
9754 return false;
9755 }
9756 }
9757 else
9758 {
9759 switch (vmode)
9760 {
9761 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9762 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9763 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9764 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9765 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9766 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9767 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9768 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9769 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9770 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9771 default:
9772 return false;
9773 }
9774 }
9775
9776 emit_insn (gen (out, in0, in1));
9777 return true;
9778}
9779
ae0533da
AL
9780/* Recognize patterns for the EXT insn. */
9781
9782static bool
9783aarch64_evpc_ext (struct expand_vec_perm_d *d)
9784{
9785 unsigned int i, nelt = d->nelt;
9786 rtx (*gen) (rtx, rtx, rtx, rtx);
9787 rtx offset;
9788
9789 unsigned int location = d->perm[0]; /* Always < nelt. */
9790
9791 /* Check if the extracted indices are increasing by one. */
9792 for (i = 1; i < nelt; i++)
9793 {
9794 unsigned int required = location + i;
9795 if (d->one_vector_p)
9796 {
9797 /* We'll pass the same vector in twice, so allow indices to wrap. */
9798 required &= (nelt - 1);
9799 }
9800 if (d->perm[i] != required)
9801 return false;
9802 }
9803
ae0533da
AL
9804 switch (d->vmode)
9805 {
9806 case V16QImode: gen = gen_aarch64_extv16qi; break;
9807 case V8QImode: gen = gen_aarch64_extv8qi; break;
9808 case V4HImode: gen = gen_aarch64_extv4hi; break;
9809 case V8HImode: gen = gen_aarch64_extv8hi; break;
9810 case V2SImode: gen = gen_aarch64_extv2si; break;
9811 case V4SImode: gen = gen_aarch64_extv4si; break;
9812 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9813 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9814 case V2DImode: gen = gen_aarch64_extv2di; break;
9815 case V2DFmode: gen = gen_aarch64_extv2df; break;
9816 default:
9817 return false;
9818 }
9819
9820 /* Success! */
9821 if (d->testing_p)
9822 return true;
9823
b31e65bb
AL
9824 /* The case where (location == 0) is a no-op for both big- and little-endian,
9825 and is removed by the mid-end at optimization levels -O1 and higher. */
9826
9827 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
9828 {
9829 /* After setup, we want the high elements of the first vector (stored
9830 at the LSB end of the register), and the low elements of the second
9831 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 9832 std::swap (d->op0, d->op1);
ae0533da
AL
9833 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9834 location = nelt - location;
9835 }
9836
9837 offset = GEN_INT (location);
9838 emit_insn (gen (d->target, d->op0, d->op1, offset));
9839 return true;
9840}
9841
923fcec3
AL
9842/* Recognize patterns for the REV insns. */
9843
9844static bool
9845aarch64_evpc_rev (struct expand_vec_perm_d *d)
9846{
9847 unsigned int i, j, diff, nelt = d->nelt;
9848 rtx (*gen) (rtx, rtx);
9849
9850 if (!d->one_vector_p)
9851 return false;
9852
9853 diff = d->perm[0];
9854 switch (diff)
9855 {
9856 case 7:
9857 switch (d->vmode)
9858 {
9859 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9860 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9861 default:
9862 return false;
9863 }
9864 break;
9865 case 3:
9866 switch (d->vmode)
9867 {
9868 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9869 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9870 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9871 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9872 default:
9873 return false;
9874 }
9875 break;
9876 case 1:
9877 switch (d->vmode)
9878 {
9879 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9880 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9881 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9882 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9883 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9884 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9885 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9886 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9887 default:
9888 return false;
9889 }
9890 break;
9891 default:
9892 return false;
9893 }
9894
9895 for (i = 0; i < nelt ; i += diff + 1)
9896 for (j = 0; j <= diff; j += 1)
9897 {
9898 /* This is guaranteed to be true as the value of diff
9899 is 7, 3, 1 and we should have enough elements in the
9900 queue to generate this. Getting a vector mask with a
9901 value of diff other than these values implies that
9902 something is wrong by the time we get here. */
9903 gcc_assert (i + j < nelt);
9904 if (d->perm[i + j] != i + diff - j)
9905 return false;
9906 }
9907
9908 /* Success! */
9909 if (d->testing_p)
9910 return true;
9911
9912 emit_insn (gen (d->target, d->op0));
9913 return true;
9914}
9915
91bd4114
JG
9916static bool
9917aarch64_evpc_dup (struct expand_vec_perm_d *d)
9918{
9919 rtx (*gen) (rtx, rtx, rtx);
9920 rtx out = d->target;
9921 rtx in0;
ef4bddc2 9922 machine_mode vmode = d->vmode;
91bd4114
JG
9923 unsigned int i, elt, nelt = d->nelt;
9924 rtx lane;
9925
91bd4114
JG
9926 elt = d->perm[0];
9927 for (i = 1; i < nelt; i++)
9928 {
9929 if (elt != d->perm[i])
9930 return false;
9931 }
9932
9933 /* The generic preparation in aarch64_expand_vec_perm_const_1
9934 swaps the operand order and the permute indices if it finds
9935 d->perm[0] to be in the second operand. Thus, we can always
9936 use d->op0 and need not do any extra arithmetic to get the
9937 correct lane number. */
9938 in0 = d->op0;
f901401e 9939 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
9940
9941 switch (vmode)
9942 {
9943 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9944 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9945 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9946 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9947 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9948 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9949 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9950 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9951 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9952 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9953 default:
9954 return false;
9955 }
9956
9957 emit_insn (gen (out, in0, lane));
9958 return true;
9959}
9960
88b08073
JG
9961static bool
9962aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9963{
9964 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 9965 machine_mode vmode = d->vmode;
88b08073
JG
9966 unsigned int i, nelt = d->nelt;
9967
88b08073
JG
9968 if (d->testing_p)
9969 return true;
9970
9971 /* Generic code will try constant permutation twice. Once with the
9972 original mode and again with the elements lowered to QImode.
9973 So wait and don't do the selector expansion ourselves. */
9974 if (vmode != V8QImode && vmode != V16QImode)
9975 return false;
9976
9977 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
9978 {
9979 int nunits = GET_MODE_NUNITS (vmode);
9980
9981 /* If big-endian and two vectors we end up with a weird mixed-endian
9982 mode on NEON. Reverse the index within each word but not the word
9983 itself. */
9984 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9985 : d->perm[i]);
9986 }
88b08073
JG
9987 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9988 sel = force_reg (vmode, sel);
9989
9990 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9991 return true;
9992}
9993
9994static bool
9995aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9996{
9997 /* The pattern matching functions above are written to look for a small
9998 number to begin the sequence (0, 1, N/2). If we begin with an index
9999 from the second operand, we can swap the operands. */
10000 if (d->perm[0] >= d->nelt)
10001 {
10002 unsigned i, nelt = d->nelt;
88b08073 10003
0696116a 10004 gcc_assert (nelt == (nelt & -nelt));
88b08073 10005 for (i = 0; i < nelt; ++i)
0696116a 10006 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 10007
cb5c6c29 10008 std::swap (d->op0, d->op1);
88b08073
JG
10009 }
10010
10011 if (TARGET_SIMD)
cc4d934f 10012 {
923fcec3
AL
10013 if (aarch64_evpc_rev (d))
10014 return true;
10015 else if (aarch64_evpc_ext (d))
ae0533da 10016 return true;
f901401e
AL
10017 else if (aarch64_evpc_dup (d))
10018 return true;
ae0533da 10019 else if (aarch64_evpc_zip (d))
cc4d934f
JG
10020 return true;
10021 else if (aarch64_evpc_uzp (d))
10022 return true;
10023 else if (aarch64_evpc_trn (d))
10024 return true;
10025 return aarch64_evpc_tbl (d);
10026 }
88b08073
JG
10027 return false;
10028}
10029
10030/* Expand a vec_perm_const pattern. */
10031
10032bool
10033aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
10034{
10035 struct expand_vec_perm_d d;
10036 int i, nelt, which;
10037
10038 d.target = target;
10039 d.op0 = op0;
10040 d.op1 = op1;
10041
10042 d.vmode = GET_MODE (target);
10043 gcc_assert (VECTOR_MODE_P (d.vmode));
10044 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10045 d.testing_p = false;
10046
10047 for (i = which = 0; i < nelt; ++i)
10048 {
10049 rtx e = XVECEXP (sel, 0, i);
10050 int ei = INTVAL (e) & (2 * nelt - 1);
10051 which |= (ei < nelt ? 1 : 2);
10052 d.perm[i] = ei;
10053 }
10054
10055 switch (which)
10056 {
10057 default:
10058 gcc_unreachable ();
10059
10060 case 3:
10061 d.one_vector_p = false;
10062 if (!rtx_equal_p (op0, op1))
10063 break;
10064
10065 /* The elements of PERM do not suggest that only the first operand
10066 is used, but both operands are identical. Allow easier matching
10067 of the permutation by folding the permutation into the single
10068 input vector. */
10069 /* Fall Through. */
10070 case 2:
10071 for (i = 0; i < nelt; ++i)
10072 d.perm[i] &= nelt - 1;
10073 d.op0 = op1;
10074 d.one_vector_p = true;
10075 break;
10076
10077 case 1:
10078 d.op1 = op0;
10079 d.one_vector_p = true;
10080 break;
10081 }
10082
10083 return aarch64_expand_vec_perm_const_1 (&d);
10084}
10085
10086static bool
ef4bddc2 10087aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
10088 const unsigned char *sel)
10089{
10090 struct expand_vec_perm_d d;
10091 unsigned int i, nelt, which;
10092 bool ret;
10093
10094 d.vmode = vmode;
10095 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10096 d.testing_p = true;
10097 memcpy (d.perm, sel, nelt);
10098
10099 /* Calculate whether all elements are in one vector. */
10100 for (i = which = 0; i < nelt; ++i)
10101 {
10102 unsigned char e = d.perm[i];
10103 gcc_assert (e < 2 * nelt);
10104 which |= (e < nelt ? 1 : 2);
10105 }
10106
10107 /* If all elements are from the second vector, reindex as if from the
10108 first vector. */
10109 if (which == 2)
10110 for (i = 0; i < nelt; ++i)
10111 d.perm[i] -= nelt;
10112
10113 /* Check whether the mask can be applied to a single vector. */
10114 d.one_vector_p = (which != 3);
10115
10116 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10117 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10118 if (!d.one_vector_p)
10119 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10120
10121 start_sequence ();
10122 ret = aarch64_expand_vec_perm_const_1 (&d);
10123 end_sequence ();
10124
10125 return ret;
10126}
10127
668046d1
DS
10128rtx
10129aarch64_reverse_mask (enum machine_mode mode)
10130{
10131 /* We have to reverse each vector because we dont have
10132 a permuted load that can reverse-load according to ABI rules. */
10133 rtx mask;
10134 rtvec v = rtvec_alloc (16);
10135 int i, j;
10136 int nunits = GET_MODE_NUNITS (mode);
10137 int usize = GET_MODE_UNIT_SIZE (mode);
10138
10139 gcc_assert (BYTES_BIG_ENDIAN);
10140 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
10141
10142 for (i = 0; i < nunits; i++)
10143 for (j = 0; j < usize; j++)
10144 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
10145 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
10146 return force_reg (V16QImode, mask);
10147}
10148
97e1ad78
JG
10149/* Implement MODES_TIEABLE_P. */
10150
10151bool
ef4bddc2 10152aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
10153{
10154 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10155 return true;
10156
10157 /* We specifically want to allow elements of "structure" modes to
10158 be tieable to the structure. This more general condition allows
10159 other rarer situations too. */
10160 if (TARGET_SIMD
10161 && aarch64_vector_mode_p (mode1)
10162 && aarch64_vector_mode_p (mode2))
10163 return true;
10164
10165 return false;
10166}
10167
e2c75eea
JG
10168/* Return a new RTX holding the result of moving POINTER forward by
10169 AMOUNT bytes. */
10170
10171static rtx
10172aarch64_move_pointer (rtx pointer, int amount)
10173{
10174 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10175
10176 return adjust_automodify_address (pointer, GET_MODE (pointer),
10177 next, amount);
10178}
10179
10180/* Return a new RTX holding the result of moving POINTER forward by the
10181 size of the mode it points to. */
10182
10183static rtx
10184aarch64_progress_pointer (rtx pointer)
10185{
10186 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10187
10188 return aarch64_move_pointer (pointer, amount);
10189}
10190
10191/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10192 MODE bytes. */
10193
10194static void
10195aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 10196 machine_mode mode)
e2c75eea
JG
10197{
10198 rtx reg = gen_reg_rtx (mode);
10199
10200 /* "Cast" the pointers to the correct mode. */
10201 *src = adjust_address (*src, mode, 0);
10202 *dst = adjust_address (*dst, mode, 0);
10203 /* Emit the memcpy. */
10204 emit_move_insn (reg, *src);
10205 emit_move_insn (*dst, reg);
10206 /* Move the pointers forward. */
10207 *src = aarch64_progress_pointer (*src);
10208 *dst = aarch64_progress_pointer (*dst);
10209}
10210
10211/* Expand movmem, as if from a __builtin_memcpy. Return true if
10212 we succeed, otherwise return false. */
10213
10214bool
10215aarch64_expand_movmem (rtx *operands)
10216{
10217 unsigned int n;
10218 rtx dst = operands[0];
10219 rtx src = operands[1];
10220 rtx base;
10221 bool speed_p = !optimize_function_for_size_p (cfun);
10222
10223 /* When optimizing for size, give a better estimate of the length of a
10224 memcpy call, but use the default otherwise. */
10225 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10226
10227 /* We can't do anything smart if the amount to copy is not constant. */
10228 if (!CONST_INT_P (operands[2]))
10229 return false;
10230
10231 n = UINTVAL (operands[2]);
10232
10233 /* Try to keep the number of instructions low. For cases below 16 bytes we
10234 need to make at most two moves. For cases above 16 bytes it will be one
10235 move for each 16 byte chunk, then at most two additional moves. */
10236 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10237 return false;
10238
10239 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10240 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10241
10242 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10243 src = adjust_automodify_address (src, VOIDmode, base, 0);
10244
10245 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10246 1-byte chunk. */
10247 if (n < 4)
10248 {
10249 if (n >= 2)
10250 {
10251 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10252 n -= 2;
10253 }
10254
10255 if (n == 1)
10256 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10257
10258 return true;
10259 }
10260
10261 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10262 4-byte chunk, partially overlapping with the previously copied chunk. */
10263 if (n < 8)
10264 {
10265 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10266 n -= 4;
10267 if (n > 0)
10268 {
10269 int move = n - 4;
10270
10271 src = aarch64_move_pointer (src, move);
10272 dst = aarch64_move_pointer (dst, move);
10273 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10274 }
10275 return true;
10276 }
10277
10278 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10279 them, then (if applicable) an 8-byte chunk. */
10280 while (n >= 8)
10281 {
10282 if (n / 16)
10283 {
10284 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10285 n -= 16;
10286 }
10287 else
10288 {
10289 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10290 n -= 8;
10291 }
10292 }
10293
10294 /* Finish the final bytes of the copy. We can always do this in one
10295 instruction. We either copy the exact amount we need, or partially
10296 overlap with the previous chunk we copied and copy 8-bytes. */
10297 if (n == 0)
10298 return true;
10299 else if (n == 1)
10300 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10301 else if (n == 2)
10302 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10303 else if (n == 4)
10304 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10305 else
10306 {
10307 if (n == 3)
10308 {
10309 src = aarch64_move_pointer (src, -1);
10310 dst = aarch64_move_pointer (dst, -1);
10311 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10312 }
10313 else
10314 {
10315 int move = n - 8;
10316
10317 src = aarch64_move_pointer (src, move);
10318 dst = aarch64_move_pointer (dst, move);
10319 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10320 }
10321 }
10322
10323 return true;
10324}
10325
a3125fc2
CL
10326/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10327
10328static unsigned HOST_WIDE_INT
10329aarch64_asan_shadow_offset (void)
10330{
10331 return (HOST_WIDE_INT_1 << 36);
10332}
10333
d3006da6 10334static bool
445d7826 10335aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
10336 unsigned int align,
10337 enum by_pieces_operation op,
10338 bool speed_p)
10339{
10340 /* STORE_BY_PIECES can be used when copying a constant string, but
10341 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10342 For now we always fail this and let the move_by_pieces code copy
10343 the string from read-only memory. */
10344 if (op == STORE_BY_PIECES)
10345 return false;
10346
10347 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10348}
10349
5f3bc026
ZC
10350static enum machine_mode
10351aarch64_code_to_ccmode (enum rtx_code code)
10352{
10353 switch (code)
10354 {
10355 case NE:
10356 return CC_DNEmode;
10357
10358 case EQ:
10359 return CC_DEQmode;
10360
10361 case LE:
10362 return CC_DLEmode;
10363
10364 case LT:
10365 return CC_DLTmode;
10366
10367 case GE:
10368 return CC_DGEmode;
10369
10370 case GT:
10371 return CC_DGTmode;
10372
10373 case LEU:
10374 return CC_DLEUmode;
10375
10376 case LTU:
10377 return CC_DLTUmode;
10378
10379 case GEU:
10380 return CC_DGEUmode;
10381
10382 case GTU:
10383 return CC_DGTUmode;
10384
10385 default:
10386 return CCmode;
10387 }
10388}
10389
10390static rtx
10391aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
10392 int code, tree treeop0, tree treeop1)
10393{
10394 enum machine_mode op_mode, cmp_mode, cc_mode;
10395 rtx op0, op1, cmp, target;
10396 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10397 enum insn_code icode;
10398 struct expand_operand ops[4];
10399
10400 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
10401 if (cc_mode == CCmode)
10402 return NULL_RTX;
10403
10404 start_sequence ();
10405 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10406
10407 op_mode = GET_MODE (op0);
10408 if (op_mode == VOIDmode)
10409 op_mode = GET_MODE (op1);
10410
10411 switch (op_mode)
10412 {
10413 case QImode:
10414 case HImode:
10415 case SImode:
10416 cmp_mode = SImode;
10417 icode = CODE_FOR_cmpsi;
10418 break;
10419
10420 case DImode:
10421 cmp_mode = DImode;
10422 icode = CODE_FOR_cmpdi;
10423 break;
10424
10425 default:
10426 end_sequence ();
10427 return NULL_RTX;
10428 }
10429
10430 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10431 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10432 if (!op0 || !op1)
10433 {
10434 end_sequence ();
10435 return NULL_RTX;
10436 }
10437 *prep_seq = get_insns ();
10438 end_sequence ();
10439
10440 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
10441 target = gen_rtx_REG (CCmode, CC_REGNUM);
10442
10443 create_output_operand (&ops[0], target, CCmode);
10444 create_fixed_operand (&ops[1], cmp);
10445 create_fixed_operand (&ops[2], op0);
10446 create_fixed_operand (&ops[3], op1);
10447
10448 start_sequence ();
10449 if (!maybe_expand_insn (icode, 4, ops))
10450 {
10451 end_sequence ();
10452 return NULL_RTX;
10453 }
10454 *gen_seq = get_insns ();
10455 end_sequence ();
10456
10457 return gen_rtx_REG (cc_mode, CC_REGNUM);
10458}
10459
10460static rtx
10461aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
10462 tree treeop0, tree treeop1, int bit_code)
10463{
10464 rtx op0, op1, cmp0, cmp1, target;
10465 enum machine_mode op_mode, cmp_mode, cc_mode;
10466 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10467 enum insn_code icode = CODE_FOR_ccmp_andsi;
10468 struct expand_operand ops[6];
10469
10470 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
10471 if (cc_mode == CCmode)
10472 return NULL_RTX;
10473
10474 push_to_sequence ((rtx_insn*) *prep_seq);
10475 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10476
10477 op_mode = GET_MODE (op0);
10478 if (op_mode == VOIDmode)
10479 op_mode = GET_MODE (op1);
10480
10481 switch (op_mode)
10482 {
10483 case QImode:
10484 case HImode:
10485 case SImode:
10486 cmp_mode = SImode;
10487 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
10488 : CODE_FOR_ccmp_iorsi;
10489 break;
10490
10491 case DImode:
10492 cmp_mode = DImode;
10493 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
10494 : CODE_FOR_ccmp_iordi;
10495 break;
10496
10497 default:
10498 end_sequence ();
10499 return NULL_RTX;
10500 }
10501
10502 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10503 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10504 if (!op0 || !op1)
10505 {
10506 end_sequence ();
10507 return NULL_RTX;
10508 }
10509 *prep_seq = get_insns ();
10510 end_sequence ();
10511
10512 target = gen_rtx_REG (cc_mode, CC_REGNUM);
10513 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
10514 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
10515
10516 create_fixed_operand (&ops[0], prev);
10517 create_fixed_operand (&ops[1], target);
10518 create_fixed_operand (&ops[2], op0);
10519 create_fixed_operand (&ops[3], op1);
10520 create_fixed_operand (&ops[4], cmp0);
10521 create_fixed_operand (&ops[5], cmp1);
10522
10523 push_to_sequence ((rtx_insn*) *gen_seq);
10524 if (!maybe_expand_insn (icode, 6, ops))
10525 {
10526 end_sequence ();
10527 return NULL_RTX;
10528 }
10529
10530 *gen_seq = get_insns ();
10531 end_sequence ();
10532
10533 return target;
10534}
10535
10536#undef TARGET_GEN_CCMP_FIRST
10537#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10538
10539#undef TARGET_GEN_CCMP_NEXT
10540#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10541
6a569cdd
KT
10542/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10543 instruction fusion of some sort. */
10544
10545static bool
10546aarch64_macro_fusion_p (void)
10547{
10548 return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10549}
10550
10551
10552/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10553 should be kept together during scheduling. */
10554
10555static bool
10556aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10557{
10558 rtx set_dest;
10559 rtx prev_set = single_set (prev);
10560 rtx curr_set = single_set (curr);
10561 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10562 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10563
10564 if (!aarch64_macro_fusion_p ())
10565 return false;
10566
10567 if (simple_sets_p
10568 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10569 {
10570 /* We are trying to match:
10571 prev (mov) == (set (reg r0) (const_int imm16))
10572 curr (movk) == (set (zero_extract (reg r0)
10573 (const_int 16)
10574 (const_int 16))
10575 (const_int imm16_1)) */
10576
10577 set_dest = SET_DEST (curr_set);
10578
10579 if (GET_CODE (set_dest) == ZERO_EXTRACT
10580 && CONST_INT_P (SET_SRC (curr_set))
10581 && CONST_INT_P (SET_SRC (prev_set))
10582 && CONST_INT_P (XEXP (set_dest, 2))
10583 && INTVAL (XEXP (set_dest, 2)) == 16
10584 && REG_P (XEXP (set_dest, 0))
10585 && REG_P (SET_DEST (prev_set))
10586 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10587 {
10588 return true;
10589 }
10590 }
10591
9bbe08fe
KT
10592 if (simple_sets_p
10593 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10594 {
10595
10596 /* We're trying to match:
10597 prev (adrp) == (set (reg r1)
10598 (high (symbol_ref ("SYM"))))
10599 curr (add) == (set (reg r0)
10600 (lo_sum (reg r1)
10601 (symbol_ref ("SYM"))))
10602 Note that r0 need not necessarily be the same as r1, especially
10603 during pre-regalloc scheduling. */
10604
10605 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10606 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10607 {
10608 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10609 && REG_P (XEXP (SET_SRC (curr_set), 0))
10610 && REGNO (XEXP (SET_SRC (curr_set), 0))
10611 == REGNO (SET_DEST (prev_set))
10612 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10613 XEXP (SET_SRC (curr_set), 1)))
10614 return true;
10615 }
10616 }
10617
cd0cb232
KT
10618 if (simple_sets_p
10619 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10620 {
10621
10622 /* We're trying to match:
10623 prev (movk) == (set (zero_extract (reg r0)
10624 (const_int 16)
10625 (const_int 32))
10626 (const_int imm16_1))
10627 curr (movk) == (set (zero_extract (reg r0)
10628 (const_int 16)
10629 (const_int 48))
10630 (const_int imm16_2)) */
10631
10632 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10633 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10634 && REG_P (XEXP (SET_DEST (prev_set), 0))
10635 && REG_P (XEXP (SET_DEST (curr_set), 0))
10636 && REGNO (XEXP (SET_DEST (prev_set), 0))
10637 == REGNO (XEXP (SET_DEST (curr_set), 0))
10638 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10639 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10640 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10641 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10642 && CONST_INT_P (SET_SRC (prev_set))
10643 && CONST_INT_P (SET_SRC (curr_set)))
10644 return true;
10645
10646 }
d8354ad7
KT
10647 if (simple_sets_p
10648 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10649 {
10650 /* We're trying to match:
10651 prev (adrp) == (set (reg r0)
10652 (high (symbol_ref ("SYM"))))
10653 curr (ldr) == (set (reg r1)
10654 (mem (lo_sum (reg r0)
10655 (symbol_ref ("SYM")))))
10656 or
10657 curr (ldr) == (set (reg r1)
10658 (zero_extend (mem
10659 (lo_sum (reg r0)
10660 (symbol_ref ("SYM")))))) */
10661 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10662 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10663 {
10664 rtx curr_src = SET_SRC (curr_set);
10665
10666 if (GET_CODE (curr_src) == ZERO_EXTEND)
10667 curr_src = XEXP (curr_src, 0);
10668
10669 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10670 && REG_P (XEXP (XEXP (curr_src, 0), 0))
10671 && REGNO (XEXP (XEXP (curr_src, 0), 0))
10672 == REGNO (SET_DEST (prev_set))
10673 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10674 XEXP (SET_SRC (prev_set), 0)))
10675 return true;
10676 }
10677 }
cd0cb232 10678
3759108f
AP
10679 if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10680 && any_condjump_p (curr))
10681 {
10682 enum attr_type prev_type = get_attr_type (prev);
10683
10684 /* FIXME: this misses some which is considered simple arthematic
10685 instructions for ThunderX. Simple shifts are missed here. */
10686 if (prev_type == TYPE_ALUS_SREG
10687 || prev_type == TYPE_ALUS_IMM
10688 || prev_type == TYPE_LOGICS_REG
10689 || prev_type == TYPE_LOGICS_IMM)
10690 return true;
10691 }
10692
6a569cdd
KT
10693 return false;
10694}
10695
350013bc
BC
10696/* If MEM is in the form of [base+offset], extract the two parts
10697 of address and set to BASE and OFFSET, otherwise return false
10698 after clearing BASE and OFFSET. */
10699
10700bool
10701extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10702{
10703 rtx addr;
10704
10705 gcc_assert (MEM_P (mem));
10706
10707 addr = XEXP (mem, 0);
10708
10709 if (REG_P (addr))
10710 {
10711 *base = addr;
10712 *offset = const0_rtx;
10713 return true;
10714 }
10715
10716 if (GET_CODE (addr) == PLUS
10717 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10718 {
10719 *base = XEXP (addr, 0);
10720 *offset = XEXP (addr, 1);
10721 return true;
10722 }
10723
10724 *base = NULL_RTX;
10725 *offset = NULL_RTX;
10726
10727 return false;
10728}
10729
10730/* Types for scheduling fusion. */
10731enum sched_fusion_type
10732{
10733 SCHED_FUSION_NONE = 0,
10734 SCHED_FUSION_LD_SIGN_EXTEND,
10735 SCHED_FUSION_LD_ZERO_EXTEND,
10736 SCHED_FUSION_LD,
10737 SCHED_FUSION_ST,
10738 SCHED_FUSION_NUM
10739};
10740
10741/* If INSN is a load or store of address in the form of [base+offset],
10742 extract the two parts and set to BASE and OFFSET. Return scheduling
10743 fusion type this INSN is. */
10744
10745static enum sched_fusion_type
10746fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10747{
10748 rtx x, dest, src;
10749 enum sched_fusion_type fusion = SCHED_FUSION_LD;
10750
10751 gcc_assert (INSN_P (insn));
10752 x = PATTERN (insn);
10753 if (GET_CODE (x) != SET)
10754 return SCHED_FUSION_NONE;
10755
10756 src = SET_SRC (x);
10757 dest = SET_DEST (x);
10758
1f46bd52
AP
10759 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
10760 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
350013bc
BC
10761 return SCHED_FUSION_NONE;
10762
10763 if (GET_CODE (src) == SIGN_EXTEND)
10764 {
10765 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10766 src = XEXP (src, 0);
10767 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10768 return SCHED_FUSION_NONE;
10769 }
10770 else if (GET_CODE (src) == ZERO_EXTEND)
10771 {
10772 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10773 src = XEXP (src, 0);
10774 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10775 return SCHED_FUSION_NONE;
10776 }
10777
10778 if (GET_CODE (src) == MEM && REG_P (dest))
10779 extract_base_offset_in_addr (src, base, offset);
10780 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10781 {
10782 fusion = SCHED_FUSION_ST;
10783 extract_base_offset_in_addr (dest, base, offset);
10784 }
10785 else
10786 return SCHED_FUSION_NONE;
10787
10788 if (*base == NULL_RTX || *offset == NULL_RTX)
10789 fusion = SCHED_FUSION_NONE;
10790
10791 return fusion;
10792}
10793
10794/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10795
10796 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10797 and PRI are only calculated for these instructions. For other instruction,
10798 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10799 type instruction fusion can be added by returning different priorities.
10800
10801 It's important that irrelevant instructions get the largest FUSION_PRI. */
10802
10803static void
10804aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10805 int *fusion_pri, int *pri)
10806{
10807 int tmp, off_val;
10808 rtx base, offset;
10809 enum sched_fusion_type fusion;
10810
10811 gcc_assert (INSN_P (insn));
10812
10813 tmp = max_pri - 1;
10814 fusion = fusion_load_store (insn, &base, &offset);
10815 if (fusion == SCHED_FUSION_NONE)
10816 {
10817 *pri = tmp;
10818 *fusion_pri = tmp;
10819 return;
10820 }
10821
10822 /* Set FUSION_PRI according to fusion type and base register. */
10823 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
10824
10825 /* Calculate PRI. */
10826 tmp /= 2;
10827
10828 /* INSN with smaller offset goes first. */
10829 off_val = (int)(INTVAL (offset));
10830 if (off_val >= 0)
10831 tmp -= (off_val & 0xfffff);
10832 else
10833 tmp += ((- off_val) & 0xfffff);
10834
10835 *pri = tmp;
10836 return;
10837}
10838
10839/* Given OPERANDS of consecutive load/store, check if we can merge
10840 them into ldp/stp. LOAD is true if they are load instructions.
10841 MODE is the mode of memory operands. */
10842
10843bool
10844aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
10845 enum machine_mode mode)
10846{
10847 HOST_WIDE_INT offval_1, offval_2, msize;
10848 enum reg_class rclass_1, rclass_2;
10849 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
10850
10851 if (load)
10852 {
10853 mem_1 = operands[1];
10854 mem_2 = operands[3];
10855 reg_1 = operands[0];
10856 reg_2 = operands[2];
10857 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
10858 if (REGNO (reg_1) == REGNO (reg_2))
10859 return false;
10860 }
10861 else
10862 {
10863 mem_1 = operands[0];
10864 mem_2 = operands[2];
10865 reg_1 = operands[1];
10866 reg_2 = operands[3];
10867 }
10868
bf84ac44
AP
10869 /* The mems cannot be volatile. */
10870 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
10871 return false;
10872
350013bc
BC
10873 /* Check if the addresses are in the form of [base+offset]. */
10874 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10875 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10876 return false;
10877 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10878 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10879 return false;
10880
10881 /* Check if the bases are same. */
10882 if (!rtx_equal_p (base_1, base_2))
10883 return false;
10884
10885 offval_1 = INTVAL (offset_1);
10886 offval_2 = INTVAL (offset_2);
10887 msize = GET_MODE_SIZE (mode);
10888 /* Check if the offsets are consecutive. */
10889 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
10890 return false;
10891
10892 /* Check if the addresses are clobbered by load. */
10893 if (load)
10894 {
10895 if (reg_mentioned_p (reg_1, mem_1))
10896 return false;
10897
10898 /* In increasing order, the last load can clobber the address. */
10899 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
10900 return false;
10901 }
10902
10903 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10904 rclass_1 = FP_REGS;
10905 else
10906 rclass_1 = GENERAL_REGS;
10907
10908 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10909 rclass_2 = FP_REGS;
10910 else
10911 rclass_2 = GENERAL_REGS;
10912
10913 /* Check if the registers are of same class. */
10914 if (rclass_1 != rclass_2)
10915 return false;
10916
10917 return true;
10918}
10919
10920/* Given OPERANDS of consecutive load/store, check if we can merge
10921 them into ldp/stp by adjusting the offset. LOAD is true if they
10922 are load instructions. MODE is the mode of memory operands.
10923
10924 Given below consecutive stores:
10925
10926 str w1, [xb, 0x100]
10927 str w1, [xb, 0x104]
10928 str w1, [xb, 0x108]
10929 str w1, [xb, 0x10c]
10930
10931 Though the offsets are out of the range supported by stp, we can
10932 still pair them after adjusting the offset, like:
10933
10934 add scratch, xb, 0x100
10935 stp w1, w1, [scratch]
10936 stp w1, w1, [scratch, 0x8]
10937
10938 The peephole patterns detecting this opportunity should guarantee
10939 the scratch register is avaliable. */
10940
10941bool
10942aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
10943 enum machine_mode mode)
10944{
10945 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
10946 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
10947 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
10948 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
10949
10950 if (load)
10951 {
10952 reg_1 = operands[0];
10953 mem_1 = operands[1];
10954 reg_2 = operands[2];
10955 mem_2 = operands[3];
10956 reg_3 = operands[4];
10957 mem_3 = operands[5];
10958 reg_4 = operands[6];
10959 mem_4 = operands[7];
10960 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
10961 && REG_P (reg_3) && REG_P (reg_4));
10962 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
10963 return false;
10964 }
10965 else
10966 {
10967 mem_1 = operands[0];
10968 reg_1 = operands[1];
10969 mem_2 = operands[2];
10970 reg_2 = operands[3];
10971 mem_3 = operands[4];
10972 reg_3 = operands[5];
10973 mem_4 = operands[6];
10974 reg_4 = operands[7];
10975 }
10976 /* Skip if memory operand is by itslef valid for ldp/stp. */
10977 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
10978 return false;
10979
bf84ac44
AP
10980 /* The mems cannot be volatile. */
10981 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
10982 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
10983 return false;
10984
350013bc
BC
10985 /* Check if the addresses are in the form of [base+offset]. */
10986 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10987 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10988 return false;
10989 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10990 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10991 return false;
10992 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
10993 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
10994 return false;
10995 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
10996 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
10997 return false;
10998
10999 /* Check if the bases are same. */
11000 if (!rtx_equal_p (base_1, base_2)
11001 || !rtx_equal_p (base_2, base_3)
11002 || !rtx_equal_p (base_3, base_4))
11003 return false;
11004
11005 offval_1 = INTVAL (offset_1);
11006 offval_2 = INTVAL (offset_2);
11007 offval_3 = INTVAL (offset_3);
11008 offval_4 = INTVAL (offset_4);
11009 msize = GET_MODE_SIZE (mode);
11010 /* Check if the offsets are consecutive. */
11011 if ((offval_1 != (offval_2 + msize)
11012 || offval_1 != (offval_3 + msize * 2)
11013 || offval_1 != (offval_4 + msize * 3))
11014 && (offval_4 != (offval_3 + msize)
11015 || offval_4 != (offval_2 + msize * 2)
11016 || offval_4 != (offval_1 + msize * 3)))
11017 return false;
11018
11019 /* Check if the addresses are clobbered by load. */
11020 if (load)
11021 {
11022 if (reg_mentioned_p (reg_1, mem_1)
11023 || reg_mentioned_p (reg_2, mem_2)
11024 || reg_mentioned_p (reg_3, mem_3))
11025 return false;
11026
11027 /* In increasing order, the last load can clobber the address. */
11028 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
11029 return false;
11030 }
11031
11032 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11033 rclass_1 = FP_REGS;
11034 else
11035 rclass_1 = GENERAL_REGS;
11036
11037 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11038 rclass_2 = FP_REGS;
11039 else
11040 rclass_2 = GENERAL_REGS;
11041
11042 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
11043 rclass_3 = FP_REGS;
11044 else
11045 rclass_3 = GENERAL_REGS;
11046
11047 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
11048 rclass_4 = FP_REGS;
11049 else
11050 rclass_4 = GENERAL_REGS;
11051
11052 /* Check if the registers are of same class. */
11053 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
11054 return false;
11055
11056 return true;
11057}
11058
11059/* Given OPERANDS of consecutive load/store, this function pairs them
11060 into ldp/stp after adjusting the offset. It depends on the fact
11061 that addresses of load/store instructions are in increasing order.
11062 MODE is the mode of memory operands. CODE is the rtl operator
11063 which should be applied to all memory operands, it's SIGN_EXTEND,
11064 ZERO_EXTEND or UNKNOWN. */
11065
11066bool
11067aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
11068 enum machine_mode mode, RTX_CODE code)
11069{
11070 rtx base, offset, t1, t2;
11071 rtx mem_1, mem_2, mem_3, mem_4;
11072 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
11073
11074 if (load)
11075 {
11076 mem_1 = operands[1];
11077 mem_2 = operands[3];
11078 mem_3 = operands[5];
11079 mem_4 = operands[7];
11080 }
11081 else
11082 {
11083 mem_1 = operands[0];
11084 mem_2 = operands[2];
11085 mem_3 = operands[4];
11086 mem_4 = operands[6];
11087 gcc_assert (code == UNKNOWN);
11088 }
11089
11090 extract_base_offset_in_addr (mem_1, &base, &offset);
11091 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
11092
11093 /* Adjust offset thus it can fit in ldp/stp instruction. */
11094 msize = GET_MODE_SIZE (mode);
11095 stp_off_limit = msize * 0x40;
11096 off_val = INTVAL (offset);
11097 abs_off = (off_val < 0) ? -off_val : off_val;
11098 new_off = abs_off % stp_off_limit;
11099 adj_off = abs_off - new_off;
11100
11101 /* Further adjust to make sure all offsets are OK. */
11102 if ((new_off + msize * 2) >= stp_off_limit)
11103 {
11104 adj_off += stp_off_limit;
11105 new_off -= stp_off_limit;
11106 }
11107
11108 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11109 if (adj_off >= 0x1000)
11110 return false;
11111
11112 if (off_val < 0)
11113 {
11114 adj_off = -adj_off;
11115 new_off = -new_off;
11116 }
11117
11118 /* Create new memory references. */
11119 mem_1 = change_address (mem_1, VOIDmode,
11120 plus_constant (DImode, operands[8], new_off));
11121
11122 /* Check if the adjusted address is OK for ldp/stp. */
11123 if (!aarch64_mem_pair_operand (mem_1, mode))
11124 return false;
11125
11126 msize = GET_MODE_SIZE (mode);
11127 mem_2 = change_address (mem_2, VOIDmode,
11128 plus_constant (DImode,
11129 operands[8],
11130 new_off + msize));
11131 mem_3 = change_address (mem_3, VOIDmode,
11132 plus_constant (DImode,
11133 operands[8],
11134 new_off + msize * 2));
11135 mem_4 = change_address (mem_4, VOIDmode,
11136 plus_constant (DImode,
11137 operands[8],
11138 new_off + msize * 3));
11139
11140 if (code == ZERO_EXTEND)
11141 {
11142 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
11143 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
11144 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
11145 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
11146 }
11147 else if (code == SIGN_EXTEND)
11148 {
11149 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
11150 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
11151 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
11152 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
11153 }
11154
11155 if (load)
11156 {
11157 operands[1] = mem_1;
11158 operands[3] = mem_2;
11159 operands[5] = mem_3;
11160 operands[7] = mem_4;
11161 }
11162 else
11163 {
11164 operands[0] = mem_1;
11165 operands[2] = mem_2;
11166 operands[4] = mem_3;
11167 operands[6] = mem_4;
11168 }
11169
11170 /* Emit adjusting instruction. */
11171 emit_insn (gen_rtx_SET (VOIDmode, operands[8],
11172 plus_constant (DImode, base, adj_off)));
11173 /* Emit ldp/stp instructions. */
11174 t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
11175 t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
11176 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11177 t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
11178 t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
11179 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11180 return true;
11181}
11182
43e9d192
IB
11183#undef TARGET_ADDRESS_COST
11184#define TARGET_ADDRESS_COST aarch64_address_cost
11185
11186/* This hook will determines whether unnamed bitfields affect the alignment
11187 of the containing structure. The hook returns true if the structure
11188 should inherit the alignment requirements of an unnamed bitfield's
11189 type. */
11190#undef TARGET_ALIGN_ANON_BITFIELD
11191#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11192
11193#undef TARGET_ASM_ALIGNED_DI_OP
11194#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11195
11196#undef TARGET_ASM_ALIGNED_HI_OP
11197#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11198
11199#undef TARGET_ASM_ALIGNED_SI_OP
11200#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11201
11202#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11203#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11204 hook_bool_const_tree_hwi_hwi_const_tree_true
11205
11206#undef TARGET_ASM_FILE_START
11207#define TARGET_ASM_FILE_START aarch64_start_file
11208
11209#undef TARGET_ASM_OUTPUT_MI_THUNK
11210#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11211
11212#undef TARGET_ASM_SELECT_RTX_SECTION
11213#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11214
11215#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11216#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11217
11218#undef TARGET_BUILD_BUILTIN_VA_LIST
11219#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11220
11221#undef TARGET_CALLEE_COPIES
11222#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11223
11224#undef TARGET_CAN_ELIMINATE
11225#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11226
11227#undef TARGET_CANNOT_FORCE_CONST_MEM
11228#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11229
11230#undef TARGET_CONDITIONAL_REGISTER_USAGE
11231#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11232
11233/* Only the least significant bit is used for initialization guard
11234 variables. */
11235#undef TARGET_CXX_GUARD_MASK_BIT
11236#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11237
11238#undef TARGET_C_MODE_FOR_SUFFIX
11239#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11240
11241#ifdef TARGET_BIG_ENDIAN_DEFAULT
11242#undef TARGET_DEFAULT_TARGET_FLAGS
11243#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11244#endif
11245
11246#undef TARGET_CLASS_MAX_NREGS
11247#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11248
119103ca
JG
11249#undef TARGET_BUILTIN_DECL
11250#define TARGET_BUILTIN_DECL aarch64_builtin_decl
11251
43e9d192
IB
11252#undef TARGET_EXPAND_BUILTIN
11253#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11254
11255#undef TARGET_EXPAND_BUILTIN_VA_START
11256#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11257
9697e620
JG
11258#undef TARGET_FOLD_BUILTIN
11259#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11260
43e9d192
IB
11261#undef TARGET_FUNCTION_ARG
11262#define TARGET_FUNCTION_ARG aarch64_function_arg
11263
11264#undef TARGET_FUNCTION_ARG_ADVANCE
11265#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11266
11267#undef TARGET_FUNCTION_ARG_BOUNDARY
11268#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11269
11270#undef TARGET_FUNCTION_OK_FOR_SIBCALL
11271#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11272
11273#undef TARGET_FUNCTION_VALUE
11274#define TARGET_FUNCTION_VALUE aarch64_function_value
11275
11276#undef TARGET_FUNCTION_VALUE_REGNO_P
11277#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11278
11279#undef TARGET_FRAME_POINTER_REQUIRED
11280#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11281
fc72cba7
AL
11282#undef TARGET_GIMPLE_FOLD_BUILTIN
11283#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 11284
43e9d192
IB
11285#undef TARGET_GIMPLIFY_VA_ARG_EXPR
11286#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11287
11288#undef TARGET_INIT_BUILTINS
11289#define TARGET_INIT_BUILTINS aarch64_init_builtins
11290
11291#undef TARGET_LEGITIMATE_ADDRESS_P
11292#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11293
11294#undef TARGET_LEGITIMATE_CONSTANT_P
11295#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11296
11297#undef TARGET_LIBGCC_CMP_RETURN_MODE
11298#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11299
38e8f663 11300#undef TARGET_LRA_P
98d404be 11301#define TARGET_LRA_P hook_bool_void_true
38e8f663 11302
ac2b960f
YZ
11303#undef TARGET_MANGLE_TYPE
11304#define TARGET_MANGLE_TYPE aarch64_mangle_type
11305
43e9d192
IB
11306#undef TARGET_MEMORY_MOVE_COST
11307#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11308
26e0ff94
WD
11309#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11310#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11311
43e9d192
IB
11312#undef TARGET_MUST_PASS_IN_STACK
11313#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11314
11315/* This target hook should return true if accesses to volatile bitfields
11316 should use the narrowest mode possible. It should return false if these
11317 accesses should use the bitfield container type. */
11318#undef TARGET_NARROW_VOLATILE_BITFIELD
11319#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11320
11321#undef TARGET_OPTION_OVERRIDE
11322#define TARGET_OPTION_OVERRIDE aarch64_override_options
11323
11324#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11325#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11326 aarch64_override_options_after_change
11327
11328#undef TARGET_PASS_BY_REFERENCE
11329#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11330
11331#undef TARGET_PREFERRED_RELOAD_CLASS
11332#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11333
cee66c68
WD
11334#undef TARGET_SCHED_REASSOCIATION_WIDTH
11335#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11336
43e9d192
IB
11337#undef TARGET_SECONDARY_RELOAD
11338#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11339
11340#undef TARGET_SHIFT_TRUNCATION_MASK
11341#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11342
11343#undef TARGET_SETUP_INCOMING_VARARGS
11344#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11345
11346#undef TARGET_STRUCT_VALUE_RTX
11347#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11348
11349#undef TARGET_REGISTER_MOVE_COST
11350#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11351
11352#undef TARGET_RETURN_IN_MEMORY
11353#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11354
11355#undef TARGET_RETURN_IN_MSB
11356#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11357
11358#undef TARGET_RTX_COSTS
7cc2145f 11359#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 11360
d126a4ae
AP
11361#undef TARGET_SCHED_ISSUE_RATE
11362#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11363
d03f7e44
MK
11364#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11365#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
11366 aarch64_sched_first_cycle_multipass_dfa_lookahead
11367
43e9d192
IB
11368#undef TARGET_TRAMPOLINE_INIT
11369#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11370
11371#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11372#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11373
11374#undef TARGET_VECTOR_MODE_SUPPORTED_P
11375#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11376
11377#undef TARGET_ARRAY_MODE_SUPPORTED_P
11378#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11379
8990e73a
TB
11380#undef TARGET_VECTORIZE_ADD_STMT_COST
11381#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11382
11383#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11384#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11385 aarch64_builtin_vectorization_cost
11386
43e9d192
IB
11387#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11388#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11389
42fc9a7f
JG
11390#undef TARGET_VECTORIZE_BUILTINS
11391#define TARGET_VECTORIZE_BUILTINS
11392
11393#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11394#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11395 aarch64_builtin_vectorized_function
11396
3b357264
JG
11397#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11398#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11399 aarch64_autovectorize_vector_sizes
11400
aa87aced
KV
11401#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11402#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11403 aarch64_atomic_assign_expand_fenv
11404
43e9d192
IB
11405/* Section anchor support. */
11406
11407#undef TARGET_MIN_ANCHOR_OFFSET
11408#define TARGET_MIN_ANCHOR_OFFSET -256
11409
11410/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11411 byte offset; we can do much more for larger data types, but have no way
11412 to determine the size of the access. We assume accesses are aligned. */
11413#undef TARGET_MAX_ANCHOR_OFFSET
11414#define TARGET_MAX_ANCHOR_OFFSET 4095
11415
db0253a4
TB
11416#undef TARGET_VECTOR_ALIGNMENT
11417#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11418
11419#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11420#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11421 aarch64_simd_vector_alignment_reachable
11422
88b08073
JG
11423/* vec_perm support. */
11424
11425#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11426#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11427 aarch64_vectorize_vec_perm_const_ok
11428
70f09188 11429
706b2314 11430#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
11431#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11432
5cb74e90
RR
11433#undef TARGET_FLAGS_REGNUM
11434#define TARGET_FLAGS_REGNUM CC_REGNUM
11435
78607708
TV
11436#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11437#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11438
a3125fc2
CL
11439#undef TARGET_ASAN_SHADOW_OFFSET
11440#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11441
0c4ec427
RE
11442#undef TARGET_LEGITIMIZE_ADDRESS
11443#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11444
d3006da6
JG
11445#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11446#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11447 aarch64_use_by_pieces_infrastructure_p
11448
594bdd53
FY
11449#undef TARGET_CAN_USE_DOLOOP_P
11450#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11451
6a569cdd
KT
11452#undef TARGET_SCHED_MACRO_FUSION_P
11453#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11454
11455#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11456#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11457
350013bc
BC
11458#undef TARGET_SCHED_FUSION_PRIORITY
11459#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11460
43e9d192
IB
11461struct gcc_target targetm = TARGET_INITIALIZER;
11462
11463#include "gt-aarch64.h"