]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
DR 1467 PR c++/51747
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
5624e564 2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
40e23961
MC
28#include "hash-set.h"
29#include "machmode.h"
30#include "vec.h"
31#include "double-int.h"
32#include "input.h"
33#include "alias.h"
34#include "symtab.h"
35#include "wide-int.h"
36#include "inchash.h"
43e9d192 37#include "tree.h"
40e23961 38#include "fold-const.h"
d8a2d370
DN
39#include "stringpool.h"
40#include "stor-layout.h"
41#include "calls.h"
42#include "varasm.h"
43e9d192 43#include "regs.h"
60393bbc
AM
44#include "dominance.h"
45#include "cfg.h"
46#include "cfgrtl.h"
47#include "cfganal.h"
48#include "lcm.h"
49#include "cfgbuild.h"
50#include "cfgcleanup.h"
51#include "predict.h"
52#include "basic-block.h"
43e9d192
IB
53#include "df.h"
54#include "hard-reg-set.h"
55#include "output.h"
36566b39
PK
56#include "hashtab.h"
57#include "function.h"
58#include "flags.h"
59#include "statistics.h"
60#include "real.h"
61#include "fixed-value.h"
62#include "insn-config.h"
63#include "expmed.h"
64#include "dojump.h"
65#include "explow.h"
66#include "emit-rtl.h"
67#include "stmt.h"
43e9d192
IB
68#include "expr.h"
69#include "reload.h"
70#include "toplev.h"
71#include "target.h"
72#include "target-def.h"
73#include "targhooks.h"
74#include "ggc.h"
43e9d192
IB
75#include "tm_p.h"
76#include "recog.h"
77#include "langhooks.h"
78#include "diagnostic-core.h"
2fb9a547 79#include "hash-table.h"
2fb9a547
AM
80#include "tree-ssa-alias.h"
81#include "internal-fn.h"
82#include "gimple-fold.h"
83#include "tree-eh.h"
84#include "gimple-expr.h"
85#include "is-a.h"
18f429e2 86#include "gimple.h"
45b0be94 87#include "gimplify.h"
43e9d192
IB
88#include "optabs.h"
89#include "dwarf2.h"
8990e73a
TB
90#include "cfgloop.h"
91#include "tree-vectorizer.h"
d1bcc29f 92#include "aarch64-cost-tables.h"
0ee859b5 93#include "dumpfile.h"
9b2b7279 94#include "builtins.h"
8baff86e 95#include "rtl-iter.h"
9bbe08fe 96#include "tm-constrs.h"
d03f7e44 97#include "sched-int.h"
fde9b31b 98#include "cortex-a57-fma-steering.h"
43e9d192 99
28514dda
YZ
100/* Defined for convenience. */
101#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
102
43e9d192
IB
103/* Classifies an address.
104
105 ADDRESS_REG_IMM
106 A simple base register plus immediate offset.
107
108 ADDRESS_REG_WB
109 A base register indexed by immediate offset with writeback.
110
111 ADDRESS_REG_REG
112 A base register indexed by (optionally scaled) register.
113
114 ADDRESS_REG_UXTW
115 A base register indexed by (optionally scaled) zero-extended register.
116
117 ADDRESS_REG_SXTW
118 A base register indexed by (optionally scaled) sign-extended register.
119
120 ADDRESS_LO_SUM
121 A LO_SUM rtx with a base register and "LO12" symbol relocation.
122
123 ADDRESS_SYMBOLIC:
124 A constant symbolic address, in pc-relative literal pool. */
125
126enum aarch64_address_type {
127 ADDRESS_REG_IMM,
128 ADDRESS_REG_WB,
129 ADDRESS_REG_REG,
130 ADDRESS_REG_UXTW,
131 ADDRESS_REG_SXTW,
132 ADDRESS_LO_SUM,
133 ADDRESS_SYMBOLIC
134};
135
136struct aarch64_address_info {
137 enum aarch64_address_type type;
138 rtx base;
139 rtx offset;
140 int shift;
141 enum aarch64_symbol_type symbol_type;
142};
143
48063b9d
IB
144struct simd_immediate_info
145{
146 rtx value;
147 int shift;
148 int element_width;
48063b9d 149 bool mvn;
e4f0f84d 150 bool msl;
48063b9d
IB
151};
152
43e9d192
IB
153/* The current code model. */
154enum aarch64_code_model aarch64_cmodel;
155
156#ifdef HAVE_AS_TLS
157#undef TARGET_HAVE_TLS
158#define TARGET_HAVE_TLS 1
159#endif
160
ef4bddc2
RS
161static bool aarch64_composite_type_p (const_tree, machine_mode);
162static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 163 const_tree,
ef4bddc2 164 machine_mode *, int *,
43e9d192
IB
165 bool *);
166static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
167static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 168static void aarch64_override_options_after_change (void);
ef4bddc2 169static bool aarch64_vector_mode_supported_p (machine_mode);
43e9d192 170static unsigned bit_count (unsigned HOST_WIDE_INT);
ef4bddc2 171static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 172 const unsigned char *sel);
ef4bddc2 173static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 174
0c6caaf8
RL
175/* Major revision number of the ARM Architecture implemented by the target. */
176unsigned aarch64_architecture_version;
177
43e9d192 178/* The processor for which instructions should be scheduled. */
02fdbd5b 179enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
180
181/* The current tuning set. */
182const struct tune_params *aarch64_tune_params;
183
184/* Mask to specify which instructions we are allowed to generate. */
185unsigned long aarch64_isa_flags = 0;
186
187/* Mask to specify which instruction scheduling options should be used. */
188unsigned long aarch64_tune_flags = 0;
189
190/* Tuning parameters. */
191
43e9d192
IB
192static const struct cpu_addrcost_table generic_addrcost_table =
193{
67747367 194 {
bd95e655
JG
195 0, /* hi */
196 0, /* si */
197 0, /* di */
198 0, /* ti */
67747367 199 },
bd95e655
JG
200 0, /* pre_modify */
201 0, /* post_modify */
202 0, /* register_offset */
203 0, /* register_extend */
204 0 /* imm_offset */
43e9d192
IB
205};
206
60bff090
JG
207static const struct cpu_addrcost_table cortexa57_addrcost_table =
208{
60bff090 209 {
bd95e655
JG
210 1, /* hi */
211 0, /* si */
212 0, /* di */
213 1, /* ti */
60bff090 214 },
bd95e655
JG
215 0, /* pre_modify */
216 0, /* post_modify */
217 0, /* register_offset */
218 0, /* register_extend */
219 0, /* imm_offset */
60bff090
JG
220};
221
381e27aa
PT
222static const struct cpu_addrcost_table xgene1_addrcost_table =
223{
381e27aa 224 {
bd95e655
JG
225 1, /* hi */
226 0, /* si */
227 0, /* di */
228 1, /* ti */
381e27aa 229 },
bd95e655
JG
230 1, /* pre_modify */
231 0, /* post_modify */
232 0, /* register_offset */
233 1, /* register_extend */
234 0, /* imm_offset */
381e27aa
PT
235};
236
43e9d192
IB
237static const struct cpu_regmove_cost generic_regmove_cost =
238{
bd95e655 239 1, /* GP2GP */
3969c510
WD
240 /* Avoid the use of slow int<->fp moves for spilling by setting
241 their cost higher than memmov_cost. */
bd95e655
JG
242 5, /* GP2FP */
243 5, /* FP2GP */
244 2 /* FP2FP */
43e9d192
IB
245};
246
e4a9c55a
WD
247static const struct cpu_regmove_cost cortexa57_regmove_cost =
248{
bd95e655 249 1, /* GP2GP */
e4a9c55a
WD
250 /* Avoid the use of slow int<->fp moves for spilling by setting
251 their cost higher than memmov_cost. */
bd95e655
JG
252 5, /* GP2FP */
253 5, /* FP2GP */
254 2 /* FP2FP */
e4a9c55a
WD
255};
256
257static const struct cpu_regmove_cost cortexa53_regmove_cost =
258{
bd95e655 259 1, /* GP2GP */
e4a9c55a
WD
260 /* Avoid the use of slow int<->fp moves for spilling by setting
261 their cost higher than memmov_cost. */
bd95e655
JG
262 5, /* GP2FP */
263 5, /* FP2GP */
264 2 /* FP2FP */
e4a9c55a
WD
265};
266
d1bcc29f
AP
267static const struct cpu_regmove_cost thunderx_regmove_cost =
268{
bd95e655
JG
269 2, /* GP2GP */
270 2, /* GP2FP */
271 6, /* FP2GP */
272 4 /* FP2FP */
d1bcc29f
AP
273};
274
381e27aa
PT
275static const struct cpu_regmove_cost xgene1_regmove_cost =
276{
bd95e655 277 1, /* GP2GP */
381e27aa
PT
278 /* Avoid the use of slow int<->fp moves for spilling by setting
279 their cost higher than memmov_cost. */
bd95e655
JG
280 8, /* GP2FP */
281 8, /* FP2GP */
282 2 /* FP2FP */
381e27aa
PT
283};
284
8990e73a 285/* Generic costs for vector insn classes. */
8990e73a
TB
286static const struct cpu_vector_cost generic_vector_cost =
287{
bd95e655
JG
288 1, /* scalar_stmt_cost */
289 1, /* scalar_load_cost */
290 1, /* scalar_store_cost */
291 1, /* vec_stmt_cost */
292 1, /* vec_to_scalar_cost */
293 1, /* scalar_to_vec_cost */
294 1, /* vec_align_load_cost */
295 1, /* vec_unalign_load_cost */
296 1, /* vec_unalign_store_cost */
297 1, /* vec_store_cost */
298 3, /* cond_taken_branch_cost */
299 1 /* cond_not_taken_branch_cost */
8990e73a
TB
300};
301
60bff090 302/* Generic costs for vector insn classes. */
60bff090
JG
303static const struct cpu_vector_cost cortexa57_vector_cost =
304{
bd95e655
JG
305 1, /* scalar_stmt_cost */
306 4, /* scalar_load_cost */
307 1, /* scalar_store_cost */
308 3, /* vec_stmt_cost */
309 8, /* vec_to_scalar_cost */
310 8, /* scalar_to_vec_cost */
311 5, /* vec_align_load_cost */
312 5, /* vec_unalign_load_cost */
313 1, /* vec_unalign_store_cost */
314 1, /* vec_store_cost */
315 1, /* cond_taken_branch_cost */
316 1 /* cond_not_taken_branch_cost */
60bff090
JG
317};
318
381e27aa 319/* Generic costs for vector insn classes. */
381e27aa
PT
320static const struct cpu_vector_cost xgene1_vector_cost =
321{
bd95e655
JG
322 1, /* scalar_stmt_cost */
323 5, /* scalar_load_cost */
324 1, /* scalar_store_cost */
325 2, /* vec_stmt_cost */
326 4, /* vec_to_scalar_cost */
327 4, /* scalar_to_vec_cost */
328 10, /* vec_align_load_cost */
329 10, /* vec_unalign_load_cost */
330 2, /* vec_unalign_store_cost */
331 2, /* vec_store_cost */
332 2, /* cond_taken_branch_cost */
333 1 /* cond_not_taken_branch_cost */
381e27aa
PT
334};
335
6a569cdd
KT
336#define AARCH64_FUSE_NOTHING (0)
337#define AARCH64_FUSE_MOV_MOVK (1 << 0)
9bbe08fe 338#define AARCH64_FUSE_ADRP_ADD (1 << 1)
cd0cb232 339#define AARCH64_FUSE_MOVK_MOVK (1 << 2)
d8354ad7 340#define AARCH64_FUSE_ADRP_LDR (1 << 3)
3759108f 341#define AARCH64_FUSE_CMP_BRANCH (1 << 4)
6a569cdd 342
b9066f5a
MW
343/* Generic costs for branch instructions. */
344static const struct cpu_branch_cost generic_branch_cost =
345{
346 2, /* Predictable. */
347 2 /* Unpredictable. */
348};
349
43e9d192
IB
350static const struct tune_params generic_tunings =
351{
4e2cd668 352 &cortexa57_extra_costs,
43e9d192
IB
353 &generic_addrcost_table,
354 &generic_regmove_cost,
8990e73a 355 &generic_vector_cost,
b9066f5a 356 &generic_branch_cost,
bd95e655
JG
357 4, /* memmov_cost */
358 2, /* issue_rate */
359 AARCH64_FUSE_NOTHING, /* fuseable_ops */
0b82a5a2
WD
360 8, /* function_align. */
361 8, /* jump_align. */
362 4, /* loop_align. */
cee66c68
WD
363 2, /* int_reassoc_width. */
364 4, /* fp_reassoc_width. */
50093a33
WD
365 1, /* vec_reassoc_width. */
366 2, /* min_div_recip_mul_sf. */
367 2 /* min_div_recip_mul_df. */
43e9d192
IB
368};
369
984239ad
KT
370static const struct tune_params cortexa53_tunings =
371{
372 &cortexa53_extra_costs,
373 &generic_addrcost_table,
e4a9c55a 374 &cortexa53_regmove_cost,
984239ad 375 &generic_vector_cost,
b9066f5a 376 &generic_branch_cost,
bd95e655
JG
377 4, /* memmov_cost */
378 2, /* issue_rate */
379 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
380 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops */
0b82a5a2
WD
381 8, /* function_align. */
382 8, /* jump_align. */
383 4, /* loop_align. */
cee66c68
WD
384 2, /* int_reassoc_width. */
385 4, /* fp_reassoc_width. */
50093a33
WD
386 1, /* vec_reassoc_width. */
387 2, /* min_div_recip_mul_sf. */
388 2 /* min_div_recip_mul_df. */
984239ad
KT
389};
390
4fd92af6
KT
391static const struct tune_params cortexa57_tunings =
392{
393 &cortexa57_extra_costs,
60bff090 394 &cortexa57_addrcost_table,
e4a9c55a 395 &cortexa57_regmove_cost,
60bff090 396 &cortexa57_vector_cost,
b9066f5a 397 &generic_branch_cost,
bd95e655
JG
398 4, /* memmov_cost */
399 3, /* issue_rate */
400 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
401 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
0b82a5a2
WD
402 16, /* function_align. */
403 8, /* jump_align. */
404 4, /* loop_align. */
cee66c68
WD
405 2, /* int_reassoc_width. */
406 4, /* fp_reassoc_width. */
50093a33
WD
407 1, /* vec_reassoc_width. */
408 2, /* min_div_recip_mul_sf. */
409 2 /* min_div_recip_mul_df. */
4fd92af6
KT
410};
411
d1bcc29f
AP
412static const struct tune_params thunderx_tunings =
413{
414 &thunderx_extra_costs,
415 &generic_addrcost_table,
416 &thunderx_regmove_cost,
417 &generic_vector_cost,
b9066f5a 418 &generic_branch_cost,
bd95e655
JG
419 6, /* memmov_cost */
420 2, /* issue_rate */
421 AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */
0b82a5a2
WD
422 8, /* function_align. */
423 8, /* jump_align. */
424 8, /* loop_align. */
cee66c68
WD
425 2, /* int_reassoc_width. */
426 4, /* fp_reassoc_width. */
50093a33
WD
427 1, /* vec_reassoc_width. */
428 2, /* min_div_recip_mul_sf. */
429 2 /* min_div_recip_mul_df. */
d1bcc29f
AP
430};
431
381e27aa
PT
432static const struct tune_params xgene1_tunings =
433{
434 &xgene1_extra_costs,
435 &xgene1_addrcost_table,
436 &xgene1_regmove_cost,
437 &xgene1_vector_cost,
b9066f5a 438 &generic_branch_cost,
bd95e655
JG
439 6, /* memmov_cost */
440 4, /* issue_rate */
441 AARCH64_FUSE_NOTHING, /* fuseable_ops */
381e27aa
PT
442 16, /* function_align. */
443 8, /* jump_align. */
444 16, /* loop_align. */
445 2, /* int_reassoc_width. */
446 4, /* fp_reassoc_width. */
50093a33
WD
447 1, /* vec_reassoc_width. */
448 2, /* min_div_recip_mul_sf. */
449 2 /* min_div_recip_mul_df. */
381e27aa
PT
450};
451
43e9d192
IB
452/* A processor implementing AArch64. */
453struct processor
454{
455 const char *const name;
456 enum aarch64_processor core;
457 const char *arch;
0c6caaf8 458 unsigned architecture_version;
43e9d192
IB
459 const unsigned long flags;
460 const struct tune_params *const tune;
461};
462
463/* Processor cores implementing AArch64. */
464static const struct processor all_cores[] =
465{
7e1bcce3 466#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
faa54226 467 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
43e9d192
IB
468#include "aarch64-cores.def"
469#undef AARCH64_CORE
faa54226 470 {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
0c6caaf8 471 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
472};
473
474/* Architectures implementing AArch64. */
475static const struct processor all_architectures[] =
476{
477#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
0c6caaf8 478 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
43e9d192
IB
479#include "aarch64-arches.def"
480#undef AARCH64_ARCH
0c6caaf8 481 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
482};
483
484/* Target specification. These are populated as commandline arguments
485 are processed, or NULL if not specified. */
486static const struct processor *selected_arch;
487static const struct processor *selected_cpu;
488static const struct processor *selected_tune;
489
490#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
491
492/* An ISA extension in the co-processor and main instruction set space. */
493struct aarch64_option_extension
494{
495 const char *const name;
496 const unsigned long flags_on;
497 const unsigned long flags_off;
498};
499
500/* ISA extensions in AArch64. */
501static const struct aarch64_option_extension all_extensions[] =
502{
7e1bcce3 503#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
43e9d192
IB
504 {NAME, FLAGS_ON, FLAGS_OFF},
505#include "aarch64-option-extensions.def"
506#undef AARCH64_OPT_EXTENSION
507 {NULL, 0, 0}
508};
509
510/* Used to track the size of an address when generating a pre/post
511 increment address. */
ef4bddc2 512static machine_mode aarch64_memory_reference_mode;
43e9d192 513
43e9d192
IB
514/* A table of valid AArch64 "bitmask immediate" values for
515 logical instructions. */
516
517#define AARCH64_NUM_BITMASKS 5334
518static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
519
43e9d192
IB
520typedef enum aarch64_cond_code
521{
522 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
523 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
524 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
525}
526aarch64_cc;
527
528#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
529
530/* The condition codes of the processor, and the inverse function. */
531static const char * const aarch64_condition_codes[] =
532{
533 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
534 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
535};
536
26e0ff94 537static unsigned int
50093a33 538aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 539{
50093a33
WD
540 if (GET_MODE_UNIT_SIZE (mode) == 4)
541 return aarch64_tune_params->min_div_recip_mul_sf;
542 return aarch64_tune_params->min_div_recip_mul_df;
26e0ff94
WD
543}
544
cee66c68
WD
545static int
546aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
547 enum machine_mode mode)
548{
549 if (VECTOR_MODE_P (mode))
550 return aarch64_tune_params->vec_reassoc_width;
551 if (INTEGRAL_MODE_P (mode))
552 return aarch64_tune_params->int_reassoc_width;
553 if (FLOAT_MODE_P (mode))
554 return aarch64_tune_params->fp_reassoc_width;
555 return 1;
556}
557
43e9d192
IB
558/* Provide a mapping from gcc register numbers to dwarf register numbers. */
559unsigned
560aarch64_dbx_register_number (unsigned regno)
561{
562 if (GP_REGNUM_P (regno))
563 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
564 else if (regno == SP_REGNUM)
565 return AARCH64_DWARF_SP;
566 else if (FP_REGNUM_P (regno))
567 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
568
569 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
570 equivalent DWARF register. */
571 return DWARF_FRAME_REGISTERS;
572}
573
574/* Return TRUE if MODE is any of the large INT modes. */
575static bool
ef4bddc2 576aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
577{
578 return mode == OImode || mode == CImode || mode == XImode;
579}
580
581/* Return TRUE if MODE is any of the vector modes. */
582static bool
ef4bddc2 583aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
584{
585 return aarch64_vector_mode_supported_p (mode)
586 || aarch64_vect_struct_mode_p (mode);
587}
588
589/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
590static bool
ef4bddc2 591aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
592 unsigned HOST_WIDE_INT nelems)
593{
594 if (TARGET_SIMD
595 && AARCH64_VALID_SIMD_QREG_MODE (mode)
596 && (nelems >= 2 && nelems <= 4))
597 return true;
598
599 return false;
600}
601
602/* Implement HARD_REGNO_NREGS. */
603
604int
ef4bddc2 605aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
606{
607 switch (aarch64_regno_regclass (regno))
608 {
609 case FP_REGS:
610 case FP_LO_REGS:
611 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
612 default:
613 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
614 }
615 gcc_unreachable ();
616}
617
618/* Implement HARD_REGNO_MODE_OK. */
619
620int
ef4bddc2 621aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
622{
623 if (GET_MODE_CLASS (mode) == MODE_CC)
624 return regno == CC_REGNUM;
625
9259db42
YZ
626 if (regno == SP_REGNUM)
627 /* The purpose of comparing with ptr_mode is to support the
628 global register variable associated with the stack pointer
629 register via the syntax of asm ("wsp") in ILP32. */
630 return mode == Pmode || mode == ptr_mode;
631
632 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
633 return mode == Pmode;
634
635 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
636 return 1;
637
638 if (FP_REGNUM_P (regno))
639 {
640 if (aarch64_vect_struct_mode_p (mode))
641 return
642 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
643 else
644 return 1;
645 }
646
647 return 0;
648}
649
73d9ac6a 650/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 651machine_mode
73d9ac6a 652aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 653 machine_mode mode)
73d9ac6a
IB
654{
655 /* Handle modes that fit within single registers. */
656 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
657 {
658 if (GET_MODE_SIZE (mode) >= 4)
659 return mode;
660 else
661 return SImode;
662 }
663 /* Fall back to generic for multi-reg and very large modes. */
664 else
665 return choose_hard_reg_mode (regno, nregs, false);
666}
667
43e9d192
IB
668/* Return true if calls to DECL should be treated as
669 long-calls (ie called via a register). */
670static bool
671aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
672{
673 return false;
674}
675
676/* Return true if calls to symbol-ref SYM should be treated as
677 long-calls (ie called via a register). */
678bool
679aarch64_is_long_call_p (rtx sym)
680{
681 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
682}
683
684/* Return true if the offsets to a zero/sign-extract operation
685 represent an expression that matches an extend operation. The
686 operands represent the paramters from
687
4745e701 688 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 689bool
ef4bddc2 690aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
691 rtx extract_imm)
692{
693 HOST_WIDE_INT mult_val, extract_val;
694
695 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
696 return false;
697
698 mult_val = INTVAL (mult_imm);
699 extract_val = INTVAL (extract_imm);
700
701 if (extract_val > 8
702 && extract_val < GET_MODE_BITSIZE (mode)
703 && exact_log2 (extract_val & ~7) > 0
704 && (extract_val & 7) <= 4
705 && mult_val == (1 << (extract_val & 7)))
706 return true;
707
708 return false;
709}
710
711/* Emit an insn that's a simple single-set. Both the operands must be
712 known to be valid. */
713inline static rtx
714emit_set_insn (rtx x, rtx y)
715{
716 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
717}
718
719/* X and Y are two things to compare using CODE. Emit the compare insn and
720 return the rtx for register 0 in the proper mode. */
721rtx
722aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
723{
ef4bddc2 724 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
725 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
726
727 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
728 return cc_reg;
729}
730
731/* Build the SYMBOL_REF for __tls_get_addr. */
732
733static GTY(()) rtx tls_get_addr_libfunc;
734
735rtx
736aarch64_tls_get_addr (void)
737{
738 if (!tls_get_addr_libfunc)
739 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
740 return tls_get_addr_libfunc;
741}
742
743/* Return the TLS model to use for ADDR. */
744
745static enum tls_model
746tls_symbolic_operand_type (rtx addr)
747{
748 enum tls_model tls_kind = TLS_MODEL_NONE;
749 rtx sym, addend;
750
751 if (GET_CODE (addr) == CONST)
752 {
753 split_const (addr, &sym, &addend);
754 if (GET_CODE (sym) == SYMBOL_REF)
755 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
756 }
757 else if (GET_CODE (addr) == SYMBOL_REF)
758 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
759
760 return tls_kind;
761}
762
763/* We'll allow lo_sum's in addresses in our legitimate addresses
764 so that combine would take care of combining addresses where
765 necessary, but for generation purposes, we'll generate the address
766 as :
767 RTL Absolute
768 tmp = hi (symbol_ref); adrp x1, foo
769 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
770 nop
771
772 PIC TLS
773 adrp x1, :got:foo adrp tmp, :tlsgd:foo
774 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
775 bl __tls_get_addr
776 nop
777
778 Load TLS symbol, depending on TLS mechanism and TLS access model.
779
780 Global Dynamic - Traditional TLS:
781 adrp tmp, :tlsgd:imm
782 add dest, tmp, #:tlsgd_lo12:imm
783 bl __tls_get_addr
784
785 Global Dynamic - TLS Descriptors:
786 adrp dest, :tlsdesc:imm
787 ldr tmp, [dest, #:tlsdesc_lo12:imm]
788 add dest, dest, #:tlsdesc_lo12:imm
789 blr tmp
790 mrs tp, tpidr_el0
791 add dest, dest, tp
792
793 Initial Exec:
794 mrs tp, tpidr_el0
795 adrp tmp, :gottprel:imm
796 ldr dest, [tmp, #:gottprel_lo12:imm]
797 add dest, dest, tp
798
799 Local Exec:
800 mrs tp, tpidr_el0
0699caae
RL
801 add t0, tp, #:tprel_hi12:imm, lsl #12
802 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
803*/
804
805static void
806aarch64_load_symref_appropriately (rtx dest, rtx imm,
807 enum aarch64_symbol_type type)
808{
809 switch (type)
810 {
811 case SYMBOL_SMALL_ABSOLUTE:
812 {
28514dda 813 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 814 rtx tmp_reg = dest;
ef4bddc2 815 machine_mode mode = GET_MODE (dest);
28514dda
YZ
816
817 gcc_assert (mode == Pmode || mode == ptr_mode);
818
43e9d192 819 if (can_create_pseudo_p ())
28514dda 820 tmp_reg = gen_reg_rtx (mode);
43e9d192 821
28514dda 822 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
823 emit_insn (gen_add_losym (dest, tmp_reg, imm));
824 return;
825 }
826
a5350ddc
CSS
827 case SYMBOL_TINY_ABSOLUTE:
828 emit_insn (gen_rtx_SET (Pmode, dest, imm));
829 return;
830
43e9d192
IB
831 case SYMBOL_SMALL_GOT:
832 {
28514dda
YZ
833 /* In ILP32, the mode of dest can be either SImode or DImode,
834 while the got entry is always of SImode size. The mode of
835 dest depends on how dest is used: if dest is assigned to a
836 pointer (e.g. in the memory), it has SImode; it may have
837 DImode if dest is dereferenced to access the memeory.
838 This is why we have to handle three different ldr_got_small
839 patterns here (two patterns for ILP32). */
43e9d192 840 rtx tmp_reg = dest;
ef4bddc2 841 machine_mode mode = GET_MODE (dest);
28514dda 842
43e9d192 843 if (can_create_pseudo_p ())
28514dda
YZ
844 tmp_reg = gen_reg_rtx (mode);
845
846 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
847 if (mode == ptr_mode)
848 {
849 if (mode == DImode)
850 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
851 else
852 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
853 }
854 else
855 {
856 gcc_assert (mode == Pmode);
857 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
858 }
859
43e9d192
IB
860 return;
861 }
862
863 case SYMBOL_SMALL_TLSGD:
864 {
5d8a22a5 865 rtx_insn *insns;
43e9d192
IB
866 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
867
868 start_sequence ();
78607708 869 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
870 insns = get_insns ();
871 end_sequence ();
872
873 RTL_CONST_CALL_P (insns) = 1;
874 emit_libcall_block (insns, dest, result, imm);
875 return;
876 }
877
878 case SYMBOL_SMALL_TLSDESC:
879 {
ef4bddc2 880 machine_mode mode = GET_MODE (dest);
621ad2de 881 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
882 rtx tp;
883
621ad2de
AP
884 gcc_assert (mode == Pmode || mode == ptr_mode);
885
886 /* In ILP32, the got entry is always of SImode size. Unlike
887 small GOT, the dest is fixed at reg 0. */
888 if (TARGET_ILP32)
889 emit_insn (gen_tlsdesc_small_si (imm));
890 else
891 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 892 tp = aarch64_load_tp (NULL);
621ad2de
AP
893
894 if (mode != Pmode)
895 tp = gen_lowpart (mode, tp);
896
897 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
898 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
899 return;
900 }
901
902 case SYMBOL_SMALL_GOTTPREL:
903 {
621ad2de
AP
904 /* In ILP32, the mode of dest can be either SImode or DImode,
905 while the got entry is always of SImode size. The mode of
906 dest depends on how dest is used: if dest is assigned to a
907 pointer (e.g. in the memory), it has SImode; it may have
908 DImode if dest is dereferenced to access the memeory.
909 This is why we have to handle three different tlsie_small
910 patterns here (two patterns for ILP32). */
ef4bddc2 911 machine_mode mode = GET_MODE (dest);
621ad2de 912 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 913 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
914
915 if (mode == ptr_mode)
916 {
917 if (mode == DImode)
918 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
919 else
920 {
921 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
922 tp = gen_lowpart (mode, tp);
923 }
924 }
925 else
926 {
927 gcc_assert (mode == Pmode);
928 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
929 }
930
931 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
932 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
933 return;
934 }
935
936 case SYMBOL_SMALL_TPREL:
937 {
938 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9
AP
939
940 if (GET_MODE (dest) != Pmode)
941 tp = gen_lowpart (GET_MODE (dest), tp);
942
43e9d192
IB
943 emit_insn (gen_tlsle_small (dest, tp, imm));
944 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
945 return;
946 }
947
87dd8ab0
MS
948 case SYMBOL_TINY_GOT:
949 emit_insn (gen_ldr_got_tiny (dest, imm));
950 return;
951
43e9d192
IB
952 default:
953 gcc_unreachable ();
954 }
955}
956
957/* Emit a move from SRC to DEST. Assume that the move expanders can
958 handle all moves if !can_create_pseudo_p (). The distinction is
959 important because, unlike emit_move_insn, the move expanders know
960 how to force Pmode objects into the constant pool even when the
961 constant pool address is not itself legitimate. */
962static rtx
963aarch64_emit_move (rtx dest, rtx src)
964{
965 return (can_create_pseudo_p ()
966 ? emit_move_insn (dest, src)
967 : emit_move_insn_1 (dest, src));
968}
969
030d03b8
RE
970/* Split a 128-bit move operation into two 64-bit move operations,
971 taking care to handle partial overlap of register to register
972 copies. Special cases are needed when moving between GP regs and
973 FP regs. SRC can be a register, constant or memory; DST a register
974 or memory. If either operand is memory it must not have any side
975 effects. */
43e9d192
IB
976void
977aarch64_split_128bit_move (rtx dst, rtx src)
978{
030d03b8
RE
979 rtx dst_lo, dst_hi;
980 rtx src_lo, src_hi;
43e9d192 981
ef4bddc2 982 machine_mode mode = GET_MODE (dst);
12dc6974 983
030d03b8
RE
984 gcc_assert (mode == TImode || mode == TFmode);
985 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
986 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
987
988 if (REG_P (dst) && REG_P (src))
989 {
030d03b8
RE
990 int src_regno = REGNO (src);
991 int dst_regno = REGNO (dst);
43e9d192 992
030d03b8 993 /* Handle FP <-> GP regs. */
43e9d192
IB
994 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
995 {
030d03b8
RE
996 src_lo = gen_lowpart (word_mode, src);
997 src_hi = gen_highpart (word_mode, src);
998
999 if (mode == TImode)
1000 {
1001 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1002 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1003 }
1004 else
1005 {
1006 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1007 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1008 }
1009 return;
43e9d192
IB
1010 }
1011 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1012 {
030d03b8
RE
1013 dst_lo = gen_lowpart (word_mode, dst);
1014 dst_hi = gen_highpart (word_mode, dst);
1015
1016 if (mode == TImode)
1017 {
1018 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1019 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1020 }
1021 else
1022 {
1023 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1024 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1025 }
1026 return;
43e9d192 1027 }
43e9d192
IB
1028 }
1029
030d03b8
RE
1030 dst_lo = gen_lowpart (word_mode, dst);
1031 dst_hi = gen_highpart (word_mode, dst);
1032 src_lo = gen_lowpart (word_mode, src);
1033 src_hi = gen_highpart_mode (word_mode, mode, src);
1034
1035 /* At most one pairing may overlap. */
1036 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1037 {
1038 aarch64_emit_move (dst_hi, src_hi);
1039 aarch64_emit_move (dst_lo, src_lo);
1040 }
1041 else
1042 {
1043 aarch64_emit_move (dst_lo, src_lo);
1044 aarch64_emit_move (dst_hi, src_hi);
1045 }
43e9d192
IB
1046}
1047
1048bool
1049aarch64_split_128bit_move_p (rtx dst, rtx src)
1050{
1051 return (! REG_P (src)
1052 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1053}
1054
8b033a8a
SN
1055/* Split a complex SIMD combine. */
1056
1057void
1058aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1059{
ef4bddc2
RS
1060 machine_mode src_mode = GET_MODE (src1);
1061 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1062
1063 gcc_assert (VECTOR_MODE_P (dst_mode));
1064
1065 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1066 {
1067 rtx (*gen) (rtx, rtx, rtx);
1068
1069 switch (src_mode)
1070 {
1071 case V8QImode:
1072 gen = gen_aarch64_simd_combinev8qi;
1073 break;
1074 case V4HImode:
1075 gen = gen_aarch64_simd_combinev4hi;
1076 break;
1077 case V2SImode:
1078 gen = gen_aarch64_simd_combinev2si;
1079 break;
1080 case V2SFmode:
1081 gen = gen_aarch64_simd_combinev2sf;
1082 break;
1083 case DImode:
1084 gen = gen_aarch64_simd_combinedi;
1085 break;
1086 case DFmode:
1087 gen = gen_aarch64_simd_combinedf;
1088 break;
1089 default:
1090 gcc_unreachable ();
1091 }
1092
1093 emit_insn (gen (dst, src1, src2));
1094 return;
1095 }
1096}
1097
fd4842cd
SN
1098/* Split a complex SIMD move. */
1099
1100void
1101aarch64_split_simd_move (rtx dst, rtx src)
1102{
ef4bddc2
RS
1103 machine_mode src_mode = GET_MODE (src);
1104 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1105
1106 gcc_assert (VECTOR_MODE_P (dst_mode));
1107
1108 if (REG_P (dst) && REG_P (src))
1109 {
c59b7e28
SN
1110 rtx (*gen) (rtx, rtx);
1111
fd4842cd
SN
1112 gcc_assert (VECTOR_MODE_P (src_mode));
1113
1114 switch (src_mode)
1115 {
1116 case V16QImode:
c59b7e28 1117 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1118 break;
1119 case V8HImode:
c59b7e28 1120 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1121 break;
1122 case V4SImode:
c59b7e28 1123 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1124 break;
1125 case V2DImode:
c59b7e28 1126 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
1127 break;
1128 case V4SFmode:
c59b7e28 1129 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1130 break;
1131 case V2DFmode:
c59b7e28 1132 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1133 break;
1134 default:
1135 gcc_unreachable ();
1136 }
c59b7e28
SN
1137
1138 emit_insn (gen (dst, src));
fd4842cd
SN
1139 return;
1140 }
1141}
1142
43e9d192 1143static rtx
ef4bddc2 1144aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1145{
1146 if (can_create_pseudo_p ())
e18b4a81 1147 return force_reg (mode, value);
43e9d192
IB
1148 else
1149 {
1150 x = aarch64_emit_move (x, value);
1151 return x;
1152 }
1153}
1154
1155
1156static rtx
ef4bddc2 1157aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1158{
9c023bf0 1159 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1160 {
1161 rtx high;
1162 /* Load the full offset into a register. This
1163 might be improvable in the future. */
1164 high = GEN_INT (offset);
1165 offset = 0;
e18b4a81
YZ
1166 high = aarch64_force_temporary (mode, temp, high);
1167 reg = aarch64_force_temporary (mode, temp,
1168 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1169 }
1170 return plus_constant (mode, reg, offset);
1171}
1172
82614948
RR
1173static int
1174aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1175 machine_mode mode)
43e9d192 1176{
43e9d192
IB
1177 unsigned HOST_WIDE_INT mask;
1178 int i;
1179 bool first;
1180 unsigned HOST_WIDE_INT val;
1181 bool subtargets;
1182 rtx subtarget;
c747993a 1183 int one_match, zero_match, first_not_ffff_match;
82614948 1184 int num_insns = 0;
43e9d192
IB
1185
1186 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1187 {
82614948 1188 if (generate)
43e9d192 1189 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
82614948
RR
1190 num_insns++;
1191 return num_insns;
43e9d192
IB
1192 }
1193
1194 if (mode == SImode)
1195 {
1196 /* We know we can't do this in 1 insn, and we must be able to do it
1197 in two; so don't mess around looking for sequences that don't buy
1198 us anything. */
82614948
RR
1199 if (generate)
1200 {
1201 emit_insn (gen_rtx_SET (VOIDmode, dest,
1202 GEN_INT (INTVAL (imm) & 0xffff)));
1203 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1204 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1205 }
1206 num_insns += 2;
1207 return num_insns;
43e9d192
IB
1208 }
1209
1210 /* Remaining cases are all for DImode. */
1211
1212 val = INTVAL (imm);
1213 subtargets = optimize && can_create_pseudo_p ();
1214
1215 one_match = 0;
1216 zero_match = 0;
1217 mask = 0xffff;
c747993a 1218 first_not_ffff_match = -1;
43e9d192
IB
1219
1220 for (i = 0; i < 64; i += 16, mask <<= 16)
1221 {
c747993a 1222 if ((val & mask) == mask)
43e9d192 1223 one_match++;
c747993a
IB
1224 else
1225 {
1226 if (first_not_ffff_match < 0)
1227 first_not_ffff_match = i;
1228 if ((val & mask) == 0)
1229 zero_match++;
1230 }
43e9d192
IB
1231 }
1232
1233 if (one_match == 2)
1234 {
c747993a
IB
1235 /* Set one of the quarters and then insert back into result. */
1236 mask = 0xffffll << first_not_ffff_match;
82614948
RR
1237 if (generate)
1238 {
1239 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1240 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1241 GEN_INT ((val >> first_not_ffff_match)
1242 & 0xffff)));
1243 }
1244 num_insns += 2;
1245 return num_insns;
c747993a
IB
1246 }
1247
43e9d192
IB
1248 if (zero_match == 2)
1249 goto simple_sequence;
1250
1251 mask = 0x0ffff0000UL;
1252 for (i = 16; i < 64; i += 16, mask <<= 16)
1253 {
1254 HOST_WIDE_INT comp = mask & ~(mask - 1);
1255
1256 if (aarch64_uimm12_shift (val - (val & mask)))
1257 {
82614948
RR
1258 if (generate)
1259 {
1260 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1261 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1262 GEN_INT (val & mask)));
1263 emit_insn (gen_adddi3 (dest, subtarget,
1264 GEN_INT (val - (val & mask))));
1265 }
1266 num_insns += 2;
1267 return num_insns;
43e9d192
IB
1268 }
1269 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1270 {
82614948
RR
1271 if (generate)
1272 {
1273 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1274 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1275 GEN_INT ((val + comp) & mask)));
1276 emit_insn (gen_adddi3 (dest, subtarget,
1277 GEN_INT (val - ((val + comp) & mask))));
1278 }
1279 num_insns += 2;
1280 return num_insns;
43e9d192
IB
1281 }
1282 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1283 {
82614948
RR
1284 if (generate)
1285 {
1286 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1287 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1288 GEN_INT ((val - comp) | ~mask)));
1289 emit_insn (gen_adddi3 (dest, subtarget,
1290 GEN_INT (val - ((val - comp) | ~mask))));
1291 }
1292 num_insns += 2;
1293 return num_insns;
43e9d192
IB
1294 }
1295 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1296 {
82614948
RR
1297 if (generate)
1298 {
1299 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1300 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1301 GEN_INT (val | ~mask)));
1302 emit_insn (gen_adddi3 (dest, subtarget,
1303 GEN_INT (val - (val | ~mask))));
1304 }
1305 num_insns += 2;
1306 return num_insns;
43e9d192
IB
1307 }
1308 }
1309
1310 /* See if we can do it by arithmetically combining two
1311 immediates. */
1312 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1313 {
1314 int j;
1315 mask = 0xffff;
1316
1317 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1318 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1319 {
82614948
RR
1320 if (generate)
1321 {
1322 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1323 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1324 GEN_INT (aarch64_bitmasks[i])));
1325 emit_insn (gen_adddi3 (dest, subtarget,
1326 GEN_INT (val - aarch64_bitmasks[i])));
1327 }
1328 num_insns += 2;
1329 return num_insns;
43e9d192
IB
1330 }
1331
1332 for (j = 0; j < 64; j += 16, mask <<= 16)
1333 {
1334 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1335 {
82614948
RR
1336 if (generate)
1337 {
1338 emit_insn (gen_rtx_SET (VOIDmode, dest,
1339 GEN_INT (aarch64_bitmasks[i])));
1340 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1341 GEN_INT ((val >> j) & 0xffff)));
1342 }
1343 num_insns += 2;
1344 return num_insns;
43e9d192
IB
1345 }
1346 }
1347 }
1348
1349 /* See if we can do it by logically combining two immediates. */
1350 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1351 {
1352 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1353 {
1354 int j;
1355
1356 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1357 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1358 {
82614948
RR
1359 if (generate)
1360 {
1361 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1362 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1363 GEN_INT (aarch64_bitmasks[i])));
1364 emit_insn (gen_iordi3 (dest, subtarget,
1365 GEN_INT (aarch64_bitmasks[j])));
1366 }
1367 num_insns += 2;
1368 return num_insns;
43e9d192
IB
1369 }
1370 }
1371 else if ((val & aarch64_bitmasks[i]) == val)
1372 {
1373 int j;
1374
1375 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1376 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1377 {
82614948
RR
1378 if (generate)
1379 {
1380 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1381 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1382 GEN_INT (aarch64_bitmasks[j])));
1383 emit_insn (gen_anddi3 (dest, subtarget,
1384 GEN_INT (aarch64_bitmasks[i])));
1385 }
1386 num_insns += 2;
1387 return num_insns;
43e9d192
IB
1388 }
1389 }
1390 }
1391
2c274197
KT
1392 if (one_match > zero_match)
1393 {
1394 /* Set either first three quarters or all but the third. */
1395 mask = 0xffffll << (16 - first_not_ffff_match);
82614948
RR
1396 if (generate)
1397 emit_insn (gen_rtx_SET (VOIDmode, dest,
1398 GEN_INT (val | mask | 0xffffffff00000000ull)));
1399 num_insns ++;
2c274197
KT
1400
1401 /* Now insert other two quarters. */
1402 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1403 i < 64; i += 16, mask <<= 16)
1404 {
1405 if ((val & mask) != mask)
82614948
RR
1406 {
1407 if (generate)
1408 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1409 GEN_INT ((val >> i) & 0xffff)));
1410 num_insns ++;
1411 }
2c274197 1412 }
82614948 1413 return num_insns;
2c274197
KT
1414 }
1415
43e9d192
IB
1416 simple_sequence:
1417 first = true;
1418 mask = 0xffff;
1419 for (i = 0; i < 64; i += 16, mask <<= 16)
1420 {
1421 if ((val & mask) != 0)
1422 {
1423 if (first)
1424 {
82614948
RR
1425 if (generate)
1426 emit_insn (gen_rtx_SET (VOIDmode, dest,
1427 GEN_INT (val & mask)));
1428 num_insns ++;
43e9d192
IB
1429 first = false;
1430 }
1431 else
82614948
RR
1432 {
1433 if (generate)
1434 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1435 GEN_INT ((val >> i) & 0xffff)));
1436 num_insns ++;
1437 }
1438 }
1439 }
1440
1441 return num_insns;
1442}
1443
1444
1445void
1446aarch64_expand_mov_immediate (rtx dest, rtx imm)
1447{
1448 machine_mode mode = GET_MODE (dest);
1449
1450 gcc_assert (mode == SImode || mode == DImode);
1451
1452 /* Check on what type of symbol it is. */
1453 if (GET_CODE (imm) == SYMBOL_REF
1454 || GET_CODE (imm) == LABEL_REF
1455 || GET_CODE (imm) == CONST)
1456 {
1457 rtx mem, base, offset;
1458 enum aarch64_symbol_type sty;
1459
1460 /* If we have (const (plus symbol offset)), separate out the offset
1461 before we start classifying the symbol. */
1462 split_const (imm, &base, &offset);
1463
f8b756b7 1464 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
82614948
RR
1465 switch (sty)
1466 {
1467 case SYMBOL_FORCE_TO_MEM:
1468 if (offset != const0_rtx
1469 && targetm.cannot_force_const_mem (mode, imm))
1470 {
1471 gcc_assert (can_create_pseudo_p ());
1472 base = aarch64_force_temporary (mode, dest, base);
1473 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1474 aarch64_emit_move (dest, base);
1475 return;
1476 }
1477 mem = force_const_mem (ptr_mode, imm);
1478 gcc_assert (mem);
1479 if (mode != ptr_mode)
1480 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1481 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1482 return;
1483
1484 case SYMBOL_SMALL_TLSGD:
1485 case SYMBOL_SMALL_TLSDESC:
1486 case SYMBOL_SMALL_GOTTPREL:
1487 case SYMBOL_SMALL_GOT:
1488 case SYMBOL_TINY_GOT:
1489 if (offset != const0_rtx)
1490 {
1491 gcc_assert(can_create_pseudo_p ());
1492 base = aarch64_force_temporary (mode, dest, base);
1493 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1494 aarch64_emit_move (dest, base);
1495 return;
1496 }
1497 /* FALLTHRU */
1498
1499 case SYMBOL_SMALL_TPREL:
1500 case SYMBOL_SMALL_ABSOLUTE:
1501 case SYMBOL_TINY_ABSOLUTE:
1502 aarch64_load_symref_appropriately (dest, imm, sty);
1503 return;
1504
1505 default:
1506 gcc_unreachable ();
1507 }
1508 }
1509
1510 if (!CONST_INT_P (imm))
1511 {
1512 if (GET_CODE (imm) == HIGH)
1513 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1514 else
1515 {
1516 rtx mem = force_const_mem (mode, imm);
1517 gcc_assert (mem);
1518 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
43e9d192 1519 }
82614948
RR
1520
1521 return;
43e9d192 1522 }
82614948
RR
1523
1524 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1525}
1526
1527static bool
fee9ba42
JW
1528aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1529 tree exp ATTRIBUTE_UNUSED)
43e9d192 1530{
fee9ba42 1531 /* Currently, always true. */
43e9d192
IB
1532 return true;
1533}
1534
1535/* Implement TARGET_PASS_BY_REFERENCE. */
1536
1537static bool
1538aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1539 machine_mode mode,
43e9d192
IB
1540 const_tree type,
1541 bool named ATTRIBUTE_UNUSED)
1542{
1543 HOST_WIDE_INT size;
ef4bddc2 1544 machine_mode dummymode;
43e9d192
IB
1545 int nregs;
1546
1547 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1548 size = (mode == BLKmode && type)
1549 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1550
aadc1c43
MHD
1551 /* Aggregates are passed by reference based on their size. */
1552 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1553 {
aadc1c43 1554 size = int_size_in_bytes (type);
43e9d192
IB
1555 }
1556
1557 /* Variable sized arguments are always returned by reference. */
1558 if (size < 0)
1559 return true;
1560
1561 /* Can this be a candidate to be passed in fp/simd register(s)? */
1562 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1563 &dummymode, &nregs,
1564 NULL))
1565 return false;
1566
1567 /* Arguments which are variable sized or larger than 2 registers are
1568 passed by reference unless they are a homogenous floating point
1569 aggregate. */
1570 return size > 2 * UNITS_PER_WORD;
1571}
1572
1573/* Return TRUE if VALTYPE is padded to its least significant bits. */
1574static bool
1575aarch64_return_in_msb (const_tree valtype)
1576{
ef4bddc2 1577 machine_mode dummy_mode;
43e9d192
IB
1578 int dummy_int;
1579
1580 /* Never happens in little-endian mode. */
1581 if (!BYTES_BIG_ENDIAN)
1582 return false;
1583
1584 /* Only composite types smaller than or equal to 16 bytes can
1585 be potentially returned in registers. */
1586 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1587 || int_size_in_bytes (valtype) <= 0
1588 || int_size_in_bytes (valtype) > 16)
1589 return false;
1590
1591 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1592 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1593 is always passed/returned in the least significant bits of fp/simd
1594 register(s). */
1595 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1596 &dummy_mode, &dummy_int, NULL))
1597 return false;
1598
1599 return true;
1600}
1601
1602/* Implement TARGET_FUNCTION_VALUE.
1603 Define how to find the value returned by a function. */
1604
1605static rtx
1606aarch64_function_value (const_tree type, const_tree func,
1607 bool outgoing ATTRIBUTE_UNUSED)
1608{
ef4bddc2 1609 machine_mode mode;
43e9d192
IB
1610 int unsignedp;
1611 int count;
ef4bddc2 1612 machine_mode ag_mode;
43e9d192
IB
1613
1614 mode = TYPE_MODE (type);
1615 if (INTEGRAL_TYPE_P (type))
1616 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1617
1618 if (aarch64_return_in_msb (type))
1619 {
1620 HOST_WIDE_INT size = int_size_in_bytes (type);
1621
1622 if (size % UNITS_PER_WORD != 0)
1623 {
1624 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1625 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1626 }
1627 }
1628
1629 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1630 &ag_mode, &count, NULL))
1631 {
1632 if (!aarch64_composite_type_p (type, mode))
1633 {
1634 gcc_assert (count == 1 && mode == ag_mode);
1635 return gen_rtx_REG (mode, V0_REGNUM);
1636 }
1637 else
1638 {
1639 int i;
1640 rtx par;
1641
1642 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1643 for (i = 0; i < count; i++)
1644 {
1645 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1646 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1647 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1648 XVECEXP (par, 0, i) = tmp;
1649 }
1650 return par;
1651 }
1652 }
1653 else
1654 return gen_rtx_REG (mode, R0_REGNUM);
1655}
1656
1657/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1658 Return true if REGNO is the number of a hard register in which the values
1659 of called function may come back. */
1660
1661static bool
1662aarch64_function_value_regno_p (const unsigned int regno)
1663{
1664 /* Maximum of 16 bytes can be returned in the general registers. Examples
1665 of 16-byte return values are: 128-bit integers and 16-byte small
1666 structures (excluding homogeneous floating-point aggregates). */
1667 if (regno == R0_REGNUM || regno == R1_REGNUM)
1668 return true;
1669
1670 /* Up to four fp/simd registers can return a function value, e.g. a
1671 homogeneous floating-point aggregate having four members. */
1672 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1673 return !TARGET_GENERAL_REGS_ONLY;
1674
1675 return false;
1676}
1677
1678/* Implement TARGET_RETURN_IN_MEMORY.
1679
1680 If the type T of the result of a function is such that
1681 void func (T arg)
1682 would require that arg be passed as a value in a register (or set of
1683 registers) according to the parameter passing rules, then the result
1684 is returned in the same registers as would be used for such an
1685 argument. */
1686
1687static bool
1688aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1689{
1690 HOST_WIDE_INT size;
ef4bddc2 1691 machine_mode ag_mode;
43e9d192
IB
1692 int count;
1693
1694 if (!AGGREGATE_TYPE_P (type)
1695 && TREE_CODE (type) != COMPLEX_TYPE
1696 && TREE_CODE (type) != VECTOR_TYPE)
1697 /* Simple scalar types always returned in registers. */
1698 return false;
1699
1700 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1701 type,
1702 &ag_mode,
1703 &count,
1704 NULL))
1705 return false;
1706
1707 /* Types larger than 2 registers returned in memory. */
1708 size = int_size_in_bytes (type);
1709 return (size < 0 || size > 2 * UNITS_PER_WORD);
1710}
1711
1712static bool
ef4bddc2 1713aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1714 const_tree type, int *nregs)
1715{
1716 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1717 return aarch64_vfp_is_call_or_return_candidate (mode,
1718 type,
1719 &pcum->aapcs_vfp_rmode,
1720 nregs,
1721 NULL);
1722}
1723
1724/* Given MODE and TYPE of a function argument, return the alignment in
1725 bits. The idea is to suppress any stronger alignment requested by
1726 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1727 This is a helper function for local use only. */
1728
1729static unsigned int
ef4bddc2 1730aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192
IB
1731{
1732 unsigned int alignment;
1733
1734 if (type)
1735 {
1736 if (!integer_zerop (TYPE_SIZE (type)))
1737 {
1738 if (TYPE_MODE (type) == mode)
1739 alignment = TYPE_ALIGN (type);
1740 else
1741 alignment = GET_MODE_ALIGNMENT (mode);
1742 }
1743 else
1744 alignment = 0;
1745 }
1746 else
1747 alignment = GET_MODE_ALIGNMENT (mode);
1748
1749 return alignment;
1750}
1751
1752/* Layout a function argument according to the AAPCS64 rules. The rule
1753 numbers refer to the rule numbers in the AAPCS64. */
1754
1755static void
ef4bddc2 1756aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1757 const_tree type,
1758 bool named ATTRIBUTE_UNUSED)
1759{
1760 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1761 int ncrn, nvrn, nregs;
1762 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1763 HOST_WIDE_INT size;
43e9d192
IB
1764
1765 /* We need to do this once per argument. */
1766 if (pcum->aapcs_arg_processed)
1767 return;
1768
1769 pcum->aapcs_arg_processed = true;
1770
3abf17cf
YZ
1771 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1772 size
1773 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1774 UNITS_PER_WORD);
1775
43e9d192
IB
1776 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1777 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1778 mode,
1779 type,
1780 &nregs);
1781
1782 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1783 The following code thus handles passing by SIMD/FP registers first. */
1784
1785 nvrn = pcum->aapcs_nvrn;
1786
1787 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1788 and homogenous short-vector aggregates (HVA). */
1789 if (allocate_nvrn)
1790 {
1791 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1792 {
1793 pcum->aapcs_nextnvrn = nvrn + nregs;
1794 if (!aarch64_composite_type_p (type, mode))
1795 {
1796 gcc_assert (nregs == 1);
1797 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1798 }
1799 else
1800 {
1801 rtx par;
1802 int i;
1803 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1804 for (i = 0; i < nregs; i++)
1805 {
1806 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1807 V0_REGNUM + nvrn + i);
1808 tmp = gen_rtx_EXPR_LIST
1809 (VOIDmode, tmp,
1810 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1811 XVECEXP (par, 0, i) = tmp;
1812 }
1813 pcum->aapcs_reg = par;
1814 }
1815 return;
1816 }
1817 else
1818 {
1819 /* C.3 NSRN is set to 8. */
1820 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1821 goto on_stack;
1822 }
1823 }
1824
1825 ncrn = pcum->aapcs_ncrn;
3abf17cf 1826 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1827
1828 /* C6 - C9. though the sign and zero extension semantics are
1829 handled elsewhere. This is the case where the argument fits
1830 entirely general registers. */
1831 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1832 {
1833 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1834
1835 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1836
1837 /* C.8 if the argument has an alignment of 16 then the NGRN is
1838 rounded up to the next even number. */
1839 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1840 {
1841 ++ncrn;
1842 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1843 }
1844 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1845 A reg is still generated for it, but the caller should be smart
1846 enough not to use it. */
1847 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1848 {
1849 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1850 }
1851 else
1852 {
1853 rtx par;
1854 int i;
1855
1856 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1857 for (i = 0; i < nregs; i++)
1858 {
1859 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1860 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1861 GEN_INT (i * UNITS_PER_WORD));
1862 XVECEXP (par, 0, i) = tmp;
1863 }
1864 pcum->aapcs_reg = par;
1865 }
1866
1867 pcum->aapcs_nextncrn = ncrn + nregs;
1868 return;
1869 }
1870
1871 /* C.11 */
1872 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1873
1874 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1875 this argument and align the total size if necessary. */
43e9d192 1876on_stack:
3abf17cf 1877 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1878 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1879 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1880 16 / UNITS_PER_WORD);
43e9d192
IB
1881 return;
1882}
1883
1884/* Implement TARGET_FUNCTION_ARG. */
1885
1886static rtx
ef4bddc2 1887aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1888 const_tree type, bool named)
1889{
1890 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1891 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1892
1893 if (mode == VOIDmode)
1894 return NULL_RTX;
1895
1896 aarch64_layout_arg (pcum_v, mode, type, named);
1897 return pcum->aapcs_reg;
1898}
1899
1900void
1901aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1902 const_tree fntype ATTRIBUTE_UNUSED,
1903 rtx libname ATTRIBUTE_UNUSED,
1904 const_tree fndecl ATTRIBUTE_UNUSED,
1905 unsigned n_named ATTRIBUTE_UNUSED)
1906{
1907 pcum->aapcs_ncrn = 0;
1908 pcum->aapcs_nvrn = 0;
1909 pcum->aapcs_nextncrn = 0;
1910 pcum->aapcs_nextnvrn = 0;
1911 pcum->pcs_variant = ARM_PCS_AAPCS64;
1912 pcum->aapcs_reg = NULL_RTX;
1913 pcum->aapcs_arg_processed = false;
1914 pcum->aapcs_stack_words = 0;
1915 pcum->aapcs_stack_size = 0;
1916
1917 return;
1918}
1919
1920static void
1921aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 1922 machine_mode mode,
43e9d192
IB
1923 const_tree type,
1924 bool named)
1925{
1926 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1927 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1928 {
1929 aarch64_layout_arg (pcum_v, mode, type, named);
1930 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1931 != (pcum->aapcs_stack_words != 0));
1932 pcum->aapcs_arg_processed = false;
1933 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1934 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1935 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1936 pcum->aapcs_stack_words = 0;
1937 pcum->aapcs_reg = NULL_RTX;
1938 }
1939}
1940
1941bool
1942aarch64_function_arg_regno_p (unsigned regno)
1943{
1944 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1945 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1946}
1947
1948/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1949 PARM_BOUNDARY bits of alignment, but will be given anything up
1950 to STACK_BOUNDARY bits if the type requires it. This makes sure
1951 that both before and after the layout of each argument, the Next
1952 Stacked Argument Address (NSAA) will have a minimum alignment of
1953 8 bytes. */
1954
1955static unsigned int
ef4bddc2 1956aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
1957{
1958 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1959
1960 if (alignment < PARM_BOUNDARY)
1961 alignment = PARM_BOUNDARY;
1962 if (alignment > STACK_BOUNDARY)
1963 alignment = STACK_BOUNDARY;
1964 return alignment;
1965}
1966
1967/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1968
1969 Return true if an argument passed on the stack should be padded upwards,
1970 i.e. if the least-significant byte of the stack slot has useful data.
1971
1972 Small aggregate types are placed in the lowest memory address.
1973
1974 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1975
1976bool
ef4bddc2 1977aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
1978{
1979 /* On little-endian targets, the least significant byte of every stack
1980 argument is passed at the lowest byte address of the stack slot. */
1981 if (!BYTES_BIG_ENDIAN)
1982 return true;
1983
00edcfbe 1984 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1985 the least significant byte of a stack argument is passed at the highest
1986 byte address of the stack slot. */
1987 if (type
00edcfbe
YZ
1988 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1989 || POINTER_TYPE_P (type))
43e9d192
IB
1990 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1991 return false;
1992
1993 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1994 return true;
1995}
1996
1997/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1998
1999 It specifies padding for the last (may also be the only)
2000 element of a block move between registers and memory. If
2001 assuming the block is in the memory, padding upward means that
2002 the last element is padded after its highest significant byte,
2003 while in downward padding, the last element is padded at the
2004 its least significant byte side.
2005
2006 Small aggregates and small complex types are always padded
2007 upwards.
2008
2009 We don't need to worry about homogeneous floating-point or
2010 short-vector aggregates; their move is not affected by the
2011 padding direction determined here. Regardless of endianness,
2012 each element of such an aggregate is put in the least
2013 significant bits of a fp/simd register.
2014
2015 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2016 register has useful data, and return the opposite if the most
2017 significant byte does. */
2018
2019bool
ef4bddc2 2020aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2021 bool first ATTRIBUTE_UNUSED)
2022{
2023
2024 /* Small composite types are always padded upward. */
2025 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2026 {
2027 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2028 : GET_MODE_SIZE (mode));
2029 if (size < 2 * UNITS_PER_WORD)
2030 return true;
2031 }
2032
2033 /* Otherwise, use the default padding. */
2034 return !BYTES_BIG_ENDIAN;
2035}
2036
ef4bddc2 2037static machine_mode
43e9d192
IB
2038aarch64_libgcc_cmp_return_mode (void)
2039{
2040 return SImode;
2041}
2042
2043static bool
2044aarch64_frame_pointer_required (void)
2045{
0b7f8166
MS
2046 /* In aarch64_override_options_after_change
2047 flag_omit_leaf_frame_pointer turns off the frame pointer by
2048 default. Turn it back on now if we've not got a leaf
2049 function. */
2050 if (flag_omit_leaf_frame_pointer
2051 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2052 return true;
43e9d192 2053
0b7f8166 2054 return false;
43e9d192
IB
2055}
2056
2057/* Mark the registers that need to be saved by the callee and calculate
2058 the size of the callee-saved registers area and frame record (both FP
2059 and LR may be omitted). */
2060static void
2061aarch64_layout_frame (void)
2062{
2063 HOST_WIDE_INT offset = 0;
2064 int regno;
2065
2066 if (reload_completed && cfun->machine->frame.laid_out)
2067 return;
2068
97826595
MS
2069#define SLOT_NOT_REQUIRED (-2)
2070#define SLOT_REQUIRED (-1)
2071
363ffa50
JW
2072 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2073 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2074
43e9d192
IB
2075 /* First mark all the registers that really need to be saved... */
2076 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2077 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2078
2079 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2080 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2081
2082 /* ... that includes the eh data registers (if needed)... */
2083 if (crtl->calls_eh_return)
2084 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2085 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2086 = SLOT_REQUIRED;
43e9d192
IB
2087
2088 /* ... and any callee saved register that dataflow says is live. */
2089 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2090 if (df_regs_ever_live_p (regno)
1c923b60
JW
2091 && (regno == R30_REGNUM
2092 || !call_used_regs[regno]))
97826595 2093 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2094
2095 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2096 if (df_regs_ever_live_p (regno)
2097 && !call_used_regs[regno])
97826595 2098 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2099
2100 if (frame_pointer_needed)
2101 {
2e1cdae5 2102 /* FP and LR are placed in the linkage record. */
43e9d192 2103 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2104 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2105 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2106 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 2107 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 2108 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2109 }
2110
2111 /* Now assign stack slots for them. */
2e1cdae5 2112 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2113 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2114 {
2115 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2116 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2117 cfun->machine->frame.wb_candidate1 = regno;
2118 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2119 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2120 offset += UNITS_PER_WORD;
2121 }
2122
2123 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2124 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2125 {
2126 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2127 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2128 cfun->machine->frame.wb_candidate1 = regno;
2129 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2130 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2131 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2132 offset += UNITS_PER_WORD;
2133 }
2134
43e9d192
IB
2135 cfun->machine->frame.padding0 =
2136 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2137 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2138
2139 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
2140
2141 cfun->machine->frame.hard_fp_offset
2142 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2143 + get_frame_size ()
2144 + cfun->machine->frame.saved_regs_size,
2145 STACK_BOUNDARY / BITS_PER_UNIT);
2146
2147 cfun->machine->frame.frame_size
2148 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2149 + crtl->outgoing_args_size,
2150 STACK_BOUNDARY / BITS_PER_UNIT);
2151
43e9d192
IB
2152 cfun->machine->frame.laid_out = true;
2153}
2154
43e9d192
IB
2155static bool
2156aarch64_register_saved_on_entry (int regno)
2157{
97826595 2158 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2159}
2160
64dedd72
JW
2161static unsigned
2162aarch64_next_callee_save (unsigned regno, unsigned limit)
2163{
2164 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2165 regno ++;
2166 return regno;
2167}
43e9d192 2168
c5e1f66e 2169static void
ef4bddc2 2170aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2171 HOST_WIDE_INT adjustment)
2172 {
2173 rtx base_rtx = stack_pointer_rtx;
2174 rtx insn, reg, mem;
2175
2176 reg = gen_rtx_REG (mode, regno);
2177 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2178 plus_constant (Pmode, base_rtx, -adjustment));
2179 mem = gen_rtx_MEM (mode, mem);
2180
2181 insn = emit_move_insn (mem, reg);
2182 RTX_FRAME_RELATED_P (insn) = 1;
2183}
2184
80c11907 2185static rtx
ef4bddc2 2186aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2187 HOST_WIDE_INT adjustment)
2188{
2189 switch (mode)
2190 {
2191 case DImode:
2192 return gen_storewb_pairdi_di (base, base, reg, reg2,
2193 GEN_INT (-adjustment),
2194 GEN_INT (UNITS_PER_WORD - adjustment));
2195 case DFmode:
2196 return gen_storewb_pairdf_di (base, base, reg, reg2,
2197 GEN_INT (-adjustment),
2198 GEN_INT (UNITS_PER_WORD - adjustment));
2199 default:
2200 gcc_unreachable ();
2201 }
2202}
2203
2204static void
ef4bddc2 2205aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2206 unsigned regno2, HOST_WIDE_INT adjustment)
2207{
5d8a22a5 2208 rtx_insn *insn;
80c11907
JW
2209 rtx reg1 = gen_rtx_REG (mode, regno1);
2210 rtx reg2 = gen_rtx_REG (mode, regno2);
2211
2212 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2213 reg2, adjustment));
2214 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2215 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2216 RTX_FRAME_RELATED_P (insn) = 1;
2217}
2218
159313d9 2219static rtx
ef4bddc2 2220aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2221 HOST_WIDE_INT adjustment)
2222{
2223 switch (mode)
2224 {
2225 case DImode:
2226 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2227 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2228 case DFmode:
2229 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2230 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2231 default:
2232 gcc_unreachable ();
2233 }
2234}
2235
72df5c1f 2236static rtx
ef4bddc2 2237aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2238 rtx reg2)
2239{
2240 switch (mode)
2241 {
2242 case DImode:
2243 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2244
2245 case DFmode:
2246 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2247
2248 default:
2249 gcc_unreachable ();
2250 }
2251}
2252
2253static rtx
ef4bddc2 2254aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2255 rtx mem2)
2256{
2257 switch (mode)
2258 {
2259 case DImode:
2260 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2261
2262 case DFmode:
2263 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2264
2265 default:
2266 gcc_unreachable ();
2267 }
2268}
2269
43e9d192 2270
43e9d192 2271static void
ef4bddc2 2272aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2273 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2274{
5d8a22a5 2275 rtx_insn *insn;
ef4bddc2 2276 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2277 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2278 unsigned regno;
2279 unsigned regno2;
2280
0ec74a1e 2281 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2282 regno <= limit;
2283 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2284 {
ae13fce3
JW
2285 rtx reg, mem;
2286 HOST_WIDE_INT offset;
64dedd72 2287
ae13fce3
JW
2288 if (skip_wb
2289 && (regno == cfun->machine->frame.wb_candidate1
2290 || regno == cfun->machine->frame.wb_candidate2))
2291 continue;
2292
2293 reg = gen_rtx_REG (mode, regno);
2294 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2295 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2296 offset));
64dedd72
JW
2297
2298 regno2 = aarch64_next_callee_save (regno + 1, limit);
2299
2300 if (regno2 <= limit
2301 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2302 == cfun->machine->frame.reg_offset[regno2]))
2303
43e9d192 2304 {
0ec74a1e 2305 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2306 rtx mem2;
2307
2308 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2309 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2310 offset));
2311 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2312 reg2));
0b4a9743 2313
64dedd72
JW
2314 /* The first part of a frame-related parallel insn is
2315 always assumed to be relevant to the frame
2316 calculations; subsequent parts, are only
2317 frame-related if explicitly marked. */
2318 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2319 regno = regno2;
2320 }
2321 else
8ed2fc62
JW
2322 insn = emit_move_insn (mem, reg);
2323
2324 RTX_FRAME_RELATED_P (insn) = 1;
2325 }
2326}
2327
2328static void
ef4bddc2 2329aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2330 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2331 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2332{
8ed2fc62 2333 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2334 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2335 ? gen_frame_mem : gen_rtx_MEM);
2336 unsigned regno;
2337 unsigned regno2;
2338 HOST_WIDE_INT offset;
2339
2340 for (regno = aarch64_next_callee_save (start, limit);
2341 regno <= limit;
2342 regno = aarch64_next_callee_save (regno + 1, limit))
2343 {
ae13fce3 2344 rtx reg, mem;
8ed2fc62 2345
ae13fce3
JW
2346 if (skip_wb
2347 && (regno == cfun->machine->frame.wb_candidate1
2348 || regno == cfun->machine->frame.wb_candidate2))
2349 continue;
2350
2351 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2352 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2353 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2354
2355 regno2 = aarch64_next_callee_save (regno + 1, limit);
2356
2357 if (regno2 <= limit
2358 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2359 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2360 {
8ed2fc62
JW
2361 rtx reg2 = gen_rtx_REG (mode, regno2);
2362 rtx mem2;
2363
2364 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2365 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2366 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2367
dd991abb 2368 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2369 regno = regno2;
43e9d192 2370 }
8ed2fc62 2371 else
dd991abb
RH
2372 emit_move_insn (reg, mem);
2373 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2374 }
43e9d192
IB
2375}
2376
2377/* AArch64 stack frames generated by this compiler look like:
2378
2379 +-------------------------------+
2380 | |
2381 | incoming stack arguments |
2382 | |
34834420
MS
2383 +-------------------------------+
2384 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2385 | callee-allocated save area |
2386 | for register varargs |
2387 | |
34834420
MS
2388 +-------------------------------+
2389 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2390 | |
2391 +-------------------------------+
454fdba9
RL
2392 | padding0 | \
2393 +-------------------------------+ |
454fdba9 2394 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2395 +-------------------------------+ |
2396 | LR' | |
2397 +-------------------------------+ |
34834420
MS
2398 | FP' | / <- hard_frame_pointer_rtx (aligned)
2399 +-------------------------------+
43e9d192
IB
2400 | dynamic allocation |
2401 +-------------------------------+
34834420
MS
2402 | padding |
2403 +-------------------------------+
2404 | outgoing stack arguments | <-- arg_pointer
2405 | |
2406 +-------------------------------+
2407 | | <-- stack_pointer_rtx (aligned)
43e9d192 2408
34834420
MS
2409 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2410 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2411 unchanged. */
43e9d192
IB
2412
2413/* Generate the prologue instructions for entry into a function.
2414 Establish the stack frame by decreasing the stack pointer with a
2415 properly calculated size and, if necessary, create a frame record
2416 filled with the values of LR and previous frame pointer. The
6991c977 2417 current FP is also set up if it is in use. */
43e9d192
IB
2418
2419void
2420aarch64_expand_prologue (void)
2421{
2422 /* sub sp, sp, #<frame_size>
2423 stp {fp, lr}, [sp, #<frame_size> - 16]
2424 add fp, sp, #<frame_size> - hardfp_offset
2425 stp {cs_reg}, [fp, #-16] etc.
2426
2427 sub sp, sp, <final_adjustment_if_any>
2428 */
43e9d192 2429 HOST_WIDE_INT frame_size, offset;
1c960e02 2430 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2431 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2432 rtx_insn *insn;
43e9d192
IB
2433
2434 aarch64_layout_frame ();
43e9d192 2435
dd991abb
RH
2436 offset = frame_size = cfun->machine->frame.frame_size;
2437 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2438 fp_offset = frame_size - hard_fp_offset;
43e9d192 2439
dd991abb
RH
2440 if (flag_stack_usage_info)
2441 current_function_static_stack_size = frame_size;
43e9d192 2442
44c0e7b9 2443 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2444 if (offset >= 512)
2445 {
2446 /* When the frame has a large size, an initial decrease is done on
2447 the stack pointer to jump over the callee-allocated save area for
2448 register varargs, the local variable area and/or the callee-saved
2449 register area. This will allow the pre-index write-back
2450 store pair instructions to be used for setting up the stack frame
2451 efficiently. */
dd991abb 2452 offset = hard_fp_offset;
43e9d192
IB
2453 if (offset >= 512)
2454 offset = cfun->machine->frame.saved_regs_size;
2455
2456 frame_size -= (offset + crtl->outgoing_args_size);
2457 fp_offset = 0;
2458
2459 if (frame_size >= 0x1000000)
2460 {
2461 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2462 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2463 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2464
2465 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2466 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2467 plus_constant (Pmode, stack_pointer_rtx,
2468 -frame_size)));
2469 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2470 }
2471 else if (frame_size > 0)
2472 {
dd991abb
RH
2473 int hi_ofs = frame_size & 0xfff000;
2474 int lo_ofs = frame_size & 0x000fff;
2475
2476 if (hi_ofs)
43e9d192
IB
2477 {
2478 insn = emit_insn (gen_add2_insn
dd991abb 2479 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2480 RTX_FRAME_RELATED_P (insn) = 1;
2481 }
dd991abb 2482 if (lo_ofs)
43e9d192
IB
2483 {
2484 insn = emit_insn (gen_add2_insn
dd991abb 2485 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2486 RTX_FRAME_RELATED_P (insn) = 1;
2487 }
2488 }
2489 }
2490 else
2491 frame_size = -1;
2492
2493 if (offset > 0)
2494 {
ae13fce3
JW
2495 bool skip_wb = false;
2496
43e9d192
IB
2497 if (frame_pointer_needed)
2498 {
c5e1f66e
JW
2499 skip_wb = true;
2500
43e9d192
IB
2501 if (fp_offset)
2502 {
2503 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2504 GEN_INT (-offset)));
2505 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2506
2507 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2508 R30_REGNUM, false);
43e9d192
IB
2509 }
2510 else
80c11907 2511 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2512
2513 /* Set up frame pointer to point to the location of the
2514 previous frame pointer on the stack. */
2515 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2516 stack_pointer_rtx,
2517 GEN_INT (fp_offset)));
43e9d192 2518 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2519 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2520 }
2521 else
2522 {
c5e1f66e
JW
2523 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2524 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2525
c5e1f66e
JW
2526 if (fp_offset
2527 || reg1 == FIRST_PSEUDO_REGISTER
2528 || (reg2 == FIRST_PSEUDO_REGISTER
2529 && offset >= 256))
2530 {
2531 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2532 GEN_INT (-offset)));
2533 RTX_FRAME_RELATED_P (insn) = 1;
2534 }
2535 else
2536 {
ef4bddc2 2537 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
2538
2539 skip_wb = true;
2540
2541 if (reg2 == FIRST_PSEUDO_REGISTER)
2542 aarch64_pushwb_single_reg (mode1, reg1, offset);
2543 else
2544 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2545 }
43e9d192
IB
2546 }
2547
c5e1f66e
JW
2548 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2549 skip_wb);
ae13fce3
JW
2550 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2551 skip_wb);
43e9d192
IB
2552 }
2553
2554 /* when offset >= 512,
2555 sub sp, sp, #<outgoing_args_size> */
2556 if (frame_size > -1)
2557 {
2558 if (crtl->outgoing_args_size > 0)
2559 {
2560 insn = emit_insn (gen_add2_insn
2561 (stack_pointer_rtx,
2562 GEN_INT (- crtl->outgoing_args_size)));
2563 RTX_FRAME_RELATED_P (insn) = 1;
2564 }
2565 }
2566}
2567
4f942779
RL
2568/* Return TRUE if we can use a simple_return insn.
2569
2570 This function checks whether the callee saved stack is empty, which
2571 means no restore actions are need. The pro_and_epilogue will use
2572 this to check whether shrink-wrapping opt is feasible. */
2573
2574bool
2575aarch64_use_return_insn_p (void)
2576{
2577 if (!reload_completed)
2578 return false;
2579
2580 if (crtl->profile)
2581 return false;
2582
2583 aarch64_layout_frame ();
2584
2585 return cfun->machine->frame.frame_size == 0;
2586}
2587
43e9d192
IB
2588/* Generate the epilogue instructions for returning from a function. */
2589void
2590aarch64_expand_epilogue (bool for_sibcall)
2591{
1c960e02 2592 HOST_WIDE_INT frame_size, offset;
43e9d192 2593 HOST_WIDE_INT fp_offset;
dd991abb 2594 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2595 rtx_insn *insn;
7e8c2bd5
JW
2596 /* We need to add memory barrier to prevent read from deallocated stack. */
2597 bool need_barrier_p = (get_frame_size () != 0
2598 || cfun->machine->frame.saved_varargs_size);
43e9d192
IB
2599
2600 aarch64_layout_frame ();
43e9d192 2601
1c960e02 2602 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2603 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2604 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2605
2606 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2607 if (offset >= 512)
2608 {
dd991abb 2609 offset = hard_fp_offset;
43e9d192
IB
2610 if (offset >= 512)
2611 offset = cfun->machine->frame.saved_regs_size;
2612
2613 frame_size -= (offset + crtl->outgoing_args_size);
2614 fp_offset = 0;
2615 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2616 {
2617 insn = emit_insn (gen_add2_insn
2618 (stack_pointer_rtx,
2619 GEN_INT (crtl->outgoing_args_size)));
2620 RTX_FRAME_RELATED_P (insn) = 1;
2621 }
2622 }
2623 else
2624 frame_size = -1;
2625
2626 /* If there were outgoing arguments or we've done dynamic stack
2627 allocation, then restore the stack pointer from the frame
2628 pointer. This is at most one insn and more efficient than using
2629 GCC's internal mechanism. */
2630 if (frame_pointer_needed
2631 && (crtl->outgoing_args_size || cfun->calls_alloca))
2632 {
7e8c2bd5
JW
2633 if (cfun->calls_alloca)
2634 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2635
43e9d192
IB
2636 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2637 hard_frame_pointer_rtx,
8f454e9f
JW
2638 GEN_INT (0)));
2639 offset = offset - fp_offset;
43e9d192
IB
2640 }
2641
43e9d192
IB
2642 if (offset > 0)
2643 {
4b92caa1
JW
2644 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2645 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2646 bool skip_wb = true;
dd991abb 2647 rtx cfi_ops = NULL;
4b92caa1 2648
43e9d192 2649 if (frame_pointer_needed)
4b92caa1
JW
2650 fp_offset = 0;
2651 else if (fp_offset
2652 || reg1 == FIRST_PSEUDO_REGISTER
2653 || (reg2 == FIRST_PSEUDO_REGISTER
2654 && offset >= 256))
2655 skip_wb = false;
2656
2657 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2658 skip_wb, &cfi_ops);
4b92caa1 2659 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2660 skip_wb, &cfi_ops);
4b92caa1 2661
7e8c2bd5
JW
2662 if (need_barrier_p)
2663 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2664
4b92caa1 2665 if (skip_wb)
43e9d192 2666 {
ef4bddc2 2667 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2668 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2669
dd991abb 2670 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2671 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2672 {
2673 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2674 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2675 mem = gen_rtx_MEM (mode1, mem);
2676 insn = emit_move_insn (rreg1, mem);
2677 }
4b92caa1
JW
2678 else
2679 {
dd991abb 2680 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2681
dd991abb
RH
2682 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2683 insn = emit_insn (aarch64_gen_loadwb_pair
2684 (mode1, stack_pointer_rtx, rreg1,
2685 rreg2, offset));
4b92caa1 2686 }
43e9d192 2687 }
43e9d192
IB
2688 else
2689 {
2690 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2691 GEN_INT (offset)));
43e9d192 2692 }
43e9d192 2693
dd991abb
RH
2694 /* Reset the CFA to be SP + FRAME_SIZE. */
2695 rtx new_cfa = stack_pointer_rtx;
2696 if (frame_size > 0)
2697 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2698 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2699 REG_NOTES (insn) = cfi_ops;
43e9d192 2700 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2701 }
2702
dd991abb 2703 if (frame_size > 0)
43e9d192 2704 {
7e8c2bd5
JW
2705 if (need_barrier_p)
2706 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2707
43e9d192
IB
2708 if (frame_size >= 0x1000000)
2709 {
2710 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2711 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2712 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2713 }
dd991abb 2714 else
43e9d192 2715 {
dd991abb
RH
2716 int hi_ofs = frame_size & 0xfff000;
2717 int lo_ofs = frame_size & 0x000fff;
2718
2719 if (hi_ofs && lo_ofs)
43e9d192
IB
2720 {
2721 insn = emit_insn (gen_add2_insn
dd991abb 2722 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2723 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2724 frame_size = lo_ofs;
43e9d192 2725 }
dd991abb
RH
2726 insn = emit_insn (gen_add2_insn
2727 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2728 }
2729
dd991abb
RH
2730 /* Reset the CFA to be SP + 0. */
2731 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2732 RTX_FRAME_RELATED_P (insn) = 1;
2733 }
2734
2735 /* Stack adjustment for exception handler. */
2736 if (crtl->calls_eh_return)
2737 {
2738 /* We need to unwind the stack by the offset computed by
2739 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2740 to be SP; letting the CFA move during this adjustment
2741 is just as correct as retaining the CFA from the body
2742 of the function. Therefore, do nothing special. */
2743 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2744 }
2745
2746 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2747 if (!for_sibcall)
2748 emit_jump_insn (ret_rtx);
2749}
2750
2751/* Return the place to copy the exception unwinding return address to.
2752 This will probably be a stack slot, but could (in theory be the
2753 return register). */
2754rtx
2755aarch64_final_eh_return_addr (void)
2756{
1c960e02
MS
2757 HOST_WIDE_INT fp_offset;
2758
43e9d192 2759 aarch64_layout_frame ();
1c960e02
MS
2760
2761 fp_offset = cfun->machine->frame.frame_size
2762 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2763
2764 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2765 return gen_rtx_REG (DImode, LR_REGNUM);
2766
2767 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2768 result in a store to save LR introduced by builtin_eh_return () being
2769 incorrectly deleted because the alias is not detected.
2770 So in the calculation of the address to copy the exception unwinding
2771 return address to, we note 2 cases.
2772 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2773 we return a SP-relative location since all the addresses are SP-relative
2774 in this case. This prevents the store from being optimized away.
2775 If the fp_offset is not 0, then the addresses will be FP-relative and
2776 therefore we return a FP-relative location. */
2777
2778 if (frame_pointer_needed)
2779 {
2780 if (fp_offset)
2781 return gen_frame_mem (DImode,
2782 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2783 else
2784 return gen_frame_mem (DImode,
2785 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2786 }
2787
2788 /* If FP is not needed, we calculate the location of LR, which would be
2789 at the top of the saved registers block. */
2790
2791 return gen_frame_mem (DImode,
2792 plus_constant (Pmode,
2793 stack_pointer_rtx,
2794 fp_offset
2795 + cfun->machine->frame.saved_regs_size
2796 - 2 * UNITS_PER_WORD));
2797}
2798
9dfc162c
JG
2799/* Possibly output code to build up a constant in a register. For
2800 the benefit of the costs infrastructure, returns the number of
2801 instructions which would be emitted. GENERATE inhibits or
2802 enables code generation. */
2803
2804static int
2805aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2806{
9dfc162c
JG
2807 int insns = 0;
2808
43e9d192 2809 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2810 {
2811 if (generate)
2812 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2813 insns = 1;
2814 }
43e9d192
IB
2815 else
2816 {
2817 int i;
2818 int ncount = 0;
2819 int zcount = 0;
2820 HOST_WIDE_INT valp = val >> 16;
2821 HOST_WIDE_INT valm;
2822 HOST_WIDE_INT tval;
2823
2824 for (i = 16; i < 64; i += 16)
2825 {
2826 valm = (valp & 0xffff);
2827
2828 if (valm != 0)
2829 ++ zcount;
2830
2831 if (valm != 0xffff)
2832 ++ ncount;
2833
2834 valp >>= 16;
2835 }
2836
2837 /* zcount contains the number of additional MOVK instructions
2838 required if the constant is built up with an initial MOVZ instruction,
2839 while ncount is the number of MOVK instructions required if starting
2840 with a MOVN instruction. Choose the sequence that yields the fewest
2841 number of instructions, preferring MOVZ instructions when they are both
2842 the same. */
2843 if (ncount < zcount)
2844 {
9dfc162c
JG
2845 if (generate)
2846 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2847 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2848 tval = 0xffff;
9dfc162c 2849 insns++;
43e9d192
IB
2850 }
2851 else
2852 {
9dfc162c
JG
2853 if (generate)
2854 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2855 GEN_INT (val & 0xffff));
43e9d192 2856 tval = 0;
9dfc162c 2857 insns++;
43e9d192
IB
2858 }
2859
2860 val >>= 16;
2861
2862 for (i = 16; i < 64; i += 16)
2863 {
2864 if ((val & 0xffff) != tval)
9dfc162c
JG
2865 {
2866 if (generate)
2867 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2868 GEN_INT (i),
2869 GEN_INT (val & 0xffff)));
2870 insns++;
2871 }
43e9d192
IB
2872 val >>= 16;
2873 }
2874 }
9dfc162c 2875 return insns;
43e9d192
IB
2876}
2877
2878static void
d9600ae5 2879aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2880{
2881 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2882 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2883 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2884
2885 if (mdelta < 0)
2886 mdelta = -mdelta;
2887
2888 if (mdelta >= 4096 * 4096)
2889 {
9dfc162c 2890 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2891 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2892 }
2893 else if (mdelta > 0)
2894 {
43e9d192 2895 if (mdelta >= 4096)
d9600ae5
SN
2896 {
2897 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2898 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2899 if (delta < 0)
2900 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2901 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2902 else
2903 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2904 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2905 }
43e9d192 2906 if (mdelta % 4096 != 0)
d9600ae5
SN
2907 {
2908 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2909 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2910 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2911 }
43e9d192
IB
2912 }
2913}
2914
2915/* Output code to add DELTA to the first argument, and then jump
2916 to FUNCTION. Used for C++ multiple inheritance. */
2917static void
2918aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2919 HOST_WIDE_INT delta,
2920 HOST_WIDE_INT vcall_offset,
2921 tree function)
2922{
2923 /* The this pointer is always in x0. Note that this differs from
2924 Arm where the this pointer maybe bumped to r1 if r0 is required
2925 to return a pointer to an aggregate. On AArch64 a result value
2926 pointer will be in x8. */
2927 int this_regno = R0_REGNUM;
5d8a22a5
DM
2928 rtx this_rtx, temp0, temp1, addr, funexp;
2929 rtx_insn *insn;
43e9d192 2930
75f1d6fc
SN
2931 reload_completed = 1;
2932 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2933
2934 if (vcall_offset == 0)
d9600ae5 2935 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2936 else
2937 {
28514dda 2938 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2939
75f1d6fc
SN
2940 this_rtx = gen_rtx_REG (Pmode, this_regno);
2941 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2942 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2943
75f1d6fc
SN
2944 addr = this_rtx;
2945 if (delta != 0)
2946 {
2947 if (delta >= -256 && delta < 256)
2948 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2949 plus_constant (Pmode, this_rtx, delta));
2950 else
d9600ae5 2951 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2952 }
2953
28514dda
YZ
2954 if (Pmode == ptr_mode)
2955 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2956 else
2957 aarch64_emit_move (temp0,
2958 gen_rtx_ZERO_EXTEND (Pmode,
2959 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2960
28514dda 2961 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2962 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2963 else
2964 {
9dfc162c 2965 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2966 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2967 }
2968
28514dda
YZ
2969 if (Pmode == ptr_mode)
2970 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2971 else
2972 aarch64_emit_move (temp1,
2973 gen_rtx_SIGN_EXTEND (Pmode,
2974 gen_rtx_MEM (ptr_mode, addr)));
2975
75f1d6fc 2976 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2977 }
2978
75f1d6fc
SN
2979 /* Generate a tail call to the target function. */
2980 if (!TREE_USED (function))
2981 {
2982 assemble_external (function);
2983 TREE_USED (function) = 1;
2984 }
2985 funexp = XEXP (DECL_RTL (function), 0);
2986 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2987 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2988 SIBLING_CALL_P (insn) = 1;
2989
2990 insn = get_insns ();
2991 shorten_branches (insn);
2992 final_start_function (insn, file, 1);
2993 final (insn, file, 1);
43e9d192 2994 final_end_function ();
75f1d6fc
SN
2995
2996 /* Stop pretending to be a post-reload pass. */
2997 reload_completed = 0;
43e9d192
IB
2998}
2999
43e9d192
IB
3000static bool
3001aarch64_tls_referenced_p (rtx x)
3002{
3003 if (!TARGET_HAVE_TLS)
3004 return false;
e7de8563
RS
3005 subrtx_iterator::array_type array;
3006 FOR_EACH_SUBRTX (iter, array, x, ALL)
3007 {
3008 const_rtx x = *iter;
3009 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3010 return true;
3011 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3012 TLS offsets, not real symbol references. */
3013 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3014 iter.skip_subrtxes ();
3015 }
3016 return false;
43e9d192
IB
3017}
3018
3019
3020static int
3021aarch64_bitmasks_cmp (const void *i1, const void *i2)
3022{
3023 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
3024 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3025
3026 if (*imm1 < *imm2)
3027 return -1;
3028 if (*imm1 > *imm2)
3029 return +1;
3030 return 0;
3031}
3032
3033
3034static void
3035aarch64_build_bitmask_table (void)
3036{
3037 unsigned HOST_WIDE_INT mask, imm;
3038 unsigned int log_e, e, s, r;
3039 unsigned int nimms = 0;
3040
3041 for (log_e = 1; log_e <= 6; log_e++)
3042 {
3043 e = 1 << log_e;
3044 if (e == 64)
3045 mask = ~(HOST_WIDE_INT) 0;
3046 else
3047 mask = ((HOST_WIDE_INT) 1 << e) - 1;
3048 for (s = 1; s < e; s++)
3049 {
3050 for (r = 0; r < e; r++)
3051 {
3052 /* set s consecutive bits to 1 (s < 64) */
3053 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3054 /* rotate right by r */
3055 if (r != 0)
3056 imm = ((imm >> r) | (imm << (e - r))) & mask;
3057 /* replicate the constant depending on SIMD size */
3058 switch (log_e) {
3059 case 1: imm |= (imm << 2);
3060 case 2: imm |= (imm << 4);
3061 case 3: imm |= (imm << 8);
3062 case 4: imm |= (imm << 16);
3063 case 5: imm |= (imm << 32);
3064 case 6:
3065 break;
3066 default:
3067 gcc_unreachable ();
3068 }
3069 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3070 aarch64_bitmasks[nimms++] = imm;
3071 }
3072 }
3073 }
3074
3075 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3076 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3077 aarch64_bitmasks_cmp);
3078}
3079
3080
3081/* Return true if val can be encoded as a 12-bit unsigned immediate with
3082 a left shift of 0 or 12 bits. */
3083bool
3084aarch64_uimm12_shift (HOST_WIDE_INT val)
3085{
3086 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3087 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3088 );
3089}
3090
3091
3092/* Return true if val is an immediate that can be loaded into a
3093 register by a MOVZ instruction. */
3094static bool
ef4bddc2 3095aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3096{
3097 if (GET_MODE_SIZE (mode) > 4)
3098 {
3099 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3100 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3101 return 1;
3102 }
3103 else
3104 {
3105 /* Ignore sign extension. */
3106 val &= (HOST_WIDE_INT) 0xffffffff;
3107 }
3108 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3109 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3110}
3111
3112
3113/* Return true if val is a valid bitmask immediate. */
3114bool
ef4bddc2 3115aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3116{
3117 if (GET_MODE_SIZE (mode) < 8)
3118 {
3119 /* Replicate bit pattern. */
3120 val &= (HOST_WIDE_INT) 0xffffffff;
3121 val |= val << 32;
3122 }
3123 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3124 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3125}
3126
3127
3128/* Return true if val is an immediate that can be loaded into a
3129 register in a single instruction. */
3130bool
ef4bddc2 3131aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3132{
3133 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3134 return 1;
3135 return aarch64_bitmask_imm (val, mode);
3136}
3137
3138static bool
ef4bddc2 3139aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3140{
3141 rtx base, offset;
7eda14e1 3142
43e9d192
IB
3143 if (GET_CODE (x) == HIGH)
3144 return true;
3145
3146 split_const (x, &base, &offset);
3147 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3148 {
f8b756b7 3149 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
28514dda
YZ
3150 != SYMBOL_FORCE_TO_MEM)
3151 return true;
3152 else
3153 /* Avoid generating a 64-bit relocation in ILP32; leave
3154 to aarch64_expand_mov_immediate to handle it properly. */
3155 return mode != ptr_mode;
3156 }
43e9d192
IB
3157
3158 return aarch64_tls_referenced_p (x);
3159}
3160
3161/* Return true if register REGNO is a valid index register.
3162 STRICT_P is true if REG_OK_STRICT is in effect. */
3163
3164bool
3165aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3166{
3167 if (!HARD_REGISTER_NUM_P (regno))
3168 {
3169 if (!strict_p)
3170 return true;
3171
3172 if (!reg_renumber)
3173 return false;
3174
3175 regno = reg_renumber[regno];
3176 }
3177 return GP_REGNUM_P (regno);
3178}
3179
3180/* Return true if register REGNO is a valid base register for mode MODE.
3181 STRICT_P is true if REG_OK_STRICT is in effect. */
3182
3183bool
3184aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3185{
3186 if (!HARD_REGISTER_NUM_P (regno))
3187 {
3188 if (!strict_p)
3189 return true;
3190
3191 if (!reg_renumber)
3192 return false;
3193
3194 regno = reg_renumber[regno];
3195 }
3196
3197 /* The fake registers will be eliminated to either the stack or
3198 hard frame pointer, both of which are usually valid base registers.
3199 Reload deals with the cases where the eliminated form isn't valid. */
3200 return (GP_REGNUM_P (regno)
3201 || regno == SP_REGNUM
3202 || regno == FRAME_POINTER_REGNUM
3203 || regno == ARG_POINTER_REGNUM);
3204}
3205
3206/* Return true if X is a valid base register for mode MODE.
3207 STRICT_P is true if REG_OK_STRICT is in effect. */
3208
3209static bool
3210aarch64_base_register_rtx_p (rtx x, bool strict_p)
3211{
3212 if (!strict_p && GET_CODE (x) == SUBREG)
3213 x = SUBREG_REG (x);
3214
3215 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3216}
3217
3218/* Return true if address offset is a valid index. If it is, fill in INFO
3219 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3220
3221static bool
3222aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3223 machine_mode mode, bool strict_p)
43e9d192
IB
3224{
3225 enum aarch64_address_type type;
3226 rtx index;
3227 int shift;
3228
3229 /* (reg:P) */
3230 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3231 && GET_MODE (x) == Pmode)
3232 {
3233 type = ADDRESS_REG_REG;
3234 index = x;
3235 shift = 0;
3236 }
3237 /* (sign_extend:DI (reg:SI)) */
3238 else if ((GET_CODE (x) == SIGN_EXTEND
3239 || GET_CODE (x) == ZERO_EXTEND)
3240 && GET_MODE (x) == DImode
3241 && GET_MODE (XEXP (x, 0)) == SImode)
3242 {
3243 type = (GET_CODE (x) == SIGN_EXTEND)
3244 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3245 index = XEXP (x, 0);
3246 shift = 0;
3247 }
3248 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3249 else if (GET_CODE (x) == MULT
3250 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3251 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3252 && GET_MODE (XEXP (x, 0)) == DImode
3253 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3254 && CONST_INT_P (XEXP (x, 1)))
3255 {
3256 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3257 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3258 index = XEXP (XEXP (x, 0), 0);
3259 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3260 }
3261 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3262 else if (GET_CODE (x) == ASHIFT
3263 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3264 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3265 && GET_MODE (XEXP (x, 0)) == DImode
3266 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3267 && CONST_INT_P (XEXP (x, 1)))
3268 {
3269 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3270 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3271 index = XEXP (XEXP (x, 0), 0);
3272 shift = INTVAL (XEXP (x, 1));
3273 }
3274 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3275 else if ((GET_CODE (x) == SIGN_EXTRACT
3276 || GET_CODE (x) == ZERO_EXTRACT)
3277 && GET_MODE (x) == DImode
3278 && GET_CODE (XEXP (x, 0)) == MULT
3279 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3280 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3281 {
3282 type = (GET_CODE (x) == SIGN_EXTRACT)
3283 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3284 index = XEXP (XEXP (x, 0), 0);
3285 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3286 if (INTVAL (XEXP (x, 1)) != 32 + shift
3287 || INTVAL (XEXP (x, 2)) != 0)
3288 shift = -1;
3289 }
3290 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3291 (const_int 0xffffffff<<shift)) */
3292 else if (GET_CODE (x) == AND
3293 && GET_MODE (x) == DImode
3294 && GET_CODE (XEXP (x, 0)) == MULT
3295 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3296 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3297 && CONST_INT_P (XEXP (x, 1)))
3298 {
3299 type = ADDRESS_REG_UXTW;
3300 index = XEXP (XEXP (x, 0), 0);
3301 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3302 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3303 shift = -1;
3304 }
3305 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3306 else if ((GET_CODE (x) == SIGN_EXTRACT
3307 || GET_CODE (x) == ZERO_EXTRACT)
3308 && GET_MODE (x) == DImode
3309 && GET_CODE (XEXP (x, 0)) == ASHIFT
3310 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3311 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3312 {
3313 type = (GET_CODE (x) == SIGN_EXTRACT)
3314 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3315 index = XEXP (XEXP (x, 0), 0);
3316 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3317 if (INTVAL (XEXP (x, 1)) != 32 + shift
3318 || INTVAL (XEXP (x, 2)) != 0)
3319 shift = -1;
3320 }
3321 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3322 (const_int 0xffffffff<<shift)) */
3323 else if (GET_CODE (x) == AND
3324 && GET_MODE (x) == DImode
3325 && GET_CODE (XEXP (x, 0)) == ASHIFT
3326 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3327 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3328 && CONST_INT_P (XEXP (x, 1)))
3329 {
3330 type = ADDRESS_REG_UXTW;
3331 index = XEXP (XEXP (x, 0), 0);
3332 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3333 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3334 shift = -1;
3335 }
3336 /* (mult:P (reg:P) (const_int scale)) */
3337 else if (GET_CODE (x) == MULT
3338 && GET_MODE (x) == Pmode
3339 && GET_MODE (XEXP (x, 0)) == Pmode
3340 && CONST_INT_P (XEXP (x, 1)))
3341 {
3342 type = ADDRESS_REG_REG;
3343 index = XEXP (x, 0);
3344 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3345 }
3346 /* (ashift:P (reg:P) (const_int shift)) */
3347 else if (GET_CODE (x) == ASHIFT
3348 && GET_MODE (x) == Pmode
3349 && GET_MODE (XEXP (x, 0)) == Pmode
3350 && CONST_INT_P (XEXP (x, 1)))
3351 {
3352 type = ADDRESS_REG_REG;
3353 index = XEXP (x, 0);
3354 shift = INTVAL (XEXP (x, 1));
3355 }
3356 else
3357 return false;
3358
3359 if (GET_CODE (index) == SUBREG)
3360 index = SUBREG_REG (index);
3361
3362 if ((shift == 0 ||
3363 (shift > 0 && shift <= 3
3364 && (1 << shift) == GET_MODE_SIZE (mode)))
3365 && REG_P (index)
3366 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3367 {
3368 info->type = type;
3369 info->offset = index;
3370 info->shift = shift;
3371 return true;
3372 }
3373
3374 return false;
3375}
3376
44707478 3377bool
ef4bddc2 3378aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3379{
3380 return (offset >= -64 * GET_MODE_SIZE (mode)
3381 && offset < 64 * GET_MODE_SIZE (mode)
3382 && offset % GET_MODE_SIZE (mode) == 0);
3383}
3384
3385static inline bool
ef4bddc2 3386offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3387 HOST_WIDE_INT offset)
3388{
3389 return offset >= -256 && offset < 256;
3390}
3391
3392static inline bool
ef4bddc2 3393offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3394{
3395 return (offset >= 0
3396 && offset < 4096 * GET_MODE_SIZE (mode)
3397 && offset % GET_MODE_SIZE (mode) == 0);
3398}
3399
3400/* Return true if X is a valid address for machine mode MODE. If it is,
3401 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3402 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3403
3404static bool
3405aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3406 rtx x, machine_mode mode,
43e9d192
IB
3407 RTX_CODE outer_code, bool strict_p)
3408{
3409 enum rtx_code code = GET_CODE (x);
3410 rtx op0, op1;
2d8c6dc1
AH
3411
3412 /* On BE, we use load/store pair for all large int mode load/stores. */
3413 bool load_store_pair_p = (outer_code == PARALLEL
3414 || (BYTES_BIG_ENDIAN
3415 && aarch64_vect_struct_mode_p (mode)));
3416
43e9d192 3417 bool allow_reg_index_p =
2d8c6dc1
AH
3418 !load_store_pair_p
3419 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3420 && !aarch64_vect_struct_mode_p (mode);
3421
3422 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3423 REG addressing. */
3424 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
3425 && (code != POST_INC && code != REG))
3426 return false;
3427
3428 switch (code)
3429 {
3430 case REG:
3431 case SUBREG:
3432 info->type = ADDRESS_REG_IMM;
3433 info->base = x;
3434 info->offset = const0_rtx;
3435 return aarch64_base_register_rtx_p (x, strict_p);
3436
3437 case PLUS:
3438 op0 = XEXP (x, 0);
3439 op1 = XEXP (x, 1);
15c0c5c9
JW
3440
3441 if (! strict_p
4aa81c2e 3442 && REG_P (op0)
15c0c5c9
JW
3443 && (op0 == virtual_stack_vars_rtx
3444 || op0 == frame_pointer_rtx
3445 || op0 == arg_pointer_rtx)
4aa81c2e 3446 && CONST_INT_P (op1))
15c0c5c9
JW
3447 {
3448 info->type = ADDRESS_REG_IMM;
3449 info->base = op0;
3450 info->offset = op1;
3451
3452 return true;
3453 }
3454
43e9d192
IB
3455 if (GET_MODE_SIZE (mode) != 0
3456 && CONST_INT_P (op1)
3457 && aarch64_base_register_rtx_p (op0, strict_p))
3458 {
3459 HOST_WIDE_INT offset = INTVAL (op1);
3460
3461 info->type = ADDRESS_REG_IMM;
3462 info->base = op0;
3463 info->offset = op1;
3464
3465 /* TImode and TFmode values are allowed in both pairs of X
3466 registers and individual Q registers. The available
3467 address modes are:
3468 X,X: 7-bit signed scaled offset
3469 Q: 9-bit signed offset
3470 We conservatively require an offset representable in either mode.
3471 */
3472 if (mode == TImode || mode == TFmode)
44707478 3473 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3474 && offset_9bit_signed_unscaled_p (mode, offset));
3475
2d8c6dc1
AH
3476 /* A 7bit offset check because OImode will emit a ldp/stp
3477 instruction (only big endian will get here).
3478 For ldp/stp instructions, the offset is scaled for the size of a
3479 single element of the pair. */
3480 if (mode == OImode)
3481 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3482
3483 /* Three 9/12 bit offsets checks because CImode will emit three
3484 ldr/str instructions (only big endian will get here). */
3485 if (mode == CImode)
3486 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3487 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3488 || offset_12bit_unsigned_scaled_p (V16QImode,
3489 offset + 32)));
3490
3491 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3492 instructions (only big endian will get here). */
3493 if (mode == XImode)
3494 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3495 && aarch64_offset_7bit_signed_scaled_p (TImode,
3496 offset + 32));
3497
3498 if (load_store_pair_p)
43e9d192 3499 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3500 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3501 else
3502 return (offset_9bit_signed_unscaled_p (mode, offset)
3503 || offset_12bit_unsigned_scaled_p (mode, offset));
3504 }
3505
3506 if (allow_reg_index_p)
3507 {
3508 /* Look for base + (scaled/extended) index register. */
3509 if (aarch64_base_register_rtx_p (op0, strict_p)
3510 && aarch64_classify_index (info, op1, mode, strict_p))
3511 {
3512 info->base = op0;
3513 return true;
3514 }
3515 if (aarch64_base_register_rtx_p (op1, strict_p)
3516 && aarch64_classify_index (info, op0, mode, strict_p))
3517 {
3518 info->base = op1;
3519 return true;
3520 }
3521 }
3522
3523 return false;
3524
3525 case POST_INC:
3526 case POST_DEC:
3527 case PRE_INC:
3528 case PRE_DEC:
3529 info->type = ADDRESS_REG_WB;
3530 info->base = XEXP (x, 0);
3531 info->offset = NULL_RTX;
3532 return aarch64_base_register_rtx_p (info->base, strict_p);
3533
3534 case POST_MODIFY:
3535 case PRE_MODIFY:
3536 info->type = ADDRESS_REG_WB;
3537 info->base = XEXP (x, 0);
3538 if (GET_CODE (XEXP (x, 1)) == PLUS
3539 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3540 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3541 && aarch64_base_register_rtx_p (info->base, strict_p))
3542 {
3543 HOST_WIDE_INT offset;
3544 info->offset = XEXP (XEXP (x, 1), 1);
3545 offset = INTVAL (info->offset);
3546
3547 /* TImode and TFmode values are allowed in both pairs of X
3548 registers and individual Q registers. The available
3549 address modes are:
3550 X,X: 7-bit signed scaled offset
3551 Q: 9-bit signed offset
3552 We conservatively require an offset representable in either mode.
3553 */
3554 if (mode == TImode || mode == TFmode)
44707478 3555 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3556 && offset_9bit_signed_unscaled_p (mode, offset));
3557
2d8c6dc1 3558 if (load_store_pair_p)
43e9d192 3559 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3560 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3561 else
3562 return offset_9bit_signed_unscaled_p (mode, offset);
3563 }
3564 return false;
3565
3566 case CONST:
3567 case SYMBOL_REF:
3568 case LABEL_REF:
79517551
SN
3569 /* load literal: pc-relative constant pool entry. Only supported
3570 for SI mode or larger. */
43e9d192 3571 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
3572
3573 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3574 {
3575 rtx sym, addend;
3576
3577 split_const (x, &sym, &addend);
3578 return (GET_CODE (sym) == LABEL_REF
3579 || (GET_CODE (sym) == SYMBOL_REF
3580 && CONSTANT_POOL_ADDRESS_P (sym)));
3581 }
3582 return false;
3583
3584 case LO_SUM:
3585 info->type = ADDRESS_LO_SUM;
3586 info->base = XEXP (x, 0);
3587 info->offset = XEXP (x, 1);
3588 if (allow_reg_index_p
3589 && aarch64_base_register_rtx_p (info->base, strict_p))
3590 {
3591 rtx sym, offs;
3592 split_const (info->offset, &sym, &offs);
3593 if (GET_CODE (sym) == SYMBOL_REF
f8b756b7 3594 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
43e9d192
IB
3595 == SYMBOL_SMALL_ABSOLUTE))
3596 {
3597 /* The symbol and offset must be aligned to the access size. */
3598 unsigned int align;
3599 unsigned int ref_size;
3600
3601 if (CONSTANT_POOL_ADDRESS_P (sym))
3602 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3603 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3604 {
3605 tree exp = SYMBOL_REF_DECL (sym);
3606 align = TYPE_ALIGN (TREE_TYPE (exp));
3607 align = CONSTANT_ALIGNMENT (exp, align);
3608 }
3609 else if (SYMBOL_REF_DECL (sym))
3610 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3611 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3612 && SYMBOL_REF_BLOCK (sym) != NULL)
3613 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3614 else
3615 align = BITS_PER_UNIT;
3616
3617 ref_size = GET_MODE_SIZE (mode);
3618 if (ref_size == 0)
3619 ref_size = GET_MODE_SIZE (DImode);
3620
3621 return ((INTVAL (offs) & (ref_size - 1)) == 0
3622 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3623 }
3624 }
3625 return false;
3626
3627 default:
3628 return false;
3629 }
3630}
3631
3632bool
3633aarch64_symbolic_address_p (rtx x)
3634{
3635 rtx offset;
3636
3637 split_const (x, &x, &offset);
3638 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3639}
3640
3641/* Classify the base of symbolic expression X, given that X appears in
3642 context CONTEXT. */
da4f13a4
MS
3643
3644enum aarch64_symbol_type
3645aarch64_classify_symbolic_expression (rtx x,
3646 enum aarch64_symbol_context context)
43e9d192
IB
3647{
3648 rtx offset;
da4f13a4 3649
43e9d192 3650 split_const (x, &x, &offset);
f8b756b7 3651 return aarch64_classify_symbol (x, offset, context);
43e9d192
IB
3652}
3653
3654
3655/* Return TRUE if X is a legitimate address for accessing memory in
3656 mode MODE. */
3657static bool
ef4bddc2 3658aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
3659{
3660 struct aarch64_address_info addr;
3661
3662 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3663}
3664
3665/* Return TRUE if X is a legitimate address for accessing memory in
3666 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3667 pair operation. */
3668bool
ef4bddc2 3669aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 3670 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3671{
3672 struct aarch64_address_info addr;
3673
3674 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3675}
3676
3677/* Return TRUE if rtx X is immediate constant 0.0 */
3678bool
3520f7cc 3679aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3680{
3681 REAL_VALUE_TYPE r;
3682
3683 if (GET_MODE (x) == VOIDmode)
3684 return false;
3685
3686 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3687 if (REAL_VALUE_MINUS_ZERO (r))
3688 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3689 return REAL_VALUES_EQUAL (r, dconst0);
3690}
3691
70f09188
AP
3692/* Return the fixed registers used for condition codes. */
3693
3694static bool
3695aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3696{
3697 *p1 = CC_REGNUM;
3698 *p2 = INVALID_REGNUM;
3699 return true;
3700}
3701
78607708
TV
3702/* Emit call insn with PAT and do aarch64-specific handling. */
3703
d07a3fed 3704void
78607708
TV
3705aarch64_emit_call_insn (rtx pat)
3706{
3707 rtx insn = emit_call_insn (pat);
3708
3709 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3710 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3711 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3712}
3713
ef4bddc2 3714machine_mode
43e9d192
IB
3715aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3716{
3717 /* All floating point compares return CCFP if it is an equality
3718 comparison, and CCFPE otherwise. */
3719 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3720 {
3721 switch (code)
3722 {
3723 case EQ:
3724 case NE:
3725 case UNORDERED:
3726 case ORDERED:
3727 case UNLT:
3728 case UNLE:
3729 case UNGT:
3730 case UNGE:
3731 case UNEQ:
3732 case LTGT:
3733 return CCFPmode;
3734
3735 case LT:
3736 case LE:
3737 case GT:
3738 case GE:
3739 return CCFPEmode;
3740
3741 default:
3742 gcc_unreachable ();
3743 }
3744 }
3745
3746 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3747 && y == const0_rtx
3748 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3749 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3750 || GET_CODE (x) == NEG))
43e9d192
IB
3751 return CC_NZmode;
3752
1c992d1e 3753 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3754 the comparison will have to be swapped when we emit the assembly
3755 code. */
3756 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3757 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3758 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3759 || GET_CODE (x) == LSHIFTRT
1c992d1e 3760 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3761 return CC_SWPmode;
3762
1c992d1e
RE
3763 /* Similarly for a negated operand, but we can only do this for
3764 equalities. */
3765 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3766 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3767 && (code == EQ || code == NE)
3768 && GET_CODE (x) == NEG)
3769 return CC_Zmode;
3770
43e9d192
IB
3771 /* A compare of a mode narrower than SI mode against zero can be done
3772 by extending the value in the comparison. */
3773 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3774 && y == const0_rtx)
3775 /* Only use sign-extension if we really need it. */
3776 return ((code == GT || code == GE || code == LE || code == LT)
3777 ? CC_SESWPmode : CC_ZESWPmode);
3778
3779 /* For everything else, return CCmode. */
3780 return CCmode;
3781}
3782
3dfa7055
ZC
3783static int
3784aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3785
cd5660ab 3786int
43e9d192
IB
3787aarch64_get_condition_code (rtx x)
3788{
ef4bddc2 3789 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
3790 enum rtx_code comp_code = GET_CODE (x);
3791
3792 if (GET_MODE_CLASS (mode) != MODE_CC)
3793 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
3794 return aarch64_get_condition_code_1 (mode, comp_code);
3795}
43e9d192 3796
3dfa7055
ZC
3797static int
3798aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3799{
3800 int ne = -1, eq = -1;
43e9d192
IB
3801 switch (mode)
3802 {
3803 case CCFPmode:
3804 case CCFPEmode:
3805 switch (comp_code)
3806 {
3807 case GE: return AARCH64_GE;
3808 case GT: return AARCH64_GT;
3809 case LE: return AARCH64_LS;
3810 case LT: return AARCH64_MI;
3811 case NE: return AARCH64_NE;
3812 case EQ: return AARCH64_EQ;
3813 case ORDERED: return AARCH64_VC;
3814 case UNORDERED: return AARCH64_VS;
3815 case UNLT: return AARCH64_LT;
3816 case UNLE: return AARCH64_LE;
3817 case UNGT: return AARCH64_HI;
3818 case UNGE: return AARCH64_PL;
cd5660ab 3819 default: return -1;
43e9d192
IB
3820 }
3821 break;
3822
3dfa7055
ZC
3823 case CC_DNEmode:
3824 ne = AARCH64_NE;
3825 eq = AARCH64_EQ;
3826 break;
3827
3828 case CC_DEQmode:
3829 ne = AARCH64_EQ;
3830 eq = AARCH64_NE;
3831 break;
3832
3833 case CC_DGEmode:
3834 ne = AARCH64_GE;
3835 eq = AARCH64_LT;
3836 break;
3837
3838 case CC_DLTmode:
3839 ne = AARCH64_LT;
3840 eq = AARCH64_GE;
3841 break;
3842
3843 case CC_DGTmode:
3844 ne = AARCH64_GT;
3845 eq = AARCH64_LE;
3846 break;
3847
3848 case CC_DLEmode:
3849 ne = AARCH64_LE;
3850 eq = AARCH64_GT;
3851 break;
3852
3853 case CC_DGEUmode:
3854 ne = AARCH64_CS;
3855 eq = AARCH64_CC;
3856 break;
3857
3858 case CC_DLTUmode:
3859 ne = AARCH64_CC;
3860 eq = AARCH64_CS;
3861 break;
3862
3863 case CC_DGTUmode:
3864 ne = AARCH64_HI;
3865 eq = AARCH64_LS;
3866 break;
3867
3868 case CC_DLEUmode:
3869 ne = AARCH64_LS;
3870 eq = AARCH64_HI;
3871 break;
3872
43e9d192
IB
3873 case CCmode:
3874 switch (comp_code)
3875 {
3876 case NE: return AARCH64_NE;
3877 case EQ: return AARCH64_EQ;
3878 case GE: return AARCH64_GE;
3879 case GT: return AARCH64_GT;
3880 case LE: return AARCH64_LE;
3881 case LT: return AARCH64_LT;
3882 case GEU: return AARCH64_CS;
3883 case GTU: return AARCH64_HI;
3884 case LEU: return AARCH64_LS;
3885 case LTU: return AARCH64_CC;
cd5660ab 3886 default: return -1;
43e9d192
IB
3887 }
3888 break;
3889
3890 case CC_SWPmode:
3891 case CC_ZESWPmode:
3892 case CC_SESWPmode:
3893 switch (comp_code)
3894 {
3895 case NE: return AARCH64_NE;
3896 case EQ: return AARCH64_EQ;
3897 case GE: return AARCH64_LE;
3898 case GT: return AARCH64_LT;
3899 case LE: return AARCH64_GE;
3900 case LT: return AARCH64_GT;
3901 case GEU: return AARCH64_LS;
3902 case GTU: return AARCH64_CC;
3903 case LEU: return AARCH64_CS;
3904 case LTU: return AARCH64_HI;
cd5660ab 3905 default: return -1;
43e9d192
IB
3906 }
3907 break;
3908
3909 case CC_NZmode:
3910 switch (comp_code)
3911 {
3912 case NE: return AARCH64_NE;
3913 case EQ: return AARCH64_EQ;
3914 case GE: return AARCH64_PL;
3915 case LT: return AARCH64_MI;
cd5660ab 3916 default: return -1;
43e9d192
IB
3917 }
3918 break;
3919
1c992d1e
RE
3920 case CC_Zmode:
3921 switch (comp_code)
3922 {
3923 case NE: return AARCH64_NE;
3924 case EQ: return AARCH64_EQ;
cd5660ab 3925 default: return -1;
1c992d1e
RE
3926 }
3927 break;
3928
43e9d192 3929 default:
cd5660ab 3930 return -1;
43e9d192
IB
3931 break;
3932 }
3dfa7055
ZC
3933
3934 if (comp_code == NE)
3935 return ne;
3936
3937 if (comp_code == EQ)
3938 return eq;
3939
3940 return -1;
43e9d192
IB
3941}
3942
ddeabd3e
AL
3943bool
3944aarch64_const_vec_all_same_in_range_p (rtx x,
3945 HOST_WIDE_INT minval,
3946 HOST_WIDE_INT maxval)
3947{
3948 HOST_WIDE_INT firstval;
3949 int count, i;
3950
3951 if (GET_CODE (x) != CONST_VECTOR
3952 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3953 return false;
3954
3955 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3956 if (firstval < minval || firstval > maxval)
3957 return false;
3958
3959 count = CONST_VECTOR_NUNITS (x);
3960 for (i = 1; i < count; i++)
3961 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3962 return false;
3963
3964 return true;
3965}
3966
3967bool
3968aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3969{
3970 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3971}
3972
43e9d192
IB
3973static unsigned
3974bit_count (unsigned HOST_WIDE_INT value)
3975{
3976 unsigned count = 0;
3977
3978 while (value)
3979 {
3980 count++;
3981 value &= value - 1;
3982 }
3983
3984 return count;
3985}
3986
cf670503
ZC
3987/* N Z C V. */
3988#define AARCH64_CC_V 1
3989#define AARCH64_CC_C (1 << 1)
3990#define AARCH64_CC_Z (1 << 2)
3991#define AARCH64_CC_N (1 << 3)
3992
3993/* N Z C V flags for ccmp. The first code is for AND op and the other
3994 is for IOR op. Indexed by AARCH64_COND_CODE. */
3995static const int aarch64_nzcv_codes[][2] =
3996{
3997 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
3998 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
3999 {AARCH64_CC_C, 0}, /* CS, C == 1. */
4000 {0, AARCH64_CC_C}, /* CC, C == 0. */
4001 {AARCH64_CC_N, 0}, /* MI, N == 1. */
4002 {0, AARCH64_CC_N}, /* PL, N == 0. */
4003 {AARCH64_CC_V, 0}, /* VS, V == 1. */
4004 {0, AARCH64_CC_V}, /* VC, V == 0. */
4005 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
4006 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
4007 {0, AARCH64_CC_V}, /* GE, N == V. */
4008 {AARCH64_CC_V, 0}, /* LT, N != V. */
4009 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
4010 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
4011 {0, 0}, /* AL, Any. */
4012 {0, 0}, /* NV, Any. */
4013};
4014
4015int
4016aarch64_ccmp_mode_to_code (enum machine_mode mode)
4017{
4018 switch (mode)
4019 {
4020 case CC_DNEmode:
4021 return NE;
4022
4023 case CC_DEQmode:
4024 return EQ;
4025
4026 case CC_DLEmode:
4027 return LE;
4028
4029 case CC_DGTmode:
4030 return GT;
4031
4032 case CC_DLTmode:
4033 return LT;
4034
4035 case CC_DGEmode:
4036 return GE;
4037
4038 case CC_DLEUmode:
4039 return LEU;
4040
4041 case CC_DGTUmode:
4042 return GTU;
4043
4044 case CC_DLTUmode:
4045 return LTU;
4046
4047 case CC_DGEUmode:
4048 return GEU;
4049
4050 default:
4051 gcc_unreachable ();
4052 }
4053}
4054
4055
43e9d192
IB
4056void
4057aarch64_print_operand (FILE *f, rtx x, char code)
4058{
4059 switch (code)
4060 {
f541a481
KT
4061 /* An integer or symbol address without a preceding # sign. */
4062 case 'c':
4063 switch (GET_CODE (x))
4064 {
4065 case CONST_INT:
4066 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4067 break;
4068
4069 case SYMBOL_REF:
4070 output_addr_const (f, x);
4071 break;
4072
4073 case CONST:
4074 if (GET_CODE (XEXP (x, 0)) == PLUS
4075 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4076 {
4077 output_addr_const (f, x);
4078 break;
4079 }
4080 /* Fall through. */
4081
4082 default:
4083 output_operand_lossage ("Unsupported operand for code '%c'", code);
4084 }
4085 break;
4086
43e9d192
IB
4087 case 'e':
4088 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4089 {
4090 int n;
4091
4aa81c2e 4092 if (!CONST_INT_P (x)
43e9d192
IB
4093 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4094 {
4095 output_operand_lossage ("invalid operand for '%%%c'", code);
4096 return;
4097 }
4098
4099 switch (n)
4100 {
4101 case 3:
4102 fputc ('b', f);
4103 break;
4104 case 4:
4105 fputc ('h', f);
4106 break;
4107 case 5:
4108 fputc ('w', f);
4109 break;
4110 default:
4111 output_operand_lossage ("invalid operand for '%%%c'", code);
4112 return;
4113 }
4114 }
4115 break;
4116
4117 case 'p':
4118 {
4119 int n;
4120
4121 /* Print N such that 2^N == X. */
4aa81c2e 4122 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4123 {
4124 output_operand_lossage ("invalid operand for '%%%c'", code);
4125 return;
4126 }
4127
4128 asm_fprintf (f, "%d", n);
4129 }
4130 break;
4131
4132 case 'P':
4133 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4134 if (!CONST_INT_P (x))
43e9d192
IB
4135 {
4136 output_operand_lossage ("invalid operand for '%%%c'", code);
4137 return;
4138 }
4139
4140 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4141 break;
4142
4143 case 'H':
4144 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4145 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4146 {
4147 output_operand_lossage ("invalid operand for '%%%c'", code);
4148 return;
4149 }
4150
01a3a324 4151 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4152 break;
4153
43e9d192 4154 case 'm':
cd5660ab
KT
4155 {
4156 int cond_code;
4157 /* Print a condition (eq, ne, etc). */
43e9d192 4158
cd5660ab
KT
4159 /* CONST_TRUE_RTX means always -- that's the default. */
4160 if (x == const_true_rtx)
43e9d192 4161 return;
43e9d192 4162
cd5660ab
KT
4163 if (!COMPARISON_P (x))
4164 {
4165 output_operand_lossage ("invalid operand for '%%%c'", code);
4166 return;
4167 }
4168
4169 cond_code = aarch64_get_condition_code (x);
4170 gcc_assert (cond_code >= 0);
4171 fputs (aarch64_condition_codes[cond_code], f);
4172 }
43e9d192
IB
4173 break;
4174
4175 case 'M':
cd5660ab
KT
4176 {
4177 int cond_code;
4178 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 4179
cd5660ab
KT
4180 /* CONST_TRUE_RTX means never -- that's the default. */
4181 if (x == const_true_rtx)
4182 {
4183 fputs ("nv", f);
4184 return;
4185 }
43e9d192 4186
cd5660ab
KT
4187 if (!COMPARISON_P (x))
4188 {
4189 output_operand_lossage ("invalid operand for '%%%c'", code);
4190 return;
4191 }
4192 cond_code = aarch64_get_condition_code (x);
4193 gcc_assert (cond_code >= 0);
4194 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4195 (cond_code)], f);
4196 }
43e9d192
IB
4197 break;
4198
4199 case 'b':
4200 case 'h':
4201 case 's':
4202 case 'd':
4203 case 'q':
4204 /* Print a scalar FP/SIMD register name. */
4205 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4206 {
4207 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4208 return;
4209 }
50ce6f88 4210 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4211 break;
4212
4213 case 'S':
4214 case 'T':
4215 case 'U':
4216 case 'V':
4217 /* Print the first FP/SIMD register name in a list. */
4218 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4219 {
4220 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4221 return;
4222 }
50ce6f88 4223 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4224 break;
4225
2d8c6dc1
AH
4226 case 'R':
4227 /* Print a scalar FP/SIMD register name + 1. */
4228 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4229 {
4230 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4231 return;
4232 }
4233 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4234 break;
4235
a05c0ddf 4236 case 'X':
50d38551 4237 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4238 if (!CONST_INT_P (x))
a05c0ddf
IB
4239 {
4240 output_operand_lossage ("invalid operand for '%%%c'", code);
4241 return;
4242 }
50d38551 4243 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4244 break;
4245
43e9d192
IB
4246 case 'w':
4247 case 'x':
4248 /* Print a general register name or the zero register (32-bit or
4249 64-bit). */
3520f7cc
JG
4250 if (x == const0_rtx
4251 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4252 {
50ce6f88 4253 asm_fprintf (f, "%czr", code);
43e9d192
IB
4254 break;
4255 }
4256
4257 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4258 {
50ce6f88 4259 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4260 break;
4261 }
4262
4263 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4264 {
50ce6f88 4265 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4266 break;
4267 }
4268
4269 /* Fall through */
4270
4271 case 0:
4272 /* Print a normal operand, if it's a general register, then we
4273 assume DImode. */
4274 if (x == NULL)
4275 {
4276 output_operand_lossage ("missing operand");
4277 return;
4278 }
4279
4280 switch (GET_CODE (x))
4281 {
4282 case REG:
01a3a324 4283 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4284 break;
4285
4286 case MEM:
4287 aarch64_memory_reference_mode = GET_MODE (x);
4288 output_address (XEXP (x, 0));
4289 break;
4290
4291 case LABEL_REF:
4292 case SYMBOL_REF:
4293 output_addr_const (asm_out_file, x);
4294 break;
4295
4296 case CONST_INT:
4297 asm_fprintf (f, "%wd", INTVAL (x));
4298 break;
4299
4300 case CONST_VECTOR:
3520f7cc
JG
4301 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4302 {
ddeabd3e
AL
4303 gcc_assert (
4304 aarch64_const_vec_all_same_in_range_p (x,
4305 HOST_WIDE_INT_MIN,
4306 HOST_WIDE_INT_MAX));
3520f7cc
JG
4307 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4308 }
4309 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4310 {
4311 fputc ('0', f);
4312 }
4313 else
4314 gcc_unreachable ();
43e9d192
IB
4315 break;
4316
3520f7cc
JG
4317 case CONST_DOUBLE:
4318 /* CONST_DOUBLE can represent a double-width integer.
4319 In this case, the mode of x is VOIDmode. */
4320 if (GET_MODE (x) == VOIDmode)
4321 ; /* Do Nothing. */
4322 else if (aarch64_float_const_zero_rtx_p (x))
4323 {
4324 fputc ('0', f);
4325 break;
4326 }
4327 else if (aarch64_float_const_representable_p (x))
4328 {
4329#define buf_size 20
4330 char float_buf[buf_size] = {'\0'};
4331 REAL_VALUE_TYPE r;
4332 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4333 real_to_decimal_for_mode (float_buf, &r,
4334 buf_size, buf_size,
4335 1, GET_MODE (x));
4336 asm_fprintf (asm_out_file, "%s", float_buf);
4337 break;
4338#undef buf_size
4339 }
4340 output_operand_lossage ("invalid constant");
4341 return;
43e9d192
IB
4342 default:
4343 output_operand_lossage ("invalid operand");
4344 return;
4345 }
4346 break;
4347
4348 case 'A':
4349 if (GET_CODE (x) == HIGH)
4350 x = XEXP (x, 0);
4351
4352 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4353 {
4354 case SYMBOL_SMALL_GOT:
4355 asm_fprintf (asm_out_file, ":got:");
4356 break;
4357
4358 case SYMBOL_SMALL_TLSGD:
4359 asm_fprintf (asm_out_file, ":tlsgd:");
4360 break;
4361
4362 case SYMBOL_SMALL_TLSDESC:
4363 asm_fprintf (asm_out_file, ":tlsdesc:");
4364 break;
4365
4366 case SYMBOL_SMALL_GOTTPREL:
4367 asm_fprintf (asm_out_file, ":gottprel:");
4368 break;
4369
4370 case SYMBOL_SMALL_TPREL:
4371 asm_fprintf (asm_out_file, ":tprel:");
4372 break;
4373
87dd8ab0
MS
4374 case SYMBOL_TINY_GOT:
4375 gcc_unreachable ();
4376 break;
4377
43e9d192
IB
4378 default:
4379 break;
4380 }
4381 output_addr_const (asm_out_file, x);
4382 break;
4383
4384 case 'L':
4385 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4386 {
4387 case SYMBOL_SMALL_GOT:
4388 asm_fprintf (asm_out_file, ":lo12:");
4389 break;
4390
4391 case SYMBOL_SMALL_TLSGD:
4392 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4393 break;
4394
4395 case SYMBOL_SMALL_TLSDESC:
4396 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4397 break;
4398
4399 case SYMBOL_SMALL_GOTTPREL:
4400 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4401 break;
4402
4403 case SYMBOL_SMALL_TPREL:
4404 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4405 break;
4406
87dd8ab0
MS
4407 case SYMBOL_TINY_GOT:
4408 asm_fprintf (asm_out_file, ":got:");
4409 break;
4410
43e9d192
IB
4411 default:
4412 break;
4413 }
4414 output_addr_const (asm_out_file, x);
4415 break;
4416
4417 case 'G':
4418
4419 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4420 {
4421 case SYMBOL_SMALL_TPREL:
4422 asm_fprintf (asm_out_file, ":tprel_hi12:");
4423 break;
4424 default:
4425 break;
4426 }
4427 output_addr_const (asm_out_file, x);
4428 break;
4429
cf670503
ZC
4430 case 'K':
4431 {
4432 int cond_code;
4433 /* Print nzcv. */
4434
4435 if (!COMPARISON_P (x))
4436 {
4437 output_operand_lossage ("invalid operand for '%%%c'", code);
4438 return;
4439 }
4440
4441 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4442 gcc_assert (cond_code >= 0);
4443 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4444 }
4445 break;
4446
4447 case 'k':
4448 {
4449 int cond_code;
4450 /* Print nzcv. */
4451
4452 if (!COMPARISON_P (x))
4453 {
4454 output_operand_lossage ("invalid operand for '%%%c'", code);
4455 return;
4456 }
4457
4458 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4459 gcc_assert (cond_code >= 0);
4460 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4461 }
4462 break;
4463
43e9d192
IB
4464 default:
4465 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4466 return;
4467 }
4468}
4469
4470void
4471aarch64_print_operand_address (FILE *f, rtx x)
4472{
4473 struct aarch64_address_info addr;
4474
4475 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4476 MEM, true))
4477 switch (addr.type)
4478 {
4479 case ADDRESS_REG_IMM:
4480 if (addr.offset == const0_rtx)
01a3a324 4481 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4482 else
16a3246f 4483 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4484 INTVAL (addr.offset));
4485 return;
4486
4487 case ADDRESS_REG_REG:
4488 if (addr.shift == 0)
16a3246f 4489 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4490 reg_names [REGNO (addr.offset)]);
43e9d192 4491 else
16a3246f 4492 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4493 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4494 return;
4495
4496 case ADDRESS_REG_UXTW:
4497 if (addr.shift == 0)
16a3246f 4498 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4499 REGNO (addr.offset) - R0_REGNUM);
4500 else
16a3246f 4501 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4502 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4503 return;
4504
4505 case ADDRESS_REG_SXTW:
4506 if (addr.shift == 0)
16a3246f 4507 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4508 REGNO (addr.offset) - R0_REGNUM);
4509 else
16a3246f 4510 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4511 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4512 return;
4513
4514 case ADDRESS_REG_WB:
4515 switch (GET_CODE (x))
4516 {
4517 case PRE_INC:
16a3246f 4518 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4519 GET_MODE_SIZE (aarch64_memory_reference_mode));
4520 return;
4521 case POST_INC:
16a3246f 4522 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4523 GET_MODE_SIZE (aarch64_memory_reference_mode));
4524 return;
4525 case PRE_DEC:
16a3246f 4526 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4527 GET_MODE_SIZE (aarch64_memory_reference_mode));
4528 return;
4529 case POST_DEC:
16a3246f 4530 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4531 GET_MODE_SIZE (aarch64_memory_reference_mode));
4532 return;
4533 case PRE_MODIFY:
16a3246f 4534 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4535 INTVAL (addr.offset));
4536 return;
4537 case POST_MODIFY:
16a3246f 4538 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4539 INTVAL (addr.offset));
4540 return;
4541 default:
4542 break;
4543 }
4544 break;
4545
4546 case ADDRESS_LO_SUM:
16a3246f 4547 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4548 output_addr_const (f, addr.offset);
4549 asm_fprintf (f, "]");
4550 return;
4551
4552 case ADDRESS_SYMBOLIC:
4553 break;
4554 }
4555
4556 output_addr_const (f, x);
4557}
4558
43e9d192
IB
4559bool
4560aarch64_label_mentioned_p (rtx x)
4561{
4562 const char *fmt;
4563 int i;
4564
4565 if (GET_CODE (x) == LABEL_REF)
4566 return true;
4567
4568 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4569 referencing instruction, but they are constant offsets, not
4570 symbols. */
4571 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4572 return false;
4573
4574 fmt = GET_RTX_FORMAT (GET_CODE (x));
4575 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4576 {
4577 if (fmt[i] == 'E')
4578 {
4579 int j;
4580
4581 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4582 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4583 return 1;
4584 }
4585 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4586 return 1;
4587 }
4588
4589 return 0;
4590}
4591
4592/* Implement REGNO_REG_CLASS. */
4593
4594enum reg_class
4595aarch64_regno_regclass (unsigned regno)
4596{
4597 if (GP_REGNUM_P (regno))
a4a182c6 4598 return GENERAL_REGS;
43e9d192
IB
4599
4600 if (regno == SP_REGNUM)
4601 return STACK_REG;
4602
4603 if (regno == FRAME_POINTER_REGNUM
4604 || regno == ARG_POINTER_REGNUM)
f24bb080 4605 return POINTER_REGS;
43e9d192
IB
4606
4607 if (FP_REGNUM_P (regno))
4608 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4609
4610 return NO_REGS;
4611}
4612
0c4ec427 4613static rtx
ef4bddc2 4614aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
4615{
4616 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4617 where mask is selected by alignment and size of the offset.
4618 We try to pick as large a range for the offset as possible to
4619 maximize the chance of a CSE. However, for aligned addresses
4620 we limit the range to 4k so that structures with different sized
4621 elements are likely to use the same base. */
4622
4623 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4624 {
4625 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4626 HOST_WIDE_INT base_offset;
4627
4628 /* Does it look like we'll need a load/store-pair operation? */
4629 if (GET_MODE_SIZE (mode) > 16
4630 || mode == TImode)
4631 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4632 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4633 /* For offsets aren't a multiple of the access size, the limit is
4634 -256...255. */
4635 else if (offset & (GET_MODE_SIZE (mode) - 1))
4636 base_offset = (offset + 0x100) & ~0x1ff;
4637 else
4638 base_offset = offset & ~0xfff;
4639
4640 if (base_offset == 0)
4641 return x;
4642
4643 offset -= base_offset;
4644 rtx base_reg = gen_reg_rtx (Pmode);
4645 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4646 NULL_RTX);
4647 emit_move_insn (base_reg, val);
4648 x = plus_constant (Pmode, base_reg, offset);
4649 }
4650
4651 return x;
4652}
4653
43e9d192
IB
4654/* Try a machine-dependent way of reloading an illegitimate address
4655 operand. If we find one, push the reload and return the new rtx. */
4656
4657rtx
4658aarch64_legitimize_reload_address (rtx *x_p,
ef4bddc2 4659 machine_mode mode,
43e9d192
IB
4660 int opnum, int type,
4661 int ind_levels ATTRIBUTE_UNUSED)
4662{
4663 rtx x = *x_p;
4664
348d4b0a
BC
4665 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4666 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4667 && GET_CODE (x) == PLUS
4668 && REG_P (XEXP (x, 0))
4669 && CONST_INT_P (XEXP (x, 1)))
4670 {
4671 rtx orig_rtx = x;
4672 x = copy_rtx (x);
4673 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4674 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4675 opnum, (enum reload_type) type);
4676 return x;
4677 }
4678
4679 /* We must recognize output that we have already generated ourselves. */
4680 if (GET_CODE (x) == PLUS
4681 && GET_CODE (XEXP (x, 0)) == PLUS
4682 && REG_P (XEXP (XEXP (x, 0), 0))
4683 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4684 && CONST_INT_P (XEXP (x, 1)))
4685 {
4686 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4687 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4688 opnum, (enum reload_type) type);
4689 return x;
4690 }
4691
4692 /* We wish to handle large displacements off a base register by splitting
4693 the addend across an add and the mem insn. This can cut the number of
4694 extra insns needed from 3 to 1. It is only useful for load/store of a
4695 single register with 12 bit offset field. */
4696 if (GET_CODE (x) == PLUS
4697 && REG_P (XEXP (x, 0))
4698 && CONST_INT_P (XEXP (x, 1))
4699 && HARD_REGISTER_P (XEXP (x, 0))
4700 && mode != TImode
4701 && mode != TFmode
4702 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4703 {
4704 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4705 HOST_WIDE_INT low = val & 0xfff;
4706 HOST_WIDE_INT high = val - low;
4707 HOST_WIDE_INT offs;
4708 rtx cst;
ef4bddc2 4709 machine_mode xmode = GET_MODE (x);
28514dda
YZ
4710
4711 /* In ILP32, xmode can be either DImode or SImode. */
4712 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4713
4714 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4715 BLKmode alignment. */
4716 if (GET_MODE_SIZE (mode) == 0)
4717 return NULL_RTX;
4718
4719 offs = low % GET_MODE_SIZE (mode);
4720
4721 /* Align misaligned offset by adjusting high part to compensate. */
4722 if (offs != 0)
4723 {
4724 if (aarch64_uimm12_shift (high + offs))
4725 {
4726 /* Align down. */
4727 low = low - offs;
4728 high = high + offs;
4729 }
4730 else
4731 {
4732 /* Align up. */
4733 offs = GET_MODE_SIZE (mode) - offs;
4734 low = low + offs;
4735 high = high + (low & 0x1000) - offs;
4736 low &= 0xfff;
4737 }
4738 }
4739
4740 /* Check for overflow. */
4741 if (high + low != val)
4742 return NULL_RTX;
4743
4744 cst = GEN_INT (high);
4745 if (!aarch64_uimm12_shift (high))
28514dda 4746 cst = force_const_mem (xmode, cst);
43e9d192
IB
4747
4748 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4749 in the mem instruction.
4750 Note that replacing this gen_rtx_PLUS with plus_constant is
4751 wrong in this case because we rely on the
4752 (plus (plus reg c1) c2) structure being preserved so that
4753 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4754 x = gen_rtx_PLUS (xmode,
4755 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4756 GEN_INT (low));
43e9d192
IB
4757
4758 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4759 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4760 opnum, (enum reload_type) type);
4761 return x;
4762 }
4763
4764 return NULL_RTX;
4765}
4766
4767
4768static reg_class_t
4769aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4770 reg_class_t rclass,
ef4bddc2 4771 machine_mode mode,
43e9d192
IB
4772 secondary_reload_info *sri)
4773{
43e9d192
IB
4774 /* Without the TARGET_SIMD instructions we cannot move a Q register
4775 to a Q register directly. We need a scratch. */
4776 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4777 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4778 && reg_class_subset_p (rclass, FP_REGS))
4779 {
4780 if (mode == TFmode)
4781 sri->icode = CODE_FOR_aarch64_reload_movtf;
4782 else if (mode == TImode)
4783 sri->icode = CODE_FOR_aarch64_reload_movti;
4784 return NO_REGS;
4785 }
4786
4787 /* A TFmode or TImode memory access should be handled via an FP_REGS
4788 because AArch64 has richer addressing modes for LDR/STR instructions
4789 than LDP/STP instructions. */
a4a182c6 4790 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4791 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4792 return FP_REGS;
4793
4794 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4795 return GENERAL_REGS;
43e9d192
IB
4796
4797 return NO_REGS;
4798}
4799
4800static bool
4801aarch64_can_eliminate (const int from, const int to)
4802{
4803 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4804 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4805
4806 if (frame_pointer_needed)
4807 {
4808 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4809 return true;
4810 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4811 return false;
4812 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4813 && !cfun->calls_alloca)
4814 return true;
4815 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4816 return true;
0b7f8166
MS
4817
4818 return false;
43e9d192 4819 }
1c923b60
JW
4820 else
4821 {
4822 /* If we decided that we didn't need a leaf frame pointer but then used
4823 LR in the function, then we'll want a frame pointer after all, so
4824 prevent this elimination to ensure a frame pointer is used. */
4825 if (to == STACK_POINTER_REGNUM
4826 && flag_omit_leaf_frame_pointer
4827 && df_regs_ever_live_p (LR_REGNUM))
4828 return false;
4829 }
777e6976 4830
43e9d192
IB
4831 return true;
4832}
4833
4834HOST_WIDE_INT
4835aarch64_initial_elimination_offset (unsigned from, unsigned to)
4836{
43e9d192 4837 aarch64_layout_frame ();
78c29983
MS
4838
4839 if (to == HARD_FRAME_POINTER_REGNUM)
4840 {
4841 if (from == ARG_POINTER_REGNUM)
1c960e02 4842 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4843
4844 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4845 return (cfun->machine->frame.hard_fp_offset
4846 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4847 }
4848
4849 if (to == STACK_POINTER_REGNUM)
4850 {
4851 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4852 return (cfun->machine->frame.frame_size
4853 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4854 }
4855
1c960e02 4856 return cfun->machine->frame.frame_size;
43e9d192
IB
4857}
4858
43e9d192
IB
4859/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4860 previous frame. */
4861
4862rtx
4863aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4864{
4865 if (count != 0)
4866 return const0_rtx;
4867 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4868}
4869
4870
4871static void
4872aarch64_asm_trampoline_template (FILE *f)
4873{
28514dda
YZ
4874 if (TARGET_ILP32)
4875 {
4876 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4877 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4878 }
4879 else
4880 {
4881 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4882 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4883 }
01a3a324 4884 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4885 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4886 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4887 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4888}
4889
4890static void
4891aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4892{
4893 rtx fnaddr, mem, a_tramp;
28514dda 4894 const int tramp_code_sz = 16;
43e9d192
IB
4895
4896 /* Don't need to copy the trailing D-words, we fill those in below. */
4897 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4898 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4899 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4900 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4901 if (GET_MODE (fnaddr) != ptr_mode)
4902 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4903 emit_move_insn (mem, fnaddr);
4904
28514dda 4905 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4906 emit_move_insn (mem, chain_value);
4907
4908 /* XXX We should really define a "clear_cache" pattern and use
4909 gen_clear_cache(). */
4910 a_tramp = XEXP (m_tramp, 0);
4911 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4912 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4913 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4914 ptr_mode);
43e9d192
IB
4915}
4916
4917static unsigned char
ef4bddc2 4918aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
4919{
4920 switch (regclass)
4921 {
fee9ba42 4922 case CALLER_SAVE_REGS:
43e9d192
IB
4923 case POINTER_REGS:
4924 case GENERAL_REGS:
4925 case ALL_REGS:
4926 case FP_REGS:
4927 case FP_LO_REGS:
4928 return
4929 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4930 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4931 case STACK_REG:
4932 return 1;
4933
4934 case NO_REGS:
4935 return 0;
4936
4937 default:
4938 break;
4939 }
4940 gcc_unreachable ();
4941}
4942
4943static reg_class_t
78d8b9f0 4944aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4945{
51bb310d 4946 if (regclass == POINTER_REGS)
78d8b9f0
IB
4947 return GENERAL_REGS;
4948
51bb310d
MS
4949 if (regclass == STACK_REG)
4950 {
4951 if (REG_P(x)
4952 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4953 return regclass;
4954
4955 return NO_REGS;
4956 }
4957
78d8b9f0
IB
4958 /* If it's an integer immediate that MOVI can't handle, then
4959 FP_REGS is not an option, so we return NO_REGS instead. */
4960 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4961 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4962 return NO_REGS;
4963
27bd251b
IB
4964 /* Register eliminiation can result in a request for
4965 SP+constant->FP_REGS. We cannot support such operations which
4966 use SP as source and an FP_REG as destination, so reject out
4967 right now. */
4968 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4969 {
4970 rtx lhs = XEXP (x, 0);
4971
4972 /* Look through a possible SUBREG introduced by ILP32. */
4973 if (GET_CODE (lhs) == SUBREG)
4974 lhs = SUBREG_REG (lhs);
4975
4976 gcc_assert (REG_P (lhs));
4977 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4978 POINTER_REGS));
4979 return NO_REGS;
4980 }
4981
78d8b9f0 4982 return regclass;
43e9d192
IB
4983}
4984
4985void
4986aarch64_asm_output_labelref (FILE* f, const char *name)
4987{
4988 asm_fprintf (f, "%U%s", name);
4989}
4990
4991static void
4992aarch64_elf_asm_constructor (rtx symbol, int priority)
4993{
4994 if (priority == DEFAULT_INIT_PRIORITY)
4995 default_ctor_section_asm_out_constructor (symbol, priority);
4996 else
4997 {
4998 section *s;
4999 char buf[18];
5000 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5001 s = get_section (buf, SECTION_WRITE, NULL);
5002 switch_to_section (s);
5003 assemble_align (POINTER_SIZE);
28514dda 5004 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5005 }
5006}
5007
5008static void
5009aarch64_elf_asm_destructor (rtx symbol, int priority)
5010{
5011 if (priority == DEFAULT_INIT_PRIORITY)
5012 default_dtor_section_asm_out_destructor (symbol, priority);
5013 else
5014 {
5015 section *s;
5016 char buf[18];
5017 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5018 s = get_section (buf, SECTION_WRITE, NULL);
5019 switch_to_section (s);
5020 assemble_align (POINTER_SIZE);
28514dda 5021 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5022 }
5023}
5024
5025const char*
5026aarch64_output_casesi (rtx *operands)
5027{
5028 char buf[100];
5029 char label[100];
b32d5189 5030 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5031 int index;
5032 static const char *const patterns[4][2] =
5033 {
5034 {
5035 "ldrb\t%w3, [%0,%w1,uxtw]",
5036 "add\t%3, %4, %w3, sxtb #2"
5037 },
5038 {
5039 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5040 "add\t%3, %4, %w3, sxth #2"
5041 },
5042 {
5043 "ldr\t%w3, [%0,%w1,uxtw #2]",
5044 "add\t%3, %4, %w3, sxtw #2"
5045 },
5046 /* We assume that DImode is only generated when not optimizing and
5047 that we don't really need 64-bit address offsets. That would
5048 imply an object file with 8GB of code in a single function! */
5049 {
5050 "ldr\t%w3, [%0,%w1,uxtw #2]",
5051 "add\t%3, %4, %w3, sxtw #2"
5052 }
5053 };
5054
5055 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5056
5057 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5058
5059 gcc_assert (index >= 0 && index <= 3);
5060
5061 /* Need to implement table size reduction, by chaning the code below. */
5062 output_asm_insn (patterns[index][0], operands);
5063 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5064 snprintf (buf, sizeof (buf),
5065 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5066 output_asm_insn (buf, operands);
5067 output_asm_insn (patterns[index][1], operands);
5068 output_asm_insn ("br\t%3", operands);
5069 assemble_label (asm_out_file, label);
5070 return "";
5071}
5072
5073
5074/* Return size in bits of an arithmetic operand which is shifted/scaled and
5075 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5076 operator. */
5077
5078int
5079aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5080{
5081 if (shift >= 0 && shift <= 3)
5082 {
5083 int size;
5084 for (size = 8; size <= 32; size *= 2)
5085 {
5086 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5087 if (mask == bits << shift)
5088 return size;
5089 }
5090 }
5091 return 0;
5092}
5093
5094static bool
ef4bddc2 5095aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5096 const_rtx x ATTRIBUTE_UNUSED)
5097{
5098 /* We can't use blocks for constants when we're using a per-function
5099 constant pool. */
5100 return false;
5101}
5102
5103static section *
ef4bddc2 5104aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5105 rtx x ATTRIBUTE_UNUSED,
5106 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5107{
5108 /* Force all constant pool entries into the current function section. */
5109 return function_section (current_function_decl);
5110}
5111
5112
5113/* Costs. */
5114
5115/* Helper function for rtx cost calculation. Strip a shift expression
5116 from X. Returns the inner operand if successful, or the original
5117 expression on failure. */
5118static rtx
5119aarch64_strip_shift (rtx x)
5120{
5121 rtx op = x;
5122
57b77d46
RE
5123 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5124 we can convert both to ROR during final output. */
43e9d192
IB
5125 if ((GET_CODE (op) == ASHIFT
5126 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5127 || GET_CODE (op) == LSHIFTRT
5128 || GET_CODE (op) == ROTATERT
5129 || GET_CODE (op) == ROTATE)
43e9d192
IB
5130 && CONST_INT_P (XEXP (op, 1)))
5131 return XEXP (op, 0);
5132
5133 if (GET_CODE (op) == MULT
5134 && CONST_INT_P (XEXP (op, 1))
5135 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5136 return XEXP (op, 0);
5137
5138 return x;
5139}
5140
4745e701 5141/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5142 expression from X. Returns the inner operand if successful, or the
5143 original expression on failure. We deal with a number of possible
5144 canonicalization variations here. */
5145static rtx
4745e701 5146aarch64_strip_extend (rtx x)
43e9d192
IB
5147{
5148 rtx op = x;
5149
5150 /* Zero and sign extraction of a widened value. */
5151 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5152 && XEXP (op, 2) == const0_rtx
4745e701 5153 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5154 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5155 XEXP (op, 1)))
5156 return XEXP (XEXP (op, 0), 0);
5157
5158 /* It can also be represented (for zero-extend) as an AND with an
5159 immediate. */
5160 if (GET_CODE (op) == AND
5161 && GET_CODE (XEXP (op, 0)) == MULT
5162 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5163 && CONST_INT_P (XEXP (op, 1))
5164 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5165 INTVAL (XEXP (op, 1))) != 0)
5166 return XEXP (XEXP (op, 0), 0);
5167
5168 /* Now handle extended register, as this may also have an optional
5169 left shift by 1..4. */
5170 if (GET_CODE (op) == ASHIFT
5171 && CONST_INT_P (XEXP (op, 1))
5172 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5173 op = XEXP (op, 0);
5174
5175 if (GET_CODE (op) == ZERO_EXTEND
5176 || GET_CODE (op) == SIGN_EXTEND)
5177 op = XEXP (op, 0);
5178
5179 if (op != x)
5180 return op;
5181
4745e701
JG
5182 return x;
5183}
5184
0a78ebe4
KT
5185/* Return true iff CODE is a shift supported in combination
5186 with arithmetic instructions. */
4d1919ed 5187
0a78ebe4
KT
5188static bool
5189aarch64_shift_p (enum rtx_code code)
5190{
5191 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5192}
5193
4745e701 5194/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
5195 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5196 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
5197 operands where needed. */
5198
5199static int
5200aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5201{
5202 rtx op0, op1;
5203 const struct cpu_cost_table *extra_cost
5204 = aarch64_tune_params->insn_extra_cost;
5205 int cost = 0;
0a78ebe4 5206 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 5207 machine_mode mode = GET_MODE (x);
4745e701
JG
5208
5209 gcc_checking_assert (code == MULT);
5210
5211 op0 = XEXP (x, 0);
5212 op1 = XEXP (x, 1);
5213
5214 if (VECTOR_MODE_P (mode))
5215 mode = GET_MODE_INNER (mode);
5216
5217 /* Integer multiply/fma. */
5218 if (GET_MODE_CLASS (mode) == MODE_INT)
5219 {
5220 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
5221 if (aarch64_shift_p (GET_CODE (x))
5222 || (CONST_INT_P (op1)
5223 && exact_log2 (INTVAL (op1)) > 0))
4745e701 5224 {
0a78ebe4
KT
5225 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5226 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
5227 if (speed)
5228 {
0a78ebe4
KT
5229 if (compound_p)
5230 {
5231 if (REG_P (op1))
5232 /* ARITH + shift-by-register. */
5233 cost += extra_cost->alu.arith_shift_reg;
5234 else if (is_extend)
5235 /* ARITH + extended register. We don't have a cost field
5236 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5237 cost += extra_cost->alu.extend_arith;
5238 else
5239 /* ARITH + shift-by-immediate. */
5240 cost += extra_cost->alu.arith_shift;
5241 }
4745e701
JG
5242 else
5243 /* LSL (immediate). */
0a78ebe4
KT
5244 cost += extra_cost->alu.shift;
5245
4745e701 5246 }
0a78ebe4
KT
5247 /* Strip extends as we will have costed them in the case above. */
5248 if (is_extend)
5249 op0 = aarch64_strip_extend (op0);
4745e701
JG
5250
5251 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5252
5253 return cost;
5254 }
5255
d2ac256b
KT
5256 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5257 compound and let the below cases handle it. After all, MNEG is a
5258 special-case alias of MSUB. */
5259 if (GET_CODE (op0) == NEG)
5260 {
5261 op0 = XEXP (op0, 0);
5262 compound_p = true;
5263 }
5264
4745e701
JG
5265 /* Integer multiplies or FMAs have zero/sign extending variants. */
5266 if ((GET_CODE (op0) == ZERO_EXTEND
5267 && GET_CODE (op1) == ZERO_EXTEND)
5268 || (GET_CODE (op0) == SIGN_EXTEND
5269 && GET_CODE (op1) == SIGN_EXTEND))
5270 {
5271 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5272 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5273
5274 if (speed)
5275 {
0a78ebe4 5276 if (compound_p)
d2ac256b 5277 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
5278 cost += extra_cost->mult[0].extend_add;
5279 else
5280 /* MUL/SMULL/UMULL. */
5281 cost += extra_cost->mult[0].extend;
5282 }
5283
5284 return cost;
5285 }
5286
d2ac256b 5287 /* This is either an integer multiply or a MADD. In both cases
4745e701
JG
5288 we want to recurse and cost the operands. */
5289 cost += rtx_cost (op0, MULT, 0, speed)
5290 + rtx_cost (op1, MULT, 1, speed);
5291
5292 if (speed)
5293 {
0a78ebe4 5294 if (compound_p)
d2ac256b 5295 /* MADD/MSUB. */
4745e701
JG
5296 cost += extra_cost->mult[mode == DImode].add;
5297 else
5298 /* MUL. */
5299 cost += extra_cost->mult[mode == DImode].simple;
5300 }
5301
5302 return cost;
5303 }
5304 else
5305 {
5306 if (speed)
5307 {
3d840f7d 5308 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
5309 operands. */
5310 if (GET_CODE (op0) == NEG)
3d840f7d 5311 op0 = XEXP (op0, 0);
4745e701 5312 if (GET_CODE (op1) == NEG)
3d840f7d 5313 op1 = XEXP (op1, 0);
4745e701 5314
0a78ebe4 5315 if (compound_p)
4745e701
JG
5316 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5317 cost += extra_cost->fp[mode == DFmode].fma;
5318 else
3d840f7d 5319 /* FMUL/FNMUL. */
4745e701
JG
5320 cost += extra_cost->fp[mode == DFmode].mult;
5321 }
5322
5323 cost += rtx_cost (op0, MULT, 0, speed)
5324 + rtx_cost (op1, MULT, 1, speed);
5325 return cost;
5326 }
43e9d192
IB
5327}
5328
67747367
JG
5329static int
5330aarch64_address_cost (rtx x,
ef4bddc2 5331 machine_mode mode,
67747367
JG
5332 addr_space_t as ATTRIBUTE_UNUSED,
5333 bool speed)
5334{
5335 enum rtx_code c = GET_CODE (x);
5336 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5337 struct aarch64_address_info info;
5338 int cost = 0;
5339 info.shift = 0;
5340
5341 if (!aarch64_classify_address (&info, x, mode, c, false))
5342 {
5343 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5344 {
5345 /* This is a CONST or SYMBOL ref which will be split
5346 in a different way depending on the code model in use.
5347 Cost it through the generic infrastructure. */
5348 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5349 /* Divide through by the cost of one instruction to
5350 bring it to the same units as the address costs. */
5351 cost_symbol_ref /= COSTS_N_INSNS (1);
5352 /* The cost is then the cost of preparing the address,
5353 followed by an immediate (possibly 0) offset. */
5354 return cost_symbol_ref + addr_cost->imm_offset;
5355 }
5356 else
5357 {
5358 /* This is most likely a jump table from a case
5359 statement. */
5360 return addr_cost->register_offset;
5361 }
5362 }
5363
5364 switch (info.type)
5365 {
5366 case ADDRESS_LO_SUM:
5367 case ADDRESS_SYMBOLIC:
5368 case ADDRESS_REG_IMM:
5369 cost += addr_cost->imm_offset;
5370 break;
5371
5372 case ADDRESS_REG_WB:
5373 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5374 cost += addr_cost->pre_modify;
5375 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5376 cost += addr_cost->post_modify;
5377 else
5378 gcc_unreachable ();
5379
5380 break;
5381
5382 case ADDRESS_REG_REG:
5383 cost += addr_cost->register_offset;
5384 break;
5385
5386 case ADDRESS_REG_UXTW:
5387 case ADDRESS_REG_SXTW:
5388 cost += addr_cost->register_extend;
5389 break;
5390
5391 default:
5392 gcc_unreachable ();
5393 }
5394
5395
5396 if (info.shift > 0)
5397 {
5398 /* For the sake of calculating the cost of the shifted register
5399 component, we can treat same sized modes in the same way. */
5400 switch (GET_MODE_BITSIZE (mode))
5401 {
5402 case 16:
5403 cost += addr_cost->addr_scale_costs.hi;
5404 break;
5405
5406 case 32:
5407 cost += addr_cost->addr_scale_costs.si;
5408 break;
5409
5410 case 64:
5411 cost += addr_cost->addr_scale_costs.di;
5412 break;
5413
5414 /* We can't tell, or this is a 128-bit vector. */
5415 default:
5416 cost += addr_cost->addr_scale_costs.ti;
5417 break;
5418 }
5419 }
5420
5421 return cost;
5422}
5423
b9066f5a
MW
5424/* Return the cost of a branch. If SPEED_P is true then the compiler is
5425 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5426 to be taken. */
5427
5428int
5429aarch64_branch_cost (bool speed_p, bool predictable_p)
5430{
5431 /* When optimizing for speed, use the cost of unpredictable branches. */
5432 const struct cpu_branch_cost *branch_costs =
5433 aarch64_tune_params->branch_costs;
5434
5435 if (!speed_p || predictable_p)
5436 return branch_costs->predictable;
5437 else
5438 return branch_costs->unpredictable;
5439}
5440
7cc2145f
JG
5441/* Return true if the RTX X in mode MODE is a zero or sign extract
5442 usable in an ADD or SUB (extended register) instruction. */
5443static bool
ef4bddc2 5444aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5445{
5446 /* Catch add with a sign extract.
5447 This is add_<optab><mode>_multp2. */
5448 if (GET_CODE (x) == SIGN_EXTRACT
5449 || GET_CODE (x) == ZERO_EXTRACT)
5450 {
5451 rtx op0 = XEXP (x, 0);
5452 rtx op1 = XEXP (x, 1);
5453 rtx op2 = XEXP (x, 2);
5454
5455 if (GET_CODE (op0) == MULT
5456 && CONST_INT_P (op1)
5457 && op2 == const0_rtx
5458 && CONST_INT_P (XEXP (op0, 1))
5459 && aarch64_is_extend_from_extract (mode,
5460 XEXP (op0, 1),
5461 op1))
5462 {
5463 return true;
5464 }
5465 }
5466
5467 return false;
5468}
5469
61263118
KT
5470static bool
5471aarch64_frint_unspec_p (unsigned int u)
5472{
5473 switch (u)
5474 {
5475 case UNSPEC_FRINTZ:
5476 case UNSPEC_FRINTP:
5477 case UNSPEC_FRINTM:
5478 case UNSPEC_FRINTA:
5479 case UNSPEC_FRINTN:
5480 case UNSPEC_FRINTX:
5481 case UNSPEC_FRINTI:
5482 return true;
5483
5484 default:
5485 return false;
5486 }
5487}
5488
fb0cb7fa
KT
5489/* Return true iff X is an rtx that will match an extr instruction
5490 i.e. as described in the *extr<mode>5_insn family of patterns.
5491 OP0 and OP1 will be set to the operands of the shifts involved
5492 on success and will be NULL_RTX otherwise. */
5493
5494static bool
5495aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
5496{
5497 rtx op0, op1;
5498 machine_mode mode = GET_MODE (x);
5499
5500 *res_op0 = NULL_RTX;
5501 *res_op1 = NULL_RTX;
5502
5503 if (GET_CODE (x) != IOR)
5504 return false;
5505
5506 op0 = XEXP (x, 0);
5507 op1 = XEXP (x, 1);
5508
5509 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
5510 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
5511 {
5512 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5513 if (GET_CODE (op1) == ASHIFT)
5514 std::swap (op0, op1);
5515
5516 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
5517 return false;
5518
5519 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
5520 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
5521
5522 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
5523 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
5524 {
5525 *res_op0 = XEXP (op0, 0);
5526 *res_op1 = XEXP (op1, 0);
5527 return true;
5528 }
5529 }
5530
5531 return false;
5532}
5533
2d5ffe46
AP
5534/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5535 storing it in *COST. Result is true if the total cost of the operation
5536 has now been calculated. */
5537static bool
5538aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5539{
b9e3afe9
AP
5540 rtx inner;
5541 rtx comparator;
5542 enum rtx_code cmpcode;
5543
5544 if (COMPARISON_P (op0))
5545 {
5546 inner = XEXP (op0, 0);
5547 comparator = XEXP (op0, 1);
5548 cmpcode = GET_CODE (op0);
5549 }
5550 else
5551 {
5552 inner = op0;
5553 comparator = const0_rtx;
5554 cmpcode = NE;
5555 }
5556
2d5ffe46
AP
5557 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5558 {
5559 /* Conditional branch. */
b9e3afe9 5560 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5561 return true;
5562 else
5563 {
b9e3afe9 5564 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5565 {
2d5ffe46
AP
5566 if (comparator == const0_rtx)
5567 {
5568 /* TBZ/TBNZ/CBZ/CBNZ. */
5569 if (GET_CODE (inner) == ZERO_EXTRACT)
5570 /* TBZ/TBNZ. */
5571 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5572 0, speed);
5573 else
5574 /* CBZ/CBNZ. */
b9e3afe9 5575 *cost += rtx_cost (inner, cmpcode, 0, speed);
2d5ffe46
AP
5576
5577 return true;
5578 }
5579 }
b9e3afe9 5580 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5581 {
2d5ffe46
AP
5582 /* TBZ/TBNZ. */
5583 if (comparator == const0_rtx)
5584 return true;
5585 }
5586 }
5587 }
b9e3afe9 5588 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5589 {
5590 /* It's a conditional operation based on the status flags,
5591 so it must be some flavor of CSEL. */
5592
5593 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5594 if (GET_CODE (op1) == NEG
5595 || GET_CODE (op1) == NOT
5596 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5597 op1 = XEXP (op1, 0);
5598
5599 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5600 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5601 return true;
5602 }
5603
5604 /* We don't know what this is, cost all operands. */
5605 return false;
5606}
5607
43e9d192
IB
5608/* Calculate the cost of calculating X, storing it in *COST. Result
5609 is true if the total cost of the operation has now been calculated. */
5610static bool
5611aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5612 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5613{
a8eecd00 5614 rtx op0, op1, op2;
73250c4c 5615 const struct cpu_cost_table *extra_cost
43e9d192 5616 = aarch64_tune_params->insn_extra_cost;
ef4bddc2 5617 machine_mode mode = GET_MODE (x);
43e9d192 5618
7fc5ef02
JG
5619 /* By default, assume that everything has equivalent cost to the
5620 cheapest instruction. Any additional costs are applied as a delta
5621 above this default. */
5622 *cost = COSTS_N_INSNS (1);
5623
5624 /* TODO: The cost infrastructure currently does not handle
5625 vector operations. Assume that all vector operations
5626 are equally expensive. */
5627 if (VECTOR_MODE_P (mode))
5628 {
5629 if (speed)
5630 *cost += extra_cost->vect.alu;
5631 return true;
5632 }
5633
43e9d192
IB
5634 switch (code)
5635 {
5636 case SET:
ba123b0d
JG
5637 /* The cost depends entirely on the operands to SET. */
5638 *cost = 0;
43e9d192
IB
5639 op0 = SET_DEST (x);
5640 op1 = SET_SRC (x);
5641
5642 switch (GET_CODE (op0))
5643 {
5644 case MEM:
5645 if (speed)
2961177e
JG
5646 {
5647 rtx address = XEXP (op0, 0);
5648 if (GET_MODE_CLASS (mode) == MODE_INT)
5649 *cost += extra_cost->ldst.store;
5650 else if (mode == SFmode)
5651 *cost += extra_cost->ldst.storef;
5652 else if (mode == DFmode)
5653 *cost += extra_cost->ldst.stored;
5654
5655 *cost +=
5656 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5657 0, speed));
5658 }
43e9d192 5659
ba123b0d 5660 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5661 return true;
5662
5663 case SUBREG:
5664 if (! REG_P (SUBREG_REG (op0)))
5665 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 5666
43e9d192
IB
5667 /* Fall through. */
5668 case REG:
ba123b0d
JG
5669 /* const0_rtx is in general free, but we will use an
5670 instruction to set a register to 0. */
5671 if (REG_P (op1) || op1 == const0_rtx)
5672 {
5673 /* The cost is 1 per register copied. */
5674 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5675 / UNITS_PER_WORD;
5676 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5677 }
5678 else
5679 /* Cost is just the cost of the RHS of the set. */
5680 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5681 return true;
5682
ba123b0d 5683 case ZERO_EXTRACT:
43e9d192 5684 case SIGN_EXTRACT:
ba123b0d
JG
5685 /* Bit-field insertion. Strip any redundant widening of
5686 the RHS to meet the width of the target. */
43e9d192
IB
5687 if (GET_CODE (op1) == SUBREG)
5688 op1 = SUBREG_REG (op1);
5689 if ((GET_CODE (op1) == ZERO_EXTEND
5690 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5691 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5692 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5693 >= INTVAL (XEXP (op0, 1))))
5694 op1 = XEXP (op1, 0);
ba123b0d
JG
5695
5696 if (CONST_INT_P (op1))
5697 {
5698 /* MOV immediate is assumed to always be cheap. */
5699 *cost = COSTS_N_INSNS (1);
5700 }
5701 else
5702 {
5703 /* BFM. */
5704 if (speed)
5705 *cost += extra_cost->alu.bfi;
5706 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5707 }
5708
43e9d192
IB
5709 return true;
5710
5711 default:
ba123b0d
JG
5712 /* We can't make sense of this, assume default cost. */
5713 *cost = COSTS_N_INSNS (1);
61263118 5714 return false;
43e9d192
IB
5715 }
5716 return false;
5717
9dfc162c
JG
5718 case CONST_INT:
5719 /* If an instruction can incorporate a constant within the
5720 instruction, the instruction's expression avoids calling
5721 rtx_cost() on the constant. If rtx_cost() is called on a
5722 constant, then it is usually because the constant must be
5723 moved into a register by one or more instructions.
5724
5725 The exception is constant 0, which can be expressed
5726 as XZR/WZR and is therefore free. The exception to this is
5727 if we have (set (reg) (const0_rtx)) in which case we must cost
5728 the move. However, we can catch that when we cost the SET, so
5729 we don't need to consider that here. */
5730 if (x == const0_rtx)
5731 *cost = 0;
5732 else
5733 {
5734 /* To an approximation, building any other constant is
5735 proportionally expensive to the number of instructions
5736 required to build that constant. This is true whether we
5737 are compiling for SPEED or otherwise. */
82614948
RR
5738 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5739 (NULL_RTX, x, false, mode));
9dfc162c
JG
5740 }
5741 return true;
5742
5743 case CONST_DOUBLE:
5744 if (speed)
5745 {
5746 /* mov[df,sf]_aarch64. */
5747 if (aarch64_float_const_representable_p (x))
5748 /* FMOV (scalar immediate). */
5749 *cost += extra_cost->fp[mode == DFmode].fpconst;
5750 else if (!aarch64_float_const_zero_rtx_p (x))
5751 {
5752 /* This will be a load from memory. */
5753 if (mode == DFmode)
5754 *cost += extra_cost->ldst.loadd;
5755 else
5756 *cost += extra_cost->ldst.loadf;
5757 }
5758 else
5759 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5760 or MOV v0.s[0], wzr - neither of which are modeled by the
5761 cost tables. Just use the default cost. */
5762 {
5763 }
5764 }
5765
5766 return true;
5767
43e9d192
IB
5768 case MEM:
5769 if (speed)
2961177e
JG
5770 {
5771 /* For loads we want the base cost of a load, plus an
5772 approximation for the additional cost of the addressing
5773 mode. */
5774 rtx address = XEXP (x, 0);
5775 if (GET_MODE_CLASS (mode) == MODE_INT)
5776 *cost += extra_cost->ldst.load;
5777 else if (mode == SFmode)
5778 *cost += extra_cost->ldst.loadf;
5779 else if (mode == DFmode)
5780 *cost += extra_cost->ldst.loadd;
5781
5782 *cost +=
5783 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5784 0, speed));
5785 }
43e9d192
IB
5786
5787 return true;
5788
5789 case NEG:
4745e701
JG
5790 op0 = XEXP (x, 0);
5791
5792 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5793 {
5794 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5795 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5796 {
5797 /* CSETM. */
5798 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5799 return true;
5800 }
5801
5802 /* Cost this as SUB wzr, X. */
5803 op0 = CONST0_RTX (GET_MODE (x));
5804 op1 = XEXP (x, 0);
5805 goto cost_minus;
5806 }
5807
5808 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5809 {
5810 /* Support (neg(fma...)) as a single instruction only if
5811 sign of zeros is unimportant. This matches the decision
5812 making in aarch64.md. */
5813 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5814 {
5815 /* FNMADD. */
5816 *cost = rtx_cost (op0, NEG, 0, speed);
5817 return true;
5818 }
5819 if (speed)
5820 /* FNEG. */
5821 *cost += extra_cost->fp[mode == DFmode].neg;
5822 return false;
5823 }
5824
5825 return false;
43e9d192 5826
781aeb73
KT
5827 case CLRSB:
5828 case CLZ:
5829 if (speed)
5830 *cost += extra_cost->alu.clz;
5831
5832 return false;
5833
43e9d192
IB
5834 case COMPARE:
5835 op0 = XEXP (x, 0);
5836 op1 = XEXP (x, 1);
5837
5838 if (op1 == const0_rtx
5839 && GET_CODE (op0) == AND)
5840 {
5841 x = op0;
5842 goto cost_logic;
5843 }
5844
a8eecd00
JG
5845 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5846 {
5847 /* TODO: A write to the CC flags possibly costs extra, this
5848 needs encoding in the cost tables. */
5849
5850 /* CC_ZESWPmode supports zero extend for free. */
5851 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5852 op0 = XEXP (op0, 0);
5853
5854 /* ANDS. */
5855 if (GET_CODE (op0) == AND)
5856 {
5857 x = op0;
5858 goto cost_logic;
5859 }
5860
5861 if (GET_CODE (op0) == PLUS)
5862 {
5863 /* ADDS (and CMN alias). */
5864 x = op0;
5865 goto cost_plus;
5866 }
5867
5868 if (GET_CODE (op0) == MINUS)
5869 {
5870 /* SUBS. */
5871 x = op0;
5872 goto cost_minus;
5873 }
5874
5875 if (GET_CODE (op1) == NEG)
5876 {
5877 /* CMN. */
5878 if (speed)
5879 *cost += extra_cost->alu.arith;
5880
5881 *cost += rtx_cost (op0, COMPARE, 0, speed);
5882 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5883 return true;
5884 }
5885
5886 /* CMP.
5887
5888 Compare can freely swap the order of operands, and
5889 canonicalization puts the more complex operation first.
5890 But the integer MINUS logic expects the shift/extend
5891 operation in op1. */
5892 if (! (REG_P (op0)
5893 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5894 {
5895 op0 = XEXP (x, 1);
5896 op1 = XEXP (x, 0);
5897 }
5898 goto cost_minus;
5899 }
5900
5901 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5902 {
5903 /* FCMP. */
5904 if (speed)
5905 *cost += extra_cost->fp[mode == DFmode].compare;
5906
5907 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5908 {
ad88bedb 5909 *cost += rtx_cost (op0, COMPARE, 0, speed);
a8eecd00
JG
5910 /* FCMP supports constant 0.0 for no extra cost. */
5911 return true;
5912 }
5913 return false;
5914 }
5915
5916 return false;
43e9d192
IB
5917
5918 case MINUS:
4745e701
JG
5919 {
5920 op0 = XEXP (x, 0);
5921 op1 = XEXP (x, 1);
5922
5923cost_minus:
23cb6618
WD
5924 *cost += rtx_cost (op0, MINUS, 0, speed);
5925
4745e701
JG
5926 /* Detect valid immediates. */
5927 if ((GET_MODE_CLASS (mode) == MODE_INT
5928 || (GET_MODE_CLASS (mode) == MODE_CC
5929 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5930 && CONST_INT_P (op1)
5931 && aarch64_uimm12_shift (INTVAL (op1)))
5932 {
4745e701
JG
5933 if (speed)
5934 /* SUB(S) (immediate). */
5935 *cost += extra_cost->alu.arith;
5936 return true;
4745e701
JG
5937 }
5938
7cc2145f
JG
5939 /* Look for SUB (extended register). */
5940 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5941 {
5942 if (speed)
2533c820 5943 *cost += extra_cost->alu.extend_arith;
7cc2145f
JG
5944
5945 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5946 (enum rtx_code) GET_CODE (op1),
5947 0, speed);
5948 return true;
5949 }
5950
4745e701
JG
5951 rtx new_op1 = aarch64_strip_extend (op1);
5952
5953 /* Cost this as an FMA-alike operation. */
5954 if ((GET_CODE (new_op1) == MULT
0a78ebe4 5955 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
5956 && code != COMPARE)
5957 {
5958 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5959 (enum rtx_code) code,
5960 speed);
4745e701
JG
5961 return true;
5962 }
43e9d192 5963
4745e701 5964 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5965
4745e701
JG
5966 if (speed)
5967 {
5968 if (GET_MODE_CLASS (mode) == MODE_INT)
5969 /* SUB(S). */
5970 *cost += extra_cost->alu.arith;
5971 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5972 /* FSUB. */
5973 *cost += extra_cost->fp[mode == DFmode].addsub;
5974 }
5975 return true;
5976 }
43e9d192
IB
5977
5978 case PLUS:
4745e701
JG
5979 {
5980 rtx new_op0;
43e9d192 5981
4745e701
JG
5982 op0 = XEXP (x, 0);
5983 op1 = XEXP (x, 1);
43e9d192 5984
a8eecd00 5985cost_plus:
4745e701
JG
5986 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5987 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5988 {
5989 /* CSINC. */
5990 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5991 *cost += rtx_cost (op1, PLUS, 1, speed);
5992 return true;
5993 }
43e9d192 5994
4745e701
JG
5995 if (GET_MODE_CLASS (mode) == MODE_INT
5996 && CONST_INT_P (op1)
5997 && aarch64_uimm12_shift (INTVAL (op1)))
5998 {
5999 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 6000
4745e701
JG
6001 if (speed)
6002 /* ADD (immediate). */
6003 *cost += extra_cost->alu.arith;
6004 return true;
6005 }
6006
23cb6618
WD
6007 *cost += rtx_cost (op1, PLUS, 1, speed);
6008
7cc2145f
JG
6009 /* Look for ADD (extended register). */
6010 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6011 {
6012 if (speed)
2533c820 6013 *cost += extra_cost->alu.extend_arith;
7cc2145f
JG
6014
6015 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
6016 (enum rtx_code) GET_CODE (op0),
6017 0, speed);
6018 return true;
6019 }
6020
4745e701
JG
6021 /* Strip any extend, leave shifts behind as we will
6022 cost them through mult_cost. */
6023 new_op0 = aarch64_strip_extend (op0);
6024
6025 if (GET_CODE (new_op0) == MULT
0a78ebe4 6026 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
6027 {
6028 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6029 speed);
4745e701
JG
6030 return true;
6031 }
6032
23cb6618 6033 *cost += rtx_cost (new_op0, PLUS, 0, speed);
4745e701
JG
6034
6035 if (speed)
6036 {
6037 if (GET_MODE_CLASS (mode) == MODE_INT)
6038 /* ADD. */
6039 *cost += extra_cost->alu.arith;
6040 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6041 /* FADD. */
6042 *cost += extra_cost->fp[mode == DFmode].addsub;
6043 }
6044 return true;
6045 }
43e9d192 6046
18b42b2a
KT
6047 case BSWAP:
6048 *cost = COSTS_N_INSNS (1);
6049
6050 if (speed)
6051 *cost += extra_cost->alu.rev;
6052
6053 return false;
6054
43e9d192 6055 case IOR:
f7d5cf8d
KT
6056 if (aarch_rev16_p (x))
6057 {
6058 *cost = COSTS_N_INSNS (1);
6059
6060 if (speed)
6061 *cost += extra_cost->alu.rev;
6062
6063 return true;
6064 }
fb0cb7fa
KT
6065
6066 if (aarch64_extr_rtx_p (x, &op0, &op1))
6067 {
6068 *cost += rtx_cost (op0, IOR, 0, speed)
6069 + rtx_cost (op1, IOR, 1, speed);
6070 if (speed)
6071 *cost += extra_cost->alu.shift;
6072
6073 return true;
6074 }
f7d5cf8d 6075 /* Fall through. */
43e9d192
IB
6076 case XOR:
6077 case AND:
6078 cost_logic:
6079 op0 = XEXP (x, 0);
6080 op1 = XEXP (x, 1);
6081
268c3b47
JG
6082 if (code == AND
6083 && GET_CODE (op0) == MULT
6084 && CONST_INT_P (XEXP (op0, 1))
6085 && CONST_INT_P (op1)
6086 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
6087 INTVAL (op1)) != 0)
6088 {
6089 /* This is a UBFM/SBFM. */
6090 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
6091 if (speed)
6092 *cost += extra_cost->alu.bfx;
6093 return true;
6094 }
6095
43e9d192
IB
6096 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6097 {
268c3b47
JG
6098 /* We possibly get the immediate for free, this is not
6099 modelled. */
43e9d192
IB
6100 if (CONST_INT_P (op1)
6101 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
6102 {
268c3b47
JG
6103 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6104
6105 if (speed)
6106 *cost += extra_cost->alu.logical;
6107
6108 return true;
43e9d192
IB
6109 }
6110 else
6111 {
268c3b47
JG
6112 rtx new_op0 = op0;
6113
6114 /* Handle ORN, EON, or BIC. */
43e9d192
IB
6115 if (GET_CODE (op0) == NOT)
6116 op0 = XEXP (op0, 0);
268c3b47
JG
6117
6118 new_op0 = aarch64_strip_shift (op0);
6119
6120 /* If we had a shift on op0 then this is a logical-shift-
6121 by-register/immediate operation. Otherwise, this is just
6122 a logical operation. */
6123 if (speed)
6124 {
6125 if (new_op0 != op0)
6126 {
6127 /* Shift by immediate. */
6128 if (CONST_INT_P (XEXP (op0, 1)))
6129 *cost += extra_cost->alu.log_shift;
6130 else
6131 *cost += extra_cost->alu.log_shift_reg;
6132 }
6133 else
6134 *cost += extra_cost->alu.logical;
6135 }
6136
6137 /* In both cases we want to cost both operands. */
6138 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
6139 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
6140
6141 return true;
43e9d192 6142 }
43e9d192
IB
6143 }
6144 return false;
6145
268c3b47 6146 case NOT:
6365da9e
KT
6147 x = XEXP (x, 0);
6148 op0 = aarch64_strip_shift (x);
6149
6150 /* MVN-shifted-reg. */
6151 if (op0 != x)
6152 {
6153 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6154
6155 if (speed)
6156 *cost += extra_cost->alu.log_shift;
6157
6158 return true;
6159 }
6160 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6161 Handle the second form here taking care that 'a' in the above can
6162 be a shift. */
6163 else if (GET_CODE (op0) == XOR)
6164 {
6165 rtx newop0 = XEXP (op0, 0);
6166 rtx newop1 = XEXP (op0, 1);
6167 rtx op0_stripped = aarch64_strip_shift (newop0);
6168
6169 *cost += rtx_cost (newop1, (enum rtx_code) code, 1, speed)
6170 + rtx_cost (op0_stripped, XOR, 0, speed);
6171
6172 if (speed)
6173 {
6174 if (op0_stripped != newop0)
6175 *cost += extra_cost->alu.log_shift;
6176 else
6177 *cost += extra_cost->alu.logical;
6178 }
6179
6180 return true;
6181 }
268c3b47
JG
6182 /* MVN. */
6183 if (speed)
6184 *cost += extra_cost->alu.logical;
6185
268c3b47
JG
6186 return false;
6187
43e9d192 6188 case ZERO_EXTEND:
b1685e62
JG
6189
6190 op0 = XEXP (x, 0);
6191 /* If a value is written in SI mode, then zero extended to DI
6192 mode, the operation will in general be free as a write to
6193 a 'w' register implicitly zeroes the upper bits of an 'x'
6194 register. However, if this is
6195
6196 (set (reg) (zero_extend (reg)))
6197
6198 we must cost the explicit register move. */
6199 if (mode == DImode
6200 && GET_MODE (op0) == SImode
6201 && outer == SET)
6202 {
6203 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
6204
6205 if (!op_cost && speed)
6206 /* MOV. */
6207 *cost += extra_cost->alu.extend;
6208 else
6209 /* Free, the cost is that of the SI mode operation. */
6210 *cost = op_cost;
6211
6212 return true;
6213 }
6214 else if (MEM_P (XEXP (x, 0)))
43e9d192 6215 {
b1685e62
JG
6216 /* All loads can zero extend to any size for free. */
6217 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
6218 return true;
6219 }
b1685e62
JG
6220
6221 /* UXTB/UXTH. */
6222 if (speed)
6223 *cost += extra_cost->alu.extend;
6224
43e9d192
IB
6225 return false;
6226
6227 case SIGN_EXTEND:
b1685e62 6228 if (MEM_P (XEXP (x, 0)))
43e9d192 6229 {
b1685e62
JG
6230 /* LDRSH. */
6231 if (speed)
6232 {
6233 rtx address = XEXP (XEXP (x, 0), 0);
6234 *cost += extra_cost->ldst.load_sign_extend;
6235
6236 *cost +=
6237 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6238 0, speed));
6239 }
43e9d192
IB
6240 return true;
6241 }
b1685e62
JG
6242
6243 if (speed)
6244 *cost += extra_cost->alu.extend;
43e9d192
IB
6245 return false;
6246
ba0cfa17
JG
6247 case ASHIFT:
6248 op0 = XEXP (x, 0);
6249 op1 = XEXP (x, 1);
6250
6251 if (CONST_INT_P (op1))
6252 {
6253 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6254 aliases. */
6255 if (speed)
6256 *cost += extra_cost->alu.shift;
6257
6258 /* We can incorporate zero/sign extend for free. */
6259 if (GET_CODE (op0) == ZERO_EXTEND
6260 || GET_CODE (op0) == SIGN_EXTEND)
6261 op0 = XEXP (op0, 0);
6262
6263 *cost += rtx_cost (op0, ASHIFT, 0, speed);
6264 return true;
6265 }
6266 else
6267 {
6268 /* LSLV. */
6269 if (speed)
6270 *cost += extra_cost->alu.shift_reg;
6271
6272 return false; /* All arguments need to be in registers. */
6273 }
6274
43e9d192 6275 case ROTATE:
43e9d192
IB
6276 case ROTATERT:
6277 case LSHIFTRT:
43e9d192 6278 case ASHIFTRT:
ba0cfa17
JG
6279 op0 = XEXP (x, 0);
6280 op1 = XEXP (x, 1);
43e9d192 6281
ba0cfa17
JG
6282 if (CONST_INT_P (op1))
6283 {
6284 /* ASR (immediate) and friends. */
6285 if (speed)
6286 *cost += extra_cost->alu.shift;
43e9d192 6287
ba0cfa17
JG
6288 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6289 return true;
6290 }
6291 else
6292 {
6293
6294 /* ASR (register) and friends. */
6295 if (speed)
6296 *cost += extra_cost->alu.shift_reg;
6297
6298 return false; /* All arguments need to be in registers. */
6299 }
43e9d192 6300
909734be
JG
6301 case SYMBOL_REF:
6302
6303 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6304 {
6305 /* LDR. */
6306 if (speed)
6307 *cost += extra_cost->ldst.load;
6308 }
6309 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6310 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6311 {
6312 /* ADRP, followed by ADD. */
6313 *cost += COSTS_N_INSNS (1);
6314 if (speed)
6315 *cost += 2 * extra_cost->alu.arith;
6316 }
6317 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6318 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6319 {
6320 /* ADR. */
6321 if (speed)
6322 *cost += extra_cost->alu.arith;
6323 }
6324
6325 if (flag_pic)
6326 {
6327 /* One extra load instruction, after accessing the GOT. */
6328 *cost += COSTS_N_INSNS (1);
6329 if (speed)
6330 *cost += extra_cost->ldst.load;
6331 }
43e9d192
IB
6332 return true;
6333
909734be 6334 case HIGH:
43e9d192 6335 case LO_SUM:
909734be
JG
6336 /* ADRP/ADD (immediate). */
6337 if (speed)
6338 *cost += extra_cost->alu.arith;
43e9d192
IB
6339 return true;
6340
6341 case ZERO_EXTRACT:
6342 case SIGN_EXTRACT:
7cc2145f
JG
6343 /* UBFX/SBFX. */
6344 if (speed)
6345 *cost += extra_cost->alu.bfx;
6346
6347 /* We can trust that the immediates used will be correct (there
6348 are no by-register forms), so we need only cost op0. */
6349 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
6350 return true;
6351
6352 case MULT:
4745e701
JG
6353 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6354 /* aarch64_rtx_mult_cost always handles recursion to its
6355 operands. */
6356 return true;
43e9d192
IB
6357
6358 case MOD:
6359 case UMOD:
43e9d192
IB
6360 if (speed)
6361 {
6362 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
6363 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6364 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 6365 else if (GET_MODE (x) == DFmode)
73250c4c
KT
6366 *cost += (extra_cost->fp[1].mult
6367 + extra_cost->fp[1].div);
43e9d192 6368 else if (GET_MODE (x) == SFmode)
73250c4c
KT
6369 *cost += (extra_cost->fp[0].mult
6370 + extra_cost->fp[0].div);
43e9d192
IB
6371 }
6372 return false; /* All arguments need to be in registers. */
6373
6374 case DIV:
6375 case UDIV:
4105fe38 6376 case SQRT:
43e9d192
IB
6377 if (speed)
6378 {
4105fe38
JG
6379 if (GET_MODE_CLASS (mode) == MODE_INT)
6380 /* There is no integer SQRT, so only DIV and UDIV can get
6381 here. */
6382 *cost += extra_cost->mult[mode == DImode].idiv;
6383 else
6384 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
6385 }
6386 return false; /* All arguments need to be in registers. */
6387
a8eecd00 6388 case IF_THEN_ELSE:
2d5ffe46
AP
6389 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6390 XEXP (x, 2), cost, speed);
a8eecd00
JG
6391
6392 case EQ:
6393 case NE:
6394 case GT:
6395 case GTU:
6396 case LT:
6397 case LTU:
6398 case GE:
6399 case GEU:
6400 case LE:
6401 case LEU:
6402
6403 return false; /* All arguments must be in registers. */
6404
b292109f
JG
6405 case FMA:
6406 op0 = XEXP (x, 0);
6407 op1 = XEXP (x, 1);
6408 op2 = XEXP (x, 2);
6409
6410 if (speed)
6411 *cost += extra_cost->fp[mode == DFmode].fma;
6412
6413 /* FMSUB, FNMADD, and FNMSUB are free. */
6414 if (GET_CODE (op0) == NEG)
6415 op0 = XEXP (op0, 0);
6416
6417 if (GET_CODE (op2) == NEG)
6418 op2 = XEXP (op2, 0);
6419
6420 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6421 and the by-element operand as operand 0. */
6422 if (GET_CODE (op1) == NEG)
6423 op1 = XEXP (op1, 0);
6424
6425 /* Catch vector-by-element operations. The by-element operand can
6426 either be (vec_duplicate (vec_select (x))) or just
6427 (vec_select (x)), depending on whether we are multiplying by
6428 a vector or a scalar.
6429
6430 Canonicalization is not very good in these cases, FMA4 will put the
6431 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6432 if (GET_CODE (op0) == VEC_DUPLICATE)
6433 op0 = XEXP (op0, 0);
6434 else if (GET_CODE (op1) == VEC_DUPLICATE)
6435 op1 = XEXP (op1, 0);
6436
6437 if (GET_CODE (op0) == VEC_SELECT)
6438 op0 = XEXP (op0, 0);
6439 else if (GET_CODE (op1) == VEC_SELECT)
6440 op1 = XEXP (op1, 0);
6441
6442 /* If the remaining parameters are not registers,
6443 get the cost to put them into registers. */
6444 *cost += rtx_cost (op0, FMA, 0, speed);
6445 *cost += rtx_cost (op1, FMA, 1, speed);
6446 *cost += rtx_cost (op2, FMA, 2, speed);
6447 return true;
6448
5e2a765b
KT
6449 case FLOAT:
6450 case UNSIGNED_FLOAT:
6451 if (speed)
6452 *cost += extra_cost->fp[mode == DFmode].fromint;
6453 return false;
6454
b292109f
JG
6455 case FLOAT_EXTEND:
6456 if (speed)
6457 *cost += extra_cost->fp[mode == DFmode].widen;
6458 return false;
6459
6460 case FLOAT_TRUNCATE:
6461 if (speed)
6462 *cost += extra_cost->fp[mode == DFmode].narrow;
6463 return false;
6464
61263118
KT
6465 case FIX:
6466 case UNSIGNED_FIX:
6467 x = XEXP (x, 0);
6468 /* Strip the rounding part. They will all be implemented
6469 by the fcvt* family of instructions anyway. */
6470 if (GET_CODE (x) == UNSPEC)
6471 {
6472 unsigned int uns_code = XINT (x, 1);
6473
6474 if (uns_code == UNSPEC_FRINTA
6475 || uns_code == UNSPEC_FRINTM
6476 || uns_code == UNSPEC_FRINTN
6477 || uns_code == UNSPEC_FRINTP
6478 || uns_code == UNSPEC_FRINTZ)
6479 x = XVECEXP (x, 0, 0);
6480 }
6481
6482 if (speed)
6483 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6484
6485 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6486 return true;
6487
b292109f
JG
6488 case ABS:
6489 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6490 {
19261b99
KT
6491 op0 = XEXP (x, 0);
6492
6493 /* FABD, which is analogous to FADD. */
6494 if (GET_CODE (op0) == MINUS)
6495 {
6496 *cost += rtx_cost (XEXP (op0, 0), MINUS, 0, speed);
6497 + rtx_cost (XEXP (op0, 1), MINUS, 1, speed);
6498 if (speed)
6499 *cost += extra_cost->fp[mode == DFmode].addsub;
6500
6501 return true;
6502 }
6503 /* Simple FABS is analogous to FNEG. */
b292109f
JG
6504 if (speed)
6505 *cost += extra_cost->fp[mode == DFmode].neg;
6506 }
6507 else
6508 {
6509 /* Integer ABS will either be split to
6510 two arithmetic instructions, or will be an ABS
6511 (scalar), which we don't model. */
6512 *cost = COSTS_N_INSNS (2);
6513 if (speed)
6514 *cost += 2 * extra_cost->alu.arith;
6515 }
6516 return false;
6517
6518 case SMAX:
6519 case SMIN:
6520 if (speed)
6521 {
6522 /* FMAXNM/FMINNM/FMAX/FMIN.
6523 TODO: This may not be accurate for all implementations, but
6524 we do not model this in the cost tables. */
6525 *cost += extra_cost->fp[mode == DFmode].addsub;
6526 }
6527 return false;
6528
61263118
KT
6529 case UNSPEC:
6530 /* The floating point round to integer frint* instructions. */
6531 if (aarch64_frint_unspec_p (XINT (x, 1)))
6532 {
6533 if (speed)
6534 *cost += extra_cost->fp[mode == DFmode].roundint;
6535
6536 return false;
6537 }
781aeb73
KT
6538
6539 if (XINT (x, 1) == UNSPEC_RBIT)
6540 {
6541 if (speed)
6542 *cost += extra_cost->alu.rev;
6543
6544 return false;
6545 }
61263118
KT
6546 break;
6547
fb620c4a
JG
6548 case TRUNCATE:
6549
6550 /* Decompose <su>muldi3_highpart. */
6551 if (/* (truncate:DI */
6552 mode == DImode
6553 /* (lshiftrt:TI */
6554 && GET_MODE (XEXP (x, 0)) == TImode
6555 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6556 /* (mult:TI */
6557 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6558 /* (ANY_EXTEND:TI (reg:DI))
6559 (ANY_EXTEND:TI (reg:DI))) */
6560 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6561 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6562 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6563 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6564 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6565 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6566 /* (const_int 64) */
6567 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6568 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6569 {
6570 /* UMULH/SMULH. */
6571 if (speed)
6572 *cost += extra_cost->mult[mode == DImode].extend;
6573 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6574 MULT, 0, speed);
6575 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6576 MULT, 1, speed);
6577 return true;
6578 }
6579
6580 /* Fall through. */
43e9d192 6581 default:
61263118 6582 break;
43e9d192 6583 }
61263118
KT
6584
6585 if (dump_file && (dump_flags & TDF_DETAILS))
6586 fprintf (dump_file,
6587 "\nFailed to cost RTX. Assuming default cost.\n");
6588
6589 return true;
43e9d192
IB
6590}
6591
0ee859b5
JG
6592/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6593 calculated for X. This cost is stored in *COST. Returns true
6594 if the total cost of X was calculated. */
6595static bool
6596aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6597 int param, int *cost, bool speed)
6598{
6599 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6600
6601 if (dump_file && (dump_flags & TDF_DETAILS))
6602 {
6603 print_rtl_single (dump_file, x);
6604 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6605 speed ? "Hot" : "Cold",
6606 *cost, result ? "final" : "partial");
6607 }
6608
6609 return result;
6610}
6611
43e9d192 6612static int
ef4bddc2 6613aarch64_register_move_cost (machine_mode mode,
8a3a7e67 6614 reg_class_t from_i, reg_class_t to_i)
43e9d192 6615{
8a3a7e67
RH
6616 enum reg_class from = (enum reg_class) from_i;
6617 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
6618 const struct cpu_regmove_cost *regmove_cost
6619 = aarch64_tune_params->regmove_cost;
6620
3be07662
WD
6621 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6622 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6623 to = GENERAL_REGS;
6624
6625 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6626 from = GENERAL_REGS;
6627
6ee70f81
AP
6628 /* Moving between GPR and stack cost is the same as GP2GP. */
6629 if ((from == GENERAL_REGS && to == STACK_REG)
6630 || (to == GENERAL_REGS && from == STACK_REG))
6631 return regmove_cost->GP2GP;
6632
6633 /* To/From the stack register, we move via the gprs. */
6634 if (to == STACK_REG || from == STACK_REG)
6635 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6636 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6637
8919453c
WD
6638 if (GET_MODE_SIZE (mode) == 16)
6639 {
6640 /* 128-bit operations on general registers require 2 instructions. */
6641 if (from == GENERAL_REGS && to == GENERAL_REGS)
6642 return regmove_cost->GP2GP * 2;
6643 else if (from == GENERAL_REGS)
6644 return regmove_cost->GP2FP * 2;
6645 else if (to == GENERAL_REGS)
6646 return regmove_cost->FP2GP * 2;
6647
6648 /* When AdvSIMD instructions are disabled it is not possible to move
6649 a 128-bit value directly between Q registers. This is handled in
6650 secondary reload. A general register is used as a scratch to move
6651 the upper DI value and the lower DI value is moved directly,
6652 hence the cost is the sum of three moves. */
6653 if (! TARGET_SIMD)
6654 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6655
6656 return regmove_cost->FP2FP;
6657 }
6658
43e9d192
IB
6659 if (from == GENERAL_REGS && to == GENERAL_REGS)
6660 return regmove_cost->GP2GP;
6661 else if (from == GENERAL_REGS)
6662 return regmove_cost->GP2FP;
6663 else if (to == GENERAL_REGS)
6664 return regmove_cost->FP2GP;
6665
43e9d192
IB
6666 return regmove_cost->FP2FP;
6667}
6668
6669static int
ef4bddc2 6670aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
6671 reg_class_t rclass ATTRIBUTE_UNUSED,
6672 bool in ATTRIBUTE_UNUSED)
6673{
6674 return aarch64_tune_params->memmov_cost;
6675}
6676
d126a4ae
AP
6677/* Return the number of instructions that can be issued per cycle. */
6678static int
6679aarch64_sched_issue_rate (void)
6680{
6681 return aarch64_tune_params->issue_rate;
6682}
6683
d03f7e44
MK
6684static int
6685aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6686{
6687 int issue_rate = aarch64_sched_issue_rate ();
6688
6689 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
6690}
6691
8990e73a
TB
6692/* Vectorizer cost model target hooks. */
6693
6694/* Implement targetm.vectorize.builtin_vectorization_cost. */
6695static int
6696aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6697 tree vectype,
6698 int misalign ATTRIBUTE_UNUSED)
6699{
6700 unsigned elements;
6701
6702 switch (type_of_cost)
6703 {
6704 case scalar_stmt:
6705 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6706
6707 case scalar_load:
6708 return aarch64_tune_params->vec_costs->scalar_load_cost;
6709
6710 case scalar_store:
6711 return aarch64_tune_params->vec_costs->scalar_store_cost;
6712
6713 case vector_stmt:
6714 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6715
6716 case vector_load:
6717 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6718
6719 case vector_store:
6720 return aarch64_tune_params->vec_costs->vec_store_cost;
6721
6722 case vec_to_scalar:
6723 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6724
6725 case scalar_to_vec:
6726 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6727
6728 case unaligned_load:
6729 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6730
6731 case unaligned_store:
6732 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6733
6734 case cond_branch_taken:
6735 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6736
6737 case cond_branch_not_taken:
6738 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6739
6740 case vec_perm:
6741 case vec_promote_demote:
6742 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6743
6744 case vec_construct:
6745 elements = TYPE_VECTOR_SUBPARTS (vectype);
6746 return elements / 2 + 1;
6747
6748 default:
6749 gcc_unreachable ();
6750 }
6751}
6752
6753/* Implement targetm.vectorize.add_stmt_cost. */
6754static unsigned
6755aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6756 struct _stmt_vec_info *stmt_info, int misalign,
6757 enum vect_cost_model_location where)
6758{
6759 unsigned *cost = (unsigned *) data;
6760 unsigned retval = 0;
6761
6762 if (flag_vect_cost_model)
6763 {
6764 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6765 int stmt_cost =
6766 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6767
6768 /* Statements in an inner loop relative to the loop being
6769 vectorized are weighted more heavily. The value here is
6770 a function (linear for now) of the loop nest level. */
6771 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6772 {
6773 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6774 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6775 unsigned nest_level = loop_depth (loop);
6776
6777 count *= nest_level;
6778 }
6779
6780 retval = (unsigned) (count * stmt_cost);
6781 cost[where] += retval;
6782 }
6783
6784 return retval;
6785}
6786
43e9d192
IB
6787static void initialize_aarch64_code_model (void);
6788
6789/* Parse the architecture extension string. */
6790
6791static void
6792aarch64_parse_extension (char *str)
6793{
6794 /* The extension string is parsed left to right. */
6795 const struct aarch64_option_extension *opt = NULL;
6796
6797 /* Flag to say whether we are adding or removing an extension. */
6798 int adding_ext = -1;
6799
6800 while (str != NULL && *str != 0)
6801 {
6802 char *ext;
6803 size_t len;
6804
6805 str++;
6806 ext = strchr (str, '+');
6807
6808 if (ext != NULL)
6809 len = ext - str;
6810 else
6811 len = strlen (str);
6812
6813 if (len >= 2 && strncmp (str, "no", 2) == 0)
6814 {
6815 adding_ext = 0;
6816 len -= 2;
6817 str += 2;
6818 }
6819 else if (len > 0)
6820 adding_ext = 1;
6821
6822 if (len == 0)
6823 {
217d0904
KT
6824 error ("missing feature modifier after %qs", adding_ext ? "+"
6825 : "+no");
43e9d192
IB
6826 return;
6827 }
6828
6829 /* Scan over the extensions table trying to find an exact match. */
6830 for (opt = all_extensions; opt->name != NULL; opt++)
6831 {
6832 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6833 {
6834 /* Add or remove the extension. */
6835 if (adding_ext)
6836 aarch64_isa_flags |= opt->flags_on;
6837 else
6838 aarch64_isa_flags &= ~(opt->flags_off);
6839 break;
6840 }
6841 }
6842
6843 if (opt->name == NULL)
6844 {
6845 /* Extension not found in list. */
6846 error ("unknown feature modifier %qs", str);
6847 return;
6848 }
6849
6850 str = ext;
6851 };
6852
6853 return;
6854}
6855
6856/* Parse the ARCH string. */
6857
6858static void
6859aarch64_parse_arch (void)
6860{
6861 char *ext;
6862 const struct processor *arch;
6863 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6864 size_t len;
6865
6866 strcpy (str, aarch64_arch_string);
6867
6868 ext = strchr (str, '+');
6869
6870 if (ext != NULL)
6871 len = ext - str;
6872 else
6873 len = strlen (str);
6874
6875 if (len == 0)
6876 {
6877 error ("missing arch name in -march=%qs", str);
6878 return;
6879 }
6880
6881 /* Loop through the list of supported ARCHs to find a match. */
6882 for (arch = all_architectures; arch->name != NULL; arch++)
6883 {
6884 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6885 {
6886 selected_arch = arch;
6887 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6888
6889 if (!selected_cpu)
6890 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6891
6892 if (ext != NULL)
6893 {
6894 /* ARCH string contains at least one extension. */
6895 aarch64_parse_extension (ext);
6896 }
6897
ffee7aa9
JG
6898 if (strcmp (selected_arch->arch, selected_cpu->arch))
6899 {
6900 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6901 selected_cpu->name, selected_arch->name);
6902 }
6903
43e9d192
IB
6904 return;
6905 }
6906 }
6907
6908 /* ARCH name not found in list. */
6909 error ("unknown value %qs for -march", str);
6910 return;
6911}
6912
6913/* Parse the CPU string. */
6914
6915static void
6916aarch64_parse_cpu (void)
6917{
6918 char *ext;
6919 const struct processor *cpu;
6920 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6921 size_t len;
6922
6923 strcpy (str, aarch64_cpu_string);
6924
6925 ext = strchr (str, '+');
6926
6927 if (ext != NULL)
6928 len = ext - str;
6929 else
6930 len = strlen (str);
6931
6932 if (len == 0)
6933 {
6934 error ("missing cpu name in -mcpu=%qs", str);
6935 return;
6936 }
6937
6938 /* Loop through the list of supported CPUs to find a match. */
6939 for (cpu = all_cores; cpu->name != NULL; cpu++)
6940 {
6941 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6942 {
6943 selected_cpu = cpu;
6944 aarch64_isa_flags = selected_cpu->flags;
6945
6946 if (ext != NULL)
6947 {
6948 /* CPU string contains at least one extension. */
6949 aarch64_parse_extension (ext);
6950 }
6951
6952 return;
6953 }
6954 }
6955
6956 /* CPU name not found in list. */
6957 error ("unknown value %qs for -mcpu", str);
6958 return;
6959}
6960
6961/* Parse the TUNE string. */
6962
6963static void
6964aarch64_parse_tune (void)
6965{
6966 const struct processor *cpu;
6967 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6968 strcpy (str, aarch64_tune_string);
6969
6970 /* Loop through the list of supported CPUs to find a match. */
6971 for (cpu = all_cores; cpu->name != NULL; cpu++)
6972 {
6973 if (strcmp (cpu->name, str) == 0)
6974 {
6975 selected_tune = cpu;
6976 return;
6977 }
6978 }
6979
6980 /* CPU name not found in list. */
6981 error ("unknown value %qs for -mtune", str);
6982 return;
6983}
6984
6985
6986/* Implement TARGET_OPTION_OVERRIDE. */
6987
6988static void
6989aarch64_override_options (void)
6990{
ffee7aa9
JG
6991 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6992 If either of -march or -mtune is given, they override their
6993 respective component of -mcpu.
43e9d192 6994
ffee7aa9
JG
6995 So, first parse AARCH64_CPU_STRING, then the others, be careful
6996 with -march as, if -mcpu is not present on the command line, march
6997 must set a sensible default CPU. */
6998 if (aarch64_cpu_string)
43e9d192 6999 {
ffee7aa9 7000 aarch64_parse_cpu ();
43e9d192
IB
7001 }
7002
ffee7aa9 7003 if (aarch64_arch_string)
43e9d192 7004 {
ffee7aa9 7005 aarch64_parse_arch ();
43e9d192
IB
7006 }
7007
7008 if (aarch64_tune_string)
7009 {
7010 aarch64_parse_tune ();
7011 }
7012
63892fa2
KV
7013#ifndef HAVE_AS_MABI_OPTION
7014 /* The compiler may have been configured with 2.23.* binutils, which does
7015 not have support for ILP32. */
7016 if (TARGET_ILP32)
7017 error ("Assembler does not support -mabi=ilp32");
7018#endif
7019
43e9d192
IB
7020 initialize_aarch64_code_model ();
7021
7022 aarch64_build_bitmask_table ();
7023
7024 /* This target defaults to strict volatile bitfields. */
7025 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
7026 flag_strict_volatile_bitfields = 1;
7027
7028 /* If the user did not specify a processor, choose the default
7029 one for them. This will be the CPU set during configuration using
a3cd0246 7030 --with-cpu, otherwise it is "generic". */
43e9d192
IB
7031 if (!selected_cpu)
7032 {
7033 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
7034 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
7035 }
7036
7037 gcc_assert (selected_cpu);
7038
43e9d192 7039 if (!selected_tune)
3edaf26d 7040 selected_tune = selected_cpu;
43e9d192
IB
7041
7042 aarch64_tune_flags = selected_tune->flags;
7043 aarch64_tune = selected_tune->core;
7044 aarch64_tune_params = selected_tune->tune;
0c6caaf8 7045 aarch64_architecture_version = selected_cpu->architecture_version;
43e9d192 7046
5e396da6
KT
7047 if (aarch64_fix_a53_err835769 == 2)
7048 {
7049#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
7050 aarch64_fix_a53_err835769 = 1;
7051#else
7052 aarch64_fix_a53_err835769 = 0;
7053#endif
7054 }
7055
fde9b31b
TP
7056 if (AARCH64_TUNE_FMA_STEERING)
7057 aarch64_register_fma_steering ();
7058
43e9d192
IB
7059 aarch64_override_options_after_change ();
7060}
7061
7062/* Implement targetm.override_options_after_change. */
7063
7064static void
7065aarch64_override_options_after_change (void)
7066{
0b7f8166
MS
7067 if (flag_omit_frame_pointer)
7068 flag_omit_leaf_frame_pointer = false;
7069 else if (flag_omit_leaf_frame_pointer)
7070 flag_omit_frame_pointer = true;
487edc87
CB
7071
7072 /* If not optimizing for size, set the default
7073 alignment to what the target wants */
7074 if (!optimize_size)
7075 {
7076 if (align_loops <= 0)
7077 align_loops = aarch64_tune_params->loop_align;
7078 if (align_jumps <= 0)
7079 align_jumps = aarch64_tune_params->jump_align;
7080 if (align_functions <= 0)
7081 align_functions = aarch64_tune_params->function_align;
7082 }
43e9d192
IB
7083}
7084
7085static struct machine_function *
7086aarch64_init_machine_status (void)
7087{
7088 struct machine_function *machine;
766090c2 7089 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
7090 return machine;
7091}
7092
7093void
7094aarch64_init_expanders (void)
7095{
7096 init_machine_status = aarch64_init_machine_status;
7097}
7098
7099/* A checking mechanism for the implementation of the various code models. */
7100static void
7101initialize_aarch64_code_model (void)
7102{
7103 if (flag_pic)
7104 {
7105 switch (aarch64_cmodel_var)
7106 {
7107 case AARCH64_CMODEL_TINY:
7108 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
7109 break;
7110 case AARCH64_CMODEL_SMALL:
7111 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
7112 break;
7113 case AARCH64_CMODEL_LARGE:
7114 sorry ("code model %qs with -f%s", "large",
7115 flag_pic > 1 ? "PIC" : "pic");
7116 default:
7117 gcc_unreachable ();
7118 }
7119 }
7120 else
7121 aarch64_cmodel = aarch64_cmodel_var;
7122}
7123
7124/* Return true if SYMBOL_REF X binds locally. */
7125
7126static bool
7127aarch64_symbol_binds_local_p (const_rtx x)
7128{
7129 return (SYMBOL_REF_DECL (x)
7130 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
7131 : SYMBOL_REF_LOCAL_P (x));
7132}
7133
7134/* Return true if SYMBOL_REF X is thread local */
7135static bool
7136aarch64_tls_symbol_p (rtx x)
7137{
7138 if (! TARGET_HAVE_TLS)
7139 return false;
7140
7141 if (GET_CODE (x) != SYMBOL_REF)
7142 return false;
7143
7144 return SYMBOL_REF_TLS_MODEL (x) != 0;
7145}
7146
7147/* Classify a TLS symbol into one of the TLS kinds. */
7148enum aarch64_symbol_type
7149aarch64_classify_tls_symbol (rtx x)
7150{
7151 enum tls_model tls_kind = tls_symbolic_operand_type (x);
7152
7153 switch (tls_kind)
7154 {
7155 case TLS_MODEL_GLOBAL_DYNAMIC:
7156 case TLS_MODEL_LOCAL_DYNAMIC:
7157 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
7158
7159 case TLS_MODEL_INITIAL_EXEC:
7160 return SYMBOL_SMALL_GOTTPREL;
7161
7162 case TLS_MODEL_LOCAL_EXEC:
7163 return SYMBOL_SMALL_TPREL;
7164
7165 case TLS_MODEL_EMULATED:
7166 case TLS_MODEL_NONE:
7167 return SYMBOL_FORCE_TO_MEM;
7168
7169 default:
7170 gcc_unreachable ();
7171 }
7172}
7173
7174/* Return the method that should be used to access SYMBOL_REF or
7175 LABEL_REF X in context CONTEXT. */
17f4d4bf 7176
43e9d192 7177enum aarch64_symbol_type
f8b756b7 7178aarch64_classify_symbol (rtx x, rtx offset,
43e9d192
IB
7179 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
7180{
7181 if (GET_CODE (x) == LABEL_REF)
7182 {
7183 switch (aarch64_cmodel)
7184 {
7185 case AARCH64_CMODEL_LARGE:
7186 return SYMBOL_FORCE_TO_MEM;
7187
7188 case AARCH64_CMODEL_TINY_PIC:
7189 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
7190 return SYMBOL_TINY_ABSOLUTE;
7191
43e9d192
IB
7192 case AARCH64_CMODEL_SMALL_PIC:
7193 case AARCH64_CMODEL_SMALL:
7194 return SYMBOL_SMALL_ABSOLUTE;
7195
7196 default:
7197 gcc_unreachable ();
7198 }
7199 }
7200
17f4d4bf 7201 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 7202 {
4a985a37
MS
7203 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
7204 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
7205
7206 if (aarch64_tls_symbol_p (x))
7207 return aarch64_classify_tls_symbol (x);
7208
17f4d4bf
CSS
7209 switch (aarch64_cmodel)
7210 {
7211 case AARCH64_CMODEL_TINY:
f8b756b7
TB
7212 /* When we retreive symbol + offset address, we have to make sure
7213 the offset does not cause overflow of the final address. But
7214 we have no way of knowing the address of symbol at compile time
7215 so we can't accurately say if the distance between the PC and
7216 symbol + offset is outside the addressible range of +/-1M in the
7217 TINY code model. So we rely on images not being greater than
7218 1M and cap the offset at 1M and anything beyond 1M will have to
7219 be loaded using an alternative mechanism. */
7220 if (SYMBOL_REF_WEAK (x)
7221 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
7222 return SYMBOL_FORCE_TO_MEM;
7223 return SYMBOL_TINY_ABSOLUTE;
7224
17f4d4bf 7225 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
7226 /* Same reasoning as the tiny code model, but the offset cap here is
7227 4G. */
7228 if (SYMBOL_REF_WEAK (x)
3ff5d1f0
TB
7229 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
7230 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
7231 return SYMBOL_FORCE_TO_MEM;
7232 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7233
17f4d4bf 7234 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 7235 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 7236 return SYMBOL_TINY_GOT;
38e6c9a6
MS
7237 return SYMBOL_TINY_ABSOLUTE;
7238
17f4d4bf
CSS
7239 case AARCH64_CMODEL_SMALL_PIC:
7240 if (!aarch64_symbol_binds_local_p (x))
7241 return SYMBOL_SMALL_GOT;
7242 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7243
17f4d4bf
CSS
7244 default:
7245 gcc_unreachable ();
7246 }
43e9d192 7247 }
17f4d4bf 7248
43e9d192
IB
7249 /* By default push everything into the constant pool. */
7250 return SYMBOL_FORCE_TO_MEM;
7251}
7252
43e9d192
IB
7253bool
7254aarch64_constant_address_p (rtx x)
7255{
7256 return (CONSTANT_P (x) && memory_address_p (DImode, x));
7257}
7258
7259bool
7260aarch64_legitimate_pic_operand_p (rtx x)
7261{
7262 if (GET_CODE (x) == SYMBOL_REF
7263 || (GET_CODE (x) == CONST
7264 && GET_CODE (XEXP (x, 0)) == PLUS
7265 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7266 return false;
7267
7268 return true;
7269}
7270
3520f7cc
JG
7271/* Return true if X holds either a quarter-precision or
7272 floating-point +0.0 constant. */
7273static bool
ef4bddc2 7274aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
7275{
7276 if (!CONST_DOUBLE_P (x))
7277 return false;
7278
7279 /* TODO: We could handle moving 0.0 to a TFmode register,
7280 but first we would like to refactor the movtf_aarch64
7281 to be more amicable to split moves properly and
7282 correctly gate on TARGET_SIMD. For now - reject all
7283 constants which are not to SFmode or DFmode registers. */
7284 if (!(mode == SFmode || mode == DFmode))
7285 return false;
7286
7287 if (aarch64_float_const_zero_rtx_p (x))
7288 return true;
7289 return aarch64_float_const_representable_p (x);
7290}
7291
43e9d192 7292static bool
ef4bddc2 7293aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
7294{
7295 /* Do not allow vector struct mode constants. We could support
7296 0 and -1 easily, but they need support in aarch64-simd.md. */
7297 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7298 return false;
7299
7300 /* This could probably go away because
7301 we now decompose CONST_INTs according to expand_mov_immediate. */
7302 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 7303 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
7304 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7305 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
7306
7307 if (GET_CODE (x) == HIGH
7308 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7309 return true;
7310
7311 return aarch64_constant_address_p (x);
7312}
7313
a5bc806c 7314rtx
43e9d192
IB
7315aarch64_load_tp (rtx target)
7316{
7317 if (!target
7318 || GET_MODE (target) != Pmode
7319 || !register_operand (target, Pmode))
7320 target = gen_reg_rtx (Pmode);
7321
7322 /* Can return in any reg. */
7323 emit_insn (gen_aarch64_load_tp_hard (target));
7324 return target;
7325}
7326
43e9d192
IB
7327/* On AAPCS systems, this is the "struct __va_list". */
7328static GTY(()) tree va_list_type;
7329
7330/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7331 Return the type to use as __builtin_va_list.
7332
7333 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7334
7335 struct __va_list
7336 {
7337 void *__stack;
7338 void *__gr_top;
7339 void *__vr_top;
7340 int __gr_offs;
7341 int __vr_offs;
7342 }; */
7343
7344static tree
7345aarch64_build_builtin_va_list (void)
7346{
7347 tree va_list_name;
7348 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7349
7350 /* Create the type. */
7351 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7352 /* Give it the required name. */
7353 va_list_name = build_decl (BUILTINS_LOCATION,
7354 TYPE_DECL,
7355 get_identifier ("__va_list"),
7356 va_list_type);
7357 DECL_ARTIFICIAL (va_list_name) = 1;
7358 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 7359 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
7360
7361 /* Create the fields. */
7362 f_stack = build_decl (BUILTINS_LOCATION,
7363 FIELD_DECL, get_identifier ("__stack"),
7364 ptr_type_node);
7365 f_grtop = build_decl (BUILTINS_LOCATION,
7366 FIELD_DECL, get_identifier ("__gr_top"),
7367 ptr_type_node);
7368 f_vrtop = build_decl (BUILTINS_LOCATION,
7369 FIELD_DECL, get_identifier ("__vr_top"),
7370 ptr_type_node);
7371 f_groff = build_decl (BUILTINS_LOCATION,
7372 FIELD_DECL, get_identifier ("__gr_offs"),
7373 integer_type_node);
7374 f_vroff = build_decl (BUILTINS_LOCATION,
7375 FIELD_DECL, get_identifier ("__vr_offs"),
7376 integer_type_node);
7377
7378 DECL_ARTIFICIAL (f_stack) = 1;
7379 DECL_ARTIFICIAL (f_grtop) = 1;
7380 DECL_ARTIFICIAL (f_vrtop) = 1;
7381 DECL_ARTIFICIAL (f_groff) = 1;
7382 DECL_ARTIFICIAL (f_vroff) = 1;
7383
7384 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7385 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7386 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7387 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7388 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7389
7390 TYPE_FIELDS (va_list_type) = f_stack;
7391 DECL_CHAIN (f_stack) = f_grtop;
7392 DECL_CHAIN (f_grtop) = f_vrtop;
7393 DECL_CHAIN (f_vrtop) = f_groff;
7394 DECL_CHAIN (f_groff) = f_vroff;
7395
7396 /* Compute its layout. */
7397 layout_type (va_list_type);
7398
7399 return va_list_type;
7400}
7401
7402/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7403static void
7404aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7405{
7406 const CUMULATIVE_ARGS *cum;
7407 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7408 tree stack, grtop, vrtop, groff, vroff;
7409 tree t;
7410 int gr_save_area_size;
7411 int vr_save_area_size;
7412 int vr_offset;
7413
7414 cum = &crtl->args.info;
7415 gr_save_area_size
7416 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7417 vr_save_area_size
7418 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7419
7420 if (TARGET_GENERAL_REGS_ONLY)
7421 {
7422 if (cum->aapcs_nvrn > 0)
7423 sorry ("%qs and floating point or vector arguments",
7424 "-mgeneral-regs-only");
7425 vr_save_area_size = 0;
7426 }
7427
7428 f_stack = TYPE_FIELDS (va_list_type_node);
7429 f_grtop = DECL_CHAIN (f_stack);
7430 f_vrtop = DECL_CHAIN (f_grtop);
7431 f_groff = DECL_CHAIN (f_vrtop);
7432 f_vroff = DECL_CHAIN (f_groff);
7433
7434 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7435 NULL_TREE);
7436 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7437 NULL_TREE);
7438 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7439 NULL_TREE);
7440 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7441 NULL_TREE);
7442 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7443 NULL_TREE);
7444
7445 /* Emit code to initialize STACK, which points to the next varargs stack
7446 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7447 by named arguments. STACK is 8-byte aligned. */
7448 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7449 if (cum->aapcs_stack_size > 0)
7450 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7451 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7452 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7453
7454 /* Emit code to initialize GRTOP, the top of the GR save area.
7455 virtual_incoming_args_rtx should have been 16 byte aligned. */
7456 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7457 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7458 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7459
7460 /* Emit code to initialize VRTOP, the top of the VR save area.
7461 This address is gr_save_area_bytes below GRTOP, rounded
7462 down to the next 16-byte boundary. */
7463 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7464 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7465 STACK_BOUNDARY / BITS_PER_UNIT);
7466
7467 if (vr_offset)
7468 t = fold_build_pointer_plus_hwi (t, -vr_offset);
7469 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7470 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7471
7472 /* Emit code to initialize GROFF, the offset from GRTOP of the
7473 next GPR argument. */
7474 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7475 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7476 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7477
7478 /* Likewise emit code to initialize VROFF, the offset from FTOP
7479 of the next VR argument. */
7480 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7481 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7482 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7483}
7484
7485/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7486
7487static tree
7488aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7489 gimple_seq *post_p ATTRIBUTE_UNUSED)
7490{
7491 tree addr;
7492 bool indirect_p;
7493 bool is_ha; /* is HFA or HVA. */
7494 bool dw_align; /* double-word align. */
ef4bddc2 7495 machine_mode ag_mode = VOIDmode;
43e9d192 7496 int nregs;
ef4bddc2 7497 machine_mode mode;
43e9d192
IB
7498
7499 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7500 tree stack, f_top, f_off, off, arg, roundup, on_stack;
7501 HOST_WIDE_INT size, rsize, adjust, align;
7502 tree t, u, cond1, cond2;
7503
7504 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7505 if (indirect_p)
7506 type = build_pointer_type (type);
7507
7508 mode = TYPE_MODE (type);
7509
7510 f_stack = TYPE_FIELDS (va_list_type_node);
7511 f_grtop = DECL_CHAIN (f_stack);
7512 f_vrtop = DECL_CHAIN (f_grtop);
7513 f_groff = DECL_CHAIN (f_vrtop);
7514 f_vroff = DECL_CHAIN (f_groff);
7515
7516 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7517 f_stack, NULL_TREE);
7518 size = int_size_in_bytes (type);
7519 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7520
7521 dw_align = false;
7522 adjust = 0;
7523 if (aarch64_vfp_is_call_or_return_candidate (mode,
7524 type,
7525 &ag_mode,
7526 &nregs,
7527 &is_ha))
7528 {
7529 /* TYPE passed in fp/simd registers. */
7530 if (TARGET_GENERAL_REGS_ONLY)
7531 sorry ("%qs and floating point or vector arguments",
7532 "-mgeneral-regs-only");
7533
7534 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7535 unshare_expr (valist), f_vrtop, NULL_TREE);
7536 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7537 unshare_expr (valist), f_vroff, NULL_TREE);
7538
7539 rsize = nregs * UNITS_PER_VREG;
7540
7541 if (is_ha)
7542 {
7543 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7544 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7545 }
7546 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7547 && size < UNITS_PER_VREG)
7548 {
7549 adjust = UNITS_PER_VREG - size;
7550 }
7551 }
7552 else
7553 {
7554 /* TYPE passed in general registers. */
7555 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7556 unshare_expr (valist), f_grtop, NULL_TREE);
7557 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7558 unshare_expr (valist), f_groff, NULL_TREE);
7559 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7560 nregs = rsize / UNITS_PER_WORD;
7561
7562 if (align > 8)
7563 dw_align = true;
7564
7565 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7566 && size < UNITS_PER_WORD)
7567 {
7568 adjust = UNITS_PER_WORD - size;
7569 }
7570 }
7571
7572 /* Get a local temporary for the field value. */
7573 off = get_initialized_tmp_var (f_off, pre_p, NULL);
7574
7575 /* Emit code to branch if off >= 0. */
7576 t = build2 (GE_EXPR, boolean_type_node, off,
7577 build_int_cst (TREE_TYPE (off), 0));
7578 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7579
7580 if (dw_align)
7581 {
7582 /* Emit: offs = (offs + 15) & -16. */
7583 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7584 build_int_cst (TREE_TYPE (off), 15));
7585 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7586 build_int_cst (TREE_TYPE (off), -16));
7587 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7588 }
7589 else
7590 roundup = NULL;
7591
7592 /* Update ap.__[g|v]r_offs */
7593 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7594 build_int_cst (TREE_TYPE (off), rsize));
7595 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7596
7597 /* String up. */
7598 if (roundup)
7599 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7600
7601 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7602 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7603 build_int_cst (TREE_TYPE (f_off), 0));
7604 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7605
7606 /* String up: make sure the assignment happens before the use. */
7607 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7608 COND_EXPR_ELSE (cond1) = t;
7609
7610 /* Prepare the trees handling the argument that is passed on the stack;
7611 the top level node will store in ON_STACK. */
7612 arg = get_initialized_tmp_var (stack, pre_p, NULL);
7613 if (align > 8)
7614 {
7615 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7616 t = fold_convert (intDI_type_node, arg);
7617 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7618 build_int_cst (TREE_TYPE (t), 15));
7619 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7620 build_int_cst (TREE_TYPE (t), -16));
7621 t = fold_convert (TREE_TYPE (arg), t);
7622 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7623 }
7624 else
7625 roundup = NULL;
7626 /* Advance ap.__stack */
7627 t = fold_convert (intDI_type_node, arg);
7628 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7629 build_int_cst (TREE_TYPE (t), size + 7));
7630 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7631 build_int_cst (TREE_TYPE (t), -8));
7632 t = fold_convert (TREE_TYPE (arg), t);
7633 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7634 /* String up roundup and advance. */
7635 if (roundup)
7636 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7637 /* String up with arg */
7638 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7639 /* Big-endianness related address adjustment. */
7640 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7641 && size < UNITS_PER_WORD)
7642 {
7643 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7644 size_int (UNITS_PER_WORD - size));
7645 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7646 }
7647
7648 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7649 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7650
7651 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7652 t = off;
7653 if (adjust)
7654 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7655 build_int_cst (TREE_TYPE (off), adjust));
7656
7657 t = fold_convert (sizetype, t);
7658 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7659
7660 if (is_ha)
7661 {
7662 /* type ha; // treat as "struct {ftype field[n];}"
7663 ... [computing offs]
7664 for (i = 0; i <nregs; ++i, offs += 16)
7665 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7666 return ha; */
7667 int i;
7668 tree tmp_ha, field_t, field_ptr_t;
7669
7670 /* Declare a local variable. */
7671 tmp_ha = create_tmp_var_raw (type, "ha");
7672 gimple_add_tmp_var (tmp_ha);
7673
7674 /* Establish the base type. */
7675 switch (ag_mode)
7676 {
7677 case SFmode:
7678 field_t = float_type_node;
7679 field_ptr_t = float_ptr_type_node;
7680 break;
7681 case DFmode:
7682 field_t = double_type_node;
7683 field_ptr_t = double_ptr_type_node;
7684 break;
7685 case TFmode:
7686 field_t = long_double_type_node;
7687 field_ptr_t = long_double_ptr_type_node;
7688 break;
7689/* The half precision and quad precision are not fully supported yet. Enable
7690 the following code after the support is complete. Need to find the correct
7691 type node for __fp16 *. */
7692#if 0
7693 case HFmode:
7694 field_t = float_type_node;
7695 field_ptr_t = float_ptr_type_node;
7696 break;
7697#endif
7698 case V2SImode:
7699 case V4SImode:
7700 {
7701 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7702 field_t = build_vector_type_for_mode (innertype, ag_mode);
7703 field_ptr_t = build_pointer_type (field_t);
7704 }
7705 break;
7706 default:
7707 gcc_assert (0);
7708 }
7709
7710 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7711 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7712 addr = t;
7713 t = fold_convert (field_ptr_t, addr);
7714 t = build2 (MODIFY_EXPR, field_t,
7715 build1 (INDIRECT_REF, field_t, tmp_ha),
7716 build1 (INDIRECT_REF, field_t, t));
7717
7718 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7719 for (i = 1; i < nregs; ++i)
7720 {
7721 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7722 u = fold_convert (field_ptr_t, addr);
7723 u = build2 (MODIFY_EXPR, field_t,
7724 build2 (MEM_REF, field_t, tmp_ha,
7725 build_int_cst (field_ptr_t,
7726 (i *
7727 int_size_in_bytes (field_t)))),
7728 build1 (INDIRECT_REF, field_t, u));
7729 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7730 }
7731
7732 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7733 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7734 }
7735
7736 COND_EXPR_ELSE (cond2) = t;
7737 addr = fold_convert (build_pointer_type (type), cond1);
7738 addr = build_va_arg_indirect_ref (addr);
7739
7740 if (indirect_p)
7741 addr = build_va_arg_indirect_ref (addr);
7742
7743 return addr;
7744}
7745
7746/* Implement TARGET_SETUP_INCOMING_VARARGS. */
7747
7748static void
ef4bddc2 7749aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
7750 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7751 int no_rtl)
7752{
7753 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7754 CUMULATIVE_ARGS local_cum;
7755 int gr_saved, vr_saved;
7756
7757 /* The caller has advanced CUM up to, but not beyond, the last named
7758 argument. Advance a local copy of CUM past the last "real" named
7759 argument, to find out how many registers are left over. */
7760 local_cum = *cum;
7761 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7762
7763 /* Found out how many registers we need to save. */
7764 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7765 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7766
7767 if (TARGET_GENERAL_REGS_ONLY)
7768 {
7769 if (local_cum.aapcs_nvrn > 0)
7770 sorry ("%qs and floating point or vector arguments",
7771 "-mgeneral-regs-only");
7772 vr_saved = 0;
7773 }
7774
7775 if (!no_rtl)
7776 {
7777 if (gr_saved > 0)
7778 {
7779 rtx ptr, mem;
7780
7781 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7782 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7783 - gr_saved * UNITS_PER_WORD);
7784 mem = gen_frame_mem (BLKmode, ptr);
7785 set_mem_alias_set (mem, get_varargs_alias_set ());
7786
7787 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7788 mem, gr_saved);
7789 }
7790 if (vr_saved > 0)
7791 {
7792 /* We can't use move_block_from_reg, because it will use
7793 the wrong mode, storing D regs only. */
ef4bddc2 7794 machine_mode mode = TImode;
43e9d192
IB
7795 int off, i;
7796
7797 /* Set OFF to the offset from virtual_incoming_args_rtx of
7798 the first vector register. The VR save area lies below
7799 the GR one, and is aligned to 16 bytes. */
7800 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7801 STACK_BOUNDARY / BITS_PER_UNIT);
7802 off -= vr_saved * UNITS_PER_VREG;
7803
7804 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7805 {
7806 rtx ptr, mem;
7807
7808 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7809 mem = gen_frame_mem (mode, ptr);
7810 set_mem_alias_set (mem, get_varargs_alias_set ());
7811 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7812 off += UNITS_PER_VREG;
7813 }
7814 }
7815 }
7816
7817 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7818 any complication of having crtl->args.pretend_args_size changed. */
8799637a 7819 cfun->machine->frame.saved_varargs_size
43e9d192
IB
7820 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7821 STACK_BOUNDARY / BITS_PER_UNIT)
7822 + vr_saved * UNITS_PER_VREG);
7823}
7824
7825static void
7826aarch64_conditional_register_usage (void)
7827{
7828 int i;
7829 if (!TARGET_FLOAT)
7830 {
7831 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7832 {
7833 fixed_regs[i] = 1;
7834 call_used_regs[i] = 1;
7835 }
7836 }
7837}
7838
7839/* Walk down the type tree of TYPE counting consecutive base elements.
7840 If *MODEP is VOIDmode, then set it to the first valid floating point
7841 type. If a non-floating point type is found, or if a floating point
7842 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7843 otherwise return the count in the sub-tree. */
7844static int
ef4bddc2 7845aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 7846{
ef4bddc2 7847 machine_mode mode;
43e9d192
IB
7848 HOST_WIDE_INT size;
7849
7850 switch (TREE_CODE (type))
7851 {
7852 case REAL_TYPE:
7853 mode = TYPE_MODE (type);
7854 if (mode != DFmode && mode != SFmode && mode != TFmode)
7855 return -1;
7856
7857 if (*modep == VOIDmode)
7858 *modep = mode;
7859
7860 if (*modep == mode)
7861 return 1;
7862
7863 break;
7864
7865 case COMPLEX_TYPE:
7866 mode = TYPE_MODE (TREE_TYPE (type));
7867 if (mode != DFmode && mode != SFmode && mode != TFmode)
7868 return -1;
7869
7870 if (*modep == VOIDmode)
7871 *modep = mode;
7872
7873 if (*modep == mode)
7874 return 2;
7875
7876 break;
7877
7878 case VECTOR_TYPE:
7879 /* Use V2SImode and V4SImode as representatives of all 64-bit
7880 and 128-bit vector types. */
7881 size = int_size_in_bytes (type);
7882 switch (size)
7883 {
7884 case 8:
7885 mode = V2SImode;
7886 break;
7887 case 16:
7888 mode = V4SImode;
7889 break;
7890 default:
7891 return -1;
7892 }
7893
7894 if (*modep == VOIDmode)
7895 *modep = mode;
7896
7897 /* Vector modes are considered to be opaque: two vectors are
7898 equivalent for the purposes of being homogeneous aggregates
7899 if they are the same size. */
7900 if (*modep == mode)
7901 return 1;
7902
7903 break;
7904
7905 case ARRAY_TYPE:
7906 {
7907 int count;
7908 tree index = TYPE_DOMAIN (type);
7909
807e902e
KZ
7910 /* Can't handle incomplete types nor sizes that are not
7911 fixed. */
7912 if (!COMPLETE_TYPE_P (type)
7913 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7914 return -1;
7915
7916 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7917 if (count == -1
7918 || !index
7919 || !TYPE_MAX_VALUE (index)
cc269bb6 7920 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7921 || !TYPE_MIN_VALUE (index)
cc269bb6 7922 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7923 || count < 0)
7924 return -1;
7925
ae7e9ddd
RS
7926 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7927 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7928
7929 /* There must be no padding. */
807e902e 7930 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7931 return -1;
7932
7933 return count;
7934 }
7935
7936 case RECORD_TYPE:
7937 {
7938 int count = 0;
7939 int sub_count;
7940 tree field;
7941
807e902e
KZ
7942 /* Can't handle incomplete types nor sizes that are not
7943 fixed. */
7944 if (!COMPLETE_TYPE_P (type)
7945 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7946 return -1;
7947
7948 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7949 {
7950 if (TREE_CODE (field) != FIELD_DECL)
7951 continue;
7952
7953 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7954 if (sub_count < 0)
7955 return -1;
7956 count += sub_count;
7957 }
7958
7959 /* There must be no padding. */
807e902e 7960 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7961 return -1;
7962
7963 return count;
7964 }
7965
7966 case UNION_TYPE:
7967 case QUAL_UNION_TYPE:
7968 {
7969 /* These aren't very interesting except in a degenerate case. */
7970 int count = 0;
7971 int sub_count;
7972 tree field;
7973
807e902e
KZ
7974 /* Can't handle incomplete types nor sizes that are not
7975 fixed. */
7976 if (!COMPLETE_TYPE_P (type)
7977 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7978 return -1;
7979
7980 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7981 {
7982 if (TREE_CODE (field) != FIELD_DECL)
7983 continue;
7984
7985 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7986 if (sub_count < 0)
7987 return -1;
7988 count = count > sub_count ? count : sub_count;
7989 }
7990
7991 /* There must be no padding. */
807e902e 7992 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7993 return -1;
7994
7995 return count;
7996 }
7997
7998 default:
7999 break;
8000 }
8001
8002 return -1;
8003}
8004
8005/* Return TRUE if the type, as described by TYPE and MODE, is a composite
8006 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
8007 array types. The C99 floating-point complex types are also considered
8008 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
8009 types, which are GCC extensions and out of the scope of AAPCS64, are
8010 treated as composite types here as well.
8011
8012 Note that MODE itself is not sufficient in determining whether a type
8013 is such a composite type or not. This is because
8014 stor-layout.c:compute_record_mode may have already changed the MODE
8015 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
8016 structure with only one field may have its MODE set to the mode of the
8017 field. Also an integer mode whose size matches the size of the
8018 RECORD_TYPE type may be used to substitute the original mode
8019 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
8020 solely relied on. */
8021
8022static bool
8023aarch64_composite_type_p (const_tree type,
ef4bddc2 8024 machine_mode mode)
43e9d192
IB
8025{
8026 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
8027 return true;
8028
8029 if (mode == BLKmode
8030 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
8031 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
8032 return true;
8033
8034 return false;
8035}
8036
8037/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
8038 type as described in AAPCS64 \S 4.1.2.
8039
8040 See the comment above aarch64_composite_type_p for the notes on MODE. */
8041
8042static bool
8043aarch64_short_vector_p (const_tree type,
ef4bddc2 8044 machine_mode mode)
43e9d192
IB
8045{
8046 HOST_WIDE_INT size = -1;
8047
8048 if (type && TREE_CODE (type) == VECTOR_TYPE)
8049 size = int_size_in_bytes (type);
8050 else if (!aarch64_composite_type_p (type, mode)
8051 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8052 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
8053 size = GET_MODE_SIZE (mode);
8054
8055 return (size == 8 || size == 16) ? true : false;
8056}
8057
8058/* Return TRUE if an argument, whose type is described by TYPE and MODE,
8059 shall be passed or returned in simd/fp register(s) (providing these
8060 parameter passing registers are available).
8061
8062 Upon successful return, *COUNT returns the number of needed registers,
8063 *BASE_MODE returns the mode of the individual register and when IS_HAF
8064 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
8065 floating-point aggregate or a homogeneous short-vector aggregate. */
8066
8067static bool
ef4bddc2 8068aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 8069 const_tree type,
ef4bddc2 8070 machine_mode *base_mode,
43e9d192
IB
8071 int *count,
8072 bool *is_ha)
8073{
ef4bddc2 8074 machine_mode new_mode = VOIDmode;
43e9d192
IB
8075 bool composite_p = aarch64_composite_type_p (type, mode);
8076
8077 if (is_ha != NULL) *is_ha = false;
8078
8079 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
8080 || aarch64_short_vector_p (type, mode))
8081 {
8082 *count = 1;
8083 new_mode = mode;
8084 }
8085 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
8086 {
8087 if (is_ha != NULL) *is_ha = true;
8088 *count = 2;
8089 new_mode = GET_MODE_INNER (mode);
8090 }
8091 else if (type && composite_p)
8092 {
8093 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
8094
8095 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
8096 {
8097 if (is_ha != NULL) *is_ha = true;
8098 *count = ag_count;
8099 }
8100 else
8101 return false;
8102 }
8103 else
8104 return false;
8105
8106 *base_mode = new_mode;
8107 return true;
8108}
8109
8110/* Implement TARGET_STRUCT_VALUE_RTX. */
8111
8112static rtx
8113aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
8114 int incoming ATTRIBUTE_UNUSED)
8115{
8116 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
8117}
8118
8119/* Implements target hook vector_mode_supported_p. */
8120static bool
ef4bddc2 8121aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
8122{
8123 if (TARGET_SIMD
8124 && (mode == V4SImode || mode == V8HImode
8125 || mode == V16QImode || mode == V2DImode
8126 || mode == V2SImode || mode == V4HImode
8127 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
8128 || mode == V4SFmode || mode == V2DFmode
8129 || mode == V1DFmode))
43e9d192
IB
8130 return true;
8131
8132 return false;
8133}
8134
b7342d25
IB
8135/* Return appropriate SIMD container
8136 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
8137static machine_mode
8138aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 8139{
b7342d25 8140 gcc_assert (width == 64 || width == 128);
43e9d192 8141 if (TARGET_SIMD)
b7342d25
IB
8142 {
8143 if (width == 128)
8144 switch (mode)
8145 {
8146 case DFmode:
8147 return V2DFmode;
8148 case SFmode:
8149 return V4SFmode;
8150 case SImode:
8151 return V4SImode;
8152 case HImode:
8153 return V8HImode;
8154 case QImode:
8155 return V16QImode;
8156 case DImode:
8157 return V2DImode;
8158 default:
8159 break;
8160 }
8161 else
8162 switch (mode)
8163 {
8164 case SFmode:
8165 return V2SFmode;
8166 case SImode:
8167 return V2SImode;
8168 case HImode:
8169 return V4HImode;
8170 case QImode:
8171 return V8QImode;
8172 default:
8173 break;
8174 }
8175 }
43e9d192
IB
8176 return word_mode;
8177}
8178
b7342d25 8179/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
8180static machine_mode
8181aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
8182{
8183 return aarch64_simd_container_mode (mode, 128);
8184}
8185
3b357264
JG
8186/* Return the bitmask of possible vector sizes for the vectorizer
8187 to iterate over. */
8188static unsigned int
8189aarch64_autovectorize_vector_sizes (void)
8190{
8191 return (16 | 8);
8192}
8193
ac2b960f
YZ
8194/* Implement TARGET_MANGLE_TYPE. */
8195
6f549691 8196static const char *
ac2b960f
YZ
8197aarch64_mangle_type (const_tree type)
8198{
8199 /* The AArch64 ABI documents say that "__va_list" has to be
8200 managled as if it is in the "std" namespace. */
8201 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
8202 return "St9__va_list";
8203
f9d53c27
TB
8204 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8205 builtin types. */
8206 if (TYPE_NAME (type) != NULL)
8207 return aarch64_mangle_builtin_type (type);
c6fc9e43 8208
ac2b960f
YZ
8209 /* Use the default mangling. */
8210 return NULL;
8211}
8212
8baff86e
KT
8213
8214/* Return true if the rtx_insn contains a MEM RTX somewhere
8215 in it. */
75cf1494
KT
8216
8217static bool
8baff86e 8218has_memory_op (rtx_insn *mem_insn)
75cf1494 8219{
8baff86e
KT
8220 subrtx_iterator::array_type array;
8221 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
8222 if (MEM_P (*iter))
8223 return true;
8224
8225 return false;
75cf1494
KT
8226}
8227
8228/* Find the first rtx_insn before insn that will generate an assembly
8229 instruction. */
8230
8231static rtx_insn *
8232aarch64_prev_real_insn (rtx_insn *insn)
8233{
8234 if (!insn)
8235 return NULL;
8236
8237 do
8238 {
8239 insn = prev_real_insn (insn);
8240 }
8241 while (insn && recog_memoized (insn) < 0);
8242
8243 return insn;
8244}
8245
8246static bool
8247is_madd_op (enum attr_type t1)
8248{
8249 unsigned int i;
8250 /* A number of these may be AArch32 only. */
8251 enum attr_type mlatypes[] = {
8252 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8253 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8254 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8255 };
8256
8257 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8258 {
8259 if (t1 == mlatypes[i])
8260 return true;
8261 }
8262
8263 return false;
8264}
8265
8266/* Check if there is a register dependency between a load and the insn
8267 for which we hold recog_data. */
8268
8269static bool
8270dep_between_memop_and_curr (rtx memop)
8271{
8272 rtx load_reg;
8273 int opno;
8274
8baff86e 8275 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
8276
8277 if (!REG_P (SET_DEST (memop)))
8278 return false;
8279
8280 load_reg = SET_DEST (memop);
8baff86e 8281 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
8282 {
8283 rtx operand = recog_data.operand[opno];
8284 if (REG_P (operand)
8285 && reg_overlap_mentioned_p (load_reg, operand))
8286 return true;
8287
8288 }
8289 return false;
8290}
8291
8baff86e
KT
8292
8293/* When working around the Cortex-A53 erratum 835769,
8294 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8295 instruction and has a preceding memory instruction such that a NOP
8296 should be inserted between them. */
8297
75cf1494
KT
8298bool
8299aarch64_madd_needs_nop (rtx_insn* insn)
8300{
8301 enum attr_type attr_type;
8302 rtx_insn *prev;
8303 rtx body;
8304
8305 if (!aarch64_fix_a53_err835769)
8306 return false;
8307
8308 if (recog_memoized (insn) < 0)
8309 return false;
8310
8311 attr_type = get_attr_type (insn);
8312 if (!is_madd_op (attr_type))
8313 return false;
8314
8315 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
8316 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8317 Restore recog state to INSN to avoid state corruption. */
8318 extract_constrain_insn_cached (insn);
8319
8baff86e 8320 if (!prev || !has_memory_op (prev))
75cf1494
KT
8321 return false;
8322
8323 body = single_set (prev);
8324
8325 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
8326 it and the DImode madd, emit a NOP between them. If body is NULL then we
8327 have a complex memory operation, probably a load/store pair.
8328 Be conservative for now and emit a NOP. */
8329 if (GET_MODE (recog_data.operand[0]) == DImode
8330 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
8331 return true;
8332
8333 return false;
8334
8335}
8336
8baff86e
KT
8337
8338/* Implement FINAL_PRESCAN_INSN. */
8339
75cf1494
KT
8340void
8341aarch64_final_prescan_insn (rtx_insn *insn)
8342{
8343 if (aarch64_madd_needs_nop (insn))
8344 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8345}
8346
8347
43e9d192 8348/* Return the equivalent letter for size. */
81c2dfb9 8349static char
43e9d192
IB
8350sizetochar (int size)
8351{
8352 switch (size)
8353 {
8354 case 64: return 'd';
8355 case 32: return 's';
8356 case 16: return 'h';
8357 case 8 : return 'b';
8358 default: gcc_unreachable ();
8359 }
8360}
8361
3520f7cc
JG
8362/* Return true iff x is a uniform vector of floating-point
8363 constants, and the constant can be represented in
8364 quarter-precision form. Note, as aarch64_float_const_representable
8365 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8366static bool
8367aarch64_vect_float_const_representable_p (rtx x)
8368{
8369 int i = 0;
8370 REAL_VALUE_TYPE r0, ri;
8371 rtx x0, xi;
8372
8373 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8374 return false;
8375
8376 x0 = CONST_VECTOR_ELT (x, 0);
8377 if (!CONST_DOUBLE_P (x0))
8378 return false;
8379
8380 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8381
8382 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8383 {
8384 xi = CONST_VECTOR_ELT (x, i);
8385 if (!CONST_DOUBLE_P (xi))
8386 return false;
8387
8388 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8389 if (!REAL_VALUES_EQUAL (r0, ri))
8390 return false;
8391 }
8392
8393 return aarch64_float_const_representable_p (x0);
8394}
8395
d8edd899 8396/* Return true for valid and false for invalid. */
3ea63f60 8397bool
ef4bddc2 8398aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 8399 struct simd_immediate_info *info)
43e9d192
IB
8400{
8401#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8402 matches = 1; \
8403 for (i = 0; i < idx; i += (STRIDE)) \
8404 if (!(TEST)) \
8405 matches = 0; \
8406 if (matches) \
8407 { \
8408 immtype = (CLASS); \
8409 elsize = (ELSIZE); \
43e9d192
IB
8410 eshift = (SHIFT); \
8411 emvn = (NEG); \
8412 break; \
8413 }
8414
8415 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8416 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8417 unsigned char bytes[16];
43e9d192
IB
8418 int immtype = -1, matches;
8419 unsigned int invmask = inverse ? 0xff : 0;
8420 int eshift, emvn;
8421
43e9d192 8422 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 8423 {
81c2dfb9
IB
8424 if (! (aarch64_simd_imm_zero_p (op, mode)
8425 || aarch64_vect_float_const_representable_p (op)))
d8edd899 8426 return false;
3520f7cc 8427
48063b9d
IB
8428 if (info)
8429 {
8430 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 8431 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
8432 info->mvn = false;
8433 info->shift = 0;
8434 }
3520f7cc 8435
d8edd899 8436 return true;
3520f7cc 8437 }
43e9d192
IB
8438
8439 /* Splat vector constant out into a byte vector. */
8440 for (i = 0; i < n_elts; i++)
8441 {
4b1e108c
AL
8442 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8443 it must be laid out in the vector register in reverse order. */
8444 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
8445 unsigned HOST_WIDE_INT elpart;
8446 unsigned int part, parts;
8447
4aa81c2e 8448 if (CONST_INT_P (el))
43e9d192
IB
8449 {
8450 elpart = INTVAL (el);
8451 parts = 1;
8452 }
8453 else if (GET_CODE (el) == CONST_DOUBLE)
8454 {
8455 elpart = CONST_DOUBLE_LOW (el);
8456 parts = 2;
8457 }
8458 else
8459 gcc_unreachable ();
8460
8461 for (part = 0; part < parts; part++)
8462 {
8463 unsigned int byte;
8464 for (byte = 0; byte < innersize; byte++)
8465 {
8466 bytes[idx++] = (elpart & 0xff) ^ invmask;
8467 elpart >>= BITS_PER_UNIT;
8468 }
8469 if (GET_CODE (el) == CONST_DOUBLE)
8470 elpart = CONST_DOUBLE_HIGH (el);
8471 }
8472 }
8473
8474 /* Sanity check. */
8475 gcc_assert (idx == GET_MODE_SIZE (mode));
8476
8477 do
8478 {
8479 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8480 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8481
8482 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8483 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8484
8485 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8486 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8487
8488 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8489 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8490
8491 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8492
8493 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8494
8495 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8496 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8497
8498 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8499 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8500
8501 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8502 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8503
8504 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8505 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8506
8507 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8508
8509 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8510
8511 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 8512 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
8513
8514 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 8515 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
8516
8517 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 8518 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
8519
8520 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 8521 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
8522
8523 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8524
8525 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8526 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8527 }
8528 while (0);
8529
e4f0f84d 8530 if (immtype == -1)
d8edd899 8531 return false;
43e9d192 8532
48063b9d 8533 if (info)
43e9d192 8534 {
48063b9d 8535 info->element_width = elsize;
48063b9d
IB
8536 info->mvn = emvn != 0;
8537 info->shift = eshift;
8538
43e9d192
IB
8539 unsigned HOST_WIDE_INT imm = 0;
8540
e4f0f84d
TB
8541 if (immtype >= 12 && immtype <= 15)
8542 info->msl = true;
8543
43e9d192
IB
8544 /* Un-invert bytes of recognized vector, if necessary. */
8545 if (invmask != 0)
8546 for (i = 0; i < idx; i++)
8547 bytes[i] ^= invmask;
8548
8549 if (immtype == 17)
8550 {
8551 /* FIXME: Broken on 32-bit H_W_I hosts. */
8552 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8553
8554 for (i = 0; i < 8; i++)
8555 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8556 << (i * BITS_PER_UNIT);
8557
43e9d192 8558
48063b9d
IB
8559 info->value = GEN_INT (imm);
8560 }
8561 else
8562 {
8563 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8564 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
8565
8566 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
8567 generic constants. */
8568 if (info->mvn)
43e9d192 8569 imm = ~imm;
48063b9d
IB
8570 imm = (imm >> info->shift) & 0xff;
8571 info->value = GEN_INT (imm);
8572 }
43e9d192
IB
8573 }
8574
48063b9d 8575 return true;
43e9d192
IB
8576#undef CHECK
8577}
8578
43e9d192
IB
8579/* Check of immediate shift constants are within range. */
8580bool
ef4bddc2 8581aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
8582{
8583 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8584 if (left)
ddeabd3e 8585 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 8586 else
ddeabd3e 8587 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
8588}
8589
3520f7cc
JG
8590/* Return true if X is a uniform vector where all elements
8591 are either the floating-point constant 0.0 or the
8592 integer constant 0. */
43e9d192 8593bool
ef4bddc2 8594aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 8595{
3520f7cc 8596 return x == CONST0_RTX (mode);
43e9d192
IB
8597}
8598
8599bool
ef4bddc2 8600aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
8601{
8602 HOST_WIDE_INT imm = INTVAL (x);
8603 int i;
8604
8605 for (i = 0; i < 8; i++)
8606 {
8607 unsigned int byte = imm & 0xff;
8608 if (byte != 0xff && byte != 0)
8609 return false;
8610 imm >>= 8;
8611 }
8612
8613 return true;
8614}
8615
83f8c414
CSS
8616bool
8617aarch64_mov_operand_p (rtx x,
a5350ddc 8618 enum aarch64_symbol_context context,
ef4bddc2 8619 machine_mode mode)
83f8c414 8620{
83f8c414
CSS
8621 if (GET_CODE (x) == HIGH
8622 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8623 return true;
8624
82614948 8625 if (CONST_INT_P (x))
83f8c414
CSS
8626 return true;
8627
8628 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8629 return true;
8630
a5350ddc
CSS
8631 return aarch64_classify_symbolic_expression (x, context)
8632 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
8633}
8634
43e9d192
IB
8635/* Return a const_int vector of VAL. */
8636rtx
ef4bddc2 8637aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
8638{
8639 int nunits = GET_MODE_NUNITS (mode);
8640 rtvec v = rtvec_alloc (nunits);
8641 int i;
8642
8643 for (i=0; i < nunits; i++)
8644 RTVEC_ELT (v, i) = GEN_INT (val);
8645
8646 return gen_rtx_CONST_VECTOR (mode, v);
8647}
8648
051d0e2f
SN
8649/* Check OP is a legal scalar immediate for the MOVI instruction. */
8650
8651bool
ef4bddc2 8652aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 8653{
ef4bddc2 8654 machine_mode vmode;
051d0e2f
SN
8655
8656 gcc_assert (!VECTOR_MODE_P (mode));
8657 vmode = aarch64_preferred_simd_mode (mode);
8658 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 8659 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
8660}
8661
988fa693
JG
8662/* Construct and return a PARALLEL RTX vector with elements numbering the
8663 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8664 the vector - from the perspective of the architecture. This does not
8665 line up with GCC's perspective on lane numbers, so we end up with
8666 different masks depending on our target endian-ness. The diagram
8667 below may help. We must draw the distinction when building masks
8668 which select one half of the vector. An instruction selecting
8669 architectural low-lanes for a big-endian target, must be described using
8670 a mask selecting GCC high-lanes.
8671
8672 Big-Endian Little-Endian
8673
8674GCC 0 1 2 3 3 2 1 0
8675 | x | x | x | x | | x | x | x | x |
8676Architecture 3 2 1 0 3 2 1 0
8677
8678Low Mask: { 2, 3 } { 0, 1 }
8679High Mask: { 0, 1 } { 2, 3 }
8680*/
8681
43e9d192 8682rtx
ef4bddc2 8683aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
8684{
8685 int nunits = GET_MODE_NUNITS (mode);
8686 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
8687 int high_base = nunits / 2;
8688 int low_base = 0;
8689 int base;
43e9d192
IB
8690 rtx t1;
8691 int i;
8692
988fa693
JG
8693 if (BYTES_BIG_ENDIAN)
8694 base = high ? low_base : high_base;
8695 else
8696 base = high ? high_base : low_base;
8697
8698 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
8699 RTVEC_ELT (v, i) = GEN_INT (base + i);
8700
8701 t1 = gen_rtx_PARALLEL (mode, v);
8702 return t1;
8703}
8704
988fa693
JG
8705/* Check OP for validity as a PARALLEL RTX vector with elements
8706 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8707 from the perspective of the architecture. See the diagram above
8708 aarch64_simd_vect_par_cnst_half for more details. */
8709
8710bool
ef4bddc2 8711aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
8712 bool high)
8713{
8714 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8715 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8716 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8717 int i = 0;
8718
8719 if (!VECTOR_MODE_P (mode))
8720 return false;
8721
8722 if (count_op != count_ideal)
8723 return false;
8724
8725 for (i = 0; i < count_ideal; i++)
8726 {
8727 rtx elt_op = XVECEXP (op, 0, i);
8728 rtx elt_ideal = XVECEXP (ideal, 0, i);
8729
4aa81c2e 8730 if (!CONST_INT_P (elt_op)
988fa693
JG
8731 || INTVAL (elt_ideal) != INTVAL (elt_op))
8732 return false;
8733 }
8734 return true;
8735}
8736
43e9d192
IB
8737/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8738 HIGH (exclusive). */
8739void
46ed6024
CB
8740aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8741 const_tree exp)
43e9d192
IB
8742{
8743 HOST_WIDE_INT lane;
4aa81c2e 8744 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
8745 lane = INTVAL (operand);
8746
8747 if (lane < low || lane >= high)
46ed6024
CB
8748 {
8749 if (exp)
8750 error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8751 else
8752 error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8753 }
43e9d192
IB
8754}
8755
43e9d192
IB
8756/* Return TRUE if OP is a valid vector addressing mode. */
8757bool
8758aarch64_simd_mem_operand_p (rtx op)
8759{
8760 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 8761 || REG_P (XEXP (op, 0)));
43e9d192
IB
8762}
8763
2d8c6dc1
AH
8764/* Emit a register copy from operand to operand, taking care not to
8765 early-clobber source registers in the process.
43e9d192 8766
2d8c6dc1
AH
8767 COUNT is the number of components into which the copy needs to be
8768 decomposed. */
43e9d192 8769void
2d8c6dc1
AH
8770aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
8771 unsigned int count)
43e9d192
IB
8772{
8773 unsigned int i;
2d8c6dc1
AH
8774 int rdest = REGNO (operands[0]);
8775 int rsrc = REGNO (operands[1]);
43e9d192
IB
8776
8777 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
8778 || rdest < rsrc)
8779 for (i = 0; i < count; i++)
8780 emit_move_insn (gen_rtx_REG (mode, rdest + i),
8781 gen_rtx_REG (mode, rsrc + i));
43e9d192 8782 else
2d8c6dc1
AH
8783 for (i = 0; i < count; i++)
8784 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
8785 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
8786}
8787
8788/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8789 one of VSTRUCT modes: OI, CI or XI. */
8790int
647d790d 8791aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 8792{
ef4bddc2 8793 machine_mode mode;
43e9d192
IB
8794
8795 extract_insn_cached (insn);
8796
8797 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8798 {
8799 mode = GET_MODE (recog_data.operand[0]);
8800 switch (mode)
8801 {
8802 case OImode:
8803 return 8;
8804 case CImode:
8805 return 12;
8806 case XImode:
8807 return 16;
8808 default:
8809 gcc_unreachable ();
8810 }
8811 }
8812 return 4;
8813}
8814
668046d1
DS
8815/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
8816 one of VSTRUCT modes: OI, CI, EI, or XI. */
8817int
8818aarch64_simd_attr_length_rglist (enum machine_mode mode)
8819{
8820 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
8821}
8822
db0253a4
TB
8823/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8824 alignment of a vector to 128 bits. */
8825static HOST_WIDE_INT
8826aarch64_simd_vector_alignment (const_tree type)
8827{
9439e9a1 8828 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
8829 return MIN (align, 128);
8830}
8831
8832/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8833static bool
8834aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8835{
8836 if (is_packed)
8837 return false;
8838
8839 /* We guarantee alignment for vectors up to 128-bits. */
8840 if (tree_int_cst_compare (TYPE_SIZE (type),
8841 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8842 return false;
8843
8844 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8845 return true;
8846}
8847
4369c11e
TB
8848/* If VALS is a vector constant that can be loaded into a register
8849 using DUP, generate instructions to do so and return an RTX to
8850 assign to the register. Otherwise return NULL_RTX. */
8851static rtx
8852aarch64_simd_dup_constant (rtx vals)
8853{
ef4bddc2
RS
8854 machine_mode mode = GET_MODE (vals);
8855 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
8856 int n_elts = GET_MODE_NUNITS (mode);
8857 bool all_same = true;
8858 rtx x;
8859 int i;
8860
8861 if (GET_CODE (vals) != CONST_VECTOR)
8862 return NULL_RTX;
8863
8864 for (i = 1; i < n_elts; ++i)
8865 {
8866 x = CONST_VECTOR_ELT (vals, i);
8867 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8868 all_same = false;
8869 }
8870
8871 if (!all_same)
8872 return NULL_RTX;
8873
8874 /* We can load this constant by using DUP and a constant in a
8875 single ARM register. This will be cheaper than a vector
8876 load. */
8877 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8878 return gen_rtx_VEC_DUPLICATE (mode, x);
8879}
8880
8881
8882/* Generate code to load VALS, which is a PARALLEL containing only
8883 constants (for vec_init) or CONST_VECTOR, efficiently into a
8884 register. Returns an RTX to copy into the register, or NULL_RTX
8885 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 8886static rtx
4369c11e
TB
8887aarch64_simd_make_constant (rtx vals)
8888{
ef4bddc2 8889 machine_mode mode = GET_MODE (vals);
4369c11e
TB
8890 rtx const_dup;
8891 rtx const_vec = NULL_RTX;
8892 int n_elts = GET_MODE_NUNITS (mode);
8893 int n_const = 0;
8894 int i;
8895
8896 if (GET_CODE (vals) == CONST_VECTOR)
8897 const_vec = vals;
8898 else if (GET_CODE (vals) == PARALLEL)
8899 {
8900 /* A CONST_VECTOR must contain only CONST_INTs and
8901 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8902 Only store valid constants in a CONST_VECTOR. */
8903 for (i = 0; i < n_elts; ++i)
8904 {
8905 rtx x = XVECEXP (vals, 0, i);
8906 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8907 n_const++;
8908 }
8909 if (n_const == n_elts)
8910 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8911 }
8912 else
8913 gcc_unreachable ();
8914
8915 if (const_vec != NULL_RTX
48063b9d 8916 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
8917 /* Load using MOVI/MVNI. */
8918 return const_vec;
8919 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8920 /* Loaded using DUP. */
8921 return const_dup;
8922 else if (const_vec != NULL_RTX)
8923 /* Load from constant pool. We can not take advantage of single-cycle
8924 LD1 because we need a PC-relative addressing mode. */
8925 return const_vec;
8926 else
8927 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8928 We can not construct an initializer. */
8929 return NULL_RTX;
8930}
8931
8932void
8933aarch64_expand_vector_init (rtx target, rtx vals)
8934{
ef4bddc2
RS
8935 machine_mode mode = GET_MODE (target);
8936 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 8937 int n_elts = GET_MODE_NUNITS (mode);
8b66a2d4
AL
8938 int n_var = 0;
8939 rtx any_const = NULL_RTX;
4369c11e 8940 bool all_same = true;
4369c11e 8941
8b66a2d4 8942 for (int i = 0; i < n_elts; ++i)
4369c11e 8943 {
8b66a2d4 8944 rtx x = XVECEXP (vals, 0, i);
4369c11e 8945 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8b66a2d4
AL
8946 ++n_var;
8947 else
8948 any_const = x;
4369c11e 8949
8b66a2d4 8950 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
4369c11e
TB
8951 all_same = false;
8952 }
8953
8954 if (n_var == 0)
8955 {
8956 rtx constant = aarch64_simd_make_constant (vals);
8957 if (constant != NULL_RTX)
8958 {
8959 emit_move_insn (target, constant);
8960 return;
8961 }
8962 }
8963
8964 /* Splat a single non-constant element if we can. */
8965 if (all_same)
8966 {
8b66a2d4 8967 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
4369c11e
TB
8968 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8969 return;
8970 }
8971
8b66a2d4
AL
8972 /* Half the fields (or less) are non-constant. Load constant then overwrite
8973 varying fields. Hope that this is more efficient than using the stack. */
8974 if (n_var <= n_elts/2)
4369c11e
TB
8975 {
8976 rtx copy = copy_rtx (vals);
4369c11e 8977
8b66a2d4
AL
8978 /* Load constant part of vector. We really don't care what goes into the
8979 parts we will overwrite, but we're more likely to be able to load the
8980 constant efficiently if it has fewer, larger, repeating parts
8981 (see aarch64_simd_valid_immediate). */
8982 for (int i = 0; i < n_elts; i++)
8983 {
8984 rtx x = XVECEXP (vals, 0, i);
8985 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8986 continue;
8987 rtx subst = any_const;
8988 for (int bit = n_elts / 2; bit > 0; bit /= 2)
8989 {
8990 /* Look in the copied vector, as more elements are const. */
8991 rtx test = XVECEXP (copy, 0, i ^ bit);
8992 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
8993 {
8994 subst = test;
8995 break;
8996 }
8997 }
8998 XVECEXP (copy, 0, i) = subst;
8999 }
4369c11e
TB
9000 aarch64_expand_vector_init (target, copy);
9001
8b66a2d4
AL
9002 /* Insert variables. */
9003 enum insn_code icode = optab_handler (vec_set_optab, mode);
4369c11e 9004 gcc_assert (icode != CODE_FOR_nothing);
8b66a2d4
AL
9005
9006 for (int i = 0; i < n_elts; i++)
9007 {
9008 rtx x = XVECEXP (vals, 0, i);
9009 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9010 continue;
9011 x = copy_to_mode_reg (inner_mode, x);
9012 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
9013 }
4369c11e
TB
9014 return;
9015 }
9016
9017 /* Construct the vector in memory one field at a time
9018 and load the whole vector. */
8b66a2d4
AL
9019 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9020 for (int i = 0; i < n_elts; i++)
4369c11e
TB
9021 emit_move_insn (adjust_address_nv (mem, inner_mode,
9022 i * GET_MODE_SIZE (inner_mode)),
9023 XVECEXP (vals, 0, i));
9024 emit_move_insn (target, mem);
9025
9026}
9027
43e9d192 9028static unsigned HOST_WIDE_INT
ef4bddc2 9029aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
9030{
9031 return
9032 (aarch64_vector_mode_supported_p (mode)
9033 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
9034}
9035
9036#ifndef TLS_SECTION_ASM_FLAG
9037#define TLS_SECTION_ASM_FLAG 'T'
9038#endif
9039
9040void
9041aarch64_elf_asm_named_section (const char *name, unsigned int flags,
9042 tree decl ATTRIBUTE_UNUSED)
9043{
9044 char flagchars[10], *f = flagchars;
9045
9046 /* If we have already declared this section, we can use an
9047 abbreviated form to switch back to it -- unless this section is
9048 part of a COMDAT groups, in which case GAS requires the full
9049 declaration every time. */
9050 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9051 && (flags & SECTION_DECLARED))
9052 {
9053 fprintf (asm_out_file, "\t.section\t%s\n", name);
9054 return;
9055 }
9056
9057 if (!(flags & SECTION_DEBUG))
9058 *f++ = 'a';
9059 if (flags & SECTION_WRITE)
9060 *f++ = 'w';
9061 if (flags & SECTION_CODE)
9062 *f++ = 'x';
9063 if (flags & SECTION_SMALL)
9064 *f++ = 's';
9065 if (flags & SECTION_MERGE)
9066 *f++ = 'M';
9067 if (flags & SECTION_STRINGS)
9068 *f++ = 'S';
9069 if (flags & SECTION_TLS)
9070 *f++ = TLS_SECTION_ASM_FLAG;
9071 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9072 *f++ = 'G';
9073 *f = '\0';
9074
9075 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
9076
9077 if (!(flags & SECTION_NOTYPE))
9078 {
9079 const char *type;
9080 const char *format;
9081
9082 if (flags & SECTION_BSS)
9083 type = "nobits";
9084 else
9085 type = "progbits";
9086
9087#ifdef TYPE_OPERAND_FMT
9088 format = "," TYPE_OPERAND_FMT;
9089#else
9090 format = ",@%s";
9091#endif
9092
9093 fprintf (asm_out_file, format, type);
9094
9095 if (flags & SECTION_ENTSIZE)
9096 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
9097 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9098 {
9099 if (TREE_CODE (decl) == IDENTIFIER_NODE)
9100 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
9101 else
9102 fprintf (asm_out_file, ",%s,comdat",
9103 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
9104 }
9105 }
9106
9107 putc ('\n', asm_out_file);
9108}
9109
9110/* Select a format to encode pointers in exception handling data. */
9111int
9112aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
9113{
9114 int type;
9115 switch (aarch64_cmodel)
9116 {
9117 case AARCH64_CMODEL_TINY:
9118 case AARCH64_CMODEL_TINY_PIC:
9119 case AARCH64_CMODEL_SMALL:
9120 case AARCH64_CMODEL_SMALL_PIC:
9121 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
9122 for everything. */
9123 type = DW_EH_PE_sdata4;
9124 break;
9125 default:
9126 /* No assumptions here. 8-byte relocs required. */
9127 type = DW_EH_PE_sdata8;
9128 break;
9129 }
9130 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
9131}
9132
0462169c
SN
9133/* Emit load exclusive. */
9134
9135static void
ef4bddc2 9136aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
9137 rtx mem, rtx model_rtx)
9138{
9139 rtx (*gen) (rtx, rtx, rtx);
9140
9141 switch (mode)
9142 {
9143 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
9144 case HImode: gen = gen_aarch64_load_exclusivehi; break;
9145 case SImode: gen = gen_aarch64_load_exclusivesi; break;
9146 case DImode: gen = gen_aarch64_load_exclusivedi; break;
9147 default:
9148 gcc_unreachable ();
9149 }
9150
9151 emit_insn (gen (rval, mem, model_rtx));
9152}
9153
9154/* Emit store exclusive. */
9155
9156static void
ef4bddc2 9157aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
9158 rtx rval, rtx mem, rtx model_rtx)
9159{
9160 rtx (*gen) (rtx, rtx, rtx, rtx);
9161
9162 switch (mode)
9163 {
9164 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
9165 case HImode: gen = gen_aarch64_store_exclusivehi; break;
9166 case SImode: gen = gen_aarch64_store_exclusivesi; break;
9167 case DImode: gen = gen_aarch64_store_exclusivedi; break;
9168 default:
9169 gcc_unreachable ();
9170 }
9171
9172 emit_insn (gen (bval, rval, mem, model_rtx));
9173}
9174
9175/* Mark the previous jump instruction as unlikely. */
9176
9177static void
9178aarch64_emit_unlikely_jump (rtx insn)
9179{
e5af9ddd 9180 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
9181
9182 insn = emit_jump_insn (insn);
e5af9ddd 9183 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
9184}
9185
9186/* Expand a compare and swap pattern. */
9187
9188void
9189aarch64_expand_compare_and_swap (rtx operands[])
9190{
9191 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 9192 machine_mode mode, cmp_mode;
0462169c
SN
9193 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
9194
9195 bval = operands[0];
9196 rval = operands[1];
9197 mem = operands[2];
9198 oldval = operands[3];
9199 newval = operands[4];
9200 is_weak = operands[5];
9201 mod_s = operands[6];
9202 mod_f = operands[7];
9203 mode = GET_MODE (mem);
9204 cmp_mode = mode;
9205
9206 /* Normally the succ memory model must be stronger than fail, but in the
9207 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9208 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9209
9210 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
9211 && INTVAL (mod_s) == MEMMODEL_RELEASE)
9212 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
9213
9214 switch (mode)
9215 {
9216 case QImode:
9217 case HImode:
9218 /* For short modes, we're going to perform the comparison in SImode,
9219 so do the zero-extension now. */
9220 cmp_mode = SImode;
9221 rval = gen_reg_rtx (SImode);
9222 oldval = convert_modes (SImode, mode, oldval, true);
9223 /* Fall through. */
9224
9225 case SImode:
9226 case DImode:
9227 /* Force the value into a register if needed. */
9228 if (!aarch64_plus_operand (oldval, mode))
9229 oldval = force_reg (cmp_mode, oldval);
9230 break;
9231
9232 default:
9233 gcc_unreachable ();
9234 }
9235
9236 switch (mode)
9237 {
9238 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
9239 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
9240 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
9241 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
9242 default:
9243 gcc_unreachable ();
9244 }
9245
9246 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
9247
9248 if (mode == QImode || mode == HImode)
9249 emit_move_insn (operands[1], gen_lowpart (mode, rval));
9250
9251 x = gen_rtx_REG (CCmode, CC_REGNUM);
9252 x = gen_rtx_EQ (SImode, x, const0_rtx);
9253 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
9254}
9255
9256/* Split a compare and swap pattern. */
9257
9258void
9259aarch64_split_compare_and_swap (rtx operands[])
9260{
9261 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 9262 machine_mode mode;
0462169c 9263 bool is_weak;
5d8a22a5
DM
9264 rtx_code_label *label1, *label2;
9265 rtx x, cond;
0462169c
SN
9266
9267 rval = operands[0];
9268 mem = operands[1];
9269 oldval = operands[2];
9270 newval = operands[3];
9271 is_weak = (operands[4] != const0_rtx);
0462169c
SN
9272 scratch = operands[7];
9273 mode = GET_MODE (mem);
9274
5d8a22a5 9275 label1 = NULL;
0462169c
SN
9276 if (!is_weak)
9277 {
9278 label1 = gen_label_rtx ();
9279 emit_label (label1);
9280 }
9281 label2 = gen_label_rtx ();
9282
9283 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
9284
9285 cond = aarch64_gen_compare_reg (NE, rval, oldval);
9286 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9287 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9288 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9289 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9290
9291 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
9292
9293 if (!is_weak)
9294 {
9295 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9296 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9297 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9298 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9299 }
9300 else
9301 {
9302 cond = gen_rtx_REG (CCmode, CC_REGNUM);
9303 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9304 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9305 }
9306
9307 emit_label (label2);
9308}
9309
9310/* Split an atomic operation. */
9311
9312void
9313aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9314 rtx value, rtx model_rtx, rtx cond)
9315{
ef4bddc2
RS
9316 machine_mode mode = GET_MODE (mem);
9317 machine_mode wmode = (mode == DImode ? DImode : SImode);
5d8a22a5
DM
9318 rtx_code_label *label;
9319 rtx x;
0462169c
SN
9320
9321 label = gen_label_rtx ();
9322 emit_label (label);
9323
9324 if (new_out)
9325 new_out = gen_lowpart (wmode, new_out);
9326 if (old_out)
9327 old_out = gen_lowpart (wmode, old_out);
9328 else
9329 old_out = new_out;
9330 value = simplify_gen_subreg (wmode, value, mode, 0);
9331
9332 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9333
9334 switch (code)
9335 {
9336 case SET:
9337 new_out = value;
9338 break;
9339
9340 case NOT:
9341 x = gen_rtx_AND (wmode, old_out, value);
9342 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9343 x = gen_rtx_NOT (wmode, new_out);
9344 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9345 break;
9346
9347 case MINUS:
9348 if (CONST_INT_P (value))
9349 {
9350 value = GEN_INT (-INTVAL (value));
9351 code = PLUS;
9352 }
9353 /* Fall through. */
9354
9355 default:
9356 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9357 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9358 break;
9359 }
9360
9361 aarch64_emit_store_exclusive (mode, cond, mem,
9362 gen_lowpart (mode, new_out), model_rtx);
9363
9364 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9365 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9366 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9367 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9368}
9369
95ca411e
YZ
9370static void
9371aarch64_print_extension (void)
9372{
9373 const struct aarch64_option_extension *opt = NULL;
9374
9375 for (opt = all_extensions; opt->name != NULL; opt++)
9376 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9377 asm_fprintf (asm_out_file, "+%s", opt->name);
9378
9379 asm_fprintf (asm_out_file, "\n");
9380}
9381
43e9d192
IB
9382static void
9383aarch64_start_file (void)
9384{
9385 if (selected_arch)
95ca411e
YZ
9386 {
9387 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9388 aarch64_print_extension ();
9389 }
43e9d192 9390 else if (selected_cpu)
95ca411e 9391 {
682287fb
JG
9392 const char *truncated_name
9393 = aarch64_rewrite_selected_cpu (selected_cpu->name);
9394 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
9395 aarch64_print_extension ();
9396 }
43e9d192
IB
9397 default_file_start();
9398}
9399
9400/* Target hook for c_mode_for_suffix. */
ef4bddc2 9401static machine_mode
43e9d192
IB
9402aarch64_c_mode_for_suffix (char suffix)
9403{
9404 if (suffix == 'q')
9405 return TFmode;
9406
9407 return VOIDmode;
9408}
9409
3520f7cc
JG
9410/* We can only represent floating point constants which will fit in
9411 "quarter-precision" values. These values are characterised by
9412 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9413 by:
9414
9415 (-1)^s * (n/16) * 2^r
9416
9417 Where:
9418 's' is the sign bit.
9419 'n' is an integer in the range 16 <= n <= 31.
9420 'r' is an integer in the range -3 <= r <= 4. */
9421
9422/* Return true iff X can be represented by a quarter-precision
9423 floating point immediate operand X. Note, we cannot represent 0.0. */
9424bool
9425aarch64_float_const_representable_p (rtx x)
9426{
9427 /* This represents our current view of how many bits
9428 make up the mantissa. */
9429 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 9430 int exponent;
3520f7cc 9431 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 9432 REAL_VALUE_TYPE r, m;
807e902e 9433 bool fail;
3520f7cc
JG
9434
9435 if (!CONST_DOUBLE_P (x))
9436 return false;
9437
94bfa2da
TV
9438 if (GET_MODE (x) == VOIDmode)
9439 return false;
9440
3520f7cc
JG
9441 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9442
9443 /* We cannot represent infinities, NaNs or +/-zero. We won't
9444 know if we have +zero until we analyse the mantissa, but we
9445 can reject the other invalid values. */
9446 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9447 || REAL_VALUE_MINUS_ZERO (r))
9448 return false;
9449
ba96cdfb 9450 /* Extract exponent. */
3520f7cc
JG
9451 r = real_value_abs (&r);
9452 exponent = REAL_EXP (&r);
9453
9454 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9455 highest (sign) bit, with a fixed binary point at bit point_pos.
9456 m1 holds the low part of the mantissa, m2 the high part.
9457 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9458 bits for the mantissa, this can fail (low bits will be lost). */
9459 real_ldexp (&m, &r, point_pos - exponent);
807e902e 9460 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
9461
9462 /* If the low part of the mantissa has bits set we cannot represent
9463 the value. */
807e902e 9464 if (w.elt (0) != 0)
3520f7cc
JG
9465 return false;
9466 /* We have rejected the lower HOST_WIDE_INT, so update our
9467 understanding of how many bits lie in the mantissa and
9468 look only at the high HOST_WIDE_INT. */
807e902e 9469 mantissa = w.elt (1);
3520f7cc
JG
9470 point_pos -= HOST_BITS_PER_WIDE_INT;
9471
9472 /* We can only represent values with a mantissa of the form 1.xxxx. */
9473 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9474 if ((mantissa & mask) != 0)
9475 return false;
9476
9477 /* Having filtered unrepresentable values, we may now remove all
9478 but the highest 5 bits. */
9479 mantissa >>= point_pos - 5;
9480
9481 /* We cannot represent the value 0.0, so reject it. This is handled
9482 elsewhere. */
9483 if (mantissa == 0)
9484 return false;
9485
9486 /* Then, as bit 4 is always set, we can mask it off, leaving
9487 the mantissa in the range [0, 15]. */
9488 mantissa &= ~(1 << 4);
9489 gcc_assert (mantissa <= 15);
9490
9491 /* GCC internally does not use IEEE754-like encoding (where normalized
9492 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9493 Our mantissa values are shifted 4 places to the left relative to
9494 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9495 by 5 places to correct for GCC's representation. */
9496 exponent = 5 - exponent;
9497
9498 return (exponent >= 0 && exponent <= 7);
9499}
9500
9501char*
81c2dfb9 9502aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 9503 machine_mode mode,
3520f7cc
JG
9504 unsigned width)
9505{
3ea63f60 9506 bool is_valid;
3520f7cc 9507 static char templ[40];
3520f7cc 9508 const char *mnemonic;
e4f0f84d 9509 const char *shift_op;
3520f7cc 9510 unsigned int lane_count = 0;
81c2dfb9 9511 char element_char;
3520f7cc 9512
e4f0f84d 9513 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
9514
9515 /* This will return true to show const_vector is legal for use as either
9516 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9517 also update INFO to show how the immediate should be generated. */
81c2dfb9 9518 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
9519 gcc_assert (is_valid);
9520
81c2dfb9 9521 element_char = sizetochar (info.element_width);
48063b9d
IB
9522 lane_count = width / info.element_width;
9523
3520f7cc
JG
9524 mode = GET_MODE_INNER (mode);
9525 if (mode == SFmode || mode == DFmode)
9526 {
48063b9d
IB
9527 gcc_assert (info.shift == 0 && ! info.mvn);
9528 if (aarch64_float_const_zero_rtx_p (info.value))
9529 info.value = GEN_INT (0);
9530 else
9531 {
9532#define buf_size 20
9533 REAL_VALUE_TYPE r;
9534 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9535 char float_buf[buf_size] = {'\0'};
9536 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9537#undef buf_size
9538
9539 if (lane_count == 1)
9540 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9541 else
9542 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 9543 lane_count, element_char, float_buf);
48063b9d
IB
9544 return templ;
9545 }
3520f7cc 9546 }
3520f7cc 9547
48063b9d 9548 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 9549 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
9550
9551 if (lane_count == 1)
48063b9d
IB
9552 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9553 mnemonic, UINTVAL (info.value));
9554 else if (info.shift)
9555 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
9556 ", %s %d", mnemonic, lane_count, element_char,
9557 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 9558 else
48063b9d 9559 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 9560 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
9561 return templ;
9562}
9563
b7342d25
IB
9564char*
9565aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 9566 machine_mode mode)
b7342d25 9567{
ef4bddc2 9568 machine_mode vmode;
b7342d25
IB
9569
9570 gcc_assert (!VECTOR_MODE_P (mode));
9571 vmode = aarch64_simd_container_mode (mode, 64);
9572 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9573 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9574}
9575
88b08073
JG
9576/* Split operands into moves from op[1] + op[2] into op[0]. */
9577
9578void
9579aarch64_split_combinev16qi (rtx operands[3])
9580{
9581 unsigned int dest = REGNO (operands[0]);
9582 unsigned int src1 = REGNO (operands[1]);
9583 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 9584 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
9585 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9586 rtx destlo, desthi;
9587
9588 gcc_assert (halfmode == V16QImode);
9589
9590 if (src1 == dest && src2 == dest + halfregs)
9591 {
9592 /* No-op move. Can't split to nothing; emit something. */
9593 emit_note (NOTE_INSN_DELETED);
9594 return;
9595 }
9596
9597 /* Preserve register attributes for variable tracking. */
9598 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9599 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9600 GET_MODE_SIZE (halfmode));
9601
9602 /* Special case of reversed high/low parts. */
9603 if (reg_overlap_mentioned_p (operands[2], destlo)
9604 && reg_overlap_mentioned_p (operands[1], desthi))
9605 {
9606 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9607 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9608 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9609 }
9610 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9611 {
9612 /* Try to avoid unnecessary moves if part of the result
9613 is in the right place already. */
9614 if (src1 != dest)
9615 emit_move_insn (destlo, operands[1]);
9616 if (src2 != dest + halfregs)
9617 emit_move_insn (desthi, operands[2]);
9618 }
9619 else
9620 {
9621 if (src2 != dest + halfregs)
9622 emit_move_insn (desthi, operands[2]);
9623 if (src1 != dest)
9624 emit_move_insn (destlo, operands[1]);
9625 }
9626}
9627
9628/* vec_perm support. */
9629
9630#define MAX_VECT_LEN 16
9631
9632struct expand_vec_perm_d
9633{
9634 rtx target, op0, op1;
9635 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 9636 machine_mode vmode;
88b08073
JG
9637 unsigned char nelt;
9638 bool one_vector_p;
9639 bool testing_p;
9640};
9641
9642/* Generate a variable permutation. */
9643
9644static void
9645aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9646{
ef4bddc2 9647 machine_mode vmode = GET_MODE (target);
88b08073
JG
9648 bool one_vector_p = rtx_equal_p (op0, op1);
9649
9650 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9651 gcc_checking_assert (GET_MODE (op0) == vmode);
9652 gcc_checking_assert (GET_MODE (op1) == vmode);
9653 gcc_checking_assert (GET_MODE (sel) == vmode);
9654 gcc_checking_assert (TARGET_SIMD);
9655
9656 if (one_vector_p)
9657 {
9658 if (vmode == V8QImode)
9659 {
9660 /* Expand the argument to a V16QI mode by duplicating it. */
9661 rtx pair = gen_reg_rtx (V16QImode);
9662 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9663 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9664 }
9665 else
9666 {
9667 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9668 }
9669 }
9670 else
9671 {
9672 rtx pair;
9673
9674 if (vmode == V8QImode)
9675 {
9676 pair = gen_reg_rtx (V16QImode);
9677 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9678 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9679 }
9680 else
9681 {
9682 pair = gen_reg_rtx (OImode);
9683 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9684 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9685 }
9686 }
9687}
9688
9689void
9690aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9691{
ef4bddc2 9692 machine_mode vmode = GET_MODE (target);
c9d1a16a 9693 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 9694 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 9695 rtx mask;
88b08073
JG
9696
9697 /* The TBL instruction does not use a modulo index, so we must take care
9698 of that ourselves. */
f7c4e5b8
AL
9699 mask = aarch64_simd_gen_const_vector_dup (vmode,
9700 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
9701 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9702
f7c4e5b8
AL
9703 /* For big-endian, we also need to reverse the index within the vector
9704 (but not which vector). */
9705 if (BYTES_BIG_ENDIAN)
9706 {
9707 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9708 if (!one_vector_p)
9709 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9710 sel = expand_simple_binop (vmode, XOR, sel, mask,
9711 NULL, 0, OPTAB_LIB_WIDEN);
9712 }
88b08073
JG
9713 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9714}
9715
cc4d934f
JG
9716/* Recognize patterns suitable for the TRN instructions. */
9717static bool
9718aarch64_evpc_trn (struct expand_vec_perm_d *d)
9719{
9720 unsigned int i, odd, mask, nelt = d->nelt;
9721 rtx out, in0, in1, x;
9722 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9723 machine_mode vmode = d->vmode;
cc4d934f
JG
9724
9725 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9726 return false;
9727
9728 /* Note that these are little-endian tests.
9729 We correct for big-endian later. */
9730 if (d->perm[0] == 0)
9731 odd = 0;
9732 else if (d->perm[0] == 1)
9733 odd = 1;
9734 else
9735 return false;
9736 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9737
9738 for (i = 0; i < nelt; i += 2)
9739 {
9740 if (d->perm[i] != i + odd)
9741 return false;
9742 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9743 return false;
9744 }
9745
9746 /* Success! */
9747 if (d->testing_p)
9748 return true;
9749
9750 in0 = d->op0;
9751 in1 = d->op1;
9752 if (BYTES_BIG_ENDIAN)
9753 {
9754 x = in0, in0 = in1, in1 = x;
9755 odd = !odd;
9756 }
9757 out = d->target;
9758
9759 if (odd)
9760 {
9761 switch (vmode)
9762 {
9763 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9764 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9765 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9766 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9767 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9768 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9769 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9770 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9771 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9772 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9773 default:
9774 return false;
9775 }
9776 }
9777 else
9778 {
9779 switch (vmode)
9780 {
9781 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9782 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9783 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9784 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9785 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9786 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9787 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9788 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9789 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9790 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9791 default:
9792 return false;
9793 }
9794 }
9795
9796 emit_insn (gen (out, in0, in1));
9797 return true;
9798}
9799
9800/* Recognize patterns suitable for the UZP instructions. */
9801static bool
9802aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9803{
9804 unsigned int i, odd, mask, nelt = d->nelt;
9805 rtx out, in0, in1, x;
9806 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9807 machine_mode vmode = d->vmode;
cc4d934f
JG
9808
9809 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9810 return false;
9811
9812 /* Note that these are little-endian tests.
9813 We correct for big-endian later. */
9814 if (d->perm[0] == 0)
9815 odd = 0;
9816 else if (d->perm[0] == 1)
9817 odd = 1;
9818 else
9819 return false;
9820 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9821
9822 for (i = 0; i < nelt; i++)
9823 {
9824 unsigned elt = (i * 2 + odd) & mask;
9825 if (d->perm[i] != elt)
9826 return false;
9827 }
9828
9829 /* Success! */
9830 if (d->testing_p)
9831 return true;
9832
9833 in0 = d->op0;
9834 in1 = d->op1;
9835 if (BYTES_BIG_ENDIAN)
9836 {
9837 x = in0, in0 = in1, in1 = x;
9838 odd = !odd;
9839 }
9840 out = d->target;
9841
9842 if (odd)
9843 {
9844 switch (vmode)
9845 {
9846 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9847 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9848 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9849 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9850 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9851 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9852 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9853 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9854 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9855 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9856 default:
9857 return false;
9858 }
9859 }
9860 else
9861 {
9862 switch (vmode)
9863 {
9864 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9865 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9866 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9867 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9868 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9869 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9870 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9871 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9872 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9873 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9874 default:
9875 return false;
9876 }
9877 }
9878
9879 emit_insn (gen (out, in0, in1));
9880 return true;
9881}
9882
9883/* Recognize patterns suitable for the ZIP instructions. */
9884static bool
9885aarch64_evpc_zip (struct expand_vec_perm_d *d)
9886{
9887 unsigned int i, high, mask, nelt = d->nelt;
9888 rtx out, in0, in1, x;
9889 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9890 machine_mode vmode = d->vmode;
cc4d934f
JG
9891
9892 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9893 return false;
9894
9895 /* Note that these are little-endian tests.
9896 We correct for big-endian later. */
9897 high = nelt / 2;
9898 if (d->perm[0] == high)
9899 /* Do Nothing. */
9900 ;
9901 else if (d->perm[0] == 0)
9902 high = 0;
9903 else
9904 return false;
9905 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9906
9907 for (i = 0; i < nelt / 2; i++)
9908 {
9909 unsigned elt = (i + high) & mask;
9910 if (d->perm[i * 2] != elt)
9911 return false;
9912 elt = (elt + nelt) & mask;
9913 if (d->perm[i * 2 + 1] != elt)
9914 return false;
9915 }
9916
9917 /* Success! */
9918 if (d->testing_p)
9919 return true;
9920
9921 in0 = d->op0;
9922 in1 = d->op1;
9923 if (BYTES_BIG_ENDIAN)
9924 {
9925 x = in0, in0 = in1, in1 = x;
9926 high = !high;
9927 }
9928 out = d->target;
9929
9930 if (high)
9931 {
9932 switch (vmode)
9933 {
9934 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9935 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9936 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9937 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9938 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9939 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9940 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9941 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9942 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9943 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9944 default:
9945 return false;
9946 }
9947 }
9948 else
9949 {
9950 switch (vmode)
9951 {
9952 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9953 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9954 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9955 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9956 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9957 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9958 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9959 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9960 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9961 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9962 default:
9963 return false;
9964 }
9965 }
9966
9967 emit_insn (gen (out, in0, in1));
9968 return true;
9969}
9970
ae0533da
AL
9971/* Recognize patterns for the EXT insn. */
9972
9973static bool
9974aarch64_evpc_ext (struct expand_vec_perm_d *d)
9975{
9976 unsigned int i, nelt = d->nelt;
9977 rtx (*gen) (rtx, rtx, rtx, rtx);
9978 rtx offset;
9979
9980 unsigned int location = d->perm[0]; /* Always < nelt. */
9981
9982 /* Check if the extracted indices are increasing by one. */
9983 for (i = 1; i < nelt; i++)
9984 {
9985 unsigned int required = location + i;
9986 if (d->one_vector_p)
9987 {
9988 /* We'll pass the same vector in twice, so allow indices to wrap. */
9989 required &= (nelt - 1);
9990 }
9991 if (d->perm[i] != required)
9992 return false;
9993 }
9994
ae0533da
AL
9995 switch (d->vmode)
9996 {
9997 case V16QImode: gen = gen_aarch64_extv16qi; break;
9998 case V8QImode: gen = gen_aarch64_extv8qi; break;
9999 case V4HImode: gen = gen_aarch64_extv4hi; break;
10000 case V8HImode: gen = gen_aarch64_extv8hi; break;
10001 case V2SImode: gen = gen_aarch64_extv2si; break;
10002 case V4SImode: gen = gen_aarch64_extv4si; break;
10003 case V2SFmode: gen = gen_aarch64_extv2sf; break;
10004 case V4SFmode: gen = gen_aarch64_extv4sf; break;
10005 case V2DImode: gen = gen_aarch64_extv2di; break;
10006 case V2DFmode: gen = gen_aarch64_extv2df; break;
10007 default:
10008 return false;
10009 }
10010
10011 /* Success! */
10012 if (d->testing_p)
10013 return true;
10014
b31e65bb
AL
10015 /* The case where (location == 0) is a no-op for both big- and little-endian,
10016 and is removed by the mid-end at optimization levels -O1 and higher. */
10017
10018 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
10019 {
10020 /* After setup, we want the high elements of the first vector (stored
10021 at the LSB end of the register), and the low elements of the second
10022 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 10023 std::swap (d->op0, d->op1);
ae0533da
AL
10024 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
10025 location = nelt - location;
10026 }
10027
10028 offset = GEN_INT (location);
10029 emit_insn (gen (d->target, d->op0, d->op1, offset));
10030 return true;
10031}
10032
923fcec3
AL
10033/* Recognize patterns for the REV insns. */
10034
10035static bool
10036aarch64_evpc_rev (struct expand_vec_perm_d *d)
10037{
10038 unsigned int i, j, diff, nelt = d->nelt;
10039 rtx (*gen) (rtx, rtx);
10040
10041 if (!d->one_vector_p)
10042 return false;
10043
10044 diff = d->perm[0];
10045 switch (diff)
10046 {
10047 case 7:
10048 switch (d->vmode)
10049 {
10050 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
10051 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
10052 default:
10053 return false;
10054 }
10055 break;
10056 case 3:
10057 switch (d->vmode)
10058 {
10059 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
10060 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
10061 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
10062 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
10063 default:
10064 return false;
10065 }
10066 break;
10067 case 1:
10068 switch (d->vmode)
10069 {
10070 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
10071 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
10072 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
10073 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
10074 case V4SImode: gen = gen_aarch64_rev64v4si; break;
10075 case V2SImode: gen = gen_aarch64_rev64v2si; break;
10076 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
10077 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
10078 default:
10079 return false;
10080 }
10081 break;
10082 default:
10083 return false;
10084 }
10085
10086 for (i = 0; i < nelt ; i += diff + 1)
10087 for (j = 0; j <= diff; j += 1)
10088 {
10089 /* This is guaranteed to be true as the value of diff
10090 is 7, 3, 1 and we should have enough elements in the
10091 queue to generate this. Getting a vector mask with a
10092 value of diff other than these values implies that
10093 something is wrong by the time we get here. */
10094 gcc_assert (i + j < nelt);
10095 if (d->perm[i + j] != i + diff - j)
10096 return false;
10097 }
10098
10099 /* Success! */
10100 if (d->testing_p)
10101 return true;
10102
10103 emit_insn (gen (d->target, d->op0));
10104 return true;
10105}
10106
91bd4114
JG
10107static bool
10108aarch64_evpc_dup (struct expand_vec_perm_d *d)
10109{
10110 rtx (*gen) (rtx, rtx, rtx);
10111 rtx out = d->target;
10112 rtx in0;
ef4bddc2 10113 machine_mode vmode = d->vmode;
91bd4114
JG
10114 unsigned int i, elt, nelt = d->nelt;
10115 rtx lane;
10116
91bd4114
JG
10117 elt = d->perm[0];
10118 for (i = 1; i < nelt; i++)
10119 {
10120 if (elt != d->perm[i])
10121 return false;
10122 }
10123
10124 /* The generic preparation in aarch64_expand_vec_perm_const_1
10125 swaps the operand order and the permute indices if it finds
10126 d->perm[0] to be in the second operand. Thus, we can always
10127 use d->op0 and need not do any extra arithmetic to get the
10128 correct lane number. */
10129 in0 = d->op0;
f901401e 10130 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
10131
10132 switch (vmode)
10133 {
10134 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
10135 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
10136 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
10137 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
10138 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
10139 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
10140 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
10141 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
10142 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
10143 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
10144 default:
10145 return false;
10146 }
10147
10148 emit_insn (gen (out, in0, lane));
10149 return true;
10150}
10151
88b08073
JG
10152static bool
10153aarch64_evpc_tbl (struct expand_vec_perm_d *d)
10154{
10155 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 10156 machine_mode vmode = d->vmode;
88b08073
JG
10157 unsigned int i, nelt = d->nelt;
10158
88b08073
JG
10159 if (d->testing_p)
10160 return true;
10161
10162 /* Generic code will try constant permutation twice. Once with the
10163 original mode and again with the elements lowered to QImode.
10164 So wait and don't do the selector expansion ourselves. */
10165 if (vmode != V8QImode && vmode != V16QImode)
10166 return false;
10167
10168 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
10169 {
10170 int nunits = GET_MODE_NUNITS (vmode);
10171
10172 /* If big-endian and two vectors we end up with a weird mixed-endian
10173 mode on NEON. Reverse the index within each word but not the word
10174 itself. */
10175 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
10176 : d->perm[i]);
10177 }
88b08073
JG
10178 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
10179 sel = force_reg (vmode, sel);
10180
10181 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
10182 return true;
10183}
10184
10185static bool
10186aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
10187{
10188 /* The pattern matching functions above are written to look for a small
10189 number to begin the sequence (0, 1, N/2). If we begin with an index
10190 from the second operand, we can swap the operands. */
10191 if (d->perm[0] >= d->nelt)
10192 {
10193 unsigned i, nelt = d->nelt;
88b08073 10194
0696116a 10195 gcc_assert (nelt == (nelt & -nelt));
88b08073 10196 for (i = 0; i < nelt; ++i)
0696116a 10197 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 10198
cb5c6c29 10199 std::swap (d->op0, d->op1);
88b08073
JG
10200 }
10201
10202 if (TARGET_SIMD)
cc4d934f 10203 {
923fcec3
AL
10204 if (aarch64_evpc_rev (d))
10205 return true;
10206 else if (aarch64_evpc_ext (d))
ae0533da 10207 return true;
f901401e
AL
10208 else if (aarch64_evpc_dup (d))
10209 return true;
ae0533da 10210 else if (aarch64_evpc_zip (d))
cc4d934f
JG
10211 return true;
10212 else if (aarch64_evpc_uzp (d))
10213 return true;
10214 else if (aarch64_evpc_trn (d))
10215 return true;
10216 return aarch64_evpc_tbl (d);
10217 }
88b08073
JG
10218 return false;
10219}
10220
10221/* Expand a vec_perm_const pattern. */
10222
10223bool
10224aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
10225{
10226 struct expand_vec_perm_d d;
10227 int i, nelt, which;
10228
10229 d.target = target;
10230 d.op0 = op0;
10231 d.op1 = op1;
10232
10233 d.vmode = GET_MODE (target);
10234 gcc_assert (VECTOR_MODE_P (d.vmode));
10235 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10236 d.testing_p = false;
10237
10238 for (i = which = 0; i < nelt; ++i)
10239 {
10240 rtx e = XVECEXP (sel, 0, i);
10241 int ei = INTVAL (e) & (2 * nelt - 1);
10242 which |= (ei < nelt ? 1 : 2);
10243 d.perm[i] = ei;
10244 }
10245
10246 switch (which)
10247 {
10248 default:
10249 gcc_unreachable ();
10250
10251 case 3:
10252 d.one_vector_p = false;
10253 if (!rtx_equal_p (op0, op1))
10254 break;
10255
10256 /* The elements of PERM do not suggest that only the first operand
10257 is used, but both operands are identical. Allow easier matching
10258 of the permutation by folding the permutation into the single
10259 input vector. */
10260 /* Fall Through. */
10261 case 2:
10262 for (i = 0; i < nelt; ++i)
10263 d.perm[i] &= nelt - 1;
10264 d.op0 = op1;
10265 d.one_vector_p = true;
10266 break;
10267
10268 case 1:
10269 d.op1 = op0;
10270 d.one_vector_p = true;
10271 break;
10272 }
10273
10274 return aarch64_expand_vec_perm_const_1 (&d);
10275}
10276
10277static bool
ef4bddc2 10278aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
10279 const unsigned char *sel)
10280{
10281 struct expand_vec_perm_d d;
10282 unsigned int i, nelt, which;
10283 bool ret;
10284
10285 d.vmode = vmode;
10286 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10287 d.testing_p = true;
10288 memcpy (d.perm, sel, nelt);
10289
10290 /* Calculate whether all elements are in one vector. */
10291 for (i = which = 0; i < nelt; ++i)
10292 {
10293 unsigned char e = d.perm[i];
10294 gcc_assert (e < 2 * nelt);
10295 which |= (e < nelt ? 1 : 2);
10296 }
10297
10298 /* If all elements are from the second vector, reindex as if from the
10299 first vector. */
10300 if (which == 2)
10301 for (i = 0; i < nelt; ++i)
10302 d.perm[i] -= nelt;
10303
10304 /* Check whether the mask can be applied to a single vector. */
10305 d.one_vector_p = (which != 3);
10306
10307 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10308 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10309 if (!d.one_vector_p)
10310 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10311
10312 start_sequence ();
10313 ret = aarch64_expand_vec_perm_const_1 (&d);
10314 end_sequence ();
10315
10316 return ret;
10317}
10318
668046d1
DS
10319rtx
10320aarch64_reverse_mask (enum machine_mode mode)
10321{
10322 /* We have to reverse each vector because we dont have
10323 a permuted load that can reverse-load according to ABI rules. */
10324 rtx mask;
10325 rtvec v = rtvec_alloc (16);
10326 int i, j;
10327 int nunits = GET_MODE_NUNITS (mode);
10328 int usize = GET_MODE_UNIT_SIZE (mode);
10329
10330 gcc_assert (BYTES_BIG_ENDIAN);
10331 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
10332
10333 for (i = 0; i < nunits; i++)
10334 for (j = 0; j < usize; j++)
10335 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
10336 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
10337 return force_reg (V16QImode, mask);
10338}
10339
97e1ad78
JG
10340/* Implement MODES_TIEABLE_P. */
10341
10342bool
ef4bddc2 10343aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
10344{
10345 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10346 return true;
10347
10348 /* We specifically want to allow elements of "structure" modes to
10349 be tieable to the structure. This more general condition allows
10350 other rarer situations too. */
10351 if (TARGET_SIMD
10352 && aarch64_vector_mode_p (mode1)
10353 && aarch64_vector_mode_p (mode2))
10354 return true;
10355
10356 return false;
10357}
10358
e2c75eea
JG
10359/* Return a new RTX holding the result of moving POINTER forward by
10360 AMOUNT bytes. */
10361
10362static rtx
10363aarch64_move_pointer (rtx pointer, int amount)
10364{
10365 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10366
10367 return adjust_automodify_address (pointer, GET_MODE (pointer),
10368 next, amount);
10369}
10370
10371/* Return a new RTX holding the result of moving POINTER forward by the
10372 size of the mode it points to. */
10373
10374static rtx
10375aarch64_progress_pointer (rtx pointer)
10376{
10377 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10378
10379 return aarch64_move_pointer (pointer, amount);
10380}
10381
10382/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10383 MODE bytes. */
10384
10385static void
10386aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 10387 machine_mode mode)
e2c75eea
JG
10388{
10389 rtx reg = gen_reg_rtx (mode);
10390
10391 /* "Cast" the pointers to the correct mode. */
10392 *src = adjust_address (*src, mode, 0);
10393 *dst = adjust_address (*dst, mode, 0);
10394 /* Emit the memcpy. */
10395 emit_move_insn (reg, *src);
10396 emit_move_insn (*dst, reg);
10397 /* Move the pointers forward. */
10398 *src = aarch64_progress_pointer (*src);
10399 *dst = aarch64_progress_pointer (*dst);
10400}
10401
10402/* Expand movmem, as if from a __builtin_memcpy. Return true if
10403 we succeed, otherwise return false. */
10404
10405bool
10406aarch64_expand_movmem (rtx *operands)
10407{
10408 unsigned int n;
10409 rtx dst = operands[0];
10410 rtx src = operands[1];
10411 rtx base;
10412 bool speed_p = !optimize_function_for_size_p (cfun);
10413
10414 /* When optimizing for size, give a better estimate of the length of a
10415 memcpy call, but use the default otherwise. */
10416 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10417
10418 /* We can't do anything smart if the amount to copy is not constant. */
10419 if (!CONST_INT_P (operands[2]))
10420 return false;
10421
10422 n = UINTVAL (operands[2]);
10423
10424 /* Try to keep the number of instructions low. For cases below 16 bytes we
10425 need to make at most two moves. For cases above 16 bytes it will be one
10426 move for each 16 byte chunk, then at most two additional moves. */
10427 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10428 return false;
10429
10430 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10431 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10432
10433 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10434 src = adjust_automodify_address (src, VOIDmode, base, 0);
10435
10436 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10437 1-byte chunk. */
10438 if (n < 4)
10439 {
10440 if (n >= 2)
10441 {
10442 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10443 n -= 2;
10444 }
10445
10446 if (n == 1)
10447 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10448
10449 return true;
10450 }
10451
10452 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10453 4-byte chunk, partially overlapping with the previously copied chunk. */
10454 if (n < 8)
10455 {
10456 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10457 n -= 4;
10458 if (n > 0)
10459 {
10460 int move = n - 4;
10461
10462 src = aarch64_move_pointer (src, move);
10463 dst = aarch64_move_pointer (dst, move);
10464 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10465 }
10466 return true;
10467 }
10468
10469 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10470 them, then (if applicable) an 8-byte chunk. */
10471 while (n >= 8)
10472 {
10473 if (n / 16)
10474 {
10475 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10476 n -= 16;
10477 }
10478 else
10479 {
10480 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10481 n -= 8;
10482 }
10483 }
10484
10485 /* Finish the final bytes of the copy. We can always do this in one
10486 instruction. We either copy the exact amount we need, or partially
10487 overlap with the previous chunk we copied and copy 8-bytes. */
10488 if (n == 0)
10489 return true;
10490 else if (n == 1)
10491 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10492 else if (n == 2)
10493 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10494 else if (n == 4)
10495 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10496 else
10497 {
10498 if (n == 3)
10499 {
10500 src = aarch64_move_pointer (src, -1);
10501 dst = aarch64_move_pointer (dst, -1);
10502 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10503 }
10504 else
10505 {
10506 int move = n - 8;
10507
10508 src = aarch64_move_pointer (src, move);
10509 dst = aarch64_move_pointer (dst, move);
10510 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10511 }
10512 }
10513
10514 return true;
10515}
10516
a3125fc2
CL
10517/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10518
10519static unsigned HOST_WIDE_INT
10520aarch64_asan_shadow_offset (void)
10521{
10522 return (HOST_WIDE_INT_1 << 36);
10523}
10524
d3006da6 10525static bool
445d7826 10526aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
10527 unsigned int align,
10528 enum by_pieces_operation op,
10529 bool speed_p)
10530{
10531 /* STORE_BY_PIECES can be used when copying a constant string, but
10532 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10533 For now we always fail this and let the move_by_pieces code copy
10534 the string from read-only memory. */
10535 if (op == STORE_BY_PIECES)
10536 return false;
10537
10538 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10539}
10540
5f3bc026
ZC
10541static enum machine_mode
10542aarch64_code_to_ccmode (enum rtx_code code)
10543{
10544 switch (code)
10545 {
10546 case NE:
10547 return CC_DNEmode;
10548
10549 case EQ:
10550 return CC_DEQmode;
10551
10552 case LE:
10553 return CC_DLEmode;
10554
10555 case LT:
10556 return CC_DLTmode;
10557
10558 case GE:
10559 return CC_DGEmode;
10560
10561 case GT:
10562 return CC_DGTmode;
10563
10564 case LEU:
10565 return CC_DLEUmode;
10566
10567 case LTU:
10568 return CC_DLTUmode;
10569
10570 case GEU:
10571 return CC_DGEUmode;
10572
10573 case GTU:
10574 return CC_DGTUmode;
10575
10576 default:
10577 return CCmode;
10578 }
10579}
10580
10581static rtx
10582aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
10583 int code, tree treeop0, tree treeop1)
10584{
10585 enum machine_mode op_mode, cmp_mode, cc_mode;
10586 rtx op0, op1, cmp, target;
10587 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10588 enum insn_code icode;
10589 struct expand_operand ops[4];
10590
10591 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
10592 if (cc_mode == CCmode)
10593 return NULL_RTX;
10594
10595 start_sequence ();
10596 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10597
10598 op_mode = GET_MODE (op0);
10599 if (op_mode == VOIDmode)
10600 op_mode = GET_MODE (op1);
10601
10602 switch (op_mode)
10603 {
10604 case QImode:
10605 case HImode:
10606 case SImode:
10607 cmp_mode = SImode;
10608 icode = CODE_FOR_cmpsi;
10609 break;
10610
10611 case DImode:
10612 cmp_mode = DImode;
10613 icode = CODE_FOR_cmpdi;
10614 break;
10615
10616 default:
10617 end_sequence ();
10618 return NULL_RTX;
10619 }
10620
10621 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10622 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10623 if (!op0 || !op1)
10624 {
10625 end_sequence ();
10626 return NULL_RTX;
10627 }
10628 *prep_seq = get_insns ();
10629 end_sequence ();
10630
10631 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
10632 target = gen_rtx_REG (CCmode, CC_REGNUM);
10633
10634 create_output_operand (&ops[0], target, CCmode);
10635 create_fixed_operand (&ops[1], cmp);
10636 create_fixed_operand (&ops[2], op0);
10637 create_fixed_operand (&ops[3], op1);
10638
10639 start_sequence ();
10640 if (!maybe_expand_insn (icode, 4, ops))
10641 {
10642 end_sequence ();
10643 return NULL_RTX;
10644 }
10645 *gen_seq = get_insns ();
10646 end_sequence ();
10647
10648 return gen_rtx_REG (cc_mode, CC_REGNUM);
10649}
10650
10651static rtx
10652aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
10653 tree treeop0, tree treeop1, int bit_code)
10654{
10655 rtx op0, op1, cmp0, cmp1, target;
10656 enum machine_mode op_mode, cmp_mode, cc_mode;
10657 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10658 enum insn_code icode = CODE_FOR_ccmp_andsi;
10659 struct expand_operand ops[6];
10660
10661 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
10662 if (cc_mode == CCmode)
10663 return NULL_RTX;
10664
10665 push_to_sequence ((rtx_insn*) *prep_seq);
10666 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10667
10668 op_mode = GET_MODE (op0);
10669 if (op_mode == VOIDmode)
10670 op_mode = GET_MODE (op1);
10671
10672 switch (op_mode)
10673 {
10674 case QImode:
10675 case HImode:
10676 case SImode:
10677 cmp_mode = SImode;
10678 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
10679 : CODE_FOR_ccmp_iorsi;
10680 break;
10681
10682 case DImode:
10683 cmp_mode = DImode;
10684 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
10685 : CODE_FOR_ccmp_iordi;
10686 break;
10687
10688 default:
10689 end_sequence ();
10690 return NULL_RTX;
10691 }
10692
10693 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10694 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10695 if (!op0 || !op1)
10696 {
10697 end_sequence ();
10698 return NULL_RTX;
10699 }
10700 *prep_seq = get_insns ();
10701 end_sequence ();
10702
10703 target = gen_rtx_REG (cc_mode, CC_REGNUM);
10704 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
10705 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
10706
10707 create_fixed_operand (&ops[0], prev);
10708 create_fixed_operand (&ops[1], target);
10709 create_fixed_operand (&ops[2], op0);
10710 create_fixed_operand (&ops[3], op1);
10711 create_fixed_operand (&ops[4], cmp0);
10712 create_fixed_operand (&ops[5], cmp1);
10713
10714 push_to_sequence ((rtx_insn*) *gen_seq);
10715 if (!maybe_expand_insn (icode, 6, ops))
10716 {
10717 end_sequence ();
10718 return NULL_RTX;
10719 }
10720
10721 *gen_seq = get_insns ();
10722 end_sequence ();
10723
10724 return target;
10725}
10726
10727#undef TARGET_GEN_CCMP_FIRST
10728#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10729
10730#undef TARGET_GEN_CCMP_NEXT
10731#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10732
6a569cdd
KT
10733/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10734 instruction fusion of some sort. */
10735
10736static bool
10737aarch64_macro_fusion_p (void)
10738{
10739 return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10740}
10741
10742
10743/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10744 should be kept together during scheduling. */
10745
10746static bool
10747aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10748{
10749 rtx set_dest;
10750 rtx prev_set = single_set (prev);
10751 rtx curr_set = single_set (curr);
10752 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10753 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10754
10755 if (!aarch64_macro_fusion_p ())
10756 return false;
10757
10758 if (simple_sets_p
10759 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10760 {
10761 /* We are trying to match:
10762 prev (mov) == (set (reg r0) (const_int imm16))
10763 curr (movk) == (set (zero_extract (reg r0)
10764 (const_int 16)
10765 (const_int 16))
10766 (const_int imm16_1)) */
10767
10768 set_dest = SET_DEST (curr_set);
10769
10770 if (GET_CODE (set_dest) == ZERO_EXTRACT
10771 && CONST_INT_P (SET_SRC (curr_set))
10772 && CONST_INT_P (SET_SRC (prev_set))
10773 && CONST_INT_P (XEXP (set_dest, 2))
10774 && INTVAL (XEXP (set_dest, 2)) == 16
10775 && REG_P (XEXP (set_dest, 0))
10776 && REG_P (SET_DEST (prev_set))
10777 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10778 {
10779 return true;
10780 }
10781 }
10782
9bbe08fe
KT
10783 if (simple_sets_p
10784 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10785 {
10786
10787 /* We're trying to match:
10788 prev (adrp) == (set (reg r1)
10789 (high (symbol_ref ("SYM"))))
10790 curr (add) == (set (reg r0)
10791 (lo_sum (reg r1)
10792 (symbol_ref ("SYM"))))
10793 Note that r0 need not necessarily be the same as r1, especially
10794 during pre-regalloc scheduling. */
10795
10796 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10797 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10798 {
10799 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10800 && REG_P (XEXP (SET_SRC (curr_set), 0))
10801 && REGNO (XEXP (SET_SRC (curr_set), 0))
10802 == REGNO (SET_DEST (prev_set))
10803 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10804 XEXP (SET_SRC (curr_set), 1)))
10805 return true;
10806 }
10807 }
10808
cd0cb232
KT
10809 if (simple_sets_p
10810 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10811 {
10812
10813 /* We're trying to match:
10814 prev (movk) == (set (zero_extract (reg r0)
10815 (const_int 16)
10816 (const_int 32))
10817 (const_int imm16_1))
10818 curr (movk) == (set (zero_extract (reg r0)
10819 (const_int 16)
10820 (const_int 48))
10821 (const_int imm16_2)) */
10822
10823 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10824 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10825 && REG_P (XEXP (SET_DEST (prev_set), 0))
10826 && REG_P (XEXP (SET_DEST (curr_set), 0))
10827 && REGNO (XEXP (SET_DEST (prev_set), 0))
10828 == REGNO (XEXP (SET_DEST (curr_set), 0))
10829 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10830 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10831 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10832 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10833 && CONST_INT_P (SET_SRC (prev_set))
10834 && CONST_INT_P (SET_SRC (curr_set)))
10835 return true;
10836
10837 }
d8354ad7
KT
10838 if (simple_sets_p
10839 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10840 {
10841 /* We're trying to match:
10842 prev (adrp) == (set (reg r0)
10843 (high (symbol_ref ("SYM"))))
10844 curr (ldr) == (set (reg r1)
10845 (mem (lo_sum (reg r0)
10846 (symbol_ref ("SYM")))))
10847 or
10848 curr (ldr) == (set (reg r1)
10849 (zero_extend (mem
10850 (lo_sum (reg r0)
10851 (symbol_ref ("SYM")))))) */
10852 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10853 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10854 {
10855 rtx curr_src = SET_SRC (curr_set);
10856
10857 if (GET_CODE (curr_src) == ZERO_EXTEND)
10858 curr_src = XEXP (curr_src, 0);
10859
10860 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10861 && REG_P (XEXP (XEXP (curr_src, 0), 0))
10862 && REGNO (XEXP (XEXP (curr_src, 0), 0))
10863 == REGNO (SET_DEST (prev_set))
10864 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10865 XEXP (SET_SRC (prev_set), 0)))
10866 return true;
10867 }
10868 }
cd0cb232 10869
3759108f
AP
10870 if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10871 && any_condjump_p (curr))
10872 {
10873 enum attr_type prev_type = get_attr_type (prev);
10874
10875 /* FIXME: this misses some which is considered simple arthematic
10876 instructions for ThunderX. Simple shifts are missed here. */
10877 if (prev_type == TYPE_ALUS_SREG
10878 || prev_type == TYPE_ALUS_IMM
10879 || prev_type == TYPE_LOGICS_REG
10880 || prev_type == TYPE_LOGICS_IMM)
10881 return true;
10882 }
10883
6a569cdd
KT
10884 return false;
10885}
10886
350013bc
BC
10887/* If MEM is in the form of [base+offset], extract the two parts
10888 of address and set to BASE and OFFSET, otherwise return false
10889 after clearing BASE and OFFSET. */
10890
10891bool
10892extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10893{
10894 rtx addr;
10895
10896 gcc_assert (MEM_P (mem));
10897
10898 addr = XEXP (mem, 0);
10899
10900 if (REG_P (addr))
10901 {
10902 *base = addr;
10903 *offset = const0_rtx;
10904 return true;
10905 }
10906
10907 if (GET_CODE (addr) == PLUS
10908 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10909 {
10910 *base = XEXP (addr, 0);
10911 *offset = XEXP (addr, 1);
10912 return true;
10913 }
10914
10915 *base = NULL_RTX;
10916 *offset = NULL_RTX;
10917
10918 return false;
10919}
10920
10921/* Types for scheduling fusion. */
10922enum sched_fusion_type
10923{
10924 SCHED_FUSION_NONE = 0,
10925 SCHED_FUSION_LD_SIGN_EXTEND,
10926 SCHED_FUSION_LD_ZERO_EXTEND,
10927 SCHED_FUSION_LD,
10928 SCHED_FUSION_ST,
10929 SCHED_FUSION_NUM
10930};
10931
10932/* If INSN is a load or store of address in the form of [base+offset],
10933 extract the two parts and set to BASE and OFFSET. Return scheduling
10934 fusion type this INSN is. */
10935
10936static enum sched_fusion_type
10937fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10938{
10939 rtx x, dest, src;
10940 enum sched_fusion_type fusion = SCHED_FUSION_LD;
10941
10942 gcc_assert (INSN_P (insn));
10943 x = PATTERN (insn);
10944 if (GET_CODE (x) != SET)
10945 return SCHED_FUSION_NONE;
10946
10947 src = SET_SRC (x);
10948 dest = SET_DEST (x);
10949
1f46bd52
AP
10950 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
10951 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
350013bc
BC
10952 return SCHED_FUSION_NONE;
10953
10954 if (GET_CODE (src) == SIGN_EXTEND)
10955 {
10956 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10957 src = XEXP (src, 0);
10958 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10959 return SCHED_FUSION_NONE;
10960 }
10961 else if (GET_CODE (src) == ZERO_EXTEND)
10962 {
10963 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10964 src = XEXP (src, 0);
10965 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10966 return SCHED_FUSION_NONE;
10967 }
10968
10969 if (GET_CODE (src) == MEM && REG_P (dest))
10970 extract_base_offset_in_addr (src, base, offset);
10971 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10972 {
10973 fusion = SCHED_FUSION_ST;
10974 extract_base_offset_in_addr (dest, base, offset);
10975 }
10976 else
10977 return SCHED_FUSION_NONE;
10978
10979 if (*base == NULL_RTX || *offset == NULL_RTX)
10980 fusion = SCHED_FUSION_NONE;
10981
10982 return fusion;
10983}
10984
10985/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10986
10987 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10988 and PRI are only calculated for these instructions. For other instruction,
10989 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10990 type instruction fusion can be added by returning different priorities.
10991
10992 It's important that irrelevant instructions get the largest FUSION_PRI. */
10993
10994static void
10995aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10996 int *fusion_pri, int *pri)
10997{
10998 int tmp, off_val;
10999 rtx base, offset;
11000 enum sched_fusion_type fusion;
11001
11002 gcc_assert (INSN_P (insn));
11003
11004 tmp = max_pri - 1;
11005 fusion = fusion_load_store (insn, &base, &offset);
11006 if (fusion == SCHED_FUSION_NONE)
11007 {
11008 *pri = tmp;
11009 *fusion_pri = tmp;
11010 return;
11011 }
11012
11013 /* Set FUSION_PRI according to fusion type and base register. */
11014 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
11015
11016 /* Calculate PRI. */
11017 tmp /= 2;
11018
11019 /* INSN with smaller offset goes first. */
11020 off_val = (int)(INTVAL (offset));
11021 if (off_val >= 0)
11022 tmp -= (off_val & 0xfffff);
11023 else
11024 tmp += ((- off_val) & 0xfffff);
11025
11026 *pri = tmp;
11027 return;
11028}
11029
11030/* Given OPERANDS of consecutive load/store, check if we can merge
11031 them into ldp/stp. LOAD is true if they are load instructions.
11032 MODE is the mode of memory operands. */
11033
11034bool
11035aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
11036 enum machine_mode mode)
11037{
11038 HOST_WIDE_INT offval_1, offval_2, msize;
11039 enum reg_class rclass_1, rclass_2;
11040 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
11041
11042 if (load)
11043 {
11044 mem_1 = operands[1];
11045 mem_2 = operands[3];
11046 reg_1 = operands[0];
11047 reg_2 = operands[2];
11048 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
11049 if (REGNO (reg_1) == REGNO (reg_2))
11050 return false;
11051 }
11052 else
11053 {
11054 mem_1 = operands[0];
11055 mem_2 = operands[2];
11056 reg_1 = operands[1];
11057 reg_2 = operands[3];
11058 }
11059
bf84ac44
AP
11060 /* The mems cannot be volatile. */
11061 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
11062 return false;
11063
350013bc
BC
11064 /* Check if the addresses are in the form of [base+offset]. */
11065 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11066 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11067 return false;
11068 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11069 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11070 return false;
11071
11072 /* Check if the bases are same. */
11073 if (!rtx_equal_p (base_1, base_2))
11074 return false;
11075
11076 offval_1 = INTVAL (offset_1);
11077 offval_2 = INTVAL (offset_2);
11078 msize = GET_MODE_SIZE (mode);
11079 /* Check if the offsets are consecutive. */
11080 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
11081 return false;
11082
11083 /* Check if the addresses are clobbered by load. */
11084 if (load)
11085 {
11086 if (reg_mentioned_p (reg_1, mem_1))
11087 return false;
11088
11089 /* In increasing order, the last load can clobber the address. */
11090 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
11091 return false;
11092 }
11093
11094 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11095 rclass_1 = FP_REGS;
11096 else
11097 rclass_1 = GENERAL_REGS;
11098
11099 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11100 rclass_2 = FP_REGS;
11101 else
11102 rclass_2 = GENERAL_REGS;
11103
11104 /* Check if the registers are of same class. */
11105 if (rclass_1 != rclass_2)
11106 return false;
11107
11108 return true;
11109}
11110
11111/* Given OPERANDS of consecutive load/store, check if we can merge
11112 them into ldp/stp by adjusting the offset. LOAD is true if they
11113 are load instructions. MODE is the mode of memory operands.
11114
11115 Given below consecutive stores:
11116
11117 str w1, [xb, 0x100]
11118 str w1, [xb, 0x104]
11119 str w1, [xb, 0x108]
11120 str w1, [xb, 0x10c]
11121
11122 Though the offsets are out of the range supported by stp, we can
11123 still pair them after adjusting the offset, like:
11124
11125 add scratch, xb, 0x100
11126 stp w1, w1, [scratch]
11127 stp w1, w1, [scratch, 0x8]
11128
11129 The peephole patterns detecting this opportunity should guarantee
11130 the scratch register is avaliable. */
11131
11132bool
11133aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
11134 enum machine_mode mode)
11135{
11136 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
11137 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
11138 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
11139 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
11140
11141 if (load)
11142 {
11143 reg_1 = operands[0];
11144 mem_1 = operands[1];
11145 reg_2 = operands[2];
11146 mem_2 = operands[3];
11147 reg_3 = operands[4];
11148 mem_3 = operands[5];
11149 reg_4 = operands[6];
11150 mem_4 = operands[7];
11151 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
11152 && REG_P (reg_3) && REG_P (reg_4));
11153 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
11154 return false;
11155 }
11156 else
11157 {
11158 mem_1 = operands[0];
11159 reg_1 = operands[1];
11160 mem_2 = operands[2];
11161 reg_2 = operands[3];
11162 mem_3 = operands[4];
11163 reg_3 = operands[5];
11164 mem_4 = operands[6];
11165 reg_4 = operands[7];
11166 }
11167 /* Skip if memory operand is by itslef valid for ldp/stp. */
11168 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
11169 return false;
11170
bf84ac44
AP
11171 /* The mems cannot be volatile. */
11172 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
11173 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
11174 return false;
11175
350013bc
BC
11176 /* Check if the addresses are in the form of [base+offset]. */
11177 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11178 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11179 return false;
11180 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11181 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11182 return false;
11183 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
11184 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
11185 return false;
11186 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
11187 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
11188 return false;
11189
11190 /* Check if the bases are same. */
11191 if (!rtx_equal_p (base_1, base_2)
11192 || !rtx_equal_p (base_2, base_3)
11193 || !rtx_equal_p (base_3, base_4))
11194 return false;
11195
11196 offval_1 = INTVAL (offset_1);
11197 offval_2 = INTVAL (offset_2);
11198 offval_3 = INTVAL (offset_3);
11199 offval_4 = INTVAL (offset_4);
11200 msize = GET_MODE_SIZE (mode);
11201 /* Check if the offsets are consecutive. */
11202 if ((offval_1 != (offval_2 + msize)
11203 || offval_1 != (offval_3 + msize * 2)
11204 || offval_1 != (offval_4 + msize * 3))
11205 && (offval_4 != (offval_3 + msize)
11206 || offval_4 != (offval_2 + msize * 2)
11207 || offval_4 != (offval_1 + msize * 3)))
11208 return false;
11209
11210 /* Check if the addresses are clobbered by load. */
11211 if (load)
11212 {
11213 if (reg_mentioned_p (reg_1, mem_1)
11214 || reg_mentioned_p (reg_2, mem_2)
11215 || reg_mentioned_p (reg_3, mem_3))
11216 return false;
11217
11218 /* In increasing order, the last load can clobber the address. */
11219 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
11220 return false;
11221 }
11222
11223 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11224 rclass_1 = FP_REGS;
11225 else
11226 rclass_1 = GENERAL_REGS;
11227
11228 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11229 rclass_2 = FP_REGS;
11230 else
11231 rclass_2 = GENERAL_REGS;
11232
11233 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
11234 rclass_3 = FP_REGS;
11235 else
11236 rclass_3 = GENERAL_REGS;
11237
11238 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
11239 rclass_4 = FP_REGS;
11240 else
11241 rclass_4 = GENERAL_REGS;
11242
11243 /* Check if the registers are of same class. */
11244 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
11245 return false;
11246
11247 return true;
11248}
11249
11250/* Given OPERANDS of consecutive load/store, this function pairs them
11251 into ldp/stp after adjusting the offset. It depends on the fact
11252 that addresses of load/store instructions are in increasing order.
11253 MODE is the mode of memory operands. CODE is the rtl operator
11254 which should be applied to all memory operands, it's SIGN_EXTEND,
11255 ZERO_EXTEND or UNKNOWN. */
11256
11257bool
11258aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
11259 enum machine_mode mode, RTX_CODE code)
11260{
11261 rtx base, offset, t1, t2;
11262 rtx mem_1, mem_2, mem_3, mem_4;
11263 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
11264
11265 if (load)
11266 {
11267 mem_1 = operands[1];
11268 mem_2 = operands[3];
11269 mem_3 = operands[5];
11270 mem_4 = operands[7];
11271 }
11272 else
11273 {
11274 mem_1 = operands[0];
11275 mem_2 = operands[2];
11276 mem_3 = operands[4];
11277 mem_4 = operands[6];
11278 gcc_assert (code == UNKNOWN);
11279 }
11280
11281 extract_base_offset_in_addr (mem_1, &base, &offset);
11282 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
11283
11284 /* Adjust offset thus it can fit in ldp/stp instruction. */
11285 msize = GET_MODE_SIZE (mode);
11286 stp_off_limit = msize * 0x40;
11287 off_val = INTVAL (offset);
11288 abs_off = (off_val < 0) ? -off_val : off_val;
11289 new_off = abs_off % stp_off_limit;
11290 adj_off = abs_off - new_off;
11291
11292 /* Further adjust to make sure all offsets are OK. */
11293 if ((new_off + msize * 2) >= stp_off_limit)
11294 {
11295 adj_off += stp_off_limit;
11296 new_off -= stp_off_limit;
11297 }
11298
11299 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11300 if (adj_off >= 0x1000)
11301 return false;
11302
11303 if (off_val < 0)
11304 {
11305 adj_off = -adj_off;
11306 new_off = -new_off;
11307 }
11308
11309 /* Create new memory references. */
11310 mem_1 = change_address (mem_1, VOIDmode,
11311 plus_constant (DImode, operands[8], new_off));
11312
11313 /* Check if the adjusted address is OK for ldp/stp. */
11314 if (!aarch64_mem_pair_operand (mem_1, mode))
11315 return false;
11316
11317 msize = GET_MODE_SIZE (mode);
11318 mem_2 = change_address (mem_2, VOIDmode,
11319 plus_constant (DImode,
11320 operands[8],
11321 new_off + msize));
11322 mem_3 = change_address (mem_3, VOIDmode,
11323 plus_constant (DImode,
11324 operands[8],
11325 new_off + msize * 2));
11326 mem_4 = change_address (mem_4, VOIDmode,
11327 plus_constant (DImode,
11328 operands[8],
11329 new_off + msize * 3));
11330
11331 if (code == ZERO_EXTEND)
11332 {
11333 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
11334 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
11335 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
11336 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
11337 }
11338 else if (code == SIGN_EXTEND)
11339 {
11340 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
11341 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
11342 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
11343 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
11344 }
11345
11346 if (load)
11347 {
11348 operands[1] = mem_1;
11349 operands[3] = mem_2;
11350 operands[5] = mem_3;
11351 operands[7] = mem_4;
11352 }
11353 else
11354 {
11355 operands[0] = mem_1;
11356 operands[2] = mem_2;
11357 operands[4] = mem_3;
11358 operands[6] = mem_4;
11359 }
11360
11361 /* Emit adjusting instruction. */
11362 emit_insn (gen_rtx_SET (VOIDmode, operands[8],
11363 plus_constant (DImode, base, adj_off)));
11364 /* Emit ldp/stp instructions. */
11365 t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
11366 t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
11367 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11368 t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
11369 t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
11370 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11371 return true;
11372}
11373
43e9d192
IB
11374#undef TARGET_ADDRESS_COST
11375#define TARGET_ADDRESS_COST aarch64_address_cost
11376
11377/* This hook will determines whether unnamed bitfields affect the alignment
11378 of the containing structure. The hook returns true if the structure
11379 should inherit the alignment requirements of an unnamed bitfield's
11380 type. */
11381#undef TARGET_ALIGN_ANON_BITFIELD
11382#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11383
11384#undef TARGET_ASM_ALIGNED_DI_OP
11385#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11386
11387#undef TARGET_ASM_ALIGNED_HI_OP
11388#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11389
11390#undef TARGET_ASM_ALIGNED_SI_OP
11391#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11392
11393#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11394#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11395 hook_bool_const_tree_hwi_hwi_const_tree_true
11396
11397#undef TARGET_ASM_FILE_START
11398#define TARGET_ASM_FILE_START aarch64_start_file
11399
11400#undef TARGET_ASM_OUTPUT_MI_THUNK
11401#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11402
11403#undef TARGET_ASM_SELECT_RTX_SECTION
11404#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11405
11406#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11407#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11408
11409#undef TARGET_BUILD_BUILTIN_VA_LIST
11410#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11411
11412#undef TARGET_CALLEE_COPIES
11413#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11414
11415#undef TARGET_CAN_ELIMINATE
11416#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11417
11418#undef TARGET_CANNOT_FORCE_CONST_MEM
11419#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11420
11421#undef TARGET_CONDITIONAL_REGISTER_USAGE
11422#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11423
11424/* Only the least significant bit is used for initialization guard
11425 variables. */
11426#undef TARGET_CXX_GUARD_MASK_BIT
11427#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11428
11429#undef TARGET_C_MODE_FOR_SUFFIX
11430#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11431
11432#ifdef TARGET_BIG_ENDIAN_DEFAULT
11433#undef TARGET_DEFAULT_TARGET_FLAGS
11434#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11435#endif
11436
11437#undef TARGET_CLASS_MAX_NREGS
11438#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11439
119103ca
JG
11440#undef TARGET_BUILTIN_DECL
11441#define TARGET_BUILTIN_DECL aarch64_builtin_decl
11442
43e9d192
IB
11443#undef TARGET_EXPAND_BUILTIN
11444#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11445
11446#undef TARGET_EXPAND_BUILTIN_VA_START
11447#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11448
9697e620
JG
11449#undef TARGET_FOLD_BUILTIN
11450#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11451
43e9d192
IB
11452#undef TARGET_FUNCTION_ARG
11453#define TARGET_FUNCTION_ARG aarch64_function_arg
11454
11455#undef TARGET_FUNCTION_ARG_ADVANCE
11456#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11457
11458#undef TARGET_FUNCTION_ARG_BOUNDARY
11459#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11460
11461#undef TARGET_FUNCTION_OK_FOR_SIBCALL
11462#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11463
11464#undef TARGET_FUNCTION_VALUE
11465#define TARGET_FUNCTION_VALUE aarch64_function_value
11466
11467#undef TARGET_FUNCTION_VALUE_REGNO_P
11468#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11469
11470#undef TARGET_FRAME_POINTER_REQUIRED
11471#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11472
fc72cba7
AL
11473#undef TARGET_GIMPLE_FOLD_BUILTIN
11474#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 11475
43e9d192
IB
11476#undef TARGET_GIMPLIFY_VA_ARG_EXPR
11477#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11478
11479#undef TARGET_INIT_BUILTINS
11480#define TARGET_INIT_BUILTINS aarch64_init_builtins
11481
11482#undef TARGET_LEGITIMATE_ADDRESS_P
11483#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11484
11485#undef TARGET_LEGITIMATE_CONSTANT_P
11486#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11487
11488#undef TARGET_LIBGCC_CMP_RETURN_MODE
11489#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11490
38e8f663 11491#undef TARGET_LRA_P
98d404be 11492#define TARGET_LRA_P hook_bool_void_true
38e8f663 11493
ac2b960f
YZ
11494#undef TARGET_MANGLE_TYPE
11495#define TARGET_MANGLE_TYPE aarch64_mangle_type
11496
43e9d192
IB
11497#undef TARGET_MEMORY_MOVE_COST
11498#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11499
26e0ff94
WD
11500#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11501#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11502
43e9d192
IB
11503#undef TARGET_MUST_PASS_IN_STACK
11504#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11505
11506/* This target hook should return true if accesses to volatile bitfields
11507 should use the narrowest mode possible. It should return false if these
11508 accesses should use the bitfield container type. */
11509#undef TARGET_NARROW_VOLATILE_BITFIELD
11510#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11511
11512#undef TARGET_OPTION_OVERRIDE
11513#define TARGET_OPTION_OVERRIDE aarch64_override_options
11514
11515#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11516#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11517 aarch64_override_options_after_change
11518
11519#undef TARGET_PASS_BY_REFERENCE
11520#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11521
11522#undef TARGET_PREFERRED_RELOAD_CLASS
11523#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11524
cee66c68
WD
11525#undef TARGET_SCHED_REASSOCIATION_WIDTH
11526#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11527
43e9d192
IB
11528#undef TARGET_SECONDARY_RELOAD
11529#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11530
11531#undef TARGET_SHIFT_TRUNCATION_MASK
11532#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11533
11534#undef TARGET_SETUP_INCOMING_VARARGS
11535#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11536
11537#undef TARGET_STRUCT_VALUE_RTX
11538#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11539
11540#undef TARGET_REGISTER_MOVE_COST
11541#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11542
11543#undef TARGET_RETURN_IN_MEMORY
11544#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11545
11546#undef TARGET_RETURN_IN_MSB
11547#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11548
11549#undef TARGET_RTX_COSTS
7cc2145f 11550#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 11551
d126a4ae
AP
11552#undef TARGET_SCHED_ISSUE_RATE
11553#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11554
d03f7e44
MK
11555#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11556#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
11557 aarch64_sched_first_cycle_multipass_dfa_lookahead
11558
43e9d192
IB
11559#undef TARGET_TRAMPOLINE_INIT
11560#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11561
11562#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11563#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11564
11565#undef TARGET_VECTOR_MODE_SUPPORTED_P
11566#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11567
11568#undef TARGET_ARRAY_MODE_SUPPORTED_P
11569#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11570
8990e73a
TB
11571#undef TARGET_VECTORIZE_ADD_STMT_COST
11572#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11573
11574#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11575#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11576 aarch64_builtin_vectorization_cost
11577
43e9d192
IB
11578#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11579#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11580
42fc9a7f
JG
11581#undef TARGET_VECTORIZE_BUILTINS
11582#define TARGET_VECTORIZE_BUILTINS
11583
11584#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11585#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11586 aarch64_builtin_vectorized_function
11587
3b357264
JG
11588#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11589#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11590 aarch64_autovectorize_vector_sizes
11591
aa87aced
KV
11592#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11593#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11594 aarch64_atomic_assign_expand_fenv
11595
43e9d192
IB
11596/* Section anchor support. */
11597
11598#undef TARGET_MIN_ANCHOR_OFFSET
11599#define TARGET_MIN_ANCHOR_OFFSET -256
11600
11601/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11602 byte offset; we can do much more for larger data types, but have no way
11603 to determine the size of the access. We assume accesses are aligned. */
11604#undef TARGET_MAX_ANCHOR_OFFSET
11605#define TARGET_MAX_ANCHOR_OFFSET 4095
11606
db0253a4
TB
11607#undef TARGET_VECTOR_ALIGNMENT
11608#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11609
11610#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11611#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11612 aarch64_simd_vector_alignment_reachable
11613
88b08073
JG
11614/* vec_perm support. */
11615
11616#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11617#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11618 aarch64_vectorize_vec_perm_const_ok
11619
70f09188 11620
706b2314 11621#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
11622#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11623
5cb74e90
RR
11624#undef TARGET_FLAGS_REGNUM
11625#define TARGET_FLAGS_REGNUM CC_REGNUM
11626
78607708
TV
11627#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11628#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11629
a3125fc2
CL
11630#undef TARGET_ASAN_SHADOW_OFFSET
11631#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11632
0c4ec427
RE
11633#undef TARGET_LEGITIMIZE_ADDRESS
11634#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11635
d3006da6
JG
11636#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11637#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11638 aarch64_use_by_pieces_infrastructure_p
11639
594bdd53
FY
11640#undef TARGET_CAN_USE_DOLOOP_P
11641#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11642
6a569cdd
KT
11643#undef TARGET_SCHED_MACRO_FUSION_P
11644#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11645
11646#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11647#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11648
350013bc
BC
11649#undef TARGET_SCHED_FUSION_PRIORITY
11650#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11651
43e9d192
IB
11652struct gcc_target targetm = TARGET_INITIALIZER;
11653
11654#include "gt-aarch64.h"