]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
attr_thumb.c: Skip if Thumb is not supported.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
5624e564 2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2
AM
24#include "backend.h"
25#include "tree.h"
26#include "gimple.h"
43e9d192 27#include "rtl.h"
c7131fb2
AM
28#include "df.h"
29#include "insn-codes.h"
43e9d192 30#include "insn-attr.h"
40e23961 31#include "alias.h"
40e23961 32#include "fold-const.h"
d8a2d370
DN
33#include "stringpool.h"
34#include "stor-layout.h"
35#include "calls.h"
36#include "varasm.h"
43e9d192 37#include "regs.h"
60393bbc
AM
38#include "cfgrtl.h"
39#include "cfganal.h"
40#include "lcm.h"
41#include "cfgbuild.h"
42#include "cfgcleanup.h"
43e9d192 43#include "output.h"
36566b39 44#include "flags.h"
36566b39
PK
45#include "insn-config.h"
46#include "expmed.h"
47#include "dojump.h"
48#include "explow.h"
49#include "emit-rtl.h"
50#include "stmt.h"
43e9d192
IB
51#include "expr.h"
52#include "reload.h"
53#include "toplev.h"
54#include "target.h"
43e9d192 55#include "targhooks.h"
43e9d192
IB
56#include "tm_p.h"
57#include "recog.h"
58#include "langhooks.h"
59#include "diagnostic-core.h"
2fb9a547
AM
60#include "internal-fn.h"
61#include "gimple-fold.h"
62#include "tree-eh.h"
45b0be94 63#include "gimplify.h"
43e9d192
IB
64#include "optabs.h"
65#include "dwarf2.h"
8990e73a
TB
66#include "cfgloop.h"
67#include "tree-vectorizer.h"
d1bcc29f 68#include "aarch64-cost-tables.h"
0ee859b5 69#include "dumpfile.h"
9b2b7279 70#include "builtins.h"
8baff86e 71#include "rtl-iter.h"
9bbe08fe 72#include "tm-constrs.h"
d03f7e44 73#include "sched-int.h"
fde9b31b 74#include "cortex-a57-fma-steering.h"
43e9d192 75
994c5d85 76/* This file should be included last. */
d58627a0
RS
77#include "target-def.h"
78
28514dda
YZ
79/* Defined for convenience. */
80#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
81
43e9d192
IB
82/* Classifies an address.
83
84 ADDRESS_REG_IMM
85 A simple base register plus immediate offset.
86
87 ADDRESS_REG_WB
88 A base register indexed by immediate offset with writeback.
89
90 ADDRESS_REG_REG
91 A base register indexed by (optionally scaled) register.
92
93 ADDRESS_REG_UXTW
94 A base register indexed by (optionally scaled) zero-extended register.
95
96 ADDRESS_REG_SXTW
97 A base register indexed by (optionally scaled) sign-extended register.
98
99 ADDRESS_LO_SUM
100 A LO_SUM rtx with a base register and "LO12" symbol relocation.
101
102 ADDRESS_SYMBOLIC:
103 A constant symbolic address, in pc-relative literal pool. */
104
105enum aarch64_address_type {
106 ADDRESS_REG_IMM,
107 ADDRESS_REG_WB,
108 ADDRESS_REG_REG,
109 ADDRESS_REG_UXTW,
110 ADDRESS_REG_SXTW,
111 ADDRESS_LO_SUM,
112 ADDRESS_SYMBOLIC
113};
114
115struct aarch64_address_info {
116 enum aarch64_address_type type;
117 rtx base;
118 rtx offset;
119 int shift;
120 enum aarch64_symbol_type symbol_type;
121};
122
48063b9d
IB
123struct simd_immediate_info
124{
125 rtx value;
126 int shift;
127 int element_width;
48063b9d 128 bool mvn;
e4f0f84d 129 bool msl;
48063b9d
IB
130};
131
43e9d192
IB
132/* The current code model. */
133enum aarch64_code_model aarch64_cmodel;
134
135#ifdef HAVE_AS_TLS
136#undef TARGET_HAVE_TLS
137#define TARGET_HAVE_TLS 1
138#endif
139
ef4bddc2
RS
140static bool aarch64_composite_type_p (const_tree, machine_mode);
141static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 142 const_tree,
ef4bddc2 143 machine_mode *, int *,
43e9d192
IB
144 bool *);
145static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
146static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 147static void aarch64_override_options_after_change (void);
ef4bddc2 148static bool aarch64_vector_mode_supported_p (machine_mode);
43e9d192 149static unsigned bit_count (unsigned HOST_WIDE_INT);
ef4bddc2 150static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 151 const unsigned char *sel);
ef4bddc2 152static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 153
0c6caaf8
RL
154/* Major revision number of the ARM Architecture implemented by the target. */
155unsigned aarch64_architecture_version;
156
43e9d192 157/* The processor for which instructions should be scheduled. */
02fdbd5b 158enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 159
43e9d192
IB
160/* Mask to specify which instructions we are allowed to generate. */
161unsigned long aarch64_isa_flags = 0;
162
163/* Mask to specify which instruction scheduling options should be used. */
164unsigned long aarch64_tune_flags = 0;
165
8dec06f2
JG
166/* Support for command line parsing of boolean flags in the tuning
167 structures. */
168struct aarch64_flag_desc
169{
170 const char* name;
171 unsigned int flag;
172};
173
174#define AARCH64_FUSION_PAIR(name, internal_name, y) \
175 { name, AARCH64_FUSE_##internal_name },
176static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
177{
178 { "none", AARCH64_FUSE_NOTHING },
179#include "aarch64-fusion-pairs.def"
180 { "all", AARCH64_FUSE_ALL },
181 { NULL, AARCH64_FUSE_NOTHING }
182};
183#undef AARCH64_FUION_PAIR
184
185#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \
186 { name, AARCH64_EXTRA_TUNE_##internal_name },
187static const struct aarch64_flag_desc aarch64_tuning_flags[] =
188{
189 { "none", AARCH64_EXTRA_TUNE_NONE },
190#include "aarch64-tuning-flags.def"
191 { "all", AARCH64_EXTRA_TUNE_ALL },
192 { NULL, AARCH64_EXTRA_TUNE_NONE }
193};
194#undef AARCH64_EXTRA_TUNING_OPTION
195
43e9d192
IB
196/* Tuning parameters. */
197
43e9d192
IB
198static const struct cpu_addrcost_table generic_addrcost_table =
199{
67747367 200 {
bd95e655
JG
201 0, /* hi */
202 0, /* si */
203 0, /* di */
204 0, /* ti */
67747367 205 },
bd95e655
JG
206 0, /* pre_modify */
207 0, /* post_modify */
208 0, /* register_offset */
209 0, /* register_extend */
210 0 /* imm_offset */
43e9d192
IB
211};
212
60bff090
JG
213static const struct cpu_addrcost_table cortexa57_addrcost_table =
214{
60bff090 215 {
bd95e655
JG
216 1, /* hi */
217 0, /* si */
218 0, /* di */
219 1, /* ti */
60bff090 220 },
bd95e655
JG
221 0, /* pre_modify */
222 0, /* post_modify */
223 0, /* register_offset */
224 0, /* register_extend */
225 0, /* imm_offset */
60bff090
JG
226};
227
381e27aa
PT
228static const struct cpu_addrcost_table xgene1_addrcost_table =
229{
381e27aa 230 {
bd95e655
JG
231 1, /* hi */
232 0, /* si */
233 0, /* di */
234 1, /* ti */
381e27aa 235 },
bd95e655
JG
236 1, /* pre_modify */
237 0, /* post_modify */
238 0, /* register_offset */
239 1, /* register_extend */
240 0, /* imm_offset */
381e27aa
PT
241};
242
43e9d192
IB
243static const struct cpu_regmove_cost generic_regmove_cost =
244{
bd95e655 245 1, /* GP2GP */
3969c510
WD
246 /* Avoid the use of slow int<->fp moves for spilling by setting
247 their cost higher than memmov_cost. */
bd95e655
JG
248 5, /* GP2FP */
249 5, /* FP2GP */
250 2 /* FP2FP */
43e9d192
IB
251};
252
e4a9c55a
WD
253static const struct cpu_regmove_cost cortexa57_regmove_cost =
254{
bd95e655 255 1, /* GP2GP */
e4a9c55a
WD
256 /* Avoid the use of slow int<->fp moves for spilling by setting
257 their cost higher than memmov_cost. */
bd95e655
JG
258 5, /* GP2FP */
259 5, /* FP2GP */
260 2 /* FP2FP */
e4a9c55a
WD
261};
262
263static const struct cpu_regmove_cost cortexa53_regmove_cost =
264{
bd95e655 265 1, /* GP2GP */
e4a9c55a
WD
266 /* Avoid the use of slow int<->fp moves for spilling by setting
267 their cost higher than memmov_cost. */
bd95e655
JG
268 5, /* GP2FP */
269 5, /* FP2GP */
270 2 /* FP2FP */
e4a9c55a
WD
271};
272
d1bcc29f
AP
273static const struct cpu_regmove_cost thunderx_regmove_cost =
274{
bd95e655
JG
275 2, /* GP2GP */
276 2, /* GP2FP */
277 6, /* FP2GP */
278 4 /* FP2FP */
d1bcc29f
AP
279};
280
381e27aa
PT
281static const struct cpu_regmove_cost xgene1_regmove_cost =
282{
bd95e655 283 1, /* GP2GP */
381e27aa
PT
284 /* Avoid the use of slow int<->fp moves for spilling by setting
285 their cost higher than memmov_cost. */
bd95e655
JG
286 8, /* GP2FP */
287 8, /* FP2GP */
288 2 /* FP2FP */
381e27aa
PT
289};
290
8990e73a 291/* Generic costs for vector insn classes. */
8990e73a
TB
292static const struct cpu_vector_cost generic_vector_cost =
293{
bd95e655
JG
294 1, /* scalar_stmt_cost */
295 1, /* scalar_load_cost */
296 1, /* scalar_store_cost */
297 1, /* vec_stmt_cost */
298 1, /* vec_to_scalar_cost */
299 1, /* scalar_to_vec_cost */
300 1, /* vec_align_load_cost */
301 1, /* vec_unalign_load_cost */
302 1, /* vec_unalign_store_cost */
303 1, /* vec_store_cost */
304 3, /* cond_taken_branch_cost */
305 1 /* cond_not_taken_branch_cost */
8990e73a
TB
306};
307
60bff090 308/* Generic costs for vector insn classes. */
60bff090
JG
309static const struct cpu_vector_cost cortexa57_vector_cost =
310{
bd95e655
JG
311 1, /* scalar_stmt_cost */
312 4, /* scalar_load_cost */
313 1, /* scalar_store_cost */
314 3, /* vec_stmt_cost */
315 8, /* vec_to_scalar_cost */
316 8, /* scalar_to_vec_cost */
317 5, /* vec_align_load_cost */
318 5, /* vec_unalign_load_cost */
319 1, /* vec_unalign_store_cost */
320 1, /* vec_store_cost */
321 1, /* cond_taken_branch_cost */
322 1 /* cond_not_taken_branch_cost */
60bff090
JG
323};
324
381e27aa 325/* Generic costs for vector insn classes. */
381e27aa
PT
326static const struct cpu_vector_cost xgene1_vector_cost =
327{
bd95e655
JG
328 1, /* scalar_stmt_cost */
329 5, /* scalar_load_cost */
330 1, /* scalar_store_cost */
331 2, /* vec_stmt_cost */
332 4, /* vec_to_scalar_cost */
333 4, /* scalar_to_vec_cost */
334 10, /* vec_align_load_cost */
335 10, /* vec_unalign_load_cost */
336 2, /* vec_unalign_store_cost */
337 2, /* vec_store_cost */
338 2, /* cond_taken_branch_cost */
339 1 /* cond_not_taken_branch_cost */
381e27aa
PT
340};
341
b9066f5a
MW
342/* Generic costs for branch instructions. */
343static const struct cpu_branch_cost generic_branch_cost =
344{
345 2, /* Predictable. */
346 2 /* Unpredictable. */
347};
348
43e9d192
IB
349static const struct tune_params generic_tunings =
350{
4e2cd668 351 &cortexa57_extra_costs,
43e9d192
IB
352 &generic_addrcost_table,
353 &generic_regmove_cost,
8990e73a 354 &generic_vector_cost,
b9066f5a 355 &generic_branch_cost,
bd95e655
JG
356 4, /* memmov_cost */
357 2, /* issue_rate */
e9a3a175 358 AARCH64_FUSE_NOTHING, /* fusible_ops */
0b82a5a2
WD
359 8, /* function_align. */
360 8, /* jump_align. */
361 4, /* loop_align. */
cee66c68
WD
362 2, /* int_reassoc_width. */
363 4, /* fp_reassoc_width. */
50093a33
WD
364 1, /* vec_reassoc_width. */
365 2, /* min_div_recip_mul_sf. */
dfba575f
JG
366 2, /* min_div_recip_mul_df. */
367 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
43e9d192
IB
368};
369
984239ad
KT
370static const struct tune_params cortexa53_tunings =
371{
372 &cortexa53_extra_costs,
373 &generic_addrcost_table,
e4a9c55a 374 &cortexa53_regmove_cost,
984239ad 375 &generic_vector_cost,
b9066f5a 376 &generic_branch_cost,
bd95e655
JG
377 4, /* memmov_cost */
378 2, /* issue_rate */
379 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 380 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
0b82a5a2
WD
381 8, /* function_align. */
382 8, /* jump_align. */
383 4, /* loop_align. */
cee66c68
WD
384 2, /* int_reassoc_width. */
385 4, /* fp_reassoc_width. */
50093a33
WD
386 1, /* vec_reassoc_width. */
387 2, /* min_div_recip_mul_sf. */
dfba575f
JG
388 2, /* min_div_recip_mul_df. */
389 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
984239ad
KT
390};
391
4fd92af6
KT
392static const struct tune_params cortexa57_tunings =
393{
394 &cortexa57_extra_costs,
60bff090 395 &cortexa57_addrcost_table,
e4a9c55a 396 &cortexa57_regmove_cost,
60bff090 397 &cortexa57_vector_cost,
b9066f5a 398 &generic_branch_cost,
bd95e655
JG
399 4, /* memmov_cost */
400 3, /* issue_rate */
401 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 402 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2
WD
403 16, /* function_align. */
404 8, /* jump_align. */
405 4, /* loop_align. */
cee66c68
WD
406 2, /* int_reassoc_width. */
407 4, /* fp_reassoc_width. */
50093a33
WD
408 1, /* vec_reassoc_width. */
409 2, /* min_div_recip_mul_sf. */
dfba575f
JG
410 2, /* min_div_recip_mul_df. */
411 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
412};
413
414static const struct tune_params cortexa72_tunings =
415{
416 &cortexa57_extra_costs,
417 &cortexa57_addrcost_table,
418 &cortexa57_regmove_cost,
419 &cortexa57_vector_cost,
420 &generic_branch_cost,
421 4, /* memmov_cost */
422 3, /* issue_rate */
423 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
424 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
425 16, /* function_align. */
426 8, /* jump_align. */
427 4, /* loop_align. */
428 2, /* int_reassoc_width. */
429 4, /* fp_reassoc_width. */
430 1, /* vec_reassoc_width. */
431 2, /* min_div_recip_mul_sf. */
432 2, /* min_div_recip_mul_df. */
433 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
4fd92af6
KT
434};
435
d1bcc29f
AP
436static const struct tune_params thunderx_tunings =
437{
438 &thunderx_extra_costs,
439 &generic_addrcost_table,
440 &thunderx_regmove_cost,
441 &generic_vector_cost,
b9066f5a 442 &generic_branch_cost,
bd95e655
JG
443 6, /* memmov_cost */
444 2, /* issue_rate */
e9a3a175 445 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
446 8, /* function_align. */
447 8, /* jump_align. */
448 8, /* loop_align. */
cee66c68
WD
449 2, /* int_reassoc_width. */
450 4, /* fp_reassoc_width. */
50093a33
WD
451 1, /* vec_reassoc_width. */
452 2, /* min_div_recip_mul_sf. */
dfba575f
JG
453 2, /* min_div_recip_mul_df. */
454 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
d1bcc29f
AP
455};
456
381e27aa
PT
457static const struct tune_params xgene1_tunings =
458{
459 &xgene1_extra_costs,
460 &xgene1_addrcost_table,
461 &xgene1_regmove_cost,
462 &xgene1_vector_cost,
b9066f5a 463 &generic_branch_cost,
bd95e655
JG
464 6, /* memmov_cost */
465 4, /* issue_rate */
e9a3a175 466 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
467 16, /* function_align. */
468 8, /* jump_align. */
469 16, /* loop_align. */
470 2, /* int_reassoc_width. */
471 4, /* fp_reassoc_width. */
50093a33
WD
472 1, /* vec_reassoc_width. */
473 2, /* min_div_recip_mul_sf. */
dfba575f
JG
474 2, /* min_div_recip_mul_df. */
475 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
381e27aa
PT
476};
477
8dec06f2
JG
478/* Support for fine-grained override of the tuning structures. */
479struct aarch64_tuning_override_function
480{
481 const char* name;
482 void (*parse_override)(const char*, struct tune_params*);
483};
484
485static void aarch64_parse_fuse_string (const char*, struct tune_params*);
486static void aarch64_parse_tune_string (const char*, struct tune_params*);
487
488static const struct aarch64_tuning_override_function
489aarch64_tuning_override_functions[] =
490{
491 { "fuse", aarch64_parse_fuse_string },
492 { "tune", aarch64_parse_tune_string },
493 { NULL, NULL }
494};
495
43e9d192
IB
496/* A processor implementing AArch64. */
497struct processor
498{
499 const char *const name;
500 enum aarch64_processor core;
501 const char *arch;
0c6caaf8 502 unsigned architecture_version;
43e9d192
IB
503 const unsigned long flags;
504 const struct tune_params *const tune;
505};
506
507/* Processor cores implementing AArch64. */
508static const struct processor all_cores[] =
509{
7e1bcce3 510#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
faa54226 511 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
43e9d192
IB
512#include "aarch64-cores.def"
513#undef AARCH64_CORE
faa54226 514 {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
0c6caaf8 515 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
516};
517
518/* Architectures implementing AArch64. */
519static const struct processor all_architectures[] =
520{
521#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
0c6caaf8 522 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
43e9d192
IB
523#include "aarch64-arches.def"
524#undef AARCH64_ARCH
0c6caaf8 525 {NULL, aarch64_none, NULL, 0, 0, NULL}
43e9d192
IB
526};
527
528/* Target specification. These are populated as commandline arguments
529 are processed, or NULL if not specified. */
530static const struct processor *selected_arch;
531static const struct processor *selected_cpu;
532static const struct processor *selected_tune;
533
b175b679
JG
534/* The current tuning set. */
535struct tune_params aarch64_tune_params = generic_tunings;
536
43e9d192
IB
537#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
538
539/* An ISA extension in the co-processor and main instruction set space. */
540struct aarch64_option_extension
541{
542 const char *const name;
543 const unsigned long flags_on;
544 const unsigned long flags_off;
545};
546
547/* ISA extensions in AArch64. */
548static const struct aarch64_option_extension all_extensions[] =
549{
7e1bcce3 550#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
43e9d192
IB
551 {NAME, FLAGS_ON, FLAGS_OFF},
552#include "aarch64-option-extensions.def"
553#undef AARCH64_OPT_EXTENSION
554 {NULL, 0, 0}
555};
556
557/* Used to track the size of an address when generating a pre/post
558 increment address. */
ef4bddc2 559static machine_mode aarch64_memory_reference_mode;
43e9d192 560
43e9d192
IB
561/* A table of valid AArch64 "bitmask immediate" values for
562 logical instructions. */
563
564#define AARCH64_NUM_BITMASKS 5334
565static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
566
43e9d192
IB
567typedef enum aarch64_cond_code
568{
569 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
570 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
571 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
572}
573aarch64_cc;
574
575#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
576
577/* The condition codes of the processor, and the inverse function. */
578static const char * const aarch64_condition_codes[] =
579{
580 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
581 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
582};
583
261fb553
AL
584void
585aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
586{
587 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
588 if (TARGET_GENERAL_REGS_ONLY)
589 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
590 else
591 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
592}
593
26e0ff94 594static unsigned int
50093a33 595aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 596{
50093a33 597 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
598 return aarch64_tune_params.min_div_recip_mul_sf;
599 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
600}
601
cee66c68
WD
602static int
603aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
604 enum machine_mode mode)
605{
606 if (VECTOR_MODE_P (mode))
b175b679 607 return aarch64_tune_params.vec_reassoc_width;
cee66c68 608 if (INTEGRAL_MODE_P (mode))
b175b679 609 return aarch64_tune_params.int_reassoc_width;
cee66c68 610 if (FLOAT_MODE_P (mode))
b175b679 611 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
612 return 1;
613}
614
43e9d192
IB
615/* Provide a mapping from gcc register numbers to dwarf register numbers. */
616unsigned
617aarch64_dbx_register_number (unsigned regno)
618{
619 if (GP_REGNUM_P (regno))
620 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
621 else if (regno == SP_REGNUM)
622 return AARCH64_DWARF_SP;
623 else if (FP_REGNUM_P (regno))
624 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
625
626 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
627 equivalent DWARF register. */
628 return DWARF_FRAME_REGISTERS;
629}
630
631/* Return TRUE if MODE is any of the large INT modes. */
632static bool
ef4bddc2 633aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
634{
635 return mode == OImode || mode == CImode || mode == XImode;
636}
637
638/* Return TRUE if MODE is any of the vector modes. */
639static bool
ef4bddc2 640aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
641{
642 return aarch64_vector_mode_supported_p (mode)
643 || aarch64_vect_struct_mode_p (mode);
644}
645
646/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
647static bool
ef4bddc2 648aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
649 unsigned HOST_WIDE_INT nelems)
650{
651 if (TARGET_SIMD
652 && AARCH64_VALID_SIMD_QREG_MODE (mode)
653 && (nelems >= 2 && nelems <= 4))
654 return true;
655
656 return false;
657}
658
659/* Implement HARD_REGNO_NREGS. */
660
661int
ef4bddc2 662aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
663{
664 switch (aarch64_regno_regclass (regno))
665 {
666 case FP_REGS:
667 case FP_LO_REGS:
668 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
669 default:
670 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
671 }
672 gcc_unreachable ();
673}
674
675/* Implement HARD_REGNO_MODE_OK. */
676
677int
ef4bddc2 678aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
679{
680 if (GET_MODE_CLASS (mode) == MODE_CC)
681 return regno == CC_REGNUM;
682
9259db42
YZ
683 if (regno == SP_REGNUM)
684 /* The purpose of comparing with ptr_mode is to support the
685 global register variable associated with the stack pointer
686 register via the syntax of asm ("wsp") in ILP32. */
687 return mode == Pmode || mode == ptr_mode;
688
689 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
690 return mode == Pmode;
691
692 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
693 return 1;
694
695 if (FP_REGNUM_P (regno))
696 {
697 if (aarch64_vect_struct_mode_p (mode))
698 return
699 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
700 else
701 return 1;
702 }
703
704 return 0;
705}
706
73d9ac6a 707/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 708machine_mode
73d9ac6a 709aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 710 machine_mode mode)
73d9ac6a
IB
711{
712 /* Handle modes that fit within single registers. */
713 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
714 {
715 if (GET_MODE_SIZE (mode) >= 4)
716 return mode;
717 else
718 return SImode;
719 }
720 /* Fall back to generic for multi-reg and very large modes. */
721 else
722 return choose_hard_reg_mode (regno, nregs, false);
723}
724
43e9d192
IB
725/* Return true if calls to DECL should be treated as
726 long-calls (ie called via a register). */
727static bool
728aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
729{
730 return false;
731}
732
733/* Return true if calls to symbol-ref SYM should be treated as
734 long-calls (ie called via a register). */
735bool
736aarch64_is_long_call_p (rtx sym)
737{
738 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
739}
740
741/* Return true if the offsets to a zero/sign-extract operation
742 represent an expression that matches an extend operation. The
743 operands represent the paramters from
744
4745e701 745 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 746bool
ef4bddc2 747aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
748 rtx extract_imm)
749{
750 HOST_WIDE_INT mult_val, extract_val;
751
752 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
753 return false;
754
755 mult_val = INTVAL (mult_imm);
756 extract_val = INTVAL (extract_imm);
757
758 if (extract_val > 8
759 && extract_val < GET_MODE_BITSIZE (mode)
760 && exact_log2 (extract_val & ~7) > 0
761 && (extract_val & 7) <= 4
762 && mult_val == (1 << (extract_val & 7)))
763 return true;
764
765 return false;
766}
767
768/* Emit an insn that's a simple single-set. Both the operands must be
769 known to be valid. */
770inline static rtx
771emit_set_insn (rtx x, rtx y)
772{
f7df4a84 773 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
774}
775
776/* X and Y are two things to compare using CODE. Emit the compare insn and
777 return the rtx for register 0 in the proper mode. */
778rtx
779aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
780{
ef4bddc2 781 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
782 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
783
784 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
785 return cc_reg;
786}
787
788/* Build the SYMBOL_REF for __tls_get_addr. */
789
790static GTY(()) rtx tls_get_addr_libfunc;
791
792rtx
793aarch64_tls_get_addr (void)
794{
795 if (!tls_get_addr_libfunc)
796 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
797 return tls_get_addr_libfunc;
798}
799
800/* Return the TLS model to use for ADDR. */
801
802static enum tls_model
803tls_symbolic_operand_type (rtx addr)
804{
805 enum tls_model tls_kind = TLS_MODEL_NONE;
806 rtx sym, addend;
807
808 if (GET_CODE (addr) == CONST)
809 {
810 split_const (addr, &sym, &addend);
811 if (GET_CODE (sym) == SYMBOL_REF)
812 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
813 }
814 else if (GET_CODE (addr) == SYMBOL_REF)
815 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
816
817 return tls_kind;
818}
819
820/* We'll allow lo_sum's in addresses in our legitimate addresses
821 so that combine would take care of combining addresses where
822 necessary, but for generation purposes, we'll generate the address
823 as :
824 RTL Absolute
825 tmp = hi (symbol_ref); adrp x1, foo
826 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
827 nop
828
829 PIC TLS
830 adrp x1, :got:foo adrp tmp, :tlsgd:foo
831 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
832 bl __tls_get_addr
833 nop
834
835 Load TLS symbol, depending on TLS mechanism and TLS access model.
836
837 Global Dynamic - Traditional TLS:
838 adrp tmp, :tlsgd:imm
839 add dest, tmp, #:tlsgd_lo12:imm
840 bl __tls_get_addr
841
842 Global Dynamic - TLS Descriptors:
843 adrp dest, :tlsdesc:imm
844 ldr tmp, [dest, #:tlsdesc_lo12:imm]
845 add dest, dest, #:tlsdesc_lo12:imm
846 blr tmp
847 mrs tp, tpidr_el0
848 add dest, dest, tp
849
850 Initial Exec:
851 mrs tp, tpidr_el0
852 adrp tmp, :gottprel:imm
853 ldr dest, [tmp, #:gottprel_lo12:imm]
854 add dest, dest, tp
855
856 Local Exec:
857 mrs tp, tpidr_el0
0699caae
RL
858 add t0, tp, #:tprel_hi12:imm, lsl #12
859 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
860*/
861
862static void
863aarch64_load_symref_appropriately (rtx dest, rtx imm,
864 enum aarch64_symbol_type type)
865{
866 switch (type)
867 {
868 case SYMBOL_SMALL_ABSOLUTE:
869 {
28514dda 870 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 871 rtx tmp_reg = dest;
ef4bddc2 872 machine_mode mode = GET_MODE (dest);
28514dda
YZ
873
874 gcc_assert (mode == Pmode || mode == ptr_mode);
875
43e9d192 876 if (can_create_pseudo_p ())
28514dda 877 tmp_reg = gen_reg_rtx (mode);
43e9d192 878
28514dda 879 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
880 emit_insn (gen_add_losym (dest, tmp_reg, imm));
881 return;
882 }
883
a5350ddc 884 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 885 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
886 return;
887
1b1e81f8
JW
888 case SYMBOL_SMALL_GOT_28K:
889 {
890 machine_mode mode = GET_MODE (dest);
891 rtx gp_rtx = pic_offset_table_rtx;
892
893 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
894 here before rtl expand. Tree IVOPT will generate rtl pattern to
895 decide rtx costs, in which case pic_offset_table_rtx is not
896 initialized. For that case no need to generate the first adrp
897 instruction as the the final cost for global variable access is
898 one instruction. */
899 if (gp_rtx != NULL)
900 {
901 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
902 using the page base as GOT base, the first page may be wasted,
903 in the worst scenario, there is only 28K space for GOT).
904
905 The generate instruction sequence for accessing global variable
906 is:
907
908 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
909
910 Only one instruction needed. But we must initialize
911 pic_offset_table_rtx properly. We generate initialize insn for
912 every global access, and allow CSE to remove all redundant.
913
914 The final instruction sequences will look like the following
915 for multiply global variables access.
916
917 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
918
919 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
920 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
921 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
922 ... */
923
924 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
925 crtl->uses_pic_offset_table = 1;
926 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
927
928 if (mode != GET_MODE (gp_rtx))
929 gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0);
930 }
931
932 if (mode == ptr_mode)
933 {
934 if (mode == DImode)
935 emit_insn (gen_ldr_got_small_28k_di (dest, gp_rtx, imm));
936 else
937 emit_insn (gen_ldr_got_small_28k_si (dest, gp_rtx, imm));
938 }
939 else
940 {
941 gcc_assert (mode == Pmode);
942 emit_insn (gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm));
943 }
944
945 return;
946 }
947
6642bdb4 948 case SYMBOL_SMALL_GOT_4G:
43e9d192 949 {
28514dda
YZ
950 /* In ILP32, the mode of dest can be either SImode or DImode,
951 while the got entry is always of SImode size. The mode of
952 dest depends on how dest is used: if dest is assigned to a
953 pointer (e.g. in the memory), it has SImode; it may have
954 DImode if dest is dereferenced to access the memeory.
955 This is why we have to handle three different ldr_got_small
956 patterns here (two patterns for ILP32). */
43e9d192 957 rtx tmp_reg = dest;
ef4bddc2 958 machine_mode mode = GET_MODE (dest);
28514dda 959
43e9d192 960 if (can_create_pseudo_p ())
28514dda
YZ
961 tmp_reg = gen_reg_rtx (mode);
962
963 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
964 if (mode == ptr_mode)
965 {
966 if (mode == DImode)
967 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
968 else
969 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
970 }
971 else
972 {
973 gcc_assert (mode == Pmode);
974 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
975 }
976
43e9d192
IB
977 return;
978 }
979
980 case SYMBOL_SMALL_TLSGD:
981 {
5d8a22a5 982 rtx_insn *insns;
43e9d192
IB
983 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
984
985 start_sequence ();
78607708 986 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
987 insns = get_insns ();
988 end_sequence ();
989
990 RTL_CONST_CALL_P (insns) = 1;
991 emit_libcall_block (insns, dest, result, imm);
992 return;
993 }
994
995 case SYMBOL_SMALL_TLSDESC:
996 {
ef4bddc2 997 machine_mode mode = GET_MODE (dest);
621ad2de 998 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
999 rtx tp;
1000
621ad2de
AP
1001 gcc_assert (mode == Pmode || mode == ptr_mode);
1002
1003 /* In ILP32, the got entry is always of SImode size. Unlike
1004 small GOT, the dest is fixed at reg 0. */
1005 if (TARGET_ILP32)
1006 emit_insn (gen_tlsdesc_small_si (imm));
1007 else
1008 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1009 tp = aarch64_load_tp (NULL);
621ad2de
AP
1010
1011 if (mode != Pmode)
1012 tp = gen_lowpart (mode, tp);
1013
f7df4a84 1014 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1015 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1016 return;
1017 }
1018
1019 case SYMBOL_SMALL_GOTTPREL:
1020 {
621ad2de
AP
1021 /* In ILP32, the mode of dest can be either SImode or DImode,
1022 while the got entry is always of SImode size. The mode of
1023 dest depends on how dest is used: if dest is assigned to a
1024 pointer (e.g. in the memory), it has SImode; it may have
1025 DImode if dest is dereferenced to access the memeory.
1026 This is why we have to handle three different tlsie_small
1027 patterns here (two patterns for ILP32). */
ef4bddc2 1028 machine_mode mode = GET_MODE (dest);
621ad2de 1029 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1030 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1031
1032 if (mode == ptr_mode)
1033 {
1034 if (mode == DImode)
1035 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1036 else
1037 {
1038 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1039 tp = gen_lowpart (mode, tp);
1040 }
1041 }
1042 else
1043 {
1044 gcc_assert (mode == Pmode);
1045 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1046 }
1047
f7df4a84 1048 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1049 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1050 return;
1051 }
1052
8fd17b98 1053 case SYMBOL_TLSLE:
43e9d192
IB
1054 {
1055 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9
AP
1056
1057 if (GET_MODE (dest) != Pmode)
1058 tp = gen_lowpart (GET_MODE (dest), tp);
1059
8fd17b98 1060 emit_insn (gen_tlsle (dest, tp, imm));
43e9d192
IB
1061 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1062 return;
1063 }
1064
87dd8ab0
MS
1065 case SYMBOL_TINY_GOT:
1066 emit_insn (gen_ldr_got_tiny (dest, imm));
1067 return;
1068
43e9d192
IB
1069 default:
1070 gcc_unreachable ();
1071 }
1072}
1073
1074/* Emit a move from SRC to DEST. Assume that the move expanders can
1075 handle all moves if !can_create_pseudo_p (). The distinction is
1076 important because, unlike emit_move_insn, the move expanders know
1077 how to force Pmode objects into the constant pool even when the
1078 constant pool address is not itself legitimate. */
1079static rtx
1080aarch64_emit_move (rtx dest, rtx src)
1081{
1082 return (can_create_pseudo_p ()
1083 ? emit_move_insn (dest, src)
1084 : emit_move_insn_1 (dest, src));
1085}
1086
030d03b8
RE
1087/* Split a 128-bit move operation into two 64-bit move operations,
1088 taking care to handle partial overlap of register to register
1089 copies. Special cases are needed when moving between GP regs and
1090 FP regs. SRC can be a register, constant or memory; DST a register
1091 or memory. If either operand is memory it must not have any side
1092 effects. */
43e9d192
IB
1093void
1094aarch64_split_128bit_move (rtx dst, rtx src)
1095{
030d03b8
RE
1096 rtx dst_lo, dst_hi;
1097 rtx src_lo, src_hi;
43e9d192 1098
ef4bddc2 1099 machine_mode mode = GET_MODE (dst);
12dc6974 1100
030d03b8
RE
1101 gcc_assert (mode == TImode || mode == TFmode);
1102 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1103 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1104
1105 if (REG_P (dst) && REG_P (src))
1106 {
030d03b8
RE
1107 int src_regno = REGNO (src);
1108 int dst_regno = REGNO (dst);
43e9d192 1109
030d03b8 1110 /* Handle FP <-> GP regs. */
43e9d192
IB
1111 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1112 {
030d03b8
RE
1113 src_lo = gen_lowpart (word_mode, src);
1114 src_hi = gen_highpart (word_mode, src);
1115
1116 if (mode == TImode)
1117 {
1118 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1119 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1120 }
1121 else
1122 {
1123 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1124 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1125 }
1126 return;
43e9d192
IB
1127 }
1128 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1129 {
030d03b8
RE
1130 dst_lo = gen_lowpart (word_mode, dst);
1131 dst_hi = gen_highpart (word_mode, dst);
1132
1133 if (mode == TImode)
1134 {
1135 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1136 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1137 }
1138 else
1139 {
1140 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1141 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1142 }
1143 return;
43e9d192 1144 }
43e9d192
IB
1145 }
1146
030d03b8
RE
1147 dst_lo = gen_lowpart (word_mode, dst);
1148 dst_hi = gen_highpart (word_mode, dst);
1149 src_lo = gen_lowpart (word_mode, src);
1150 src_hi = gen_highpart_mode (word_mode, mode, src);
1151
1152 /* At most one pairing may overlap. */
1153 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1154 {
1155 aarch64_emit_move (dst_hi, src_hi);
1156 aarch64_emit_move (dst_lo, src_lo);
1157 }
1158 else
1159 {
1160 aarch64_emit_move (dst_lo, src_lo);
1161 aarch64_emit_move (dst_hi, src_hi);
1162 }
43e9d192
IB
1163}
1164
1165bool
1166aarch64_split_128bit_move_p (rtx dst, rtx src)
1167{
1168 return (! REG_P (src)
1169 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1170}
1171
8b033a8a
SN
1172/* Split a complex SIMD combine. */
1173
1174void
1175aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1176{
ef4bddc2
RS
1177 machine_mode src_mode = GET_MODE (src1);
1178 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1179
1180 gcc_assert (VECTOR_MODE_P (dst_mode));
1181
1182 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1183 {
1184 rtx (*gen) (rtx, rtx, rtx);
1185
1186 switch (src_mode)
1187 {
1188 case V8QImode:
1189 gen = gen_aarch64_simd_combinev8qi;
1190 break;
1191 case V4HImode:
1192 gen = gen_aarch64_simd_combinev4hi;
1193 break;
1194 case V2SImode:
1195 gen = gen_aarch64_simd_combinev2si;
1196 break;
1197 case V2SFmode:
1198 gen = gen_aarch64_simd_combinev2sf;
1199 break;
1200 case DImode:
1201 gen = gen_aarch64_simd_combinedi;
1202 break;
1203 case DFmode:
1204 gen = gen_aarch64_simd_combinedf;
1205 break;
1206 default:
1207 gcc_unreachable ();
1208 }
1209
1210 emit_insn (gen (dst, src1, src2));
1211 return;
1212 }
1213}
1214
fd4842cd
SN
1215/* Split a complex SIMD move. */
1216
1217void
1218aarch64_split_simd_move (rtx dst, rtx src)
1219{
ef4bddc2
RS
1220 machine_mode src_mode = GET_MODE (src);
1221 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1222
1223 gcc_assert (VECTOR_MODE_P (dst_mode));
1224
1225 if (REG_P (dst) && REG_P (src))
1226 {
c59b7e28
SN
1227 rtx (*gen) (rtx, rtx);
1228
fd4842cd
SN
1229 gcc_assert (VECTOR_MODE_P (src_mode));
1230
1231 switch (src_mode)
1232 {
1233 case V16QImode:
c59b7e28 1234 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1235 break;
1236 case V8HImode:
c59b7e28 1237 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1238 break;
1239 case V4SImode:
c59b7e28 1240 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1241 break;
1242 case V2DImode:
c59b7e28 1243 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
1244 break;
1245 case V4SFmode:
c59b7e28 1246 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1247 break;
1248 case V2DFmode:
c59b7e28 1249 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1250 break;
1251 default:
1252 gcc_unreachable ();
1253 }
c59b7e28
SN
1254
1255 emit_insn (gen (dst, src));
fd4842cd
SN
1256 return;
1257 }
1258}
1259
43e9d192 1260static rtx
ef4bddc2 1261aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1262{
1263 if (can_create_pseudo_p ())
e18b4a81 1264 return force_reg (mode, value);
43e9d192
IB
1265 else
1266 {
1267 x = aarch64_emit_move (x, value);
1268 return x;
1269 }
1270}
1271
1272
1273static rtx
ef4bddc2 1274aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1275{
9c023bf0 1276 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1277 {
1278 rtx high;
1279 /* Load the full offset into a register. This
1280 might be improvable in the future. */
1281 high = GEN_INT (offset);
1282 offset = 0;
e18b4a81
YZ
1283 high = aarch64_force_temporary (mode, temp, high);
1284 reg = aarch64_force_temporary (mode, temp,
1285 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1286 }
1287 return plus_constant (mode, reg, offset);
1288}
1289
82614948
RR
1290static int
1291aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1292 machine_mode mode)
43e9d192 1293{
43e9d192
IB
1294 unsigned HOST_WIDE_INT mask;
1295 int i;
1296 bool first;
1297 unsigned HOST_WIDE_INT val;
1298 bool subtargets;
1299 rtx subtarget;
c747993a 1300 int one_match, zero_match, first_not_ffff_match;
82614948 1301 int num_insns = 0;
43e9d192
IB
1302
1303 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1304 {
82614948 1305 if (generate)
f7df4a84 1306 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1307 num_insns++;
1308 return num_insns;
43e9d192
IB
1309 }
1310
1311 if (mode == SImode)
1312 {
1313 /* We know we can't do this in 1 insn, and we must be able to do it
1314 in two; so don't mess around looking for sequences that don't buy
1315 us anything. */
82614948
RR
1316 if (generate)
1317 {
f7df4a84 1318 emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
82614948
RR
1319 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1320 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1321 }
1322 num_insns += 2;
1323 return num_insns;
43e9d192
IB
1324 }
1325
1326 /* Remaining cases are all for DImode. */
1327
1328 val = INTVAL (imm);
1329 subtargets = optimize && can_create_pseudo_p ();
1330
1331 one_match = 0;
1332 zero_match = 0;
1333 mask = 0xffff;
c747993a 1334 first_not_ffff_match = -1;
43e9d192
IB
1335
1336 for (i = 0; i < 64; i += 16, mask <<= 16)
1337 {
c747993a 1338 if ((val & mask) == mask)
43e9d192 1339 one_match++;
c747993a
IB
1340 else
1341 {
1342 if (first_not_ffff_match < 0)
1343 first_not_ffff_match = i;
1344 if ((val & mask) == 0)
1345 zero_match++;
1346 }
43e9d192
IB
1347 }
1348
1349 if (one_match == 2)
1350 {
c747993a
IB
1351 /* Set one of the quarters and then insert back into result. */
1352 mask = 0xffffll << first_not_ffff_match;
82614948
RR
1353 if (generate)
1354 {
f7df4a84 1355 emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
82614948
RR
1356 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1357 GEN_INT ((val >> first_not_ffff_match)
1358 & 0xffff)));
1359 }
1360 num_insns += 2;
1361 return num_insns;
c747993a
IB
1362 }
1363
43e9d192
IB
1364 if (zero_match == 2)
1365 goto simple_sequence;
1366
1367 mask = 0x0ffff0000UL;
1368 for (i = 16; i < 64; i += 16, mask <<= 16)
1369 {
1370 HOST_WIDE_INT comp = mask & ~(mask - 1);
1371
1372 if (aarch64_uimm12_shift (val - (val & mask)))
1373 {
82614948
RR
1374 if (generate)
1375 {
1376 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1377 emit_insn (gen_rtx_SET (subtarget, GEN_INT (val & mask)));
82614948
RR
1378 emit_insn (gen_adddi3 (dest, subtarget,
1379 GEN_INT (val - (val & mask))));
1380 }
1381 num_insns += 2;
1382 return num_insns;
43e9d192
IB
1383 }
1384 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1385 {
82614948
RR
1386 if (generate)
1387 {
1388 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1389 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1390 GEN_INT ((val + comp) & mask)));
1391 emit_insn (gen_adddi3 (dest, subtarget,
1392 GEN_INT (val - ((val + comp) & mask))));
1393 }
1394 num_insns += 2;
1395 return num_insns;
43e9d192
IB
1396 }
1397 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1398 {
82614948
RR
1399 if (generate)
1400 {
1401 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1402 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1403 GEN_INT ((val - comp) | ~mask)));
1404 emit_insn (gen_adddi3 (dest, subtarget,
1405 GEN_INT (val - ((val - comp) | ~mask))));
1406 }
1407 num_insns += 2;
1408 return num_insns;
43e9d192
IB
1409 }
1410 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1411 {
82614948
RR
1412 if (generate)
1413 {
1414 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1415 emit_insn (gen_rtx_SET (subtarget, GEN_INT (val | ~mask)));
82614948
RR
1416 emit_insn (gen_adddi3 (dest, subtarget,
1417 GEN_INT (val - (val | ~mask))));
1418 }
1419 num_insns += 2;
1420 return num_insns;
43e9d192
IB
1421 }
1422 }
1423
1424 /* See if we can do it by arithmetically combining two
1425 immediates. */
1426 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1427 {
1428 int j;
1429 mask = 0xffff;
1430
1431 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1432 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1433 {
82614948
RR
1434 if (generate)
1435 {
1436 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1437 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1438 GEN_INT (aarch64_bitmasks[i])));
1439 emit_insn (gen_adddi3 (dest, subtarget,
1440 GEN_INT (val - aarch64_bitmasks[i])));
1441 }
1442 num_insns += 2;
1443 return num_insns;
43e9d192
IB
1444 }
1445
1446 for (j = 0; j < 64; j += 16, mask <<= 16)
1447 {
1448 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1449 {
82614948
RR
1450 if (generate)
1451 {
f7df4a84 1452 emit_insn (gen_rtx_SET (dest,
82614948
RR
1453 GEN_INT (aarch64_bitmasks[i])));
1454 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1455 GEN_INT ((val >> j) & 0xffff)));
1456 }
1457 num_insns += 2;
1458 return num_insns;
43e9d192
IB
1459 }
1460 }
1461 }
1462
1463 /* See if we can do it by logically combining two immediates. */
1464 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1465 {
1466 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1467 {
1468 int j;
1469
1470 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1471 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1472 {
82614948
RR
1473 if (generate)
1474 {
1475 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
f7df4a84 1476 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1477 GEN_INT (aarch64_bitmasks[i])));
1478 emit_insn (gen_iordi3 (dest, subtarget,
1479 GEN_INT (aarch64_bitmasks[j])));
1480 }
1481 num_insns += 2;
1482 return num_insns;
43e9d192
IB
1483 }
1484 }
1485 else if ((val & aarch64_bitmasks[i]) == val)
1486 {
1487 int j;
1488
1489 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1490 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1491 {
82614948
RR
1492 if (generate)
1493 {
1494 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
f7df4a84 1495 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1496 GEN_INT (aarch64_bitmasks[j])));
1497 emit_insn (gen_anddi3 (dest, subtarget,
1498 GEN_INT (aarch64_bitmasks[i])));
1499 }
1500 num_insns += 2;
1501 return num_insns;
43e9d192
IB
1502 }
1503 }
1504 }
1505
2c274197
KT
1506 if (one_match > zero_match)
1507 {
1508 /* Set either first three quarters or all but the third. */
1509 mask = 0xffffll << (16 - first_not_ffff_match);
82614948 1510 if (generate)
f7df4a84 1511 emit_insn (gen_rtx_SET (dest,
82614948
RR
1512 GEN_INT (val | mask | 0xffffffff00000000ull)));
1513 num_insns ++;
2c274197
KT
1514
1515 /* Now insert other two quarters. */
1516 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1517 i < 64; i += 16, mask <<= 16)
1518 {
1519 if ((val & mask) != mask)
82614948
RR
1520 {
1521 if (generate)
1522 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1523 GEN_INT ((val >> i) & 0xffff)));
1524 num_insns ++;
1525 }
2c274197 1526 }
82614948 1527 return num_insns;
2c274197
KT
1528 }
1529
43e9d192
IB
1530 simple_sequence:
1531 first = true;
1532 mask = 0xffff;
1533 for (i = 0; i < 64; i += 16, mask <<= 16)
1534 {
1535 if ((val & mask) != 0)
1536 {
1537 if (first)
1538 {
82614948 1539 if (generate)
f7df4a84 1540 emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
82614948 1541 num_insns ++;
43e9d192
IB
1542 first = false;
1543 }
1544 else
82614948
RR
1545 {
1546 if (generate)
1547 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1548 GEN_INT ((val >> i) & 0xffff)));
1549 num_insns ++;
1550 }
1551 }
1552 }
1553
1554 return num_insns;
1555}
1556
1557
1558void
1559aarch64_expand_mov_immediate (rtx dest, rtx imm)
1560{
1561 machine_mode mode = GET_MODE (dest);
1562
1563 gcc_assert (mode == SImode || mode == DImode);
1564
1565 /* Check on what type of symbol it is. */
1566 if (GET_CODE (imm) == SYMBOL_REF
1567 || GET_CODE (imm) == LABEL_REF
1568 || GET_CODE (imm) == CONST)
1569 {
1570 rtx mem, base, offset;
1571 enum aarch64_symbol_type sty;
1572
1573 /* If we have (const (plus symbol offset)), separate out the offset
1574 before we start classifying the symbol. */
1575 split_const (imm, &base, &offset);
1576
f8b756b7 1577 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
82614948
RR
1578 switch (sty)
1579 {
1580 case SYMBOL_FORCE_TO_MEM:
1581 if (offset != const0_rtx
1582 && targetm.cannot_force_const_mem (mode, imm))
1583 {
1584 gcc_assert (can_create_pseudo_p ());
1585 base = aarch64_force_temporary (mode, dest, base);
1586 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1587 aarch64_emit_move (dest, base);
1588 return;
1589 }
1590 mem = force_const_mem (ptr_mode, imm);
1591 gcc_assert (mem);
1592 if (mode != ptr_mode)
1593 mem = gen_rtx_ZERO_EXTEND (mode, mem);
f7df4a84 1594 emit_insn (gen_rtx_SET (dest, mem));
82614948
RR
1595 return;
1596
1597 case SYMBOL_SMALL_TLSGD:
1598 case SYMBOL_SMALL_TLSDESC:
1599 case SYMBOL_SMALL_GOTTPREL:
1b1e81f8 1600 case SYMBOL_SMALL_GOT_28K:
6642bdb4 1601 case SYMBOL_SMALL_GOT_4G:
82614948
RR
1602 case SYMBOL_TINY_GOT:
1603 if (offset != const0_rtx)
1604 {
1605 gcc_assert(can_create_pseudo_p ());
1606 base = aarch64_force_temporary (mode, dest, base);
1607 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1608 aarch64_emit_move (dest, base);
1609 return;
1610 }
1611 /* FALLTHRU */
1612
82614948
RR
1613 case SYMBOL_SMALL_ABSOLUTE:
1614 case SYMBOL_TINY_ABSOLUTE:
8fd17b98 1615 case SYMBOL_TLSLE:
82614948
RR
1616 aarch64_load_symref_appropriately (dest, imm, sty);
1617 return;
1618
1619 default:
1620 gcc_unreachable ();
1621 }
1622 }
1623
1624 if (!CONST_INT_P (imm))
1625 {
1626 if (GET_CODE (imm) == HIGH)
f7df4a84 1627 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1628 else
1629 {
1630 rtx mem = force_const_mem (mode, imm);
1631 gcc_assert (mem);
f7df4a84 1632 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 1633 }
82614948
RR
1634
1635 return;
43e9d192 1636 }
82614948
RR
1637
1638 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1639}
1640
1641static bool
fee9ba42
JW
1642aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1643 tree exp ATTRIBUTE_UNUSED)
43e9d192 1644{
fee9ba42 1645 /* Currently, always true. */
43e9d192
IB
1646 return true;
1647}
1648
1649/* Implement TARGET_PASS_BY_REFERENCE. */
1650
1651static bool
1652aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1653 machine_mode mode,
43e9d192
IB
1654 const_tree type,
1655 bool named ATTRIBUTE_UNUSED)
1656{
1657 HOST_WIDE_INT size;
ef4bddc2 1658 machine_mode dummymode;
43e9d192
IB
1659 int nregs;
1660
1661 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1662 size = (mode == BLKmode && type)
1663 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1664
aadc1c43
MHD
1665 /* Aggregates are passed by reference based on their size. */
1666 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1667 {
aadc1c43 1668 size = int_size_in_bytes (type);
43e9d192
IB
1669 }
1670
1671 /* Variable sized arguments are always returned by reference. */
1672 if (size < 0)
1673 return true;
1674
1675 /* Can this be a candidate to be passed in fp/simd register(s)? */
1676 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1677 &dummymode, &nregs,
1678 NULL))
1679 return false;
1680
1681 /* Arguments which are variable sized or larger than 2 registers are
1682 passed by reference unless they are a homogenous floating point
1683 aggregate. */
1684 return size > 2 * UNITS_PER_WORD;
1685}
1686
1687/* Return TRUE if VALTYPE is padded to its least significant bits. */
1688static bool
1689aarch64_return_in_msb (const_tree valtype)
1690{
ef4bddc2 1691 machine_mode dummy_mode;
43e9d192
IB
1692 int dummy_int;
1693
1694 /* Never happens in little-endian mode. */
1695 if (!BYTES_BIG_ENDIAN)
1696 return false;
1697
1698 /* Only composite types smaller than or equal to 16 bytes can
1699 be potentially returned in registers. */
1700 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1701 || int_size_in_bytes (valtype) <= 0
1702 || int_size_in_bytes (valtype) > 16)
1703 return false;
1704
1705 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1706 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1707 is always passed/returned in the least significant bits of fp/simd
1708 register(s). */
1709 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1710 &dummy_mode, &dummy_int, NULL))
1711 return false;
1712
1713 return true;
1714}
1715
1716/* Implement TARGET_FUNCTION_VALUE.
1717 Define how to find the value returned by a function. */
1718
1719static rtx
1720aarch64_function_value (const_tree type, const_tree func,
1721 bool outgoing ATTRIBUTE_UNUSED)
1722{
ef4bddc2 1723 machine_mode mode;
43e9d192
IB
1724 int unsignedp;
1725 int count;
ef4bddc2 1726 machine_mode ag_mode;
43e9d192
IB
1727
1728 mode = TYPE_MODE (type);
1729 if (INTEGRAL_TYPE_P (type))
1730 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1731
1732 if (aarch64_return_in_msb (type))
1733 {
1734 HOST_WIDE_INT size = int_size_in_bytes (type);
1735
1736 if (size % UNITS_PER_WORD != 0)
1737 {
1738 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1739 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1740 }
1741 }
1742
1743 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1744 &ag_mode, &count, NULL))
1745 {
1746 if (!aarch64_composite_type_p (type, mode))
1747 {
1748 gcc_assert (count == 1 && mode == ag_mode);
1749 return gen_rtx_REG (mode, V0_REGNUM);
1750 }
1751 else
1752 {
1753 int i;
1754 rtx par;
1755
1756 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1757 for (i = 0; i < count; i++)
1758 {
1759 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1760 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1761 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1762 XVECEXP (par, 0, i) = tmp;
1763 }
1764 return par;
1765 }
1766 }
1767 else
1768 return gen_rtx_REG (mode, R0_REGNUM);
1769}
1770
1771/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1772 Return true if REGNO is the number of a hard register in which the values
1773 of called function may come back. */
1774
1775static bool
1776aarch64_function_value_regno_p (const unsigned int regno)
1777{
1778 /* Maximum of 16 bytes can be returned in the general registers. Examples
1779 of 16-byte return values are: 128-bit integers and 16-byte small
1780 structures (excluding homogeneous floating-point aggregates). */
1781 if (regno == R0_REGNUM || regno == R1_REGNUM)
1782 return true;
1783
1784 /* Up to four fp/simd registers can return a function value, e.g. a
1785 homogeneous floating-point aggregate having four members. */
1786 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 1787 return TARGET_FLOAT;
43e9d192
IB
1788
1789 return false;
1790}
1791
1792/* Implement TARGET_RETURN_IN_MEMORY.
1793
1794 If the type T of the result of a function is such that
1795 void func (T arg)
1796 would require that arg be passed as a value in a register (or set of
1797 registers) according to the parameter passing rules, then the result
1798 is returned in the same registers as would be used for such an
1799 argument. */
1800
1801static bool
1802aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1803{
1804 HOST_WIDE_INT size;
ef4bddc2 1805 machine_mode ag_mode;
43e9d192
IB
1806 int count;
1807
1808 if (!AGGREGATE_TYPE_P (type)
1809 && TREE_CODE (type) != COMPLEX_TYPE
1810 && TREE_CODE (type) != VECTOR_TYPE)
1811 /* Simple scalar types always returned in registers. */
1812 return false;
1813
1814 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1815 type,
1816 &ag_mode,
1817 &count,
1818 NULL))
1819 return false;
1820
1821 /* Types larger than 2 registers returned in memory. */
1822 size = int_size_in_bytes (type);
1823 return (size < 0 || size > 2 * UNITS_PER_WORD);
1824}
1825
1826static bool
ef4bddc2 1827aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1828 const_tree type, int *nregs)
1829{
1830 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1831 return aarch64_vfp_is_call_or_return_candidate (mode,
1832 type,
1833 &pcum->aapcs_vfp_rmode,
1834 nregs,
1835 NULL);
1836}
1837
1838/* Given MODE and TYPE of a function argument, return the alignment in
1839 bits. The idea is to suppress any stronger alignment requested by
1840 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1841 This is a helper function for local use only. */
1842
1843static unsigned int
ef4bddc2 1844aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192
IB
1845{
1846 unsigned int alignment;
1847
1848 if (type)
1849 {
1850 if (!integer_zerop (TYPE_SIZE (type)))
1851 {
1852 if (TYPE_MODE (type) == mode)
1853 alignment = TYPE_ALIGN (type);
1854 else
1855 alignment = GET_MODE_ALIGNMENT (mode);
1856 }
1857 else
1858 alignment = 0;
1859 }
1860 else
1861 alignment = GET_MODE_ALIGNMENT (mode);
1862
1863 return alignment;
1864}
1865
1866/* Layout a function argument according to the AAPCS64 rules. The rule
1867 numbers refer to the rule numbers in the AAPCS64. */
1868
1869static void
ef4bddc2 1870aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1871 const_tree type,
1872 bool named ATTRIBUTE_UNUSED)
1873{
1874 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1875 int ncrn, nvrn, nregs;
1876 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1877 HOST_WIDE_INT size;
43e9d192
IB
1878
1879 /* We need to do this once per argument. */
1880 if (pcum->aapcs_arg_processed)
1881 return;
1882
1883 pcum->aapcs_arg_processed = true;
1884
3abf17cf
YZ
1885 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1886 size
1887 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1888 UNITS_PER_WORD);
1889
43e9d192
IB
1890 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1891 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1892 mode,
1893 type,
1894 &nregs);
1895
1896 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1897 The following code thus handles passing by SIMD/FP registers first. */
1898
1899 nvrn = pcum->aapcs_nvrn;
1900
1901 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1902 and homogenous short-vector aggregates (HVA). */
1903 if (allocate_nvrn)
1904 {
261fb553
AL
1905 if (!TARGET_FLOAT)
1906 aarch64_err_no_fpadvsimd (mode, "argument");
1907
43e9d192
IB
1908 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1909 {
1910 pcum->aapcs_nextnvrn = nvrn + nregs;
1911 if (!aarch64_composite_type_p (type, mode))
1912 {
1913 gcc_assert (nregs == 1);
1914 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1915 }
1916 else
1917 {
1918 rtx par;
1919 int i;
1920 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1921 for (i = 0; i < nregs; i++)
1922 {
1923 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1924 V0_REGNUM + nvrn + i);
1925 tmp = gen_rtx_EXPR_LIST
1926 (VOIDmode, tmp,
1927 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1928 XVECEXP (par, 0, i) = tmp;
1929 }
1930 pcum->aapcs_reg = par;
1931 }
1932 return;
1933 }
1934 else
1935 {
1936 /* C.3 NSRN is set to 8. */
1937 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1938 goto on_stack;
1939 }
1940 }
1941
1942 ncrn = pcum->aapcs_ncrn;
3abf17cf 1943 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1944
1945 /* C6 - C9. though the sign and zero extension semantics are
1946 handled elsewhere. This is the case where the argument fits
1947 entirely general registers. */
1948 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1949 {
1950 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1951
1952 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1953
1954 /* C.8 if the argument has an alignment of 16 then the NGRN is
1955 rounded up to the next even number. */
1956 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1957 {
1958 ++ncrn;
1959 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1960 }
1961 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1962 A reg is still generated for it, but the caller should be smart
1963 enough not to use it. */
1964 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1965 {
1966 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1967 }
1968 else
1969 {
1970 rtx par;
1971 int i;
1972
1973 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1974 for (i = 0; i < nregs; i++)
1975 {
1976 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1977 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1978 GEN_INT (i * UNITS_PER_WORD));
1979 XVECEXP (par, 0, i) = tmp;
1980 }
1981 pcum->aapcs_reg = par;
1982 }
1983
1984 pcum->aapcs_nextncrn = ncrn + nregs;
1985 return;
1986 }
1987
1988 /* C.11 */
1989 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1990
1991 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1992 this argument and align the total size if necessary. */
43e9d192 1993on_stack:
3abf17cf 1994 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1995 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1996 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1997 16 / UNITS_PER_WORD);
43e9d192
IB
1998 return;
1999}
2000
2001/* Implement TARGET_FUNCTION_ARG. */
2002
2003static rtx
ef4bddc2 2004aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2005 const_tree type, bool named)
2006{
2007 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2008 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2009
2010 if (mode == VOIDmode)
2011 return NULL_RTX;
2012
2013 aarch64_layout_arg (pcum_v, mode, type, named);
2014 return pcum->aapcs_reg;
2015}
2016
2017void
2018aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2019 const_tree fntype ATTRIBUTE_UNUSED,
2020 rtx libname ATTRIBUTE_UNUSED,
2021 const_tree fndecl ATTRIBUTE_UNUSED,
2022 unsigned n_named ATTRIBUTE_UNUSED)
2023{
2024 pcum->aapcs_ncrn = 0;
2025 pcum->aapcs_nvrn = 0;
2026 pcum->aapcs_nextncrn = 0;
2027 pcum->aapcs_nextnvrn = 0;
2028 pcum->pcs_variant = ARM_PCS_AAPCS64;
2029 pcum->aapcs_reg = NULL_RTX;
2030 pcum->aapcs_arg_processed = false;
2031 pcum->aapcs_stack_words = 0;
2032 pcum->aapcs_stack_size = 0;
2033
261fb553
AL
2034 if (!TARGET_FLOAT
2035 && fndecl && TREE_PUBLIC (fndecl)
2036 && fntype && fntype != error_mark_node)
2037 {
2038 const_tree type = TREE_TYPE (fntype);
2039 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2040 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2041 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2042 &mode, &nregs, NULL))
2043 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2044 }
43e9d192
IB
2045 return;
2046}
2047
2048static void
2049aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2050 machine_mode mode,
43e9d192
IB
2051 const_tree type,
2052 bool named)
2053{
2054 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2055 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2056 {
2057 aarch64_layout_arg (pcum_v, mode, type, named);
2058 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2059 != (pcum->aapcs_stack_words != 0));
2060 pcum->aapcs_arg_processed = false;
2061 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2062 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2063 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2064 pcum->aapcs_stack_words = 0;
2065 pcum->aapcs_reg = NULL_RTX;
2066 }
2067}
2068
2069bool
2070aarch64_function_arg_regno_p (unsigned regno)
2071{
2072 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2073 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2074}
2075
2076/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2077 PARM_BOUNDARY bits of alignment, but will be given anything up
2078 to STACK_BOUNDARY bits if the type requires it. This makes sure
2079 that both before and after the layout of each argument, the Next
2080 Stacked Argument Address (NSAA) will have a minimum alignment of
2081 8 bytes. */
2082
2083static unsigned int
ef4bddc2 2084aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
2085{
2086 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2087
2088 if (alignment < PARM_BOUNDARY)
2089 alignment = PARM_BOUNDARY;
2090 if (alignment > STACK_BOUNDARY)
2091 alignment = STACK_BOUNDARY;
2092 return alignment;
2093}
2094
2095/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2096
2097 Return true if an argument passed on the stack should be padded upwards,
2098 i.e. if the least-significant byte of the stack slot has useful data.
2099
2100 Small aggregate types are placed in the lowest memory address.
2101
2102 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2103
2104bool
ef4bddc2 2105aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2106{
2107 /* On little-endian targets, the least significant byte of every stack
2108 argument is passed at the lowest byte address of the stack slot. */
2109 if (!BYTES_BIG_ENDIAN)
2110 return true;
2111
00edcfbe 2112 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2113 the least significant byte of a stack argument is passed at the highest
2114 byte address of the stack slot. */
2115 if (type
00edcfbe
YZ
2116 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2117 || POINTER_TYPE_P (type))
43e9d192
IB
2118 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2119 return false;
2120
2121 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2122 return true;
2123}
2124
2125/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2126
2127 It specifies padding for the last (may also be the only)
2128 element of a block move between registers and memory. If
2129 assuming the block is in the memory, padding upward means that
2130 the last element is padded after its highest significant byte,
2131 while in downward padding, the last element is padded at the
2132 its least significant byte side.
2133
2134 Small aggregates and small complex types are always padded
2135 upwards.
2136
2137 We don't need to worry about homogeneous floating-point or
2138 short-vector aggregates; their move is not affected by the
2139 padding direction determined here. Regardless of endianness,
2140 each element of such an aggregate is put in the least
2141 significant bits of a fp/simd register.
2142
2143 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2144 register has useful data, and return the opposite if the most
2145 significant byte does. */
2146
2147bool
ef4bddc2 2148aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2149 bool first ATTRIBUTE_UNUSED)
2150{
2151
2152 /* Small composite types are always padded upward. */
2153 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2154 {
2155 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2156 : GET_MODE_SIZE (mode));
2157 if (size < 2 * UNITS_PER_WORD)
2158 return true;
2159 }
2160
2161 /* Otherwise, use the default padding. */
2162 return !BYTES_BIG_ENDIAN;
2163}
2164
ef4bddc2 2165static machine_mode
43e9d192
IB
2166aarch64_libgcc_cmp_return_mode (void)
2167{
2168 return SImode;
2169}
2170
2171static bool
2172aarch64_frame_pointer_required (void)
2173{
0b7f8166
MS
2174 /* In aarch64_override_options_after_change
2175 flag_omit_leaf_frame_pointer turns off the frame pointer by
2176 default. Turn it back on now if we've not got a leaf
2177 function. */
2178 if (flag_omit_leaf_frame_pointer
2179 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2180 return true;
43e9d192 2181
0b7f8166 2182 return false;
43e9d192
IB
2183}
2184
2185/* Mark the registers that need to be saved by the callee and calculate
2186 the size of the callee-saved registers area and frame record (both FP
2187 and LR may be omitted). */
2188static void
2189aarch64_layout_frame (void)
2190{
2191 HOST_WIDE_INT offset = 0;
2192 int regno;
2193
2194 if (reload_completed && cfun->machine->frame.laid_out)
2195 return;
2196
97826595
MS
2197#define SLOT_NOT_REQUIRED (-2)
2198#define SLOT_REQUIRED (-1)
2199
363ffa50
JW
2200 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2201 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2202
43e9d192
IB
2203 /* First mark all the registers that really need to be saved... */
2204 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2205 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2206
2207 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2208 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2209
2210 /* ... that includes the eh data registers (if needed)... */
2211 if (crtl->calls_eh_return)
2212 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2213 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2214 = SLOT_REQUIRED;
43e9d192
IB
2215
2216 /* ... and any callee saved register that dataflow says is live. */
2217 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2218 if (df_regs_ever_live_p (regno)
1c923b60
JW
2219 && (regno == R30_REGNUM
2220 || !call_used_regs[regno]))
97826595 2221 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2222
2223 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2224 if (df_regs_ever_live_p (regno)
2225 && !call_used_regs[regno])
97826595 2226 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2227
2228 if (frame_pointer_needed)
2229 {
2e1cdae5 2230 /* FP and LR are placed in the linkage record. */
43e9d192 2231 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2232 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2233 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2234 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 2235 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 2236 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2237 }
2238
2239 /* Now assign stack slots for them. */
2e1cdae5 2240 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2241 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2242 {
2243 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2244 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2245 cfun->machine->frame.wb_candidate1 = regno;
2246 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2247 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2248 offset += UNITS_PER_WORD;
2249 }
2250
2251 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2252 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2253 {
2254 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2255 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2256 cfun->machine->frame.wb_candidate1 = regno;
2257 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2258 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2259 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2260 offset += UNITS_PER_WORD;
2261 }
2262
43e9d192
IB
2263 cfun->machine->frame.padding0 =
2264 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2265 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2266
2267 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
2268
2269 cfun->machine->frame.hard_fp_offset
2270 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2271 + get_frame_size ()
2272 + cfun->machine->frame.saved_regs_size,
2273 STACK_BOUNDARY / BITS_PER_UNIT);
2274
2275 cfun->machine->frame.frame_size
2276 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2277 + crtl->outgoing_args_size,
2278 STACK_BOUNDARY / BITS_PER_UNIT);
2279
43e9d192
IB
2280 cfun->machine->frame.laid_out = true;
2281}
2282
43e9d192
IB
2283static bool
2284aarch64_register_saved_on_entry (int regno)
2285{
97826595 2286 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2287}
2288
64dedd72
JW
2289static unsigned
2290aarch64_next_callee_save (unsigned regno, unsigned limit)
2291{
2292 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2293 regno ++;
2294 return regno;
2295}
43e9d192 2296
c5e1f66e 2297static void
ef4bddc2 2298aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2299 HOST_WIDE_INT adjustment)
2300 {
2301 rtx base_rtx = stack_pointer_rtx;
2302 rtx insn, reg, mem;
2303
2304 reg = gen_rtx_REG (mode, regno);
2305 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2306 plus_constant (Pmode, base_rtx, -adjustment));
2307 mem = gen_rtx_MEM (mode, mem);
2308
2309 insn = emit_move_insn (mem, reg);
2310 RTX_FRAME_RELATED_P (insn) = 1;
2311}
2312
80c11907 2313static rtx
ef4bddc2 2314aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2315 HOST_WIDE_INT adjustment)
2316{
2317 switch (mode)
2318 {
2319 case DImode:
2320 return gen_storewb_pairdi_di (base, base, reg, reg2,
2321 GEN_INT (-adjustment),
2322 GEN_INT (UNITS_PER_WORD - adjustment));
2323 case DFmode:
2324 return gen_storewb_pairdf_di (base, base, reg, reg2,
2325 GEN_INT (-adjustment),
2326 GEN_INT (UNITS_PER_WORD - adjustment));
2327 default:
2328 gcc_unreachable ();
2329 }
2330}
2331
2332static void
ef4bddc2 2333aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2334 unsigned regno2, HOST_WIDE_INT adjustment)
2335{
5d8a22a5 2336 rtx_insn *insn;
80c11907
JW
2337 rtx reg1 = gen_rtx_REG (mode, regno1);
2338 rtx reg2 = gen_rtx_REG (mode, regno2);
2339
2340 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2341 reg2, adjustment));
2342 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2343 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2344 RTX_FRAME_RELATED_P (insn) = 1;
2345}
2346
159313d9 2347static rtx
ef4bddc2 2348aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2349 HOST_WIDE_INT adjustment)
2350{
2351 switch (mode)
2352 {
2353 case DImode:
2354 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2355 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2356 case DFmode:
2357 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2358 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2359 default:
2360 gcc_unreachable ();
2361 }
2362}
2363
72df5c1f 2364static rtx
ef4bddc2 2365aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2366 rtx reg2)
2367{
2368 switch (mode)
2369 {
2370 case DImode:
2371 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2372
2373 case DFmode:
2374 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2375
2376 default:
2377 gcc_unreachable ();
2378 }
2379}
2380
2381static rtx
ef4bddc2 2382aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2383 rtx mem2)
2384{
2385 switch (mode)
2386 {
2387 case DImode:
2388 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2389
2390 case DFmode:
2391 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2392
2393 default:
2394 gcc_unreachable ();
2395 }
2396}
2397
43e9d192 2398
43e9d192 2399static void
ef4bddc2 2400aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2401 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2402{
5d8a22a5 2403 rtx_insn *insn;
ef4bddc2 2404 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2405 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2406 unsigned regno;
2407 unsigned regno2;
2408
0ec74a1e 2409 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2410 regno <= limit;
2411 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2412 {
ae13fce3
JW
2413 rtx reg, mem;
2414 HOST_WIDE_INT offset;
64dedd72 2415
ae13fce3
JW
2416 if (skip_wb
2417 && (regno == cfun->machine->frame.wb_candidate1
2418 || regno == cfun->machine->frame.wb_candidate2))
2419 continue;
2420
2421 reg = gen_rtx_REG (mode, regno);
2422 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2423 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2424 offset));
64dedd72
JW
2425
2426 regno2 = aarch64_next_callee_save (regno + 1, limit);
2427
2428 if (regno2 <= limit
2429 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2430 == cfun->machine->frame.reg_offset[regno2]))
2431
43e9d192 2432 {
0ec74a1e 2433 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2434 rtx mem2;
2435
2436 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2437 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2438 offset));
2439 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2440 reg2));
0b4a9743 2441
64dedd72
JW
2442 /* The first part of a frame-related parallel insn is
2443 always assumed to be relevant to the frame
2444 calculations; subsequent parts, are only
2445 frame-related if explicitly marked. */
2446 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2447 regno = regno2;
2448 }
2449 else
8ed2fc62
JW
2450 insn = emit_move_insn (mem, reg);
2451
2452 RTX_FRAME_RELATED_P (insn) = 1;
2453 }
2454}
2455
2456static void
ef4bddc2 2457aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2458 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2459 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2460{
8ed2fc62 2461 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2462 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2463 ? gen_frame_mem : gen_rtx_MEM);
2464 unsigned regno;
2465 unsigned regno2;
2466 HOST_WIDE_INT offset;
2467
2468 for (regno = aarch64_next_callee_save (start, limit);
2469 regno <= limit;
2470 regno = aarch64_next_callee_save (regno + 1, limit))
2471 {
ae13fce3 2472 rtx reg, mem;
8ed2fc62 2473
ae13fce3
JW
2474 if (skip_wb
2475 && (regno == cfun->machine->frame.wb_candidate1
2476 || regno == cfun->machine->frame.wb_candidate2))
2477 continue;
2478
2479 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2480 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2481 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2482
2483 regno2 = aarch64_next_callee_save (regno + 1, limit);
2484
2485 if (regno2 <= limit
2486 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2487 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2488 {
8ed2fc62
JW
2489 rtx reg2 = gen_rtx_REG (mode, regno2);
2490 rtx mem2;
2491
2492 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2493 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2494 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2495
dd991abb 2496 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2497 regno = regno2;
43e9d192 2498 }
8ed2fc62 2499 else
dd991abb
RH
2500 emit_move_insn (reg, mem);
2501 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2502 }
43e9d192
IB
2503}
2504
2505/* AArch64 stack frames generated by this compiler look like:
2506
2507 +-------------------------------+
2508 | |
2509 | incoming stack arguments |
2510 | |
34834420
MS
2511 +-------------------------------+
2512 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2513 | callee-allocated save area |
2514 | for register varargs |
2515 | |
34834420
MS
2516 +-------------------------------+
2517 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2518 | |
2519 +-------------------------------+
454fdba9
RL
2520 | padding0 | \
2521 +-------------------------------+ |
454fdba9 2522 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2523 +-------------------------------+ |
2524 | LR' | |
2525 +-------------------------------+ |
34834420
MS
2526 | FP' | / <- hard_frame_pointer_rtx (aligned)
2527 +-------------------------------+
43e9d192
IB
2528 | dynamic allocation |
2529 +-------------------------------+
34834420
MS
2530 | padding |
2531 +-------------------------------+
2532 | outgoing stack arguments | <-- arg_pointer
2533 | |
2534 +-------------------------------+
2535 | | <-- stack_pointer_rtx (aligned)
43e9d192 2536
34834420
MS
2537 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2538 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2539 unchanged. */
43e9d192
IB
2540
2541/* Generate the prologue instructions for entry into a function.
2542 Establish the stack frame by decreasing the stack pointer with a
2543 properly calculated size and, if necessary, create a frame record
2544 filled with the values of LR and previous frame pointer. The
6991c977 2545 current FP is also set up if it is in use. */
43e9d192
IB
2546
2547void
2548aarch64_expand_prologue (void)
2549{
2550 /* sub sp, sp, #<frame_size>
2551 stp {fp, lr}, [sp, #<frame_size> - 16]
2552 add fp, sp, #<frame_size> - hardfp_offset
2553 stp {cs_reg}, [fp, #-16] etc.
2554
2555 sub sp, sp, <final_adjustment_if_any>
2556 */
43e9d192 2557 HOST_WIDE_INT frame_size, offset;
1c960e02 2558 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2559 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2560 rtx_insn *insn;
43e9d192
IB
2561
2562 aarch64_layout_frame ();
43e9d192 2563
dd991abb
RH
2564 offset = frame_size = cfun->machine->frame.frame_size;
2565 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2566 fp_offset = frame_size - hard_fp_offset;
43e9d192 2567
dd991abb
RH
2568 if (flag_stack_usage_info)
2569 current_function_static_stack_size = frame_size;
43e9d192 2570
44c0e7b9 2571 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2572 if (offset >= 512)
2573 {
2574 /* When the frame has a large size, an initial decrease is done on
2575 the stack pointer to jump over the callee-allocated save area for
2576 register varargs, the local variable area and/or the callee-saved
2577 register area. This will allow the pre-index write-back
2578 store pair instructions to be used for setting up the stack frame
2579 efficiently. */
dd991abb 2580 offset = hard_fp_offset;
43e9d192
IB
2581 if (offset >= 512)
2582 offset = cfun->machine->frame.saved_regs_size;
2583
2584 frame_size -= (offset + crtl->outgoing_args_size);
2585 fp_offset = 0;
2586
2587 if (frame_size >= 0x1000000)
2588 {
2589 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2590 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2591 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2592
2593 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 2594 gen_rtx_SET (stack_pointer_rtx,
dd991abb
RH
2595 plus_constant (Pmode, stack_pointer_rtx,
2596 -frame_size)));
2597 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2598 }
2599 else if (frame_size > 0)
2600 {
dd991abb
RH
2601 int hi_ofs = frame_size & 0xfff000;
2602 int lo_ofs = frame_size & 0x000fff;
2603
2604 if (hi_ofs)
43e9d192
IB
2605 {
2606 insn = emit_insn (gen_add2_insn
dd991abb 2607 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2608 RTX_FRAME_RELATED_P (insn) = 1;
2609 }
dd991abb 2610 if (lo_ofs)
43e9d192
IB
2611 {
2612 insn = emit_insn (gen_add2_insn
dd991abb 2613 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2614 RTX_FRAME_RELATED_P (insn) = 1;
2615 }
2616 }
2617 }
2618 else
2619 frame_size = -1;
2620
2621 if (offset > 0)
2622 {
ae13fce3
JW
2623 bool skip_wb = false;
2624
43e9d192
IB
2625 if (frame_pointer_needed)
2626 {
c5e1f66e
JW
2627 skip_wb = true;
2628
43e9d192
IB
2629 if (fp_offset)
2630 {
2631 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2632 GEN_INT (-offset)));
2633 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2634
2635 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2636 R30_REGNUM, false);
43e9d192
IB
2637 }
2638 else
80c11907 2639 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2640
2641 /* Set up frame pointer to point to the location of the
2642 previous frame pointer on the stack. */
2643 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2644 stack_pointer_rtx,
2645 GEN_INT (fp_offset)));
43e9d192 2646 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2647 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2648 }
2649 else
2650 {
c5e1f66e
JW
2651 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2652 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2653
c5e1f66e
JW
2654 if (fp_offset
2655 || reg1 == FIRST_PSEUDO_REGISTER
2656 || (reg2 == FIRST_PSEUDO_REGISTER
2657 && offset >= 256))
2658 {
2659 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2660 GEN_INT (-offset)));
2661 RTX_FRAME_RELATED_P (insn) = 1;
2662 }
2663 else
2664 {
ef4bddc2 2665 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
2666
2667 skip_wb = true;
2668
2669 if (reg2 == FIRST_PSEUDO_REGISTER)
2670 aarch64_pushwb_single_reg (mode1, reg1, offset);
2671 else
2672 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2673 }
43e9d192
IB
2674 }
2675
c5e1f66e
JW
2676 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2677 skip_wb);
ae13fce3
JW
2678 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2679 skip_wb);
43e9d192
IB
2680 }
2681
2682 /* when offset >= 512,
2683 sub sp, sp, #<outgoing_args_size> */
2684 if (frame_size > -1)
2685 {
2686 if (crtl->outgoing_args_size > 0)
2687 {
2688 insn = emit_insn (gen_add2_insn
2689 (stack_pointer_rtx,
2690 GEN_INT (- crtl->outgoing_args_size)));
2691 RTX_FRAME_RELATED_P (insn) = 1;
2692 }
2693 }
2694}
2695
4f942779
RL
2696/* Return TRUE if we can use a simple_return insn.
2697
2698 This function checks whether the callee saved stack is empty, which
2699 means no restore actions are need. The pro_and_epilogue will use
2700 this to check whether shrink-wrapping opt is feasible. */
2701
2702bool
2703aarch64_use_return_insn_p (void)
2704{
2705 if (!reload_completed)
2706 return false;
2707
2708 if (crtl->profile)
2709 return false;
2710
2711 aarch64_layout_frame ();
2712
2713 return cfun->machine->frame.frame_size == 0;
2714}
2715
43e9d192
IB
2716/* Generate the epilogue instructions for returning from a function. */
2717void
2718aarch64_expand_epilogue (bool for_sibcall)
2719{
1c960e02 2720 HOST_WIDE_INT frame_size, offset;
43e9d192 2721 HOST_WIDE_INT fp_offset;
dd991abb 2722 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2723 rtx_insn *insn;
7e8c2bd5
JW
2724 /* We need to add memory barrier to prevent read from deallocated stack. */
2725 bool need_barrier_p = (get_frame_size () != 0
2726 || cfun->machine->frame.saved_varargs_size);
43e9d192
IB
2727
2728 aarch64_layout_frame ();
43e9d192 2729
1c960e02 2730 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2731 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2732 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2733
2734 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2735 if (offset >= 512)
2736 {
dd991abb 2737 offset = hard_fp_offset;
43e9d192
IB
2738 if (offset >= 512)
2739 offset = cfun->machine->frame.saved_regs_size;
2740
2741 frame_size -= (offset + crtl->outgoing_args_size);
2742 fp_offset = 0;
2743 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2744 {
2745 insn = emit_insn (gen_add2_insn
2746 (stack_pointer_rtx,
2747 GEN_INT (crtl->outgoing_args_size)));
2748 RTX_FRAME_RELATED_P (insn) = 1;
2749 }
2750 }
2751 else
2752 frame_size = -1;
2753
2754 /* If there were outgoing arguments or we've done dynamic stack
2755 allocation, then restore the stack pointer from the frame
2756 pointer. This is at most one insn and more efficient than using
2757 GCC's internal mechanism. */
2758 if (frame_pointer_needed
2759 && (crtl->outgoing_args_size || cfun->calls_alloca))
2760 {
7e8c2bd5
JW
2761 if (cfun->calls_alloca)
2762 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2763
43e9d192
IB
2764 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2765 hard_frame_pointer_rtx,
8f454e9f
JW
2766 GEN_INT (0)));
2767 offset = offset - fp_offset;
43e9d192
IB
2768 }
2769
43e9d192
IB
2770 if (offset > 0)
2771 {
4b92caa1
JW
2772 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2773 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2774 bool skip_wb = true;
dd991abb 2775 rtx cfi_ops = NULL;
4b92caa1 2776
43e9d192 2777 if (frame_pointer_needed)
4b92caa1
JW
2778 fp_offset = 0;
2779 else if (fp_offset
2780 || reg1 == FIRST_PSEUDO_REGISTER
2781 || (reg2 == FIRST_PSEUDO_REGISTER
2782 && offset >= 256))
2783 skip_wb = false;
2784
2785 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2786 skip_wb, &cfi_ops);
4b92caa1 2787 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2788 skip_wb, &cfi_ops);
4b92caa1 2789
7e8c2bd5
JW
2790 if (need_barrier_p)
2791 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2792
4b92caa1 2793 if (skip_wb)
43e9d192 2794 {
ef4bddc2 2795 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2796 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2797
dd991abb 2798 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2799 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2800 {
2801 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2802 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2803 mem = gen_rtx_MEM (mode1, mem);
2804 insn = emit_move_insn (rreg1, mem);
2805 }
4b92caa1
JW
2806 else
2807 {
dd991abb 2808 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2809
dd991abb
RH
2810 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2811 insn = emit_insn (aarch64_gen_loadwb_pair
2812 (mode1, stack_pointer_rtx, rreg1,
2813 rreg2, offset));
4b92caa1 2814 }
43e9d192 2815 }
43e9d192
IB
2816 else
2817 {
2818 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2819 GEN_INT (offset)));
43e9d192 2820 }
43e9d192 2821
dd991abb
RH
2822 /* Reset the CFA to be SP + FRAME_SIZE. */
2823 rtx new_cfa = stack_pointer_rtx;
2824 if (frame_size > 0)
2825 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2826 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2827 REG_NOTES (insn) = cfi_ops;
43e9d192 2828 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2829 }
2830
dd991abb 2831 if (frame_size > 0)
43e9d192 2832 {
7e8c2bd5
JW
2833 if (need_barrier_p)
2834 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2835
43e9d192
IB
2836 if (frame_size >= 0x1000000)
2837 {
2838 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2839 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2840 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2841 }
dd991abb 2842 else
43e9d192 2843 {
dd991abb
RH
2844 int hi_ofs = frame_size & 0xfff000;
2845 int lo_ofs = frame_size & 0x000fff;
2846
2847 if (hi_ofs && lo_ofs)
43e9d192
IB
2848 {
2849 insn = emit_insn (gen_add2_insn
dd991abb 2850 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2851 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2852 frame_size = lo_ofs;
43e9d192 2853 }
dd991abb
RH
2854 insn = emit_insn (gen_add2_insn
2855 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2856 }
2857
dd991abb
RH
2858 /* Reset the CFA to be SP + 0. */
2859 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2860 RTX_FRAME_RELATED_P (insn) = 1;
2861 }
2862
2863 /* Stack adjustment for exception handler. */
2864 if (crtl->calls_eh_return)
2865 {
2866 /* We need to unwind the stack by the offset computed by
2867 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2868 to be SP; letting the CFA move during this adjustment
2869 is just as correct as retaining the CFA from the body
2870 of the function. Therefore, do nothing special. */
2871 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2872 }
2873
2874 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2875 if (!for_sibcall)
2876 emit_jump_insn (ret_rtx);
2877}
2878
2879/* Return the place to copy the exception unwinding return address to.
2880 This will probably be a stack slot, but could (in theory be the
2881 return register). */
2882rtx
2883aarch64_final_eh_return_addr (void)
2884{
1c960e02
MS
2885 HOST_WIDE_INT fp_offset;
2886
43e9d192 2887 aarch64_layout_frame ();
1c960e02
MS
2888
2889 fp_offset = cfun->machine->frame.frame_size
2890 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2891
2892 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2893 return gen_rtx_REG (DImode, LR_REGNUM);
2894
2895 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2896 result in a store to save LR introduced by builtin_eh_return () being
2897 incorrectly deleted because the alias is not detected.
2898 So in the calculation of the address to copy the exception unwinding
2899 return address to, we note 2 cases.
2900 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2901 we return a SP-relative location since all the addresses are SP-relative
2902 in this case. This prevents the store from being optimized away.
2903 If the fp_offset is not 0, then the addresses will be FP-relative and
2904 therefore we return a FP-relative location. */
2905
2906 if (frame_pointer_needed)
2907 {
2908 if (fp_offset)
2909 return gen_frame_mem (DImode,
2910 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2911 else
2912 return gen_frame_mem (DImode,
2913 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2914 }
2915
2916 /* If FP is not needed, we calculate the location of LR, which would be
2917 at the top of the saved registers block. */
2918
2919 return gen_frame_mem (DImode,
2920 plus_constant (Pmode,
2921 stack_pointer_rtx,
2922 fp_offset
2923 + cfun->machine->frame.saved_regs_size
2924 - 2 * UNITS_PER_WORD));
2925}
2926
9dfc162c
JG
2927/* Possibly output code to build up a constant in a register. For
2928 the benefit of the costs infrastructure, returns the number of
2929 instructions which would be emitted. GENERATE inhibits or
2930 enables code generation. */
2931
2932static int
2933aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2934{
9dfc162c
JG
2935 int insns = 0;
2936
43e9d192 2937 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2938 {
2939 if (generate)
2940 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2941 insns = 1;
2942 }
43e9d192
IB
2943 else
2944 {
2945 int i;
2946 int ncount = 0;
2947 int zcount = 0;
2948 HOST_WIDE_INT valp = val >> 16;
2949 HOST_WIDE_INT valm;
2950 HOST_WIDE_INT tval;
2951
2952 for (i = 16; i < 64; i += 16)
2953 {
2954 valm = (valp & 0xffff);
2955
2956 if (valm != 0)
2957 ++ zcount;
2958
2959 if (valm != 0xffff)
2960 ++ ncount;
2961
2962 valp >>= 16;
2963 }
2964
2965 /* zcount contains the number of additional MOVK instructions
2966 required if the constant is built up with an initial MOVZ instruction,
2967 while ncount is the number of MOVK instructions required if starting
2968 with a MOVN instruction. Choose the sequence that yields the fewest
2969 number of instructions, preferring MOVZ instructions when they are both
2970 the same. */
2971 if (ncount < zcount)
2972 {
9dfc162c
JG
2973 if (generate)
2974 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2975 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2976 tval = 0xffff;
9dfc162c 2977 insns++;
43e9d192
IB
2978 }
2979 else
2980 {
9dfc162c
JG
2981 if (generate)
2982 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2983 GEN_INT (val & 0xffff));
43e9d192 2984 tval = 0;
9dfc162c 2985 insns++;
43e9d192
IB
2986 }
2987
2988 val >>= 16;
2989
2990 for (i = 16; i < 64; i += 16)
2991 {
2992 if ((val & 0xffff) != tval)
9dfc162c
JG
2993 {
2994 if (generate)
2995 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2996 GEN_INT (i),
2997 GEN_INT (val & 0xffff)));
2998 insns++;
2999 }
43e9d192
IB
3000 val >>= 16;
3001 }
3002 }
9dfc162c 3003 return insns;
43e9d192
IB
3004}
3005
3006static void
d9600ae5 3007aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
3008{
3009 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
3010 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
3011 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
3012
3013 if (mdelta < 0)
3014 mdelta = -mdelta;
3015
3016 if (mdelta >= 4096 * 4096)
3017 {
9dfc162c 3018 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 3019 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
3020 }
3021 else if (mdelta > 0)
3022 {
43e9d192 3023 if (mdelta >= 4096)
d9600ae5 3024 {
f7df4a84 3025 emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
d9600ae5
SN
3026 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
3027 if (delta < 0)
f7df4a84 3028 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3029 gen_rtx_MINUS (Pmode, this_rtx, shift)));
3030 else
f7df4a84 3031 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3032 gen_rtx_PLUS (Pmode, this_rtx, shift)));
3033 }
43e9d192 3034 if (mdelta % 4096 != 0)
d9600ae5
SN
3035 {
3036 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
f7df4a84 3037 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3038 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
3039 }
43e9d192
IB
3040 }
3041}
3042
3043/* Output code to add DELTA to the first argument, and then jump
3044 to FUNCTION. Used for C++ multiple inheritance. */
3045static void
3046aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3047 HOST_WIDE_INT delta,
3048 HOST_WIDE_INT vcall_offset,
3049 tree function)
3050{
3051 /* The this pointer is always in x0. Note that this differs from
3052 Arm where the this pointer maybe bumped to r1 if r0 is required
3053 to return a pointer to an aggregate. On AArch64 a result value
3054 pointer will be in x8. */
3055 int this_regno = R0_REGNUM;
5d8a22a5
DM
3056 rtx this_rtx, temp0, temp1, addr, funexp;
3057 rtx_insn *insn;
43e9d192 3058
75f1d6fc
SN
3059 reload_completed = 1;
3060 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3061
3062 if (vcall_offset == 0)
d9600ae5 3063 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
3064 else
3065 {
28514dda 3066 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3067
75f1d6fc
SN
3068 this_rtx = gen_rtx_REG (Pmode, this_regno);
3069 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3070 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3071
75f1d6fc
SN
3072 addr = this_rtx;
3073 if (delta != 0)
3074 {
3075 if (delta >= -256 && delta < 256)
3076 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3077 plus_constant (Pmode, this_rtx, delta));
3078 else
d9600ae5 3079 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
3080 }
3081
28514dda
YZ
3082 if (Pmode == ptr_mode)
3083 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3084 else
3085 aarch64_emit_move (temp0,
3086 gen_rtx_ZERO_EXTEND (Pmode,
3087 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3088
28514dda 3089 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3090 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3091 else
3092 {
9dfc162c 3093 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 3094 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3095 }
3096
28514dda
YZ
3097 if (Pmode == ptr_mode)
3098 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3099 else
3100 aarch64_emit_move (temp1,
3101 gen_rtx_SIGN_EXTEND (Pmode,
3102 gen_rtx_MEM (ptr_mode, addr)));
3103
75f1d6fc 3104 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3105 }
3106
75f1d6fc
SN
3107 /* Generate a tail call to the target function. */
3108 if (!TREE_USED (function))
3109 {
3110 assemble_external (function);
3111 TREE_USED (function) = 1;
3112 }
3113 funexp = XEXP (DECL_RTL (function), 0);
3114 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3115 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3116 SIBLING_CALL_P (insn) = 1;
3117
3118 insn = get_insns ();
3119 shorten_branches (insn);
3120 final_start_function (insn, file, 1);
3121 final (insn, file, 1);
43e9d192 3122 final_end_function ();
75f1d6fc
SN
3123
3124 /* Stop pretending to be a post-reload pass. */
3125 reload_completed = 0;
43e9d192
IB
3126}
3127
43e9d192
IB
3128static bool
3129aarch64_tls_referenced_p (rtx x)
3130{
3131 if (!TARGET_HAVE_TLS)
3132 return false;
e7de8563
RS
3133 subrtx_iterator::array_type array;
3134 FOR_EACH_SUBRTX (iter, array, x, ALL)
3135 {
3136 const_rtx x = *iter;
3137 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3138 return true;
3139 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3140 TLS offsets, not real symbol references. */
3141 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3142 iter.skip_subrtxes ();
3143 }
3144 return false;
43e9d192
IB
3145}
3146
3147
3148static int
3149aarch64_bitmasks_cmp (const void *i1, const void *i2)
3150{
3151 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
3152 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3153
3154 if (*imm1 < *imm2)
3155 return -1;
3156 if (*imm1 > *imm2)
3157 return +1;
3158 return 0;
3159}
3160
3161
3162static void
3163aarch64_build_bitmask_table (void)
3164{
3165 unsigned HOST_WIDE_INT mask, imm;
3166 unsigned int log_e, e, s, r;
3167 unsigned int nimms = 0;
3168
3169 for (log_e = 1; log_e <= 6; log_e++)
3170 {
3171 e = 1 << log_e;
3172 if (e == 64)
3173 mask = ~(HOST_WIDE_INT) 0;
3174 else
3175 mask = ((HOST_WIDE_INT) 1 << e) - 1;
3176 for (s = 1; s < e; s++)
3177 {
3178 for (r = 0; r < e; r++)
3179 {
3180 /* set s consecutive bits to 1 (s < 64) */
3181 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3182 /* rotate right by r */
3183 if (r != 0)
3184 imm = ((imm >> r) | (imm << (e - r))) & mask;
3185 /* replicate the constant depending on SIMD size */
3186 switch (log_e) {
3187 case 1: imm |= (imm << 2);
3188 case 2: imm |= (imm << 4);
3189 case 3: imm |= (imm << 8);
3190 case 4: imm |= (imm << 16);
3191 case 5: imm |= (imm << 32);
3192 case 6:
3193 break;
3194 default:
3195 gcc_unreachable ();
3196 }
3197 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3198 aarch64_bitmasks[nimms++] = imm;
3199 }
3200 }
3201 }
3202
3203 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3204 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3205 aarch64_bitmasks_cmp);
3206}
3207
3208
3209/* Return true if val can be encoded as a 12-bit unsigned immediate with
3210 a left shift of 0 or 12 bits. */
3211bool
3212aarch64_uimm12_shift (HOST_WIDE_INT val)
3213{
3214 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3215 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3216 );
3217}
3218
3219
3220/* Return true if val is an immediate that can be loaded into a
3221 register by a MOVZ instruction. */
3222static bool
ef4bddc2 3223aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3224{
3225 if (GET_MODE_SIZE (mode) > 4)
3226 {
3227 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3228 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3229 return 1;
3230 }
3231 else
3232 {
3233 /* Ignore sign extension. */
3234 val &= (HOST_WIDE_INT) 0xffffffff;
3235 }
3236 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3237 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3238}
3239
3240
3241/* Return true if val is a valid bitmask immediate. */
3242bool
ef4bddc2 3243aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3244{
3245 if (GET_MODE_SIZE (mode) < 8)
3246 {
3247 /* Replicate bit pattern. */
3248 val &= (HOST_WIDE_INT) 0xffffffff;
3249 val |= val << 32;
3250 }
3251 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3252 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3253}
3254
3255
3256/* Return true if val is an immediate that can be loaded into a
3257 register in a single instruction. */
3258bool
ef4bddc2 3259aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3260{
3261 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3262 return 1;
3263 return aarch64_bitmask_imm (val, mode);
3264}
3265
3266static bool
ef4bddc2 3267aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3268{
3269 rtx base, offset;
7eda14e1 3270
43e9d192
IB
3271 if (GET_CODE (x) == HIGH)
3272 return true;
3273
3274 split_const (x, &base, &offset);
3275 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3276 {
f8b756b7 3277 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
28514dda
YZ
3278 != SYMBOL_FORCE_TO_MEM)
3279 return true;
3280 else
3281 /* Avoid generating a 64-bit relocation in ILP32; leave
3282 to aarch64_expand_mov_immediate to handle it properly. */
3283 return mode != ptr_mode;
3284 }
43e9d192
IB
3285
3286 return aarch64_tls_referenced_p (x);
3287}
3288
3289/* Return true if register REGNO is a valid index register.
3290 STRICT_P is true if REG_OK_STRICT is in effect. */
3291
3292bool
3293aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3294{
3295 if (!HARD_REGISTER_NUM_P (regno))
3296 {
3297 if (!strict_p)
3298 return true;
3299
3300 if (!reg_renumber)
3301 return false;
3302
3303 regno = reg_renumber[regno];
3304 }
3305 return GP_REGNUM_P (regno);
3306}
3307
3308/* Return true if register REGNO is a valid base register for mode MODE.
3309 STRICT_P is true if REG_OK_STRICT is in effect. */
3310
3311bool
3312aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3313{
3314 if (!HARD_REGISTER_NUM_P (regno))
3315 {
3316 if (!strict_p)
3317 return true;
3318
3319 if (!reg_renumber)
3320 return false;
3321
3322 regno = reg_renumber[regno];
3323 }
3324
3325 /* The fake registers will be eliminated to either the stack or
3326 hard frame pointer, both of which are usually valid base registers.
3327 Reload deals with the cases where the eliminated form isn't valid. */
3328 return (GP_REGNUM_P (regno)
3329 || regno == SP_REGNUM
3330 || regno == FRAME_POINTER_REGNUM
3331 || regno == ARG_POINTER_REGNUM);
3332}
3333
3334/* Return true if X is a valid base register for mode MODE.
3335 STRICT_P is true if REG_OK_STRICT is in effect. */
3336
3337static bool
3338aarch64_base_register_rtx_p (rtx x, bool strict_p)
3339{
3340 if (!strict_p && GET_CODE (x) == SUBREG)
3341 x = SUBREG_REG (x);
3342
3343 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3344}
3345
3346/* Return true if address offset is a valid index. If it is, fill in INFO
3347 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3348
3349static bool
3350aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3351 machine_mode mode, bool strict_p)
43e9d192
IB
3352{
3353 enum aarch64_address_type type;
3354 rtx index;
3355 int shift;
3356
3357 /* (reg:P) */
3358 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3359 && GET_MODE (x) == Pmode)
3360 {
3361 type = ADDRESS_REG_REG;
3362 index = x;
3363 shift = 0;
3364 }
3365 /* (sign_extend:DI (reg:SI)) */
3366 else if ((GET_CODE (x) == SIGN_EXTEND
3367 || GET_CODE (x) == ZERO_EXTEND)
3368 && GET_MODE (x) == DImode
3369 && GET_MODE (XEXP (x, 0)) == SImode)
3370 {
3371 type = (GET_CODE (x) == SIGN_EXTEND)
3372 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3373 index = XEXP (x, 0);
3374 shift = 0;
3375 }
3376 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3377 else if (GET_CODE (x) == MULT
3378 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3379 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3380 && GET_MODE (XEXP (x, 0)) == DImode
3381 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3382 && CONST_INT_P (XEXP (x, 1)))
3383 {
3384 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3385 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3386 index = XEXP (XEXP (x, 0), 0);
3387 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3388 }
3389 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3390 else if (GET_CODE (x) == ASHIFT
3391 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3392 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3393 && GET_MODE (XEXP (x, 0)) == DImode
3394 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3395 && CONST_INT_P (XEXP (x, 1)))
3396 {
3397 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3398 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3399 index = XEXP (XEXP (x, 0), 0);
3400 shift = INTVAL (XEXP (x, 1));
3401 }
3402 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3403 else if ((GET_CODE (x) == SIGN_EXTRACT
3404 || GET_CODE (x) == ZERO_EXTRACT)
3405 && GET_MODE (x) == DImode
3406 && GET_CODE (XEXP (x, 0)) == MULT
3407 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3408 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3409 {
3410 type = (GET_CODE (x) == SIGN_EXTRACT)
3411 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3412 index = XEXP (XEXP (x, 0), 0);
3413 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3414 if (INTVAL (XEXP (x, 1)) != 32 + shift
3415 || INTVAL (XEXP (x, 2)) != 0)
3416 shift = -1;
3417 }
3418 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3419 (const_int 0xffffffff<<shift)) */
3420 else if (GET_CODE (x) == AND
3421 && GET_MODE (x) == DImode
3422 && GET_CODE (XEXP (x, 0)) == MULT
3423 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3424 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3425 && CONST_INT_P (XEXP (x, 1)))
3426 {
3427 type = ADDRESS_REG_UXTW;
3428 index = XEXP (XEXP (x, 0), 0);
3429 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3430 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3431 shift = -1;
3432 }
3433 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3434 else if ((GET_CODE (x) == SIGN_EXTRACT
3435 || GET_CODE (x) == ZERO_EXTRACT)
3436 && GET_MODE (x) == DImode
3437 && GET_CODE (XEXP (x, 0)) == ASHIFT
3438 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3439 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3440 {
3441 type = (GET_CODE (x) == SIGN_EXTRACT)
3442 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3443 index = XEXP (XEXP (x, 0), 0);
3444 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3445 if (INTVAL (XEXP (x, 1)) != 32 + shift
3446 || INTVAL (XEXP (x, 2)) != 0)
3447 shift = -1;
3448 }
3449 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3450 (const_int 0xffffffff<<shift)) */
3451 else if (GET_CODE (x) == AND
3452 && GET_MODE (x) == DImode
3453 && GET_CODE (XEXP (x, 0)) == ASHIFT
3454 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3455 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3456 && CONST_INT_P (XEXP (x, 1)))
3457 {
3458 type = ADDRESS_REG_UXTW;
3459 index = XEXP (XEXP (x, 0), 0);
3460 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3461 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3462 shift = -1;
3463 }
3464 /* (mult:P (reg:P) (const_int scale)) */
3465 else if (GET_CODE (x) == MULT
3466 && GET_MODE (x) == Pmode
3467 && GET_MODE (XEXP (x, 0)) == Pmode
3468 && CONST_INT_P (XEXP (x, 1)))
3469 {
3470 type = ADDRESS_REG_REG;
3471 index = XEXP (x, 0);
3472 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3473 }
3474 /* (ashift:P (reg:P) (const_int shift)) */
3475 else if (GET_CODE (x) == ASHIFT
3476 && GET_MODE (x) == Pmode
3477 && GET_MODE (XEXP (x, 0)) == Pmode
3478 && CONST_INT_P (XEXP (x, 1)))
3479 {
3480 type = ADDRESS_REG_REG;
3481 index = XEXP (x, 0);
3482 shift = INTVAL (XEXP (x, 1));
3483 }
3484 else
3485 return false;
3486
3487 if (GET_CODE (index) == SUBREG)
3488 index = SUBREG_REG (index);
3489
3490 if ((shift == 0 ||
3491 (shift > 0 && shift <= 3
3492 && (1 << shift) == GET_MODE_SIZE (mode)))
3493 && REG_P (index)
3494 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3495 {
3496 info->type = type;
3497 info->offset = index;
3498 info->shift = shift;
3499 return true;
3500 }
3501
3502 return false;
3503}
3504
44707478 3505bool
ef4bddc2 3506aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3507{
3508 return (offset >= -64 * GET_MODE_SIZE (mode)
3509 && offset < 64 * GET_MODE_SIZE (mode)
3510 && offset % GET_MODE_SIZE (mode) == 0);
3511}
3512
3513static inline bool
ef4bddc2 3514offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3515 HOST_WIDE_INT offset)
3516{
3517 return offset >= -256 && offset < 256;
3518}
3519
3520static inline bool
ef4bddc2 3521offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3522{
3523 return (offset >= 0
3524 && offset < 4096 * GET_MODE_SIZE (mode)
3525 && offset % GET_MODE_SIZE (mode) == 0);
3526}
3527
3528/* Return true if X is a valid address for machine mode MODE. If it is,
3529 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3530 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3531
3532static bool
3533aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3534 rtx x, machine_mode mode,
43e9d192
IB
3535 RTX_CODE outer_code, bool strict_p)
3536{
3537 enum rtx_code code = GET_CODE (x);
3538 rtx op0, op1;
2d8c6dc1
AH
3539
3540 /* On BE, we use load/store pair for all large int mode load/stores. */
3541 bool load_store_pair_p = (outer_code == PARALLEL
3542 || (BYTES_BIG_ENDIAN
3543 && aarch64_vect_struct_mode_p (mode)));
3544
43e9d192 3545 bool allow_reg_index_p =
2d8c6dc1
AH
3546 !load_store_pair_p
3547 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3548 && !aarch64_vect_struct_mode_p (mode);
3549
3550 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3551 REG addressing. */
3552 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
3553 && (code != POST_INC && code != REG))
3554 return false;
3555
3556 switch (code)
3557 {
3558 case REG:
3559 case SUBREG:
3560 info->type = ADDRESS_REG_IMM;
3561 info->base = x;
3562 info->offset = const0_rtx;
3563 return aarch64_base_register_rtx_p (x, strict_p);
3564
3565 case PLUS:
3566 op0 = XEXP (x, 0);
3567 op1 = XEXP (x, 1);
15c0c5c9
JW
3568
3569 if (! strict_p
4aa81c2e 3570 && REG_P (op0)
15c0c5c9
JW
3571 && (op0 == virtual_stack_vars_rtx
3572 || op0 == frame_pointer_rtx
3573 || op0 == arg_pointer_rtx)
4aa81c2e 3574 && CONST_INT_P (op1))
15c0c5c9
JW
3575 {
3576 info->type = ADDRESS_REG_IMM;
3577 info->base = op0;
3578 info->offset = op1;
3579
3580 return true;
3581 }
3582
43e9d192
IB
3583 if (GET_MODE_SIZE (mode) != 0
3584 && CONST_INT_P (op1)
3585 && aarch64_base_register_rtx_p (op0, strict_p))
3586 {
3587 HOST_WIDE_INT offset = INTVAL (op1);
3588
3589 info->type = ADDRESS_REG_IMM;
3590 info->base = op0;
3591 info->offset = op1;
3592
3593 /* TImode and TFmode values are allowed in both pairs of X
3594 registers and individual Q registers. The available
3595 address modes are:
3596 X,X: 7-bit signed scaled offset
3597 Q: 9-bit signed offset
3598 We conservatively require an offset representable in either mode.
3599 */
3600 if (mode == TImode || mode == TFmode)
44707478 3601 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3602 && offset_9bit_signed_unscaled_p (mode, offset));
3603
2d8c6dc1
AH
3604 /* A 7bit offset check because OImode will emit a ldp/stp
3605 instruction (only big endian will get here).
3606 For ldp/stp instructions, the offset is scaled for the size of a
3607 single element of the pair. */
3608 if (mode == OImode)
3609 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3610
3611 /* Three 9/12 bit offsets checks because CImode will emit three
3612 ldr/str instructions (only big endian will get here). */
3613 if (mode == CImode)
3614 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3615 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3616 || offset_12bit_unsigned_scaled_p (V16QImode,
3617 offset + 32)));
3618
3619 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3620 instructions (only big endian will get here). */
3621 if (mode == XImode)
3622 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3623 && aarch64_offset_7bit_signed_scaled_p (TImode,
3624 offset + 32));
3625
3626 if (load_store_pair_p)
43e9d192 3627 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3628 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3629 else
3630 return (offset_9bit_signed_unscaled_p (mode, offset)
3631 || offset_12bit_unsigned_scaled_p (mode, offset));
3632 }
3633
3634 if (allow_reg_index_p)
3635 {
3636 /* Look for base + (scaled/extended) index register. */
3637 if (aarch64_base_register_rtx_p (op0, strict_p)
3638 && aarch64_classify_index (info, op1, mode, strict_p))
3639 {
3640 info->base = op0;
3641 return true;
3642 }
3643 if (aarch64_base_register_rtx_p (op1, strict_p)
3644 && aarch64_classify_index (info, op0, mode, strict_p))
3645 {
3646 info->base = op1;
3647 return true;
3648 }
3649 }
3650
3651 return false;
3652
3653 case POST_INC:
3654 case POST_DEC:
3655 case PRE_INC:
3656 case PRE_DEC:
3657 info->type = ADDRESS_REG_WB;
3658 info->base = XEXP (x, 0);
3659 info->offset = NULL_RTX;
3660 return aarch64_base_register_rtx_p (info->base, strict_p);
3661
3662 case POST_MODIFY:
3663 case PRE_MODIFY:
3664 info->type = ADDRESS_REG_WB;
3665 info->base = XEXP (x, 0);
3666 if (GET_CODE (XEXP (x, 1)) == PLUS
3667 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3668 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3669 && aarch64_base_register_rtx_p (info->base, strict_p))
3670 {
3671 HOST_WIDE_INT offset;
3672 info->offset = XEXP (XEXP (x, 1), 1);
3673 offset = INTVAL (info->offset);
3674
3675 /* TImode and TFmode values are allowed in both pairs of X
3676 registers and individual Q registers. The available
3677 address modes are:
3678 X,X: 7-bit signed scaled offset
3679 Q: 9-bit signed offset
3680 We conservatively require an offset representable in either mode.
3681 */
3682 if (mode == TImode || mode == TFmode)
44707478 3683 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3684 && offset_9bit_signed_unscaled_p (mode, offset));
3685
2d8c6dc1 3686 if (load_store_pair_p)
43e9d192 3687 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3688 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3689 else
3690 return offset_9bit_signed_unscaled_p (mode, offset);
3691 }
3692 return false;
3693
3694 case CONST:
3695 case SYMBOL_REF:
3696 case LABEL_REF:
79517551
SN
3697 /* load literal: pc-relative constant pool entry. Only supported
3698 for SI mode or larger. */
43e9d192 3699 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
3700
3701 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3702 {
3703 rtx sym, addend;
3704
3705 split_const (x, &sym, &addend);
3706 return (GET_CODE (sym) == LABEL_REF
3707 || (GET_CODE (sym) == SYMBOL_REF
3708 && CONSTANT_POOL_ADDRESS_P (sym)));
3709 }
3710 return false;
3711
3712 case LO_SUM:
3713 info->type = ADDRESS_LO_SUM;
3714 info->base = XEXP (x, 0);
3715 info->offset = XEXP (x, 1);
3716 if (allow_reg_index_p
3717 && aarch64_base_register_rtx_p (info->base, strict_p))
3718 {
3719 rtx sym, offs;
3720 split_const (info->offset, &sym, &offs);
3721 if (GET_CODE (sym) == SYMBOL_REF
f8b756b7 3722 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
43e9d192
IB
3723 == SYMBOL_SMALL_ABSOLUTE))
3724 {
3725 /* The symbol and offset must be aligned to the access size. */
3726 unsigned int align;
3727 unsigned int ref_size;
3728
3729 if (CONSTANT_POOL_ADDRESS_P (sym))
3730 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3731 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3732 {
3733 tree exp = SYMBOL_REF_DECL (sym);
3734 align = TYPE_ALIGN (TREE_TYPE (exp));
3735 align = CONSTANT_ALIGNMENT (exp, align);
3736 }
3737 else if (SYMBOL_REF_DECL (sym))
3738 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3739 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3740 && SYMBOL_REF_BLOCK (sym) != NULL)
3741 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3742 else
3743 align = BITS_PER_UNIT;
3744
3745 ref_size = GET_MODE_SIZE (mode);
3746 if (ref_size == 0)
3747 ref_size = GET_MODE_SIZE (DImode);
3748
3749 return ((INTVAL (offs) & (ref_size - 1)) == 0
3750 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3751 }
3752 }
3753 return false;
3754
3755 default:
3756 return false;
3757 }
3758}
3759
3760bool
3761aarch64_symbolic_address_p (rtx x)
3762{
3763 rtx offset;
3764
3765 split_const (x, &x, &offset);
3766 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3767}
3768
3769/* Classify the base of symbolic expression X, given that X appears in
3770 context CONTEXT. */
da4f13a4
MS
3771
3772enum aarch64_symbol_type
3773aarch64_classify_symbolic_expression (rtx x,
3774 enum aarch64_symbol_context context)
43e9d192
IB
3775{
3776 rtx offset;
da4f13a4 3777
43e9d192 3778 split_const (x, &x, &offset);
f8b756b7 3779 return aarch64_classify_symbol (x, offset, context);
43e9d192
IB
3780}
3781
3782
3783/* Return TRUE if X is a legitimate address for accessing memory in
3784 mode MODE. */
3785static bool
ef4bddc2 3786aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
3787{
3788 struct aarch64_address_info addr;
3789
3790 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3791}
3792
3793/* Return TRUE if X is a legitimate address for accessing memory in
3794 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3795 pair operation. */
3796bool
ef4bddc2 3797aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 3798 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3799{
3800 struct aarch64_address_info addr;
3801
3802 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3803}
3804
3805/* Return TRUE if rtx X is immediate constant 0.0 */
3806bool
3520f7cc 3807aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3808{
3809 REAL_VALUE_TYPE r;
3810
3811 if (GET_MODE (x) == VOIDmode)
3812 return false;
3813
3814 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3815 if (REAL_VALUE_MINUS_ZERO (r))
3816 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3817 return REAL_VALUES_EQUAL (r, dconst0);
3818}
3819
70f09188
AP
3820/* Return the fixed registers used for condition codes. */
3821
3822static bool
3823aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3824{
3825 *p1 = CC_REGNUM;
3826 *p2 = INVALID_REGNUM;
3827 return true;
3828}
3829
78607708
TV
3830/* Emit call insn with PAT and do aarch64-specific handling. */
3831
d07a3fed 3832void
78607708
TV
3833aarch64_emit_call_insn (rtx pat)
3834{
3835 rtx insn = emit_call_insn (pat);
3836
3837 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3838 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3839 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3840}
3841
ef4bddc2 3842machine_mode
43e9d192
IB
3843aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3844{
3845 /* All floating point compares return CCFP if it is an equality
3846 comparison, and CCFPE otherwise. */
3847 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3848 {
3849 switch (code)
3850 {
3851 case EQ:
3852 case NE:
3853 case UNORDERED:
3854 case ORDERED:
3855 case UNLT:
3856 case UNLE:
3857 case UNGT:
3858 case UNGE:
3859 case UNEQ:
3860 case LTGT:
3861 return CCFPmode;
3862
3863 case LT:
3864 case LE:
3865 case GT:
3866 case GE:
3867 return CCFPEmode;
3868
3869 default:
3870 gcc_unreachable ();
3871 }
3872 }
3873
3874 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3875 && y == const0_rtx
3876 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3877 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3878 || GET_CODE (x) == NEG))
43e9d192
IB
3879 return CC_NZmode;
3880
1c992d1e 3881 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3882 the comparison will have to be swapped when we emit the assembly
3883 code. */
3884 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3885 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3886 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3887 || GET_CODE (x) == LSHIFTRT
1c992d1e 3888 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3889 return CC_SWPmode;
3890
1c992d1e
RE
3891 /* Similarly for a negated operand, but we can only do this for
3892 equalities. */
3893 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3894 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3895 && (code == EQ || code == NE)
3896 && GET_CODE (x) == NEG)
3897 return CC_Zmode;
3898
43e9d192
IB
3899 /* A compare of a mode narrower than SI mode against zero can be done
3900 by extending the value in the comparison. */
3901 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3902 && y == const0_rtx)
3903 /* Only use sign-extension if we really need it. */
3904 return ((code == GT || code == GE || code == LE || code == LT)
3905 ? CC_SESWPmode : CC_ZESWPmode);
3906
3907 /* For everything else, return CCmode. */
3908 return CCmode;
3909}
3910
3dfa7055
ZC
3911static int
3912aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3913
cd5660ab 3914int
43e9d192
IB
3915aarch64_get_condition_code (rtx x)
3916{
ef4bddc2 3917 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
3918 enum rtx_code comp_code = GET_CODE (x);
3919
3920 if (GET_MODE_CLASS (mode) != MODE_CC)
3921 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
3922 return aarch64_get_condition_code_1 (mode, comp_code);
3923}
43e9d192 3924
3dfa7055
ZC
3925static int
3926aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3927{
3928 int ne = -1, eq = -1;
43e9d192
IB
3929 switch (mode)
3930 {
3931 case CCFPmode:
3932 case CCFPEmode:
3933 switch (comp_code)
3934 {
3935 case GE: return AARCH64_GE;
3936 case GT: return AARCH64_GT;
3937 case LE: return AARCH64_LS;
3938 case LT: return AARCH64_MI;
3939 case NE: return AARCH64_NE;
3940 case EQ: return AARCH64_EQ;
3941 case ORDERED: return AARCH64_VC;
3942 case UNORDERED: return AARCH64_VS;
3943 case UNLT: return AARCH64_LT;
3944 case UNLE: return AARCH64_LE;
3945 case UNGT: return AARCH64_HI;
3946 case UNGE: return AARCH64_PL;
cd5660ab 3947 default: return -1;
43e9d192
IB
3948 }
3949 break;
3950
3dfa7055
ZC
3951 case CC_DNEmode:
3952 ne = AARCH64_NE;
3953 eq = AARCH64_EQ;
3954 break;
3955
3956 case CC_DEQmode:
3957 ne = AARCH64_EQ;
3958 eq = AARCH64_NE;
3959 break;
3960
3961 case CC_DGEmode:
3962 ne = AARCH64_GE;
3963 eq = AARCH64_LT;
3964 break;
3965
3966 case CC_DLTmode:
3967 ne = AARCH64_LT;
3968 eq = AARCH64_GE;
3969 break;
3970
3971 case CC_DGTmode:
3972 ne = AARCH64_GT;
3973 eq = AARCH64_LE;
3974 break;
3975
3976 case CC_DLEmode:
3977 ne = AARCH64_LE;
3978 eq = AARCH64_GT;
3979 break;
3980
3981 case CC_DGEUmode:
3982 ne = AARCH64_CS;
3983 eq = AARCH64_CC;
3984 break;
3985
3986 case CC_DLTUmode:
3987 ne = AARCH64_CC;
3988 eq = AARCH64_CS;
3989 break;
3990
3991 case CC_DGTUmode:
3992 ne = AARCH64_HI;
3993 eq = AARCH64_LS;
3994 break;
3995
3996 case CC_DLEUmode:
3997 ne = AARCH64_LS;
3998 eq = AARCH64_HI;
3999 break;
4000
43e9d192
IB
4001 case CCmode:
4002 switch (comp_code)
4003 {
4004 case NE: return AARCH64_NE;
4005 case EQ: return AARCH64_EQ;
4006 case GE: return AARCH64_GE;
4007 case GT: return AARCH64_GT;
4008 case LE: return AARCH64_LE;
4009 case LT: return AARCH64_LT;
4010 case GEU: return AARCH64_CS;
4011 case GTU: return AARCH64_HI;
4012 case LEU: return AARCH64_LS;
4013 case LTU: return AARCH64_CC;
cd5660ab 4014 default: return -1;
43e9d192
IB
4015 }
4016 break;
4017
4018 case CC_SWPmode:
4019 case CC_ZESWPmode:
4020 case CC_SESWPmode:
4021 switch (comp_code)
4022 {
4023 case NE: return AARCH64_NE;
4024 case EQ: return AARCH64_EQ;
4025 case GE: return AARCH64_LE;
4026 case GT: return AARCH64_LT;
4027 case LE: return AARCH64_GE;
4028 case LT: return AARCH64_GT;
4029 case GEU: return AARCH64_LS;
4030 case GTU: return AARCH64_CC;
4031 case LEU: return AARCH64_CS;
4032 case LTU: return AARCH64_HI;
cd5660ab 4033 default: return -1;
43e9d192
IB
4034 }
4035 break;
4036
4037 case CC_NZmode:
4038 switch (comp_code)
4039 {
4040 case NE: return AARCH64_NE;
4041 case EQ: return AARCH64_EQ;
4042 case GE: return AARCH64_PL;
4043 case LT: return AARCH64_MI;
cd5660ab 4044 default: return -1;
43e9d192
IB
4045 }
4046 break;
4047
1c992d1e
RE
4048 case CC_Zmode:
4049 switch (comp_code)
4050 {
4051 case NE: return AARCH64_NE;
4052 case EQ: return AARCH64_EQ;
cd5660ab 4053 default: return -1;
1c992d1e
RE
4054 }
4055 break;
4056
43e9d192 4057 default:
cd5660ab 4058 return -1;
43e9d192
IB
4059 break;
4060 }
3dfa7055
ZC
4061
4062 if (comp_code == NE)
4063 return ne;
4064
4065 if (comp_code == EQ)
4066 return eq;
4067
4068 return -1;
43e9d192
IB
4069}
4070
ddeabd3e
AL
4071bool
4072aarch64_const_vec_all_same_in_range_p (rtx x,
4073 HOST_WIDE_INT minval,
4074 HOST_WIDE_INT maxval)
4075{
4076 HOST_WIDE_INT firstval;
4077 int count, i;
4078
4079 if (GET_CODE (x) != CONST_VECTOR
4080 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4081 return false;
4082
4083 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4084 if (firstval < minval || firstval > maxval)
4085 return false;
4086
4087 count = CONST_VECTOR_NUNITS (x);
4088 for (i = 1; i < count; i++)
4089 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4090 return false;
4091
4092 return true;
4093}
4094
4095bool
4096aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4097{
4098 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4099}
4100
43e9d192
IB
4101static unsigned
4102bit_count (unsigned HOST_WIDE_INT value)
4103{
4104 unsigned count = 0;
4105
4106 while (value)
4107 {
4108 count++;
4109 value &= value - 1;
4110 }
4111
4112 return count;
4113}
4114
cf670503
ZC
4115/* N Z C V. */
4116#define AARCH64_CC_V 1
4117#define AARCH64_CC_C (1 << 1)
4118#define AARCH64_CC_Z (1 << 2)
4119#define AARCH64_CC_N (1 << 3)
4120
4121/* N Z C V flags for ccmp. The first code is for AND op and the other
4122 is for IOR op. Indexed by AARCH64_COND_CODE. */
4123static const int aarch64_nzcv_codes[][2] =
4124{
4125 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
4126 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
4127 {AARCH64_CC_C, 0}, /* CS, C == 1. */
4128 {0, AARCH64_CC_C}, /* CC, C == 0. */
4129 {AARCH64_CC_N, 0}, /* MI, N == 1. */
4130 {0, AARCH64_CC_N}, /* PL, N == 0. */
4131 {AARCH64_CC_V, 0}, /* VS, V == 1. */
4132 {0, AARCH64_CC_V}, /* VC, V == 0. */
4133 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
4134 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
4135 {0, AARCH64_CC_V}, /* GE, N == V. */
4136 {AARCH64_CC_V, 0}, /* LT, N != V. */
4137 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
4138 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
4139 {0, 0}, /* AL, Any. */
4140 {0, 0}, /* NV, Any. */
4141};
4142
4143int
4144aarch64_ccmp_mode_to_code (enum machine_mode mode)
4145{
4146 switch (mode)
4147 {
4148 case CC_DNEmode:
4149 return NE;
4150
4151 case CC_DEQmode:
4152 return EQ;
4153
4154 case CC_DLEmode:
4155 return LE;
4156
4157 case CC_DGTmode:
4158 return GT;
4159
4160 case CC_DLTmode:
4161 return LT;
4162
4163 case CC_DGEmode:
4164 return GE;
4165
4166 case CC_DLEUmode:
4167 return LEU;
4168
4169 case CC_DGTUmode:
4170 return GTU;
4171
4172 case CC_DLTUmode:
4173 return LTU;
4174
4175 case CC_DGEUmode:
4176 return GEU;
4177
4178 default:
4179 gcc_unreachable ();
4180 }
4181}
4182
4183
43e9d192
IB
4184void
4185aarch64_print_operand (FILE *f, rtx x, char code)
4186{
4187 switch (code)
4188 {
f541a481
KT
4189 /* An integer or symbol address without a preceding # sign. */
4190 case 'c':
4191 switch (GET_CODE (x))
4192 {
4193 case CONST_INT:
4194 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4195 break;
4196
4197 case SYMBOL_REF:
4198 output_addr_const (f, x);
4199 break;
4200
4201 case CONST:
4202 if (GET_CODE (XEXP (x, 0)) == PLUS
4203 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4204 {
4205 output_addr_const (f, x);
4206 break;
4207 }
4208 /* Fall through. */
4209
4210 default:
4211 output_operand_lossage ("Unsupported operand for code '%c'", code);
4212 }
4213 break;
4214
43e9d192
IB
4215 case 'e':
4216 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4217 {
4218 int n;
4219
4aa81c2e 4220 if (!CONST_INT_P (x)
43e9d192
IB
4221 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4222 {
4223 output_operand_lossage ("invalid operand for '%%%c'", code);
4224 return;
4225 }
4226
4227 switch (n)
4228 {
4229 case 3:
4230 fputc ('b', f);
4231 break;
4232 case 4:
4233 fputc ('h', f);
4234 break;
4235 case 5:
4236 fputc ('w', f);
4237 break;
4238 default:
4239 output_operand_lossage ("invalid operand for '%%%c'", code);
4240 return;
4241 }
4242 }
4243 break;
4244
4245 case 'p':
4246 {
4247 int n;
4248
4249 /* Print N such that 2^N == X. */
4aa81c2e 4250 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4251 {
4252 output_operand_lossage ("invalid operand for '%%%c'", code);
4253 return;
4254 }
4255
4256 asm_fprintf (f, "%d", n);
4257 }
4258 break;
4259
4260 case 'P':
4261 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4262 if (!CONST_INT_P (x))
43e9d192
IB
4263 {
4264 output_operand_lossage ("invalid operand for '%%%c'", code);
4265 return;
4266 }
4267
4268 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4269 break;
4270
4271 case 'H':
4272 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4273 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4274 {
4275 output_operand_lossage ("invalid operand for '%%%c'", code);
4276 return;
4277 }
4278
01a3a324 4279 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4280 break;
4281
43e9d192 4282 case 'm':
cd5660ab
KT
4283 {
4284 int cond_code;
4285 /* Print a condition (eq, ne, etc). */
43e9d192 4286
cd5660ab
KT
4287 /* CONST_TRUE_RTX means always -- that's the default. */
4288 if (x == const_true_rtx)
43e9d192 4289 return;
43e9d192 4290
cd5660ab
KT
4291 if (!COMPARISON_P (x))
4292 {
4293 output_operand_lossage ("invalid operand for '%%%c'", code);
4294 return;
4295 }
4296
4297 cond_code = aarch64_get_condition_code (x);
4298 gcc_assert (cond_code >= 0);
4299 fputs (aarch64_condition_codes[cond_code], f);
4300 }
43e9d192
IB
4301 break;
4302
4303 case 'M':
cd5660ab
KT
4304 {
4305 int cond_code;
4306 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 4307
cd5660ab
KT
4308 /* CONST_TRUE_RTX means never -- that's the default. */
4309 if (x == const_true_rtx)
4310 {
4311 fputs ("nv", f);
4312 return;
4313 }
43e9d192 4314
cd5660ab
KT
4315 if (!COMPARISON_P (x))
4316 {
4317 output_operand_lossage ("invalid operand for '%%%c'", code);
4318 return;
4319 }
4320 cond_code = aarch64_get_condition_code (x);
4321 gcc_assert (cond_code >= 0);
4322 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4323 (cond_code)], f);
4324 }
43e9d192
IB
4325 break;
4326
4327 case 'b':
4328 case 'h':
4329 case 's':
4330 case 'd':
4331 case 'q':
4332 /* Print a scalar FP/SIMD register name. */
4333 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4334 {
4335 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4336 return;
4337 }
50ce6f88 4338 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4339 break;
4340
4341 case 'S':
4342 case 'T':
4343 case 'U':
4344 case 'V':
4345 /* Print the first FP/SIMD register name in a list. */
4346 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4347 {
4348 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4349 return;
4350 }
50ce6f88 4351 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4352 break;
4353
2d8c6dc1
AH
4354 case 'R':
4355 /* Print a scalar FP/SIMD register name + 1. */
4356 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4357 {
4358 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4359 return;
4360 }
4361 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4362 break;
4363
a05c0ddf 4364 case 'X':
50d38551 4365 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4366 if (!CONST_INT_P (x))
a05c0ddf
IB
4367 {
4368 output_operand_lossage ("invalid operand for '%%%c'", code);
4369 return;
4370 }
50d38551 4371 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4372 break;
4373
43e9d192
IB
4374 case 'w':
4375 case 'x':
4376 /* Print a general register name or the zero register (32-bit or
4377 64-bit). */
3520f7cc
JG
4378 if (x == const0_rtx
4379 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4380 {
50ce6f88 4381 asm_fprintf (f, "%czr", code);
43e9d192
IB
4382 break;
4383 }
4384
4385 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4386 {
50ce6f88 4387 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4388 break;
4389 }
4390
4391 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4392 {
50ce6f88 4393 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4394 break;
4395 }
4396
4397 /* Fall through */
4398
4399 case 0:
4400 /* Print a normal operand, if it's a general register, then we
4401 assume DImode. */
4402 if (x == NULL)
4403 {
4404 output_operand_lossage ("missing operand");
4405 return;
4406 }
4407
4408 switch (GET_CODE (x))
4409 {
4410 case REG:
01a3a324 4411 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4412 break;
4413
4414 case MEM:
4415 aarch64_memory_reference_mode = GET_MODE (x);
4416 output_address (XEXP (x, 0));
4417 break;
4418
4419 case LABEL_REF:
4420 case SYMBOL_REF:
4421 output_addr_const (asm_out_file, x);
4422 break;
4423
4424 case CONST_INT:
4425 asm_fprintf (f, "%wd", INTVAL (x));
4426 break;
4427
4428 case CONST_VECTOR:
3520f7cc
JG
4429 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4430 {
ddeabd3e
AL
4431 gcc_assert (
4432 aarch64_const_vec_all_same_in_range_p (x,
4433 HOST_WIDE_INT_MIN,
4434 HOST_WIDE_INT_MAX));
3520f7cc
JG
4435 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4436 }
4437 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4438 {
4439 fputc ('0', f);
4440 }
4441 else
4442 gcc_unreachable ();
43e9d192
IB
4443 break;
4444
3520f7cc
JG
4445 case CONST_DOUBLE:
4446 /* CONST_DOUBLE can represent a double-width integer.
4447 In this case, the mode of x is VOIDmode. */
4448 if (GET_MODE (x) == VOIDmode)
4449 ; /* Do Nothing. */
4450 else if (aarch64_float_const_zero_rtx_p (x))
4451 {
4452 fputc ('0', f);
4453 break;
4454 }
4455 else if (aarch64_float_const_representable_p (x))
4456 {
4457#define buf_size 20
4458 char float_buf[buf_size] = {'\0'};
4459 REAL_VALUE_TYPE r;
4460 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4461 real_to_decimal_for_mode (float_buf, &r,
4462 buf_size, buf_size,
4463 1, GET_MODE (x));
4464 asm_fprintf (asm_out_file, "%s", float_buf);
4465 break;
4466#undef buf_size
4467 }
4468 output_operand_lossage ("invalid constant");
4469 return;
43e9d192
IB
4470 default:
4471 output_operand_lossage ("invalid operand");
4472 return;
4473 }
4474 break;
4475
4476 case 'A':
4477 if (GET_CODE (x) == HIGH)
4478 x = XEXP (x, 0);
4479
4480 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4481 {
6642bdb4 4482 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4483 asm_fprintf (asm_out_file, ":got:");
4484 break;
4485
4486 case SYMBOL_SMALL_TLSGD:
4487 asm_fprintf (asm_out_file, ":tlsgd:");
4488 break;
4489
4490 case SYMBOL_SMALL_TLSDESC:
4491 asm_fprintf (asm_out_file, ":tlsdesc:");
4492 break;
4493
4494 case SYMBOL_SMALL_GOTTPREL:
4495 asm_fprintf (asm_out_file, ":gottprel:");
4496 break;
4497
8fd17b98 4498 case SYMBOL_TLSLE:
43e9d192
IB
4499 asm_fprintf (asm_out_file, ":tprel:");
4500 break;
4501
87dd8ab0
MS
4502 case SYMBOL_TINY_GOT:
4503 gcc_unreachable ();
4504 break;
4505
43e9d192
IB
4506 default:
4507 break;
4508 }
4509 output_addr_const (asm_out_file, x);
4510 break;
4511
4512 case 'L':
4513 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4514 {
6642bdb4 4515 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4516 asm_fprintf (asm_out_file, ":lo12:");
4517 break;
4518
4519 case SYMBOL_SMALL_TLSGD:
4520 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4521 break;
4522
4523 case SYMBOL_SMALL_TLSDESC:
4524 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4525 break;
4526
4527 case SYMBOL_SMALL_GOTTPREL:
4528 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4529 break;
4530
8fd17b98 4531 case SYMBOL_TLSLE:
43e9d192
IB
4532 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4533 break;
4534
87dd8ab0
MS
4535 case SYMBOL_TINY_GOT:
4536 asm_fprintf (asm_out_file, ":got:");
4537 break;
4538
43e9d192
IB
4539 default:
4540 break;
4541 }
4542 output_addr_const (asm_out_file, x);
4543 break;
4544
4545 case 'G':
4546
4547 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4548 {
8fd17b98 4549 case SYMBOL_TLSLE:
43e9d192
IB
4550 asm_fprintf (asm_out_file, ":tprel_hi12:");
4551 break;
4552 default:
4553 break;
4554 }
4555 output_addr_const (asm_out_file, x);
4556 break;
4557
cf670503
ZC
4558 case 'K':
4559 {
4560 int cond_code;
4561 /* Print nzcv. */
4562
4563 if (!COMPARISON_P (x))
4564 {
4565 output_operand_lossage ("invalid operand for '%%%c'", code);
4566 return;
4567 }
4568
4569 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4570 gcc_assert (cond_code >= 0);
4571 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4572 }
4573 break;
4574
4575 case 'k':
4576 {
4577 int cond_code;
4578 /* Print nzcv. */
4579
4580 if (!COMPARISON_P (x))
4581 {
4582 output_operand_lossage ("invalid operand for '%%%c'", code);
4583 return;
4584 }
4585
4586 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4587 gcc_assert (cond_code >= 0);
4588 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4589 }
4590 break;
4591
43e9d192
IB
4592 default:
4593 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4594 return;
4595 }
4596}
4597
4598void
4599aarch64_print_operand_address (FILE *f, rtx x)
4600{
4601 struct aarch64_address_info addr;
4602
4603 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4604 MEM, true))
4605 switch (addr.type)
4606 {
4607 case ADDRESS_REG_IMM:
4608 if (addr.offset == const0_rtx)
01a3a324 4609 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4610 else
16a3246f 4611 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4612 INTVAL (addr.offset));
4613 return;
4614
4615 case ADDRESS_REG_REG:
4616 if (addr.shift == 0)
16a3246f 4617 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4618 reg_names [REGNO (addr.offset)]);
43e9d192 4619 else
16a3246f 4620 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4621 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4622 return;
4623
4624 case ADDRESS_REG_UXTW:
4625 if (addr.shift == 0)
16a3246f 4626 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4627 REGNO (addr.offset) - R0_REGNUM);
4628 else
16a3246f 4629 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4630 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4631 return;
4632
4633 case ADDRESS_REG_SXTW:
4634 if (addr.shift == 0)
16a3246f 4635 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4636 REGNO (addr.offset) - R0_REGNUM);
4637 else
16a3246f 4638 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4639 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4640 return;
4641
4642 case ADDRESS_REG_WB:
4643 switch (GET_CODE (x))
4644 {
4645 case PRE_INC:
16a3246f 4646 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4647 GET_MODE_SIZE (aarch64_memory_reference_mode));
4648 return;
4649 case POST_INC:
16a3246f 4650 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4651 GET_MODE_SIZE (aarch64_memory_reference_mode));
4652 return;
4653 case PRE_DEC:
16a3246f 4654 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4655 GET_MODE_SIZE (aarch64_memory_reference_mode));
4656 return;
4657 case POST_DEC:
16a3246f 4658 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4659 GET_MODE_SIZE (aarch64_memory_reference_mode));
4660 return;
4661 case PRE_MODIFY:
16a3246f 4662 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4663 INTVAL (addr.offset));
4664 return;
4665 case POST_MODIFY:
16a3246f 4666 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4667 INTVAL (addr.offset));
4668 return;
4669 default:
4670 break;
4671 }
4672 break;
4673
4674 case ADDRESS_LO_SUM:
16a3246f 4675 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4676 output_addr_const (f, addr.offset);
4677 asm_fprintf (f, "]");
4678 return;
4679
4680 case ADDRESS_SYMBOLIC:
4681 break;
4682 }
4683
4684 output_addr_const (f, x);
4685}
4686
43e9d192
IB
4687bool
4688aarch64_label_mentioned_p (rtx x)
4689{
4690 const char *fmt;
4691 int i;
4692
4693 if (GET_CODE (x) == LABEL_REF)
4694 return true;
4695
4696 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4697 referencing instruction, but they are constant offsets, not
4698 symbols. */
4699 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4700 return false;
4701
4702 fmt = GET_RTX_FORMAT (GET_CODE (x));
4703 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4704 {
4705 if (fmt[i] == 'E')
4706 {
4707 int j;
4708
4709 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4710 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4711 return 1;
4712 }
4713 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4714 return 1;
4715 }
4716
4717 return 0;
4718}
4719
4720/* Implement REGNO_REG_CLASS. */
4721
4722enum reg_class
4723aarch64_regno_regclass (unsigned regno)
4724{
4725 if (GP_REGNUM_P (regno))
a4a182c6 4726 return GENERAL_REGS;
43e9d192
IB
4727
4728 if (regno == SP_REGNUM)
4729 return STACK_REG;
4730
4731 if (regno == FRAME_POINTER_REGNUM
4732 || regno == ARG_POINTER_REGNUM)
f24bb080 4733 return POINTER_REGS;
43e9d192
IB
4734
4735 if (FP_REGNUM_P (regno))
4736 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4737
4738 return NO_REGS;
4739}
4740
0c4ec427 4741static rtx
ef4bddc2 4742aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
4743{
4744 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4745 where mask is selected by alignment and size of the offset.
4746 We try to pick as large a range for the offset as possible to
4747 maximize the chance of a CSE. However, for aligned addresses
4748 we limit the range to 4k so that structures with different sized
4749 elements are likely to use the same base. */
4750
4751 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4752 {
4753 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4754 HOST_WIDE_INT base_offset;
4755
4756 /* Does it look like we'll need a load/store-pair operation? */
4757 if (GET_MODE_SIZE (mode) > 16
4758 || mode == TImode)
4759 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4760 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4761 /* For offsets aren't a multiple of the access size, the limit is
4762 -256...255. */
4763 else if (offset & (GET_MODE_SIZE (mode) - 1))
4764 base_offset = (offset + 0x100) & ~0x1ff;
4765 else
4766 base_offset = offset & ~0xfff;
4767
4768 if (base_offset == 0)
4769 return x;
4770
4771 offset -= base_offset;
4772 rtx base_reg = gen_reg_rtx (Pmode);
4773 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4774 NULL_RTX);
4775 emit_move_insn (base_reg, val);
4776 x = plus_constant (Pmode, base_reg, offset);
4777 }
4778
4779 return x;
4780}
4781
43e9d192
IB
4782/* Try a machine-dependent way of reloading an illegitimate address
4783 operand. If we find one, push the reload and return the new rtx. */
4784
4785rtx
4786aarch64_legitimize_reload_address (rtx *x_p,
ef4bddc2 4787 machine_mode mode,
43e9d192
IB
4788 int opnum, int type,
4789 int ind_levels ATTRIBUTE_UNUSED)
4790{
4791 rtx x = *x_p;
4792
348d4b0a
BC
4793 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4794 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4795 && GET_CODE (x) == PLUS
4796 && REG_P (XEXP (x, 0))
4797 && CONST_INT_P (XEXP (x, 1)))
4798 {
4799 rtx orig_rtx = x;
4800 x = copy_rtx (x);
4801 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4802 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4803 opnum, (enum reload_type) type);
4804 return x;
4805 }
4806
4807 /* We must recognize output that we have already generated ourselves. */
4808 if (GET_CODE (x) == PLUS
4809 && GET_CODE (XEXP (x, 0)) == PLUS
4810 && REG_P (XEXP (XEXP (x, 0), 0))
4811 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4812 && CONST_INT_P (XEXP (x, 1)))
4813 {
4814 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4815 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4816 opnum, (enum reload_type) type);
4817 return x;
4818 }
4819
4820 /* We wish to handle large displacements off a base register by splitting
4821 the addend across an add and the mem insn. This can cut the number of
4822 extra insns needed from 3 to 1. It is only useful for load/store of a
4823 single register with 12 bit offset field. */
4824 if (GET_CODE (x) == PLUS
4825 && REG_P (XEXP (x, 0))
4826 && CONST_INT_P (XEXP (x, 1))
4827 && HARD_REGISTER_P (XEXP (x, 0))
4828 && mode != TImode
4829 && mode != TFmode
4830 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4831 {
4832 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4833 HOST_WIDE_INT low = val & 0xfff;
4834 HOST_WIDE_INT high = val - low;
4835 HOST_WIDE_INT offs;
4836 rtx cst;
ef4bddc2 4837 machine_mode xmode = GET_MODE (x);
28514dda
YZ
4838
4839 /* In ILP32, xmode can be either DImode or SImode. */
4840 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4841
4842 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4843 BLKmode alignment. */
4844 if (GET_MODE_SIZE (mode) == 0)
4845 return NULL_RTX;
4846
4847 offs = low % GET_MODE_SIZE (mode);
4848
4849 /* Align misaligned offset by adjusting high part to compensate. */
4850 if (offs != 0)
4851 {
4852 if (aarch64_uimm12_shift (high + offs))
4853 {
4854 /* Align down. */
4855 low = low - offs;
4856 high = high + offs;
4857 }
4858 else
4859 {
4860 /* Align up. */
4861 offs = GET_MODE_SIZE (mode) - offs;
4862 low = low + offs;
4863 high = high + (low & 0x1000) - offs;
4864 low &= 0xfff;
4865 }
4866 }
4867
4868 /* Check for overflow. */
4869 if (high + low != val)
4870 return NULL_RTX;
4871
4872 cst = GEN_INT (high);
4873 if (!aarch64_uimm12_shift (high))
28514dda 4874 cst = force_const_mem (xmode, cst);
43e9d192
IB
4875
4876 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4877 in the mem instruction.
4878 Note that replacing this gen_rtx_PLUS with plus_constant is
4879 wrong in this case because we rely on the
4880 (plus (plus reg c1) c2) structure being preserved so that
4881 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4882 x = gen_rtx_PLUS (xmode,
4883 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4884 GEN_INT (low));
43e9d192
IB
4885
4886 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4887 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4888 opnum, (enum reload_type) type);
4889 return x;
4890 }
4891
4892 return NULL_RTX;
4893}
4894
4895
4896static reg_class_t
4897aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4898 reg_class_t rclass,
ef4bddc2 4899 machine_mode mode,
43e9d192
IB
4900 secondary_reload_info *sri)
4901{
43e9d192
IB
4902 /* Without the TARGET_SIMD instructions we cannot move a Q register
4903 to a Q register directly. We need a scratch. */
4904 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4905 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4906 && reg_class_subset_p (rclass, FP_REGS))
4907 {
4908 if (mode == TFmode)
4909 sri->icode = CODE_FOR_aarch64_reload_movtf;
4910 else if (mode == TImode)
4911 sri->icode = CODE_FOR_aarch64_reload_movti;
4912 return NO_REGS;
4913 }
4914
4915 /* A TFmode or TImode memory access should be handled via an FP_REGS
4916 because AArch64 has richer addressing modes for LDR/STR instructions
4917 than LDP/STP instructions. */
d5726973 4918 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
4919 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4920 return FP_REGS;
4921
4922 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4923 return GENERAL_REGS;
43e9d192
IB
4924
4925 return NO_REGS;
4926}
4927
4928static bool
4929aarch64_can_eliminate (const int from, const int to)
4930{
4931 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4932 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4933
4934 if (frame_pointer_needed)
4935 {
4936 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4937 return true;
4938 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4939 return false;
4940 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4941 && !cfun->calls_alloca)
4942 return true;
4943 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4944 return true;
0b7f8166
MS
4945
4946 return false;
43e9d192 4947 }
1c923b60
JW
4948 else
4949 {
4950 /* If we decided that we didn't need a leaf frame pointer but then used
4951 LR in the function, then we'll want a frame pointer after all, so
4952 prevent this elimination to ensure a frame pointer is used. */
4953 if (to == STACK_POINTER_REGNUM
4954 && flag_omit_leaf_frame_pointer
4955 && df_regs_ever_live_p (LR_REGNUM))
4956 return false;
4957 }
777e6976 4958
43e9d192
IB
4959 return true;
4960}
4961
4962HOST_WIDE_INT
4963aarch64_initial_elimination_offset (unsigned from, unsigned to)
4964{
43e9d192 4965 aarch64_layout_frame ();
78c29983
MS
4966
4967 if (to == HARD_FRAME_POINTER_REGNUM)
4968 {
4969 if (from == ARG_POINTER_REGNUM)
1c960e02 4970 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4971
4972 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4973 return (cfun->machine->frame.hard_fp_offset
4974 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4975 }
4976
4977 if (to == STACK_POINTER_REGNUM)
4978 {
4979 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4980 return (cfun->machine->frame.frame_size
4981 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4982 }
4983
1c960e02 4984 return cfun->machine->frame.frame_size;
43e9d192
IB
4985}
4986
43e9d192
IB
4987/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4988 previous frame. */
4989
4990rtx
4991aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4992{
4993 if (count != 0)
4994 return const0_rtx;
4995 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4996}
4997
4998
4999static void
5000aarch64_asm_trampoline_template (FILE *f)
5001{
28514dda
YZ
5002 if (TARGET_ILP32)
5003 {
5004 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5005 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5006 }
5007 else
5008 {
5009 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5010 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5011 }
01a3a324 5012 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5013 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5014 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5015 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5016}
5017
5018static void
5019aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5020{
5021 rtx fnaddr, mem, a_tramp;
28514dda 5022 const int tramp_code_sz = 16;
43e9d192
IB
5023
5024 /* Don't need to copy the trailing D-words, we fill those in below. */
5025 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5026 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5027 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5028 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5029 if (GET_MODE (fnaddr) != ptr_mode)
5030 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5031 emit_move_insn (mem, fnaddr);
5032
28514dda 5033 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5034 emit_move_insn (mem, chain_value);
5035
5036 /* XXX We should really define a "clear_cache" pattern and use
5037 gen_clear_cache(). */
5038 a_tramp = XEXP (m_tramp, 0);
5039 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5040 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5041 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5042 ptr_mode);
43e9d192
IB
5043}
5044
5045static unsigned char
ef4bddc2 5046aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5047{
5048 switch (regclass)
5049 {
fee9ba42 5050 case CALLER_SAVE_REGS:
43e9d192
IB
5051 case POINTER_REGS:
5052 case GENERAL_REGS:
5053 case ALL_REGS:
5054 case FP_REGS:
5055 case FP_LO_REGS:
5056 return
7bd11911
KT
5057 aarch64_vector_mode_p (mode)
5058 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5059 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
5060 case STACK_REG:
5061 return 1;
5062
5063 case NO_REGS:
5064 return 0;
5065
5066 default:
5067 break;
5068 }
5069 gcc_unreachable ();
5070}
5071
5072static reg_class_t
78d8b9f0 5073aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 5074{
51bb310d 5075 if (regclass == POINTER_REGS)
78d8b9f0
IB
5076 return GENERAL_REGS;
5077
51bb310d
MS
5078 if (regclass == STACK_REG)
5079 {
5080 if (REG_P(x)
5081 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5082 return regclass;
5083
5084 return NO_REGS;
5085 }
5086
78d8b9f0
IB
5087 /* If it's an integer immediate that MOVI can't handle, then
5088 FP_REGS is not an option, so we return NO_REGS instead. */
5089 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5090 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5091 return NO_REGS;
5092
27bd251b
IB
5093 /* Register eliminiation can result in a request for
5094 SP+constant->FP_REGS. We cannot support such operations which
5095 use SP as source and an FP_REG as destination, so reject out
5096 right now. */
5097 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5098 {
5099 rtx lhs = XEXP (x, 0);
5100
5101 /* Look through a possible SUBREG introduced by ILP32. */
5102 if (GET_CODE (lhs) == SUBREG)
5103 lhs = SUBREG_REG (lhs);
5104
5105 gcc_assert (REG_P (lhs));
5106 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5107 POINTER_REGS));
5108 return NO_REGS;
5109 }
5110
78d8b9f0 5111 return regclass;
43e9d192
IB
5112}
5113
5114void
5115aarch64_asm_output_labelref (FILE* f, const char *name)
5116{
5117 asm_fprintf (f, "%U%s", name);
5118}
5119
5120static void
5121aarch64_elf_asm_constructor (rtx symbol, int priority)
5122{
5123 if (priority == DEFAULT_INIT_PRIORITY)
5124 default_ctor_section_asm_out_constructor (symbol, priority);
5125 else
5126 {
5127 section *s;
5128 char buf[18];
5129 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5130 s = get_section (buf, SECTION_WRITE, NULL);
5131 switch_to_section (s);
5132 assemble_align (POINTER_SIZE);
28514dda 5133 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5134 }
5135}
5136
5137static void
5138aarch64_elf_asm_destructor (rtx symbol, int priority)
5139{
5140 if (priority == DEFAULT_INIT_PRIORITY)
5141 default_dtor_section_asm_out_destructor (symbol, priority);
5142 else
5143 {
5144 section *s;
5145 char buf[18];
5146 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5147 s = get_section (buf, SECTION_WRITE, NULL);
5148 switch_to_section (s);
5149 assemble_align (POINTER_SIZE);
28514dda 5150 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5151 }
5152}
5153
5154const char*
5155aarch64_output_casesi (rtx *operands)
5156{
5157 char buf[100];
5158 char label[100];
b32d5189 5159 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5160 int index;
5161 static const char *const patterns[4][2] =
5162 {
5163 {
5164 "ldrb\t%w3, [%0,%w1,uxtw]",
5165 "add\t%3, %4, %w3, sxtb #2"
5166 },
5167 {
5168 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5169 "add\t%3, %4, %w3, sxth #2"
5170 },
5171 {
5172 "ldr\t%w3, [%0,%w1,uxtw #2]",
5173 "add\t%3, %4, %w3, sxtw #2"
5174 },
5175 /* We assume that DImode is only generated when not optimizing and
5176 that we don't really need 64-bit address offsets. That would
5177 imply an object file with 8GB of code in a single function! */
5178 {
5179 "ldr\t%w3, [%0,%w1,uxtw #2]",
5180 "add\t%3, %4, %w3, sxtw #2"
5181 }
5182 };
5183
5184 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5185
5186 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5187
5188 gcc_assert (index >= 0 && index <= 3);
5189
5190 /* Need to implement table size reduction, by chaning the code below. */
5191 output_asm_insn (patterns[index][0], operands);
5192 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5193 snprintf (buf, sizeof (buf),
5194 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5195 output_asm_insn (buf, operands);
5196 output_asm_insn (patterns[index][1], operands);
5197 output_asm_insn ("br\t%3", operands);
5198 assemble_label (asm_out_file, label);
5199 return "";
5200}
5201
5202
5203/* Return size in bits of an arithmetic operand which is shifted/scaled and
5204 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5205 operator. */
5206
5207int
5208aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5209{
5210 if (shift >= 0 && shift <= 3)
5211 {
5212 int size;
5213 for (size = 8; size <= 32; size *= 2)
5214 {
5215 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5216 if (mask == bits << shift)
5217 return size;
5218 }
5219 }
5220 return 0;
5221}
5222
5223static bool
ef4bddc2 5224aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5225 const_rtx x ATTRIBUTE_UNUSED)
5226{
5227 /* We can't use blocks for constants when we're using a per-function
5228 constant pool. */
5229 return false;
5230}
5231
5232static section *
ef4bddc2 5233aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5234 rtx x ATTRIBUTE_UNUSED,
5235 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5236{
5237 /* Force all constant pool entries into the current function section. */
5238 return function_section (current_function_decl);
5239}
5240
5241
5242/* Costs. */
5243
5244/* Helper function for rtx cost calculation. Strip a shift expression
5245 from X. Returns the inner operand if successful, or the original
5246 expression on failure. */
5247static rtx
5248aarch64_strip_shift (rtx x)
5249{
5250 rtx op = x;
5251
57b77d46
RE
5252 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5253 we can convert both to ROR during final output. */
43e9d192
IB
5254 if ((GET_CODE (op) == ASHIFT
5255 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5256 || GET_CODE (op) == LSHIFTRT
5257 || GET_CODE (op) == ROTATERT
5258 || GET_CODE (op) == ROTATE)
43e9d192
IB
5259 && CONST_INT_P (XEXP (op, 1)))
5260 return XEXP (op, 0);
5261
5262 if (GET_CODE (op) == MULT
5263 && CONST_INT_P (XEXP (op, 1))
5264 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5265 return XEXP (op, 0);
5266
5267 return x;
5268}
5269
4745e701 5270/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5271 expression from X. Returns the inner operand if successful, or the
5272 original expression on failure. We deal with a number of possible
5273 canonicalization variations here. */
5274static rtx
4745e701 5275aarch64_strip_extend (rtx x)
43e9d192
IB
5276{
5277 rtx op = x;
5278
5279 /* Zero and sign extraction of a widened value. */
5280 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5281 && XEXP (op, 2) == const0_rtx
4745e701 5282 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5283 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5284 XEXP (op, 1)))
5285 return XEXP (XEXP (op, 0), 0);
5286
5287 /* It can also be represented (for zero-extend) as an AND with an
5288 immediate. */
5289 if (GET_CODE (op) == AND
5290 && GET_CODE (XEXP (op, 0)) == MULT
5291 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5292 && CONST_INT_P (XEXP (op, 1))
5293 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5294 INTVAL (XEXP (op, 1))) != 0)
5295 return XEXP (XEXP (op, 0), 0);
5296
5297 /* Now handle extended register, as this may also have an optional
5298 left shift by 1..4. */
5299 if (GET_CODE (op) == ASHIFT
5300 && CONST_INT_P (XEXP (op, 1))
5301 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5302 op = XEXP (op, 0);
5303
5304 if (GET_CODE (op) == ZERO_EXTEND
5305 || GET_CODE (op) == SIGN_EXTEND)
5306 op = XEXP (op, 0);
5307
5308 if (op != x)
5309 return op;
5310
4745e701
JG
5311 return x;
5312}
5313
0a78ebe4
KT
5314/* Return true iff CODE is a shift supported in combination
5315 with arithmetic instructions. */
4d1919ed 5316
0a78ebe4
KT
5317static bool
5318aarch64_shift_p (enum rtx_code code)
5319{
5320 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5321}
5322
4745e701 5323/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
5324 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5325 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
5326 operands where needed. */
5327
5328static int
e548c9df 5329aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
5330{
5331 rtx op0, op1;
5332 const struct cpu_cost_table *extra_cost
b175b679 5333 = aarch64_tune_params.insn_extra_cost;
4745e701 5334 int cost = 0;
0a78ebe4 5335 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 5336 machine_mode mode = GET_MODE (x);
4745e701
JG
5337
5338 gcc_checking_assert (code == MULT);
5339
5340 op0 = XEXP (x, 0);
5341 op1 = XEXP (x, 1);
5342
5343 if (VECTOR_MODE_P (mode))
5344 mode = GET_MODE_INNER (mode);
5345
5346 /* Integer multiply/fma. */
5347 if (GET_MODE_CLASS (mode) == MODE_INT)
5348 {
5349 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
5350 if (aarch64_shift_p (GET_CODE (x))
5351 || (CONST_INT_P (op1)
5352 && exact_log2 (INTVAL (op1)) > 0))
4745e701 5353 {
0a78ebe4
KT
5354 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5355 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
5356 if (speed)
5357 {
0a78ebe4
KT
5358 if (compound_p)
5359 {
5360 if (REG_P (op1))
5361 /* ARITH + shift-by-register. */
5362 cost += extra_cost->alu.arith_shift_reg;
5363 else if (is_extend)
5364 /* ARITH + extended register. We don't have a cost field
5365 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5366 cost += extra_cost->alu.extend_arith;
5367 else
5368 /* ARITH + shift-by-immediate. */
5369 cost += extra_cost->alu.arith_shift;
5370 }
4745e701
JG
5371 else
5372 /* LSL (immediate). */
0a78ebe4
KT
5373 cost += extra_cost->alu.shift;
5374
4745e701 5375 }
0a78ebe4
KT
5376 /* Strip extends as we will have costed them in the case above. */
5377 if (is_extend)
5378 op0 = aarch64_strip_extend (op0);
4745e701 5379
e548c9df 5380 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
5381
5382 return cost;
5383 }
5384
d2ac256b
KT
5385 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5386 compound and let the below cases handle it. After all, MNEG is a
5387 special-case alias of MSUB. */
5388 if (GET_CODE (op0) == NEG)
5389 {
5390 op0 = XEXP (op0, 0);
5391 compound_p = true;
5392 }
5393
4745e701
JG
5394 /* Integer multiplies or FMAs have zero/sign extending variants. */
5395 if ((GET_CODE (op0) == ZERO_EXTEND
5396 && GET_CODE (op1) == ZERO_EXTEND)
5397 || (GET_CODE (op0) == SIGN_EXTEND
5398 && GET_CODE (op1) == SIGN_EXTEND))
5399 {
e548c9df
AM
5400 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
5401 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
5402
5403 if (speed)
5404 {
0a78ebe4 5405 if (compound_p)
d2ac256b 5406 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
5407 cost += extra_cost->mult[0].extend_add;
5408 else
5409 /* MUL/SMULL/UMULL. */
5410 cost += extra_cost->mult[0].extend;
5411 }
5412
5413 return cost;
5414 }
5415
d2ac256b 5416 /* This is either an integer multiply or a MADD. In both cases
4745e701 5417 we want to recurse and cost the operands. */
e548c9df
AM
5418 cost += rtx_cost (op0, mode, MULT, 0, speed);
5419 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5420
5421 if (speed)
5422 {
0a78ebe4 5423 if (compound_p)
d2ac256b 5424 /* MADD/MSUB. */
4745e701
JG
5425 cost += extra_cost->mult[mode == DImode].add;
5426 else
5427 /* MUL. */
5428 cost += extra_cost->mult[mode == DImode].simple;
5429 }
5430
5431 return cost;
5432 }
5433 else
5434 {
5435 if (speed)
5436 {
3d840f7d 5437 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
5438 operands. */
5439 if (GET_CODE (op0) == NEG)
3d840f7d 5440 op0 = XEXP (op0, 0);
4745e701 5441 if (GET_CODE (op1) == NEG)
3d840f7d 5442 op1 = XEXP (op1, 0);
4745e701 5443
0a78ebe4 5444 if (compound_p)
4745e701
JG
5445 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5446 cost += extra_cost->fp[mode == DFmode].fma;
5447 else
3d840f7d 5448 /* FMUL/FNMUL. */
4745e701
JG
5449 cost += extra_cost->fp[mode == DFmode].mult;
5450 }
5451
e548c9df
AM
5452 cost += rtx_cost (op0, mode, MULT, 0, speed);
5453 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5454 return cost;
5455 }
43e9d192
IB
5456}
5457
67747367
JG
5458static int
5459aarch64_address_cost (rtx x,
ef4bddc2 5460 machine_mode mode,
67747367
JG
5461 addr_space_t as ATTRIBUTE_UNUSED,
5462 bool speed)
5463{
5464 enum rtx_code c = GET_CODE (x);
b175b679 5465 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
5466 struct aarch64_address_info info;
5467 int cost = 0;
5468 info.shift = 0;
5469
5470 if (!aarch64_classify_address (&info, x, mode, c, false))
5471 {
5472 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5473 {
5474 /* This is a CONST or SYMBOL ref which will be split
5475 in a different way depending on the code model in use.
5476 Cost it through the generic infrastructure. */
e548c9df 5477 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
5478 /* Divide through by the cost of one instruction to
5479 bring it to the same units as the address costs. */
5480 cost_symbol_ref /= COSTS_N_INSNS (1);
5481 /* The cost is then the cost of preparing the address,
5482 followed by an immediate (possibly 0) offset. */
5483 return cost_symbol_ref + addr_cost->imm_offset;
5484 }
5485 else
5486 {
5487 /* This is most likely a jump table from a case
5488 statement. */
5489 return addr_cost->register_offset;
5490 }
5491 }
5492
5493 switch (info.type)
5494 {
5495 case ADDRESS_LO_SUM:
5496 case ADDRESS_SYMBOLIC:
5497 case ADDRESS_REG_IMM:
5498 cost += addr_cost->imm_offset;
5499 break;
5500
5501 case ADDRESS_REG_WB:
5502 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5503 cost += addr_cost->pre_modify;
5504 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5505 cost += addr_cost->post_modify;
5506 else
5507 gcc_unreachable ();
5508
5509 break;
5510
5511 case ADDRESS_REG_REG:
5512 cost += addr_cost->register_offset;
5513 break;
5514
5515 case ADDRESS_REG_UXTW:
5516 case ADDRESS_REG_SXTW:
5517 cost += addr_cost->register_extend;
5518 break;
5519
5520 default:
5521 gcc_unreachable ();
5522 }
5523
5524
5525 if (info.shift > 0)
5526 {
5527 /* For the sake of calculating the cost of the shifted register
5528 component, we can treat same sized modes in the same way. */
5529 switch (GET_MODE_BITSIZE (mode))
5530 {
5531 case 16:
5532 cost += addr_cost->addr_scale_costs.hi;
5533 break;
5534
5535 case 32:
5536 cost += addr_cost->addr_scale_costs.si;
5537 break;
5538
5539 case 64:
5540 cost += addr_cost->addr_scale_costs.di;
5541 break;
5542
5543 /* We can't tell, or this is a 128-bit vector. */
5544 default:
5545 cost += addr_cost->addr_scale_costs.ti;
5546 break;
5547 }
5548 }
5549
5550 return cost;
5551}
5552
b9066f5a
MW
5553/* Return the cost of a branch. If SPEED_P is true then the compiler is
5554 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5555 to be taken. */
5556
5557int
5558aarch64_branch_cost (bool speed_p, bool predictable_p)
5559{
5560 /* When optimizing for speed, use the cost of unpredictable branches. */
5561 const struct cpu_branch_cost *branch_costs =
b175b679 5562 aarch64_tune_params.branch_costs;
b9066f5a
MW
5563
5564 if (!speed_p || predictable_p)
5565 return branch_costs->predictable;
5566 else
5567 return branch_costs->unpredictable;
5568}
5569
7cc2145f
JG
5570/* Return true if the RTX X in mode MODE is a zero or sign extract
5571 usable in an ADD or SUB (extended register) instruction. */
5572static bool
ef4bddc2 5573aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5574{
5575 /* Catch add with a sign extract.
5576 This is add_<optab><mode>_multp2. */
5577 if (GET_CODE (x) == SIGN_EXTRACT
5578 || GET_CODE (x) == ZERO_EXTRACT)
5579 {
5580 rtx op0 = XEXP (x, 0);
5581 rtx op1 = XEXP (x, 1);
5582 rtx op2 = XEXP (x, 2);
5583
5584 if (GET_CODE (op0) == MULT
5585 && CONST_INT_P (op1)
5586 && op2 == const0_rtx
5587 && CONST_INT_P (XEXP (op0, 1))
5588 && aarch64_is_extend_from_extract (mode,
5589 XEXP (op0, 1),
5590 op1))
5591 {
5592 return true;
5593 }
5594 }
5595
5596 return false;
5597}
5598
61263118
KT
5599static bool
5600aarch64_frint_unspec_p (unsigned int u)
5601{
5602 switch (u)
5603 {
5604 case UNSPEC_FRINTZ:
5605 case UNSPEC_FRINTP:
5606 case UNSPEC_FRINTM:
5607 case UNSPEC_FRINTA:
5608 case UNSPEC_FRINTN:
5609 case UNSPEC_FRINTX:
5610 case UNSPEC_FRINTI:
5611 return true;
5612
5613 default:
5614 return false;
5615 }
5616}
5617
fb0cb7fa
KT
5618/* Return true iff X is an rtx that will match an extr instruction
5619 i.e. as described in the *extr<mode>5_insn family of patterns.
5620 OP0 and OP1 will be set to the operands of the shifts involved
5621 on success and will be NULL_RTX otherwise. */
5622
5623static bool
5624aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
5625{
5626 rtx op0, op1;
5627 machine_mode mode = GET_MODE (x);
5628
5629 *res_op0 = NULL_RTX;
5630 *res_op1 = NULL_RTX;
5631
5632 if (GET_CODE (x) != IOR)
5633 return false;
5634
5635 op0 = XEXP (x, 0);
5636 op1 = XEXP (x, 1);
5637
5638 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
5639 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
5640 {
5641 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5642 if (GET_CODE (op1) == ASHIFT)
5643 std::swap (op0, op1);
5644
5645 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
5646 return false;
5647
5648 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
5649 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
5650
5651 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
5652 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
5653 {
5654 *res_op0 = XEXP (op0, 0);
5655 *res_op1 = XEXP (op1, 0);
5656 return true;
5657 }
5658 }
5659
5660 return false;
5661}
5662
2d5ffe46
AP
5663/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5664 storing it in *COST. Result is true if the total cost of the operation
5665 has now been calculated. */
5666static bool
5667aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5668{
b9e3afe9
AP
5669 rtx inner;
5670 rtx comparator;
5671 enum rtx_code cmpcode;
5672
5673 if (COMPARISON_P (op0))
5674 {
5675 inner = XEXP (op0, 0);
5676 comparator = XEXP (op0, 1);
5677 cmpcode = GET_CODE (op0);
5678 }
5679 else
5680 {
5681 inner = op0;
5682 comparator = const0_rtx;
5683 cmpcode = NE;
5684 }
5685
2d5ffe46
AP
5686 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5687 {
5688 /* Conditional branch. */
b9e3afe9 5689 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5690 return true;
5691 else
5692 {
b9e3afe9 5693 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5694 {
2d5ffe46
AP
5695 if (comparator == const0_rtx)
5696 {
5697 /* TBZ/TBNZ/CBZ/CBNZ. */
5698 if (GET_CODE (inner) == ZERO_EXTRACT)
5699 /* TBZ/TBNZ. */
e548c9df
AM
5700 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
5701 ZERO_EXTRACT, 0, speed);
5702 else
5703 /* CBZ/CBNZ. */
5704 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
5705
5706 return true;
5707 }
5708 }
b9e3afe9 5709 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5710 {
2d5ffe46
AP
5711 /* TBZ/TBNZ. */
5712 if (comparator == const0_rtx)
5713 return true;
5714 }
5715 }
5716 }
b9e3afe9 5717 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5718 {
5719 /* It's a conditional operation based on the status flags,
5720 so it must be some flavor of CSEL. */
5721
5722 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5723 if (GET_CODE (op1) == NEG
5724 || GET_CODE (op1) == NOT
5725 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5726 op1 = XEXP (op1, 0);
5727
e548c9df
AM
5728 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
5729 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
5730 return true;
5731 }
5732
5733 /* We don't know what this is, cost all operands. */
5734 return false;
5735}
5736
43e9d192
IB
5737/* Calculate the cost of calculating X, storing it in *COST. Result
5738 is true if the total cost of the operation has now been calculated. */
5739static bool
e548c9df 5740aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
5741 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5742{
a8eecd00 5743 rtx op0, op1, op2;
73250c4c 5744 const struct cpu_cost_table *extra_cost
b175b679 5745 = aarch64_tune_params.insn_extra_cost;
e548c9df 5746 int code = GET_CODE (x);
43e9d192 5747
7fc5ef02
JG
5748 /* By default, assume that everything has equivalent cost to the
5749 cheapest instruction. Any additional costs are applied as a delta
5750 above this default. */
5751 *cost = COSTS_N_INSNS (1);
5752
43e9d192
IB
5753 switch (code)
5754 {
5755 case SET:
ba123b0d
JG
5756 /* The cost depends entirely on the operands to SET. */
5757 *cost = 0;
43e9d192
IB
5758 op0 = SET_DEST (x);
5759 op1 = SET_SRC (x);
5760
5761 switch (GET_CODE (op0))
5762 {
5763 case MEM:
5764 if (speed)
2961177e
JG
5765 {
5766 rtx address = XEXP (op0, 0);
b6875aac
KV
5767 if (VECTOR_MODE_P (mode))
5768 *cost += extra_cost->ldst.storev;
5769 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
5770 *cost += extra_cost->ldst.store;
5771 else if (mode == SFmode)
5772 *cost += extra_cost->ldst.storef;
5773 else if (mode == DFmode)
5774 *cost += extra_cost->ldst.stored;
5775
5776 *cost +=
5777 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5778 0, speed));
5779 }
43e9d192 5780
e548c9df 5781 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
5782 return true;
5783
5784 case SUBREG:
5785 if (! REG_P (SUBREG_REG (op0)))
e548c9df 5786 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 5787
43e9d192
IB
5788 /* Fall through. */
5789 case REG:
b6875aac
KV
5790 /* The cost is one per vector-register copied. */
5791 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
5792 {
5793 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5794 / GET_MODE_SIZE (V4SImode);
5795 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5796 }
ba123b0d
JG
5797 /* const0_rtx is in general free, but we will use an
5798 instruction to set a register to 0. */
b6875aac
KV
5799 else if (REG_P (op1) || op1 == const0_rtx)
5800 {
5801 /* The cost is 1 per register copied. */
5802 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 5803 / UNITS_PER_WORD;
b6875aac
KV
5804 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5805 }
ba123b0d
JG
5806 else
5807 /* Cost is just the cost of the RHS of the set. */
e548c9df 5808 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
5809 return true;
5810
ba123b0d 5811 case ZERO_EXTRACT:
43e9d192 5812 case SIGN_EXTRACT:
ba123b0d
JG
5813 /* Bit-field insertion. Strip any redundant widening of
5814 the RHS to meet the width of the target. */
43e9d192
IB
5815 if (GET_CODE (op1) == SUBREG)
5816 op1 = SUBREG_REG (op1);
5817 if ((GET_CODE (op1) == ZERO_EXTEND
5818 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5819 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5820 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5821 >= INTVAL (XEXP (op0, 1))))
5822 op1 = XEXP (op1, 0);
ba123b0d
JG
5823
5824 if (CONST_INT_P (op1))
5825 {
5826 /* MOV immediate is assumed to always be cheap. */
5827 *cost = COSTS_N_INSNS (1);
5828 }
5829 else
5830 {
5831 /* BFM. */
5832 if (speed)
5833 *cost += extra_cost->alu.bfi;
e548c9df 5834 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
5835 }
5836
43e9d192
IB
5837 return true;
5838
5839 default:
ba123b0d
JG
5840 /* We can't make sense of this, assume default cost. */
5841 *cost = COSTS_N_INSNS (1);
61263118 5842 return false;
43e9d192
IB
5843 }
5844 return false;
5845
9dfc162c
JG
5846 case CONST_INT:
5847 /* If an instruction can incorporate a constant within the
5848 instruction, the instruction's expression avoids calling
5849 rtx_cost() on the constant. If rtx_cost() is called on a
5850 constant, then it is usually because the constant must be
5851 moved into a register by one or more instructions.
5852
5853 The exception is constant 0, which can be expressed
5854 as XZR/WZR and is therefore free. The exception to this is
5855 if we have (set (reg) (const0_rtx)) in which case we must cost
5856 the move. However, we can catch that when we cost the SET, so
5857 we don't need to consider that here. */
5858 if (x == const0_rtx)
5859 *cost = 0;
5860 else
5861 {
5862 /* To an approximation, building any other constant is
5863 proportionally expensive to the number of instructions
5864 required to build that constant. This is true whether we
5865 are compiling for SPEED or otherwise. */
82614948
RR
5866 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5867 (NULL_RTX, x, false, mode));
9dfc162c
JG
5868 }
5869 return true;
5870
5871 case CONST_DOUBLE:
5872 if (speed)
5873 {
5874 /* mov[df,sf]_aarch64. */
5875 if (aarch64_float_const_representable_p (x))
5876 /* FMOV (scalar immediate). */
5877 *cost += extra_cost->fp[mode == DFmode].fpconst;
5878 else if (!aarch64_float_const_zero_rtx_p (x))
5879 {
5880 /* This will be a load from memory. */
5881 if (mode == DFmode)
5882 *cost += extra_cost->ldst.loadd;
5883 else
5884 *cost += extra_cost->ldst.loadf;
5885 }
5886 else
5887 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5888 or MOV v0.s[0], wzr - neither of which are modeled by the
5889 cost tables. Just use the default cost. */
5890 {
5891 }
5892 }
5893
5894 return true;
5895
43e9d192
IB
5896 case MEM:
5897 if (speed)
2961177e
JG
5898 {
5899 /* For loads we want the base cost of a load, plus an
5900 approximation for the additional cost of the addressing
5901 mode. */
5902 rtx address = XEXP (x, 0);
b6875aac
KV
5903 if (VECTOR_MODE_P (mode))
5904 *cost += extra_cost->ldst.loadv;
5905 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
5906 *cost += extra_cost->ldst.load;
5907 else if (mode == SFmode)
5908 *cost += extra_cost->ldst.loadf;
5909 else if (mode == DFmode)
5910 *cost += extra_cost->ldst.loadd;
5911
5912 *cost +=
5913 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5914 0, speed));
5915 }
43e9d192
IB
5916
5917 return true;
5918
5919 case NEG:
4745e701
JG
5920 op0 = XEXP (x, 0);
5921
b6875aac
KV
5922 if (VECTOR_MODE_P (mode))
5923 {
5924 if (speed)
5925 {
5926 /* FNEG. */
5927 *cost += extra_cost->vect.alu;
5928 }
5929 return false;
5930 }
5931
e548c9df
AM
5932 if (GET_MODE_CLASS (mode) == MODE_INT)
5933 {
4745e701
JG
5934 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5935 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5936 {
5937 /* CSETM. */
e548c9df 5938 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
5939 return true;
5940 }
5941
5942 /* Cost this as SUB wzr, X. */
e548c9df 5943 op0 = CONST0_RTX (mode);
4745e701
JG
5944 op1 = XEXP (x, 0);
5945 goto cost_minus;
5946 }
5947
e548c9df 5948 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
5949 {
5950 /* Support (neg(fma...)) as a single instruction only if
5951 sign of zeros is unimportant. This matches the decision
5952 making in aarch64.md. */
5953 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5954 {
5955 /* FNMADD. */
e548c9df 5956 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
5957 return true;
5958 }
5959 if (speed)
5960 /* FNEG. */
5961 *cost += extra_cost->fp[mode == DFmode].neg;
5962 return false;
5963 }
5964
5965 return false;
43e9d192 5966
781aeb73
KT
5967 case CLRSB:
5968 case CLZ:
5969 if (speed)
b6875aac
KV
5970 {
5971 if (VECTOR_MODE_P (mode))
5972 *cost += extra_cost->vect.alu;
5973 else
5974 *cost += extra_cost->alu.clz;
5975 }
781aeb73
KT
5976
5977 return false;
5978
43e9d192
IB
5979 case COMPARE:
5980 op0 = XEXP (x, 0);
5981 op1 = XEXP (x, 1);
5982
5983 if (op1 == const0_rtx
5984 && GET_CODE (op0) == AND)
5985 {
5986 x = op0;
e548c9df 5987 mode = GET_MODE (op0);
43e9d192
IB
5988 goto cost_logic;
5989 }
5990
a8eecd00
JG
5991 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5992 {
5993 /* TODO: A write to the CC flags possibly costs extra, this
5994 needs encoding in the cost tables. */
5995
5996 /* CC_ZESWPmode supports zero extend for free. */
e548c9df 5997 if (mode == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
a8eecd00
JG
5998 op0 = XEXP (op0, 0);
5999
e548c9df 6000 mode = GET_MODE (op0);
a8eecd00
JG
6001 /* ANDS. */
6002 if (GET_CODE (op0) == AND)
6003 {
6004 x = op0;
6005 goto cost_logic;
6006 }
6007
6008 if (GET_CODE (op0) == PLUS)
6009 {
6010 /* ADDS (and CMN alias). */
6011 x = op0;
6012 goto cost_plus;
6013 }
6014
6015 if (GET_CODE (op0) == MINUS)
6016 {
6017 /* SUBS. */
6018 x = op0;
6019 goto cost_minus;
6020 }
6021
6022 if (GET_CODE (op1) == NEG)
6023 {
6024 /* CMN. */
6025 if (speed)
6026 *cost += extra_cost->alu.arith;
6027
e548c9df
AM
6028 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6029 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
6030 return true;
6031 }
6032
6033 /* CMP.
6034
6035 Compare can freely swap the order of operands, and
6036 canonicalization puts the more complex operation first.
6037 But the integer MINUS logic expects the shift/extend
6038 operation in op1. */
6039 if (! (REG_P (op0)
6040 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6041 {
6042 op0 = XEXP (x, 1);
6043 op1 = XEXP (x, 0);
6044 }
6045 goto cost_minus;
6046 }
6047
6048 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6049 {
6050 /* FCMP. */
6051 if (speed)
6052 *cost += extra_cost->fp[mode == DFmode].compare;
6053
6054 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6055 {
e548c9df 6056 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
6057 /* FCMP supports constant 0.0 for no extra cost. */
6058 return true;
6059 }
6060 return false;
6061 }
6062
b6875aac
KV
6063 if (VECTOR_MODE_P (mode))
6064 {
6065 /* Vector compare. */
6066 if (speed)
6067 *cost += extra_cost->vect.alu;
6068
6069 if (aarch64_float_const_zero_rtx_p (op1))
6070 {
6071 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6072 cost. */
6073 return true;
6074 }
6075 return false;
6076 }
a8eecd00 6077 return false;
43e9d192
IB
6078
6079 case MINUS:
4745e701
JG
6080 {
6081 op0 = XEXP (x, 0);
6082 op1 = XEXP (x, 1);
6083
6084cost_minus:
e548c9df 6085 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 6086
4745e701
JG
6087 /* Detect valid immediates. */
6088 if ((GET_MODE_CLASS (mode) == MODE_INT
6089 || (GET_MODE_CLASS (mode) == MODE_CC
6090 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6091 && CONST_INT_P (op1)
6092 && aarch64_uimm12_shift (INTVAL (op1)))
6093 {
4745e701
JG
6094 if (speed)
6095 /* SUB(S) (immediate). */
6096 *cost += extra_cost->alu.arith;
6097 return true;
4745e701
JG
6098 }
6099
7cc2145f
JG
6100 /* Look for SUB (extended register). */
6101 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6102 {
6103 if (speed)
2533c820 6104 *cost += extra_cost->alu.extend_arith;
7cc2145f 6105
e548c9df
AM
6106 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), VOIDmode,
6107 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
6108 return true;
6109 }
6110
4745e701
JG
6111 rtx new_op1 = aarch64_strip_extend (op1);
6112
6113 /* Cost this as an FMA-alike operation. */
6114 if ((GET_CODE (new_op1) == MULT
0a78ebe4 6115 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
6116 && code != COMPARE)
6117 {
6118 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6119 (enum rtx_code) code,
6120 speed);
4745e701
JG
6121 return true;
6122 }
43e9d192 6123
e548c9df 6124 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 6125
4745e701
JG
6126 if (speed)
6127 {
b6875aac
KV
6128 if (VECTOR_MODE_P (mode))
6129 {
6130 /* Vector SUB. */
6131 *cost += extra_cost->vect.alu;
6132 }
6133 else if (GET_MODE_CLASS (mode) == MODE_INT)
6134 {
6135 /* SUB(S). */
6136 *cost += extra_cost->alu.arith;
6137 }
4745e701 6138 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6139 {
6140 /* FSUB. */
6141 *cost += extra_cost->fp[mode == DFmode].addsub;
6142 }
4745e701
JG
6143 }
6144 return true;
6145 }
43e9d192
IB
6146
6147 case PLUS:
4745e701
JG
6148 {
6149 rtx new_op0;
43e9d192 6150
4745e701
JG
6151 op0 = XEXP (x, 0);
6152 op1 = XEXP (x, 1);
43e9d192 6153
a8eecd00 6154cost_plus:
4745e701
JG
6155 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6156 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6157 {
6158 /* CSINC. */
e548c9df
AM
6159 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
6160 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
6161 return true;
6162 }
43e9d192 6163
4745e701
JG
6164 if (GET_MODE_CLASS (mode) == MODE_INT
6165 && CONST_INT_P (op1)
6166 && aarch64_uimm12_shift (INTVAL (op1)))
6167 {
e548c9df 6168 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 6169
4745e701
JG
6170 if (speed)
6171 /* ADD (immediate). */
6172 *cost += extra_cost->alu.arith;
6173 return true;
6174 }
6175
e548c9df 6176 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 6177
7cc2145f
JG
6178 /* Look for ADD (extended register). */
6179 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6180 {
6181 if (speed)
2533c820 6182 *cost += extra_cost->alu.extend_arith;
7cc2145f 6183
e548c9df
AM
6184 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), VOIDmode,
6185 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
6186 return true;
6187 }
6188
4745e701
JG
6189 /* Strip any extend, leave shifts behind as we will
6190 cost them through mult_cost. */
6191 new_op0 = aarch64_strip_extend (op0);
6192
6193 if (GET_CODE (new_op0) == MULT
0a78ebe4 6194 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
6195 {
6196 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6197 speed);
4745e701
JG
6198 return true;
6199 }
6200
e548c9df 6201 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
6202
6203 if (speed)
6204 {
b6875aac
KV
6205 if (VECTOR_MODE_P (mode))
6206 {
6207 /* Vector ADD. */
6208 *cost += extra_cost->vect.alu;
6209 }
6210 else if (GET_MODE_CLASS (mode) == MODE_INT)
6211 {
6212 /* ADD. */
6213 *cost += extra_cost->alu.arith;
6214 }
4745e701 6215 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6216 {
6217 /* FADD. */
6218 *cost += extra_cost->fp[mode == DFmode].addsub;
6219 }
4745e701
JG
6220 }
6221 return true;
6222 }
43e9d192 6223
18b42b2a
KT
6224 case BSWAP:
6225 *cost = COSTS_N_INSNS (1);
6226
6227 if (speed)
b6875aac
KV
6228 {
6229 if (VECTOR_MODE_P (mode))
6230 *cost += extra_cost->vect.alu;
6231 else
6232 *cost += extra_cost->alu.rev;
6233 }
18b42b2a
KT
6234 return false;
6235
43e9d192 6236 case IOR:
f7d5cf8d
KT
6237 if (aarch_rev16_p (x))
6238 {
6239 *cost = COSTS_N_INSNS (1);
6240
b6875aac
KV
6241 if (speed)
6242 {
6243 if (VECTOR_MODE_P (mode))
6244 *cost += extra_cost->vect.alu;
6245 else
6246 *cost += extra_cost->alu.rev;
6247 }
6248 return true;
f7d5cf8d 6249 }
fb0cb7fa
KT
6250
6251 if (aarch64_extr_rtx_p (x, &op0, &op1))
6252 {
e548c9df
AM
6253 *cost += rtx_cost (op0, mode, IOR, 0, speed);
6254 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
6255 if (speed)
6256 *cost += extra_cost->alu.shift;
6257
6258 return true;
6259 }
f7d5cf8d 6260 /* Fall through. */
43e9d192
IB
6261 case XOR:
6262 case AND:
6263 cost_logic:
6264 op0 = XEXP (x, 0);
6265 op1 = XEXP (x, 1);
6266
b6875aac
KV
6267 if (VECTOR_MODE_P (mode))
6268 {
6269 if (speed)
6270 *cost += extra_cost->vect.alu;
6271 return true;
6272 }
6273
268c3b47
JG
6274 if (code == AND
6275 && GET_CODE (op0) == MULT
6276 && CONST_INT_P (XEXP (op0, 1))
6277 && CONST_INT_P (op1)
6278 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
6279 INTVAL (op1)) != 0)
6280 {
6281 /* This is a UBFM/SBFM. */
e548c9df 6282 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
6283 if (speed)
6284 *cost += extra_cost->alu.bfx;
6285 return true;
6286 }
6287
e548c9df 6288 if (GET_MODE_CLASS (mode) == MODE_INT)
43e9d192 6289 {
268c3b47
JG
6290 /* We possibly get the immediate for free, this is not
6291 modelled. */
43e9d192 6292 if (CONST_INT_P (op1)
e548c9df 6293 && aarch64_bitmask_imm (INTVAL (op1), mode))
43e9d192 6294 {
e548c9df 6295 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
268c3b47
JG
6296
6297 if (speed)
6298 *cost += extra_cost->alu.logical;
6299
6300 return true;
43e9d192
IB
6301 }
6302 else
6303 {
268c3b47
JG
6304 rtx new_op0 = op0;
6305
6306 /* Handle ORN, EON, or BIC. */
43e9d192
IB
6307 if (GET_CODE (op0) == NOT)
6308 op0 = XEXP (op0, 0);
268c3b47
JG
6309
6310 new_op0 = aarch64_strip_shift (op0);
6311
6312 /* If we had a shift on op0 then this is a logical-shift-
6313 by-register/immediate operation. Otherwise, this is just
6314 a logical operation. */
6315 if (speed)
6316 {
6317 if (new_op0 != op0)
6318 {
6319 /* Shift by immediate. */
6320 if (CONST_INT_P (XEXP (op0, 1)))
6321 *cost += extra_cost->alu.log_shift;
6322 else
6323 *cost += extra_cost->alu.log_shift_reg;
6324 }
6325 else
6326 *cost += extra_cost->alu.logical;
6327 }
6328
6329 /* In both cases we want to cost both operands. */
e548c9df
AM
6330 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
6331 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
268c3b47
JG
6332
6333 return true;
43e9d192 6334 }
43e9d192
IB
6335 }
6336 return false;
6337
268c3b47 6338 case NOT:
6365da9e
KT
6339 x = XEXP (x, 0);
6340 op0 = aarch64_strip_shift (x);
6341
b6875aac
KV
6342 if (VECTOR_MODE_P (mode))
6343 {
6344 /* Vector NOT. */
6345 *cost += extra_cost->vect.alu;
6346 return false;
6347 }
6348
6365da9e
KT
6349 /* MVN-shifted-reg. */
6350 if (op0 != x)
6351 {
e548c9df 6352 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
6353
6354 if (speed)
6355 *cost += extra_cost->alu.log_shift;
6356
6357 return true;
6358 }
6359 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6360 Handle the second form here taking care that 'a' in the above can
6361 be a shift. */
6362 else if (GET_CODE (op0) == XOR)
6363 {
6364 rtx newop0 = XEXP (op0, 0);
6365 rtx newop1 = XEXP (op0, 1);
6366 rtx op0_stripped = aarch64_strip_shift (newop0);
6367
e548c9df
AM
6368 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
6369 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
6370
6371 if (speed)
6372 {
6373 if (op0_stripped != newop0)
6374 *cost += extra_cost->alu.log_shift;
6375 else
6376 *cost += extra_cost->alu.logical;
6377 }
6378
6379 return true;
6380 }
268c3b47
JG
6381 /* MVN. */
6382 if (speed)
6383 *cost += extra_cost->alu.logical;
6384
268c3b47
JG
6385 return false;
6386
43e9d192 6387 case ZERO_EXTEND:
b1685e62
JG
6388
6389 op0 = XEXP (x, 0);
6390 /* If a value is written in SI mode, then zero extended to DI
6391 mode, the operation will in general be free as a write to
6392 a 'w' register implicitly zeroes the upper bits of an 'x'
6393 register. However, if this is
6394
6395 (set (reg) (zero_extend (reg)))
6396
6397 we must cost the explicit register move. */
6398 if (mode == DImode
6399 && GET_MODE (op0) == SImode
6400 && outer == SET)
6401 {
e548c9df 6402 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62
JG
6403
6404 if (!op_cost && speed)
6405 /* MOV. */
6406 *cost += extra_cost->alu.extend;
6407 else
6408 /* Free, the cost is that of the SI mode operation. */
6409 *cost = op_cost;
6410
6411 return true;
6412 }
e548c9df 6413 else if (MEM_P (op0))
43e9d192 6414 {
b1685e62 6415 /* All loads can zero extend to any size for free. */
e548c9df 6416 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
6417 return true;
6418 }
b1685e62 6419
b1685e62 6420 if (speed)
b6875aac
KV
6421 {
6422 if (VECTOR_MODE_P (mode))
6423 {
6424 /* UMOV. */
6425 *cost += extra_cost->vect.alu;
6426 }
6427 else
6428 {
6429 /* UXTB/UXTH. */
6430 *cost += extra_cost->alu.extend;
6431 }
6432 }
43e9d192
IB
6433 return false;
6434
6435 case SIGN_EXTEND:
b1685e62 6436 if (MEM_P (XEXP (x, 0)))
43e9d192 6437 {
b1685e62
JG
6438 /* LDRSH. */
6439 if (speed)
6440 {
6441 rtx address = XEXP (XEXP (x, 0), 0);
6442 *cost += extra_cost->ldst.load_sign_extend;
6443
6444 *cost +=
6445 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6446 0, speed));
6447 }
43e9d192
IB
6448 return true;
6449 }
b1685e62
JG
6450
6451 if (speed)
b6875aac
KV
6452 {
6453 if (VECTOR_MODE_P (mode))
6454 *cost += extra_cost->vect.alu;
6455 else
6456 *cost += extra_cost->alu.extend;
6457 }
43e9d192
IB
6458 return false;
6459
ba0cfa17
JG
6460 case ASHIFT:
6461 op0 = XEXP (x, 0);
6462 op1 = XEXP (x, 1);
6463
6464 if (CONST_INT_P (op1))
6465 {
ba0cfa17 6466 if (speed)
b6875aac
KV
6467 {
6468 if (VECTOR_MODE_P (mode))
6469 {
6470 /* Vector shift (immediate). */
6471 *cost += extra_cost->vect.alu;
6472 }
6473 else
6474 {
6475 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6476 aliases. */
6477 *cost += extra_cost->alu.shift;
6478 }
6479 }
ba0cfa17
JG
6480
6481 /* We can incorporate zero/sign extend for free. */
6482 if (GET_CODE (op0) == ZERO_EXTEND
6483 || GET_CODE (op0) == SIGN_EXTEND)
6484 op0 = XEXP (op0, 0);
6485
e548c9df 6486 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
6487 return true;
6488 }
6489 else
6490 {
ba0cfa17 6491 if (speed)
b6875aac
KV
6492 {
6493 if (VECTOR_MODE_P (mode))
6494 {
6495 /* Vector shift (register). */
6496 *cost += extra_cost->vect.alu;
6497 }
6498 else
6499 {
6500 /* LSLV. */
6501 *cost += extra_cost->alu.shift_reg;
6502 }
6503 }
ba0cfa17
JG
6504 return false; /* All arguments need to be in registers. */
6505 }
6506
43e9d192 6507 case ROTATE:
43e9d192
IB
6508 case ROTATERT:
6509 case LSHIFTRT:
43e9d192 6510 case ASHIFTRT:
ba0cfa17
JG
6511 op0 = XEXP (x, 0);
6512 op1 = XEXP (x, 1);
43e9d192 6513
ba0cfa17
JG
6514 if (CONST_INT_P (op1))
6515 {
6516 /* ASR (immediate) and friends. */
6517 if (speed)
b6875aac
KV
6518 {
6519 if (VECTOR_MODE_P (mode))
6520 *cost += extra_cost->vect.alu;
6521 else
6522 *cost += extra_cost->alu.shift;
6523 }
43e9d192 6524
e548c9df 6525 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
6526 return true;
6527 }
6528 else
6529 {
6530
6531 /* ASR (register) and friends. */
6532 if (speed)
b6875aac
KV
6533 {
6534 if (VECTOR_MODE_P (mode))
6535 *cost += extra_cost->vect.alu;
6536 else
6537 *cost += extra_cost->alu.shift_reg;
6538 }
ba0cfa17
JG
6539 return false; /* All arguments need to be in registers. */
6540 }
43e9d192 6541
909734be
JG
6542 case SYMBOL_REF:
6543
1b1e81f8
JW
6544 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
6545 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
6546 {
6547 /* LDR. */
6548 if (speed)
6549 *cost += extra_cost->ldst.load;
6550 }
6551 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6552 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6553 {
6554 /* ADRP, followed by ADD. */
6555 *cost += COSTS_N_INSNS (1);
6556 if (speed)
6557 *cost += 2 * extra_cost->alu.arith;
6558 }
6559 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6560 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6561 {
6562 /* ADR. */
6563 if (speed)
6564 *cost += extra_cost->alu.arith;
6565 }
6566
6567 if (flag_pic)
6568 {
6569 /* One extra load instruction, after accessing the GOT. */
6570 *cost += COSTS_N_INSNS (1);
6571 if (speed)
6572 *cost += extra_cost->ldst.load;
6573 }
43e9d192
IB
6574 return true;
6575
909734be 6576 case HIGH:
43e9d192 6577 case LO_SUM:
909734be
JG
6578 /* ADRP/ADD (immediate). */
6579 if (speed)
6580 *cost += extra_cost->alu.arith;
43e9d192
IB
6581 return true;
6582
6583 case ZERO_EXTRACT:
6584 case SIGN_EXTRACT:
7cc2145f
JG
6585 /* UBFX/SBFX. */
6586 if (speed)
b6875aac
KV
6587 {
6588 if (VECTOR_MODE_P (mode))
6589 *cost += extra_cost->vect.alu;
6590 else
6591 *cost += extra_cost->alu.bfx;
6592 }
7cc2145f
JG
6593
6594 /* We can trust that the immediates used will be correct (there
6595 are no by-register forms), so we need only cost op0. */
e548c9df 6596 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
6597 return true;
6598
6599 case MULT:
4745e701
JG
6600 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6601 /* aarch64_rtx_mult_cost always handles recursion to its
6602 operands. */
6603 return true;
43e9d192
IB
6604
6605 case MOD:
6606 case UMOD:
43e9d192
IB
6607 if (speed)
6608 {
b6875aac
KV
6609 if (VECTOR_MODE_P (mode))
6610 *cost += extra_cost->vect.alu;
e548c9df
AM
6611 else if (GET_MODE_CLASS (mode) == MODE_INT)
6612 *cost += (extra_cost->mult[mode == DImode].add
6613 + extra_cost->mult[mode == DImode].idiv);
6614 else if (mode == DFmode)
73250c4c
KT
6615 *cost += (extra_cost->fp[1].mult
6616 + extra_cost->fp[1].div);
e548c9df 6617 else if (mode == SFmode)
73250c4c
KT
6618 *cost += (extra_cost->fp[0].mult
6619 + extra_cost->fp[0].div);
43e9d192
IB
6620 }
6621 return false; /* All arguments need to be in registers. */
6622
6623 case DIV:
6624 case UDIV:
4105fe38 6625 case SQRT:
43e9d192
IB
6626 if (speed)
6627 {
b6875aac
KV
6628 if (VECTOR_MODE_P (mode))
6629 *cost += extra_cost->vect.alu;
6630 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
6631 /* There is no integer SQRT, so only DIV and UDIV can get
6632 here. */
6633 *cost += extra_cost->mult[mode == DImode].idiv;
6634 else
6635 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
6636 }
6637 return false; /* All arguments need to be in registers. */
6638
a8eecd00 6639 case IF_THEN_ELSE:
2d5ffe46
AP
6640 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6641 XEXP (x, 2), cost, speed);
a8eecd00
JG
6642
6643 case EQ:
6644 case NE:
6645 case GT:
6646 case GTU:
6647 case LT:
6648 case LTU:
6649 case GE:
6650 case GEU:
6651 case LE:
6652 case LEU:
6653
6654 return false; /* All arguments must be in registers. */
6655
b292109f
JG
6656 case FMA:
6657 op0 = XEXP (x, 0);
6658 op1 = XEXP (x, 1);
6659 op2 = XEXP (x, 2);
6660
6661 if (speed)
b6875aac
KV
6662 {
6663 if (VECTOR_MODE_P (mode))
6664 *cost += extra_cost->vect.alu;
6665 else
6666 *cost += extra_cost->fp[mode == DFmode].fma;
6667 }
b292109f
JG
6668
6669 /* FMSUB, FNMADD, and FNMSUB are free. */
6670 if (GET_CODE (op0) == NEG)
6671 op0 = XEXP (op0, 0);
6672
6673 if (GET_CODE (op2) == NEG)
6674 op2 = XEXP (op2, 0);
6675
6676 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6677 and the by-element operand as operand 0. */
6678 if (GET_CODE (op1) == NEG)
6679 op1 = XEXP (op1, 0);
6680
6681 /* Catch vector-by-element operations. The by-element operand can
6682 either be (vec_duplicate (vec_select (x))) or just
6683 (vec_select (x)), depending on whether we are multiplying by
6684 a vector or a scalar.
6685
6686 Canonicalization is not very good in these cases, FMA4 will put the
6687 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6688 if (GET_CODE (op0) == VEC_DUPLICATE)
6689 op0 = XEXP (op0, 0);
6690 else if (GET_CODE (op1) == VEC_DUPLICATE)
6691 op1 = XEXP (op1, 0);
6692
6693 if (GET_CODE (op0) == VEC_SELECT)
6694 op0 = XEXP (op0, 0);
6695 else if (GET_CODE (op1) == VEC_SELECT)
6696 op1 = XEXP (op1, 0);
6697
6698 /* If the remaining parameters are not registers,
6699 get the cost to put them into registers. */
e548c9df
AM
6700 *cost += rtx_cost (op0, mode, FMA, 0, speed);
6701 *cost += rtx_cost (op1, mode, FMA, 1, speed);
6702 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
6703 return true;
6704
5e2a765b
KT
6705 case FLOAT:
6706 case UNSIGNED_FLOAT:
6707 if (speed)
6708 *cost += extra_cost->fp[mode == DFmode].fromint;
6709 return false;
6710
b292109f
JG
6711 case FLOAT_EXTEND:
6712 if (speed)
b6875aac
KV
6713 {
6714 if (VECTOR_MODE_P (mode))
6715 {
6716 /*Vector truncate. */
6717 *cost += extra_cost->vect.alu;
6718 }
6719 else
6720 *cost += extra_cost->fp[mode == DFmode].widen;
6721 }
b292109f
JG
6722 return false;
6723
6724 case FLOAT_TRUNCATE:
6725 if (speed)
b6875aac
KV
6726 {
6727 if (VECTOR_MODE_P (mode))
6728 {
6729 /*Vector conversion. */
6730 *cost += extra_cost->vect.alu;
6731 }
6732 else
6733 *cost += extra_cost->fp[mode == DFmode].narrow;
6734 }
b292109f
JG
6735 return false;
6736
61263118
KT
6737 case FIX:
6738 case UNSIGNED_FIX:
6739 x = XEXP (x, 0);
6740 /* Strip the rounding part. They will all be implemented
6741 by the fcvt* family of instructions anyway. */
6742 if (GET_CODE (x) == UNSPEC)
6743 {
6744 unsigned int uns_code = XINT (x, 1);
6745
6746 if (uns_code == UNSPEC_FRINTA
6747 || uns_code == UNSPEC_FRINTM
6748 || uns_code == UNSPEC_FRINTN
6749 || uns_code == UNSPEC_FRINTP
6750 || uns_code == UNSPEC_FRINTZ)
6751 x = XVECEXP (x, 0, 0);
6752 }
6753
6754 if (speed)
b6875aac
KV
6755 {
6756 if (VECTOR_MODE_P (mode))
6757 *cost += extra_cost->vect.alu;
6758 else
6759 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6760 }
e548c9df 6761 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
6762 return true;
6763
b292109f 6764 case ABS:
b6875aac
KV
6765 if (VECTOR_MODE_P (mode))
6766 {
6767 /* ABS (vector). */
6768 if (speed)
6769 *cost += extra_cost->vect.alu;
6770 }
6771 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 6772 {
19261b99
KT
6773 op0 = XEXP (x, 0);
6774
6775 /* FABD, which is analogous to FADD. */
6776 if (GET_CODE (op0) == MINUS)
6777 {
e548c9df
AM
6778 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
6779 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
6780 if (speed)
6781 *cost += extra_cost->fp[mode == DFmode].addsub;
6782
6783 return true;
6784 }
6785 /* Simple FABS is analogous to FNEG. */
b292109f
JG
6786 if (speed)
6787 *cost += extra_cost->fp[mode == DFmode].neg;
6788 }
6789 else
6790 {
6791 /* Integer ABS will either be split to
6792 two arithmetic instructions, or will be an ABS
6793 (scalar), which we don't model. */
6794 *cost = COSTS_N_INSNS (2);
6795 if (speed)
6796 *cost += 2 * extra_cost->alu.arith;
6797 }
6798 return false;
6799
6800 case SMAX:
6801 case SMIN:
6802 if (speed)
6803 {
b6875aac
KV
6804 if (VECTOR_MODE_P (mode))
6805 *cost += extra_cost->vect.alu;
6806 else
6807 {
6808 /* FMAXNM/FMINNM/FMAX/FMIN.
6809 TODO: This may not be accurate for all implementations, but
6810 we do not model this in the cost tables. */
6811 *cost += extra_cost->fp[mode == DFmode].addsub;
6812 }
b292109f
JG
6813 }
6814 return false;
6815
61263118
KT
6816 case UNSPEC:
6817 /* The floating point round to integer frint* instructions. */
6818 if (aarch64_frint_unspec_p (XINT (x, 1)))
6819 {
6820 if (speed)
6821 *cost += extra_cost->fp[mode == DFmode].roundint;
6822
6823 return false;
6824 }
781aeb73
KT
6825
6826 if (XINT (x, 1) == UNSPEC_RBIT)
6827 {
6828 if (speed)
6829 *cost += extra_cost->alu.rev;
6830
6831 return false;
6832 }
61263118
KT
6833 break;
6834
fb620c4a
JG
6835 case TRUNCATE:
6836
6837 /* Decompose <su>muldi3_highpart. */
6838 if (/* (truncate:DI */
6839 mode == DImode
6840 /* (lshiftrt:TI */
6841 && GET_MODE (XEXP (x, 0)) == TImode
6842 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6843 /* (mult:TI */
6844 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6845 /* (ANY_EXTEND:TI (reg:DI))
6846 (ANY_EXTEND:TI (reg:DI))) */
6847 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6848 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6849 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6850 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6851 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6852 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6853 /* (const_int 64) */
6854 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6855 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6856 {
6857 /* UMULH/SMULH. */
6858 if (speed)
6859 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
6860 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6861 mode, MULT, 0, speed);
6862 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6863 mode, MULT, 1, speed);
fb620c4a
JG
6864 return true;
6865 }
6866
6867 /* Fall through. */
43e9d192 6868 default:
61263118 6869 break;
43e9d192 6870 }
61263118
KT
6871
6872 if (dump_file && (dump_flags & TDF_DETAILS))
6873 fprintf (dump_file,
6874 "\nFailed to cost RTX. Assuming default cost.\n");
6875
6876 return true;
43e9d192
IB
6877}
6878
0ee859b5
JG
6879/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6880 calculated for X. This cost is stored in *COST. Returns true
6881 if the total cost of X was calculated. */
6882static bool
e548c9df 6883aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
6884 int param, int *cost, bool speed)
6885{
e548c9df 6886 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5
JG
6887
6888 if (dump_file && (dump_flags & TDF_DETAILS))
6889 {
6890 print_rtl_single (dump_file, x);
6891 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6892 speed ? "Hot" : "Cold",
6893 *cost, result ? "final" : "partial");
6894 }
6895
6896 return result;
6897}
6898
43e9d192 6899static int
ef4bddc2 6900aarch64_register_move_cost (machine_mode mode,
8a3a7e67 6901 reg_class_t from_i, reg_class_t to_i)
43e9d192 6902{
8a3a7e67
RH
6903 enum reg_class from = (enum reg_class) from_i;
6904 enum reg_class to = (enum reg_class) to_i;
43e9d192 6905 const struct cpu_regmove_cost *regmove_cost
b175b679 6906 = aarch64_tune_params.regmove_cost;
43e9d192 6907
3be07662
WD
6908 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6909 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6910 to = GENERAL_REGS;
6911
6912 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6913 from = GENERAL_REGS;
6914
6ee70f81
AP
6915 /* Moving between GPR and stack cost is the same as GP2GP. */
6916 if ((from == GENERAL_REGS && to == STACK_REG)
6917 || (to == GENERAL_REGS && from == STACK_REG))
6918 return regmove_cost->GP2GP;
6919
6920 /* To/From the stack register, we move via the gprs. */
6921 if (to == STACK_REG || from == STACK_REG)
6922 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6923 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6924
8919453c
WD
6925 if (GET_MODE_SIZE (mode) == 16)
6926 {
6927 /* 128-bit operations on general registers require 2 instructions. */
6928 if (from == GENERAL_REGS && to == GENERAL_REGS)
6929 return regmove_cost->GP2GP * 2;
6930 else if (from == GENERAL_REGS)
6931 return regmove_cost->GP2FP * 2;
6932 else if (to == GENERAL_REGS)
6933 return regmove_cost->FP2GP * 2;
6934
6935 /* When AdvSIMD instructions are disabled it is not possible to move
6936 a 128-bit value directly between Q registers. This is handled in
6937 secondary reload. A general register is used as a scratch to move
6938 the upper DI value and the lower DI value is moved directly,
6939 hence the cost is the sum of three moves. */
6940 if (! TARGET_SIMD)
6941 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6942
6943 return regmove_cost->FP2FP;
6944 }
6945
43e9d192
IB
6946 if (from == GENERAL_REGS && to == GENERAL_REGS)
6947 return regmove_cost->GP2GP;
6948 else if (from == GENERAL_REGS)
6949 return regmove_cost->GP2FP;
6950 else if (to == GENERAL_REGS)
6951 return regmove_cost->FP2GP;
6952
43e9d192
IB
6953 return regmove_cost->FP2FP;
6954}
6955
6956static int
ef4bddc2 6957aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
6958 reg_class_t rclass ATTRIBUTE_UNUSED,
6959 bool in ATTRIBUTE_UNUSED)
6960{
b175b679 6961 return aarch64_tune_params.memmov_cost;
43e9d192
IB
6962}
6963
d126a4ae
AP
6964/* Return the number of instructions that can be issued per cycle. */
6965static int
6966aarch64_sched_issue_rate (void)
6967{
b175b679 6968 return aarch64_tune_params.issue_rate;
d126a4ae
AP
6969}
6970
d03f7e44
MK
6971static int
6972aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6973{
6974 int issue_rate = aarch64_sched_issue_rate ();
6975
6976 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
6977}
6978
8990e73a
TB
6979/* Vectorizer cost model target hooks. */
6980
6981/* Implement targetm.vectorize.builtin_vectorization_cost. */
6982static int
6983aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6984 tree vectype,
6985 int misalign ATTRIBUTE_UNUSED)
6986{
6987 unsigned elements;
6988
6989 switch (type_of_cost)
6990 {
6991 case scalar_stmt:
b175b679 6992 return aarch64_tune_params.vec_costs->scalar_stmt_cost;
8990e73a
TB
6993
6994 case scalar_load:
b175b679 6995 return aarch64_tune_params.vec_costs->scalar_load_cost;
8990e73a
TB
6996
6997 case scalar_store:
b175b679 6998 return aarch64_tune_params.vec_costs->scalar_store_cost;
8990e73a
TB
6999
7000 case vector_stmt:
b175b679 7001 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7002
7003 case vector_load:
b175b679 7004 return aarch64_tune_params.vec_costs->vec_align_load_cost;
8990e73a
TB
7005
7006 case vector_store:
b175b679 7007 return aarch64_tune_params.vec_costs->vec_store_cost;
8990e73a
TB
7008
7009 case vec_to_scalar:
b175b679 7010 return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
8990e73a
TB
7011
7012 case scalar_to_vec:
b175b679 7013 return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
8990e73a
TB
7014
7015 case unaligned_load:
b175b679 7016 return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
8990e73a
TB
7017
7018 case unaligned_store:
b175b679 7019 return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
8990e73a
TB
7020
7021 case cond_branch_taken:
b175b679 7022 return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
8990e73a
TB
7023
7024 case cond_branch_not_taken:
b175b679 7025 return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
8990e73a
TB
7026
7027 case vec_perm:
7028 case vec_promote_demote:
b175b679 7029 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7030
7031 case vec_construct:
7032 elements = TYPE_VECTOR_SUBPARTS (vectype);
7033 return elements / 2 + 1;
7034
7035 default:
7036 gcc_unreachable ();
7037 }
7038}
7039
7040/* Implement targetm.vectorize.add_stmt_cost. */
7041static unsigned
7042aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
7043 struct _stmt_vec_info *stmt_info, int misalign,
7044 enum vect_cost_model_location where)
7045{
7046 unsigned *cost = (unsigned *) data;
7047 unsigned retval = 0;
7048
7049 if (flag_vect_cost_model)
7050 {
7051 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
7052 int stmt_cost =
7053 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
7054
7055 /* Statements in an inner loop relative to the loop being
7056 vectorized are weighted more heavily. The value here is
7057 a function (linear for now) of the loop nest level. */
7058 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
7059 {
7060 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7061 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
7062 unsigned nest_level = loop_depth (loop);
7063
7064 count *= nest_level;
7065 }
7066
7067 retval = (unsigned) (count * stmt_cost);
7068 cost[where] += retval;
7069 }
7070
7071 return retval;
7072}
7073
43e9d192
IB
7074static void initialize_aarch64_code_model (void);
7075
7076/* Parse the architecture extension string. */
7077
7078static void
7079aarch64_parse_extension (char *str)
7080{
7081 /* The extension string is parsed left to right. */
7082 const struct aarch64_option_extension *opt = NULL;
7083
7084 /* Flag to say whether we are adding or removing an extension. */
7085 int adding_ext = -1;
7086
7087 while (str != NULL && *str != 0)
7088 {
7089 char *ext;
7090 size_t len;
7091
7092 str++;
7093 ext = strchr (str, '+');
7094
7095 if (ext != NULL)
7096 len = ext - str;
7097 else
7098 len = strlen (str);
7099
7100 if (len >= 2 && strncmp (str, "no", 2) == 0)
7101 {
7102 adding_ext = 0;
7103 len -= 2;
7104 str += 2;
7105 }
7106 else if (len > 0)
7107 adding_ext = 1;
7108
7109 if (len == 0)
7110 {
217d0904
KT
7111 error ("missing feature modifier after %qs", adding_ext ? "+"
7112 : "+no");
43e9d192
IB
7113 return;
7114 }
7115
7116 /* Scan over the extensions table trying to find an exact match. */
7117 for (opt = all_extensions; opt->name != NULL; opt++)
7118 {
7119 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
7120 {
7121 /* Add or remove the extension. */
7122 if (adding_ext)
7123 aarch64_isa_flags |= opt->flags_on;
7124 else
7125 aarch64_isa_flags &= ~(opt->flags_off);
7126 break;
7127 }
7128 }
7129
7130 if (opt->name == NULL)
7131 {
7132 /* Extension not found in list. */
7133 error ("unknown feature modifier %qs", str);
7134 return;
7135 }
7136
7137 str = ext;
7138 };
7139
7140 return;
7141}
7142
7143/* Parse the ARCH string. */
7144
7145static void
7146aarch64_parse_arch (void)
7147{
7148 char *ext;
7149 const struct processor *arch;
7150 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
7151 size_t len;
7152
7153 strcpy (str, aarch64_arch_string);
7154
7155 ext = strchr (str, '+');
7156
7157 if (ext != NULL)
7158 len = ext - str;
7159 else
7160 len = strlen (str);
7161
7162 if (len == 0)
7163 {
7164 error ("missing arch name in -march=%qs", str);
7165 return;
7166 }
7167
7168 /* Loop through the list of supported ARCHs to find a match. */
7169 for (arch = all_architectures; arch->name != NULL; arch++)
7170 {
7171 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
7172 {
7173 selected_arch = arch;
7174 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
7175
7176 if (!selected_cpu)
7177 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
7178
7179 if (ext != NULL)
7180 {
7181 /* ARCH string contains at least one extension. */
7182 aarch64_parse_extension (ext);
7183 }
7184
ffee7aa9
JG
7185 if (strcmp (selected_arch->arch, selected_cpu->arch))
7186 {
7187 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
7188 selected_cpu->name, selected_arch->name);
7189 }
7190
43e9d192
IB
7191 return;
7192 }
7193 }
7194
7195 /* ARCH name not found in list. */
7196 error ("unknown value %qs for -march", str);
7197 return;
7198}
7199
7200/* Parse the CPU string. */
7201
7202static void
7203aarch64_parse_cpu (void)
7204{
7205 char *ext;
7206 const struct processor *cpu;
7207 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
7208 size_t len;
7209
7210 strcpy (str, aarch64_cpu_string);
7211
7212 ext = strchr (str, '+');
7213
7214 if (ext != NULL)
7215 len = ext - str;
7216 else
7217 len = strlen (str);
7218
7219 if (len == 0)
7220 {
7221 error ("missing cpu name in -mcpu=%qs", str);
7222 return;
7223 }
7224
7225 /* Loop through the list of supported CPUs to find a match. */
7226 for (cpu = all_cores; cpu->name != NULL; cpu++)
7227 {
7228 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
7229 {
7230 selected_cpu = cpu;
7231 aarch64_isa_flags = selected_cpu->flags;
7232
7233 if (ext != NULL)
7234 {
7235 /* CPU string contains at least one extension. */
7236 aarch64_parse_extension (ext);
7237 }
7238
7239 return;
7240 }
7241 }
7242
7243 /* CPU name not found in list. */
7244 error ("unknown value %qs for -mcpu", str);
7245 return;
7246}
7247
7248/* Parse the TUNE string. */
7249
7250static void
7251aarch64_parse_tune (void)
7252{
7253 const struct processor *cpu;
7254 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
7255 strcpy (str, aarch64_tune_string);
7256
7257 /* Loop through the list of supported CPUs to find a match. */
7258 for (cpu = all_cores; cpu->name != NULL; cpu++)
7259 {
7260 if (strcmp (cpu->name, str) == 0)
7261 {
7262 selected_tune = cpu;
7263 return;
7264 }
7265 }
7266
7267 /* CPU name not found in list. */
7268 error ("unknown value %qs for -mtune", str);
7269 return;
7270}
7271
8dec06f2
JG
7272/* Parse TOKEN, which has length LENGTH to see if it is an option
7273 described in FLAG. If it is, return the index bit for that fusion type.
7274 If not, error (printing OPTION_NAME) and return zero. */
7275
7276static unsigned int
7277aarch64_parse_one_option_token (const char *token,
7278 size_t length,
7279 const struct aarch64_flag_desc *flag,
7280 const char *option_name)
7281{
7282 for (; flag->name != NULL; flag++)
7283 {
7284 if (length == strlen (flag->name)
7285 && !strncmp (flag->name, token, length))
7286 return flag->flag;
7287 }
7288
7289 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
7290 return 0;
7291}
7292
7293/* Parse OPTION which is a comma-separated list of flags to enable.
7294 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
7295 default state we inherit from the CPU tuning structures. OPTION_NAME
7296 gives the top-level option we are parsing in the -moverride string,
7297 for use in error messages. */
7298
7299static unsigned int
7300aarch64_parse_boolean_options (const char *option,
7301 const struct aarch64_flag_desc *flags,
7302 unsigned int initial_state,
7303 const char *option_name)
7304{
7305 const char separator = '.';
7306 const char* specs = option;
7307 const char* ntoken = option;
7308 unsigned int found_flags = initial_state;
7309
7310 while ((ntoken = strchr (specs, separator)))
7311 {
7312 size_t token_length = ntoken - specs;
7313 unsigned token_ops = aarch64_parse_one_option_token (specs,
7314 token_length,
7315 flags,
7316 option_name);
7317 /* If we find "none" (or, for simplicity's sake, an error) anywhere
7318 in the token stream, reset the supported operations. So:
7319
7320 adrp+add.cmp+branch.none.adrp+add
7321
7322 would have the result of turning on only adrp+add fusion. */
7323 if (!token_ops)
7324 found_flags = 0;
7325
7326 found_flags |= token_ops;
7327 specs = ++ntoken;
7328 }
7329
7330 /* We ended with a comma, print something. */
7331 if (!(*specs))
7332 {
7333 error ("%s string ill-formed\n", option_name);
7334 return 0;
7335 }
7336
7337 /* We still have one more token to parse. */
7338 size_t token_length = strlen (specs);
7339 unsigned token_ops = aarch64_parse_one_option_token (specs,
7340 token_length,
7341 flags,
7342 option_name);
7343 if (!token_ops)
7344 found_flags = 0;
7345
7346 found_flags |= token_ops;
7347 return found_flags;
7348}
7349
7350/* Support for overriding instruction fusion. */
7351
7352static void
7353aarch64_parse_fuse_string (const char *fuse_string,
7354 struct tune_params *tune)
7355{
7356 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
7357 aarch64_fusible_pairs,
7358 tune->fusible_ops,
7359 "fuse=");
7360}
7361
7362/* Support for overriding other tuning flags. */
7363
7364static void
7365aarch64_parse_tune_string (const char *tune_string,
7366 struct tune_params *tune)
7367{
7368 tune->extra_tuning_flags
7369 = aarch64_parse_boolean_options (tune_string,
7370 aarch64_tuning_flags,
7371 tune->extra_tuning_flags,
7372 "tune=");
7373}
7374
7375/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
7376 we understand. If it is, extract the option string and handoff to
7377 the appropriate function. */
7378
7379void
7380aarch64_parse_one_override_token (const char* token,
7381 size_t length,
7382 struct tune_params *tune)
7383{
7384 const struct aarch64_tuning_override_function *fn
7385 = aarch64_tuning_override_functions;
7386
7387 const char *option_part = strchr (token, '=');
7388 if (!option_part)
7389 {
7390 error ("tuning string missing in option (%s)", token);
7391 return;
7392 }
7393
7394 /* Get the length of the option name. */
7395 length = option_part - token;
7396 /* Skip the '=' to get to the option string. */
7397 option_part++;
7398
7399 for (; fn->name != NULL; fn++)
7400 {
7401 if (!strncmp (fn->name, token, length))
7402 {
7403 fn->parse_override (option_part, tune);
7404 return;
7405 }
7406 }
7407
7408 error ("unknown tuning option (%s)",token);
7409 return;
7410}
7411
7412/* Parse STRING looking for options in the format:
7413 string :: option:string
7414 option :: name=substring
7415 name :: {a-z}
7416 substring :: defined by option. */
7417
7418static void
7419aarch64_parse_override_string (const char* input_string,
7420 struct tune_params* tune)
7421{
7422 const char separator = ':';
7423 size_t string_length = strlen (input_string) + 1;
7424 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
7425 char *string = string_root;
7426 strncpy (string, input_string, string_length);
7427 string[string_length - 1] = '\0';
7428
7429 char* ntoken = string;
7430
7431 while ((ntoken = strchr (string, separator)))
7432 {
7433 size_t token_length = ntoken - string;
7434 /* Make this substring look like a string. */
7435 *ntoken = '\0';
7436 aarch64_parse_one_override_token (string, token_length, tune);
7437 string = ++ntoken;
7438 }
7439
7440 /* One last option to parse. */
7441 aarch64_parse_one_override_token (string, strlen (string), tune);
7442 free (string_root);
7443}
43e9d192
IB
7444
7445/* Implement TARGET_OPTION_OVERRIDE. */
7446
7447static void
7448aarch64_override_options (void)
7449{
ffee7aa9
JG
7450 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
7451 If either of -march or -mtune is given, they override their
7452 respective component of -mcpu.
43e9d192 7453
ffee7aa9
JG
7454 So, first parse AARCH64_CPU_STRING, then the others, be careful
7455 with -march as, if -mcpu is not present on the command line, march
7456 must set a sensible default CPU. */
7457 if (aarch64_cpu_string)
43e9d192 7458 {
ffee7aa9 7459 aarch64_parse_cpu ();
43e9d192
IB
7460 }
7461
ffee7aa9 7462 if (aarch64_arch_string)
43e9d192 7463 {
ffee7aa9 7464 aarch64_parse_arch ();
43e9d192
IB
7465 }
7466
7467 if (aarch64_tune_string)
7468 {
7469 aarch64_parse_tune ();
7470 }
7471
63892fa2
KV
7472#ifndef HAVE_AS_MABI_OPTION
7473 /* The compiler may have been configured with 2.23.* binutils, which does
7474 not have support for ILP32. */
7475 if (TARGET_ILP32)
7476 error ("Assembler does not support -mabi=ilp32");
7477#endif
7478
43e9d192
IB
7479 initialize_aarch64_code_model ();
7480
7481 aarch64_build_bitmask_table ();
7482
7483 /* This target defaults to strict volatile bitfields. */
7484 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
7485 flag_strict_volatile_bitfields = 1;
7486
7487 /* If the user did not specify a processor, choose the default
7488 one for them. This will be the CPU set during configuration using
a3cd0246 7489 --with-cpu, otherwise it is "generic". */
43e9d192
IB
7490 if (!selected_cpu)
7491 {
7492 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
7493 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
7494 }
7495
7496 gcc_assert (selected_cpu);
7497
43e9d192 7498 if (!selected_tune)
3edaf26d 7499 selected_tune = selected_cpu;
43e9d192
IB
7500
7501 aarch64_tune_flags = selected_tune->flags;
7502 aarch64_tune = selected_tune->core;
b175b679
JG
7503 /* Make a copy of the tuning parameters attached to the core, which
7504 we may later overwrite. */
7505 aarch64_tune_params = *(selected_tune->tune);
0c6caaf8 7506 aarch64_architecture_version = selected_cpu->architecture_version;
43e9d192 7507
8dec06f2
JG
7508 if (aarch64_override_tune_string)
7509 aarch64_parse_override_string (aarch64_override_tune_string,
7510 &aarch64_tune_params);
7511
5e396da6
KT
7512 if (aarch64_fix_a53_err835769 == 2)
7513 {
7514#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
7515 aarch64_fix_a53_err835769 = 1;
7516#else
7517 aarch64_fix_a53_err835769 = 0;
7518#endif
7519 }
7520
e2fc7193 7521 aarch64_register_fma_steering ();
fde9b31b 7522
43e9d192
IB
7523 aarch64_override_options_after_change ();
7524}
7525
7526/* Implement targetm.override_options_after_change. */
7527
7528static void
7529aarch64_override_options_after_change (void)
7530{
0b7f8166
MS
7531 if (flag_omit_frame_pointer)
7532 flag_omit_leaf_frame_pointer = false;
7533 else if (flag_omit_leaf_frame_pointer)
7534 flag_omit_frame_pointer = true;
487edc87
CB
7535
7536 /* If not optimizing for size, set the default
7537 alignment to what the target wants */
7538 if (!optimize_size)
7539 {
7540 if (align_loops <= 0)
b175b679 7541 align_loops = aarch64_tune_params.loop_align;
487edc87 7542 if (align_jumps <= 0)
b175b679 7543 align_jumps = aarch64_tune_params.jump_align;
487edc87 7544 if (align_functions <= 0)
b175b679 7545 align_functions = aarch64_tune_params.function_align;
487edc87 7546 }
43e9d192
IB
7547}
7548
7549static struct machine_function *
7550aarch64_init_machine_status (void)
7551{
7552 struct machine_function *machine;
766090c2 7553 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
7554 return machine;
7555}
7556
7557void
7558aarch64_init_expanders (void)
7559{
7560 init_machine_status = aarch64_init_machine_status;
7561}
7562
7563/* A checking mechanism for the implementation of the various code models. */
7564static void
7565initialize_aarch64_code_model (void)
7566{
7567 if (flag_pic)
7568 {
7569 switch (aarch64_cmodel_var)
7570 {
7571 case AARCH64_CMODEL_TINY:
7572 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
7573 break;
7574 case AARCH64_CMODEL_SMALL:
34ecdb0f 7575#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
7576 aarch64_cmodel = (flag_pic == 2
7577 ? AARCH64_CMODEL_SMALL_PIC
7578 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
7579#else
7580 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
7581#endif
43e9d192
IB
7582 break;
7583 case AARCH64_CMODEL_LARGE:
7584 sorry ("code model %qs with -f%s", "large",
7585 flag_pic > 1 ? "PIC" : "pic");
7586 default:
7587 gcc_unreachable ();
7588 }
7589 }
7590 else
7591 aarch64_cmodel = aarch64_cmodel_var;
7592}
7593
7594/* Return true if SYMBOL_REF X binds locally. */
7595
7596static bool
7597aarch64_symbol_binds_local_p (const_rtx x)
7598{
7599 return (SYMBOL_REF_DECL (x)
7600 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
7601 : SYMBOL_REF_LOCAL_P (x));
7602}
7603
7604/* Return true if SYMBOL_REF X is thread local */
7605static bool
7606aarch64_tls_symbol_p (rtx x)
7607{
7608 if (! TARGET_HAVE_TLS)
7609 return false;
7610
7611 if (GET_CODE (x) != SYMBOL_REF)
7612 return false;
7613
7614 return SYMBOL_REF_TLS_MODEL (x) != 0;
7615}
7616
7617/* Classify a TLS symbol into one of the TLS kinds. */
7618enum aarch64_symbol_type
7619aarch64_classify_tls_symbol (rtx x)
7620{
7621 enum tls_model tls_kind = tls_symbolic_operand_type (x);
7622
7623 switch (tls_kind)
7624 {
7625 case TLS_MODEL_GLOBAL_DYNAMIC:
7626 case TLS_MODEL_LOCAL_DYNAMIC:
7627 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
7628
7629 case TLS_MODEL_INITIAL_EXEC:
7630 return SYMBOL_SMALL_GOTTPREL;
7631
7632 case TLS_MODEL_LOCAL_EXEC:
8fd17b98 7633 return SYMBOL_TLSLE;
43e9d192
IB
7634
7635 case TLS_MODEL_EMULATED:
7636 case TLS_MODEL_NONE:
7637 return SYMBOL_FORCE_TO_MEM;
7638
7639 default:
7640 gcc_unreachable ();
7641 }
7642}
7643
7644/* Return the method that should be used to access SYMBOL_REF or
7645 LABEL_REF X in context CONTEXT. */
17f4d4bf 7646
43e9d192 7647enum aarch64_symbol_type
f8b756b7 7648aarch64_classify_symbol (rtx x, rtx offset,
43e9d192
IB
7649 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
7650{
7651 if (GET_CODE (x) == LABEL_REF)
7652 {
7653 switch (aarch64_cmodel)
7654 {
7655 case AARCH64_CMODEL_LARGE:
7656 return SYMBOL_FORCE_TO_MEM;
7657
7658 case AARCH64_CMODEL_TINY_PIC:
7659 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
7660 return SYMBOL_TINY_ABSOLUTE;
7661
1b1e81f8 7662 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
7663 case AARCH64_CMODEL_SMALL_PIC:
7664 case AARCH64_CMODEL_SMALL:
7665 return SYMBOL_SMALL_ABSOLUTE;
7666
7667 default:
7668 gcc_unreachable ();
7669 }
7670 }
7671
17f4d4bf 7672 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 7673 {
4a985a37
MS
7674 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
7675 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
7676
7677 if (aarch64_tls_symbol_p (x))
7678 return aarch64_classify_tls_symbol (x);
7679
17f4d4bf
CSS
7680 switch (aarch64_cmodel)
7681 {
7682 case AARCH64_CMODEL_TINY:
f8b756b7
TB
7683 /* When we retreive symbol + offset address, we have to make sure
7684 the offset does not cause overflow of the final address. But
7685 we have no way of knowing the address of symbol at compile time
7686 so we can't accurately say if the distance between the PC and
7687 symbol + offset is outside the addressible range of +/-1M in the
7688 TINY code model. So we rely on images not being greater than
7689 1M and cap the offset at 1M and anything beyond 1M will have to
7690 be loaded using an alternative mechanism. */
7691 if (SYMBOL_REF_WEAK (x)
7692 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
7693 return SYMBOL_FORCE_TO_MEM;
7694 return SYMBOL_TINY_ABSOLUTE;
7695
17f4d4bf 7696 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
7697 /* Same reasoning as the tiny code model, but the offset cap here is
7698 4G. */
7699 if (SYMBOL_REF_WEAK (x)
3ff5d1f0
TB
7700 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
7701 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
7702 return SYMBOL_FORCE_TO_MEM;
7703 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7704
17f4d4bf 7705 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 7706 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 7707 return SYMBOL_TINY_GOT;
38e6c9a6
MS
7708 return SYMBOL_TINY_ABSOLUTE;
7709
1b1e81f8 7710 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
7711 case AARCH64_CMODEL_SMALL_PIC:
7712 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
7713 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
7714 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 7715 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7716
17f4d4bf
CSS
7717 default:
7718 gcc_unreachable ();
7719 }
43e9d192 7720 }
17f4d4bf 7721
43e9d192
IB
7722 /* By default push everything into the constant pool. */
7723 return SYMBOL_FORCE_TO_MEM;
7724}
7725
43e9d192
IB
7726bool
7727aarch64_constant_address_p (rtx x)
7728{
7729 return (CONSTANT_P (x) && memory_address_p (DImode, x));
7730}
7731
7732bool
7733aarch64_legitimate_pic_operand_p (rtx x)
7734{
7735 if (GET_CODE (x) == SYMBOL_REF
7736 || (GET_CODE (x) == CONST
7737 && GET_CODE (XEXP (x, 0)) == PLUS
7738 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7739 return false;
7740
7741 return true;
7742}
7743
3520f7cc
JG
7744/* Return true if X holds either a quarter-precision or
7745 floating-point +0.0 constant. */
7746static bool
ef4bddc2 7747aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
7748{
7749 if (!CONST_DOUBLE_P (x))
7750 return false;
7751
6a0f8c01
JW
7752 if (aarch64_float_const_zero_rtx_p (x))
7753 return true;
7754
7755 /* We only handle moving 0.0 to a TFmode register. */
3520f7cc
JG
7756 if (!(mode == SFmode || mode == DFmode))
7757 return false;
7758
3520f7cc
JG
7759 return aarch64_float_const_representable_p (x);
7760}
7761
43e9d192 7762static bool
ef4bddc2 7763aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
7764{
7765 /* Do not allow vector struct mode constants. We could support
7766 0 and -1 easily, but they need support in aarch64-simd.md. */
7767 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7768 return false;
7769
7770 /* This could probably go away because
7771 we now decompose CONST_INTs according to expand_mov_immediate. */
7772 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 7773 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
7774 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7775 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
7776
7777 if (GET_CODE (x) == HIGH
7778 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7779 return true;
7780
7781 return aarch64_constant_address_p (x);
7782}
7783
a5bc806c 7784rtx
43e9d192
IB
7785aarch64_load_tp (rtx target)
7786{
7787 if (!target
7788 || GET_MODE (target) != Pmode
7789 || !register_operand (target, Pmode))
7790 target = gen_reg_rtx (Pmode);
7791
7792 /* Can return in any reg. */
7793 emit_insn (gen_aarch64_load_tp_hard (target));
7794 return target;
7795}
7796
43e9d192
IB
7797/* On AAPCS systems, this is the "struct __va_list". */
7798static GTY(()) tree va_list_type;
7799
7800/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7801 Return the type to use as __builtin_va_list.
7802
7803 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7804
7805 struct __va_list
7806 {
7807 void *__stack;
7808 void *__gr_top;
7809 void *__vr_top;
7810 int __gr_offs;
7811 int __vr_offs;
7812 }; */
7813
7814static tree
7815aarch64_build_builtin_va_list (void)
7816{
7817 tree va_list_name;
7818 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7819
7820 /* Create the type. */
7821 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7822 /* Give it the required name. */
7823 va_list_name = build_decl (BUILTINS_LOCATION,
7824 TYPE_DECL,
7825 get_identifier ("__va_list"),
7826 va_list_type);
7827 DECL_ARTIFICIAL (va_list_name) = 1;
7828 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 7829 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
7830
7831 /* Create the fields. */
7832 f_stack = build_decl (BUILTINS_LOCATION,
7833 FIELD_DECL, get_identifier ("__stack"),
7834 ptr_type_node);
7835 f_grtop = build_decl (BUILTINS_LOCATION,
7836 FIELD_DECL, get_identifier ("__gr_top"),
7837 ptr_type_node);
7838 f_vrtop = build_decl (BUILTINS_LOCATION,
7839 FIELD_DECL, get_identifier ("__vr_top"),
7840 ptr_type_node);
7841 f_groff = build_decl (BUILTINS_LOCATION,
7842 FIELD_DECL, get_identifier ("__gr_offs"),
7843 integer_type_node);
7844 f_vroff = build_decl (BUILTINS_LOCATION,
7845 FIELD_DECL, get_identifier ("__vr_offs"),
7846 integer_type_node);
7847
7848 DECL_ARTIFICIAL (f_stack) = 1;
7849 DECL_ARTIFICIAL (f_grtop) = 1;
7850 DECL_ARTIFICIAL (f_vrtop) = 1;
7851 DECL_ARTIFICIAL (f_groff) = 1;
7852 DECL_ARTIFICIAL (f_vroff) = 1;
7853
7854 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7855 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7856 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7857 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7858 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7859
7860 TYPE_FIELDS (va_list_type) = f_stack;
7861 DECL_CHAIN (f_stack) = f_grtop;
7862 DECL_CHAIN (f_grtop) = f_vrtop;
7863 DECL_CHAIN (f_vrtop) = f_groff;
7864 DECL_CHAIN (f_groff) = f_vroff;
7865
7866 /* Compute its layout. */
7867 layout_type (va_list_type);
7868
7869 return va_list_type;
7870}
7871
7872/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7873static void
7874aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7875{
7876 const CUMULATIVE_ARGS *cum;
7877 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7878 tree stack, grtop, vrtop, groff, vroff;
7879 tree t;
7880 int gr_save_area_size;
7881 int vr_save_area_size;
7882 int vr_offset;
7883
7884 cum = &crtl->args.info;
7885 gr_save_area_size
7886 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7887 vr_save_area_size
7888 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7889
d5726973 7890 if (!TARGET_FLOAT)
43e9d192 7891 {
261fb553 7892 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
7893 vr_save_area_size = 0;
7894 }
7895
7896 f_stack = TYPE_FIELDS (va_list_type_node);
7897 f_grtop = DECL_CHAIN (f_stack);
7898 f_vrtop = DECL_CHAIN (f_grtop);
7899 f_groff = DECL_CHAIN (f_vrtop);
7900 f_vroff = DECL_CHAIN (f_groff);
7901
7902 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7903 NULL_TREE);
7904 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7905 NULL_TREE);
7906 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7907 NULL_TREE);
7908 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7909 NULL_TREE);
7910 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7911 NULL_TREE);
7912
7913 /* Emit code to initialize STACK, which points to the next varargs stack
7914 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7915 by named arguments. STACK is 8-byte aligned. */
7916 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7917 if (cum->aapcs_stack_size > 0)
7918 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7919 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7920 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7921
7922 /* Emit code to initialize GRTOP, the top of the GR save area.
7923 virtual_incoming_args_rtx should have been 16 byte aligned. */
7924 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7925 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7926 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7927
7928 /* Emit code to initialize VRTOP, the top of the VR save area.
7929 This address is gr_save_area_bytes below GRTOP, rounded
7930 down to the next 16-byte boundary. */
7931 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7932 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7933 STACK_BOUNDARY / BITS_PER_UNIT);
7934
7935 if (vr_offset)
7936 t = fold_build_pointer_plus_hwi (t, -vr_offset);
7937 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7938 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7939
7940 /* Emit code to initialize GROFF, the offset from GRTOP of the
7941 next GPR argument. */
7942 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7943 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7944 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7945
7946 /* Likewise emit code to initialize VROFF, the offset from FTOP
7947 of the next VR argument. */
7948 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7949 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7950 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7951}
7952
7953/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7954
7955static tree
7956aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7957 gimple_seq *post_p ATTRIBUTE_UNUSED)
7958{
7959 tree addr;
7960 bool indirect_p;
7961 bool is_ha; /* is HFA or HVA. */
7962 bool dw_align; /* double-word align. */
ef4bddc2 7963 machine_mode ag_mode = VOIDmode;
43e9d192 7964 int nregs;
ef4bddc2 7965 machine_mode mode;
43e9d192
IB
7966
7967 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7968 tree stack, f_top, f_off, off, arg, roundup, on_stack;
7969 HOST_WIDE_INT size, rsize, adjust, align;
7970 tree t, u, cond1, cond2;
7971
7972 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7973 if (indirect_p)
7974 type = build_pointer_type (type);
7975
7976 mode = TYPE_MODE (type);
7977
7978 f_stack = TYPE_FIELDS (va_list_type_node);
7979 f_grtop = DECL_CHAIN (f_stack);
7980 f_vrtop = DECL_CHAIN (f_grtop);
7981 f_groff = DECL_CHAIN (f_vrtop);
7982 f_vroff = DECL_CHAIN (f_groff);
7983
7984 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7985 f_stack, NULL_TREE);
7986 size = int_size_in_bytes (type);
7987 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7988
7989 dw_align = false;
7990 adjust = 0;
7991 if (aarch64_vfp_is_call_or_return_candidate (mode,
7992 type,
7993 &ag_mode,
7994 &nregs,
7995 &is_ha))
7996 {
7997 /* TYPE passed in fp/simd registers. */
d5726973 7998 if (!TARGET_FLOAT)
261fb553 7999 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
8000
8001 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
8002 unshare_expr (valist), f_vrtop, NULL_TREE);
8003 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
8004 unshare_expr (valist), f_vroff, NULL_TREE);
8005
8006 rsize = nregs * UNITS_PER_VREG;
8007
8008 if (is_ha)
8009 {
8010 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
8011 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
8012 }
8013 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
8014 && size < UNITS_PER_VREG)
8015 {
8016 adjust = UNITS_PER_VREG - size;
8017 }
8018 }
8019 else
8020 {
8021 /* TYPE passed in general registers. */
8022 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
8023 unshare_expr (valist), f_grtop, NULL_TREE);
8024 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
8025 unshare_expr (valist), f_groff, NULL_TREE);
8026 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8027 nregs = rsize / UNITS_PER_WORD;
8028
8029 if (align > 8)
8030 dw_align = true;
8031
8032 if (BLOCK_REG_PADDING (mode, type, 1) == downward
8033 && size < UNITS_PER_WORD)
8034 {
8035 adjust = UNITS_PER_WORD - size;
8036 }
8037 }
8038
8039 /* Get a local temporary for the field value. */
8040 off = get_initialized_tmp_var (f_off, pre_p, NULL);
8041
8042 /* Emit code to branch if off >= 0. */
8043 t = build2 (GE_EXPR, boolean_type_node, off,
8044 build_int_cst (TREE_TYPE (off), 0));
8045 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
8046
8047 if (dw_align)
8048 {
8049 /* Emit: offs = (offs + 15) & -16. */
8050 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
8051 build_int_cst (TREE_TYPE (off), 15));
8052 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
8053 build_int_cst (TREE_TYPE (off), -16));
8054 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
8055 }
8056 else
8057 roundup = NULL;
8058
8059 /* Update ap.__[g|v]r_offs */
8060 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
8061 build_int_cst (TREE_TYPE (off), rsize));
8062 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
8063
8064 /* String up. */
8065 if (roundup)
8066 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
8067
8068 /* [cond2] if (ap.__[g|v]r_offs > 0) */
8069 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
8070 build_int_cst (TREE_TYPE (f_off), 0));
8071 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
8072
8073 /* String up: make sure the assignment happens before the use. */
8074 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
8075 COND_EXPR_ELSE (cond1) = t;
8076
8077 /* Prepare the trees handling the argument that is passed on the stack;
8078 the top level node will store in ON_STACK. */
8079 arg = get_initialized_tmp_var (stack, pre_p, NULL);
8080 if (align > 8)
8081 {
8082 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
8083 t = fold_convert (intDI_type_node, arg);
8084 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
8085 build_int_cst (TREE_TYPE (t), 15));
8086 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8087 build_int_cst (TREE_TYPE (t), -16));
8088 t = fold_convert (TREE_TYPE (arg), t);
8089 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
8090 }
8091 else
8092 roundup = NULL;
8093 /* Advance ap.__stack */
8094 t = fold_convert (intDI_type_node, arg);
8095 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
8096 build_int_cst (TREE_TYPE (t), size + 7));
8097 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8098 build_int_cst (TREE_TYPE (t), -8));
8099 t = fold_convert (TREE_TYPE (arg), t);
8100 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
8101 /* String up roundup and advance. */
8102 if (roundup)
8103 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
8104 /* String up with arg */
8105 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
8106 /* Big-endianness related address adjustment. */
8107 if (BLOCK_REG_PADDING (mode, type, 1) == downward
8108 && size < UNITS_PER_WORD)
8109 {
8110 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
8111 size_int (UNITS_PER_WORD - size));
8112 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
8113 }
8114
8115 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
8116 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
8117
8118 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
8119 t = off;
8120 if (adjust)
8121 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
8122 build_int_cst (TREE_TYPE (off), adjust));
8123
8124 t = fold_convert (sizetype, t);
8125 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
8126
8127 if (is_ha)
8128 {
8129 /* type ha; // treat as "struct {ftype field[n];}"
8130 ... [computing offs]
8131 for (i = 0; i <nregs; ++i, offs += 16)
8132 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
8133 return ha; */
8134 int i;
8135 tree tmp_ha, field_t, field_ptr_t;
8136
8137 /* Declare a local variable. */
8138 tmp_ha = create_tmp_var_raw (type, "ha");
8139 gimple_add_tmp_var (tmp_ha);
8140
8141 /* Establish the base type. */
8142 switch (ag_mode)
8143 {
8144 case SFmode:
8145 field_t = float_type_node;
8146 field_ptr_t = float_ptr_type_node;
8147 break;
8148 case DFmode:
8149 field_t = double_type_node;
8150 field_ptr_t = double_ptr_type_node;
8151 break;
8152 case TFmode:
8153 field_t = long_double_type_node;
8154 field_ptr_t = long_double_ptr_type_node;
8155 break;
8156/* The half precision and quad precision are not fully supported yet. Enable
8157 the following code after the support is complete. Need to find the correct
8158 type node for __fp16 *. */
8159#if 0
8160 case HFmode:
8161 field_t = float_type_node;
8162 field_ptr_t = float_ptr_type_node;
8163 break;
8164#endif
8165 case V2SImode:
8166 case V4SImode:
8167 {
8168 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
8169 field_t = build_vector_type_for_mode (innertype, ag_mode);
8170 field_ptr_t = build_pointer_type (field_t);
8171 }
8172 break;
8173 default:
8174 gcc_assert (0);
8175 }
8176
8177 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
8178 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
8179 addr = t;
8180 t = fold_convert (field_ptr_t, addr);
8181 t = build2 (MODIFY_EXPR, field_t,
8182 build1 (INDIRECT_REF, field_t, tmp_ha),
8183 build1 (INDIRECT_REF, field_t, t));
8184
8185 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
8186 for (i = 1; i < nregs; ++i)
8187 {
8188 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
8189 u = fold_convert (field_ptr_t, addr);
8190 u = build2 (MODIFY_EXPR, field_t,
8191 build2 (MEM_REF, field_t, tmp_ha,
8192 build_int_cst (field_ptr_t,
8193 (i *
8194 int_size_in_bytes (field_t)))),
8195 build1 (INDIRECT_REF, field_t, u));
8196 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
8197 }
8198
8199 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
8200 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
8201 }
8202
8203 COND_EXPR_ELSE (cond2) = t;
8204 addr = fold_convert (build_pointer_type (type), cond1);
8205 addr = build_va_arg_indirect_ref (addr);
8206
8207 if (indirect_p)
8208 addr = build_va_arg_indirect_ref (addr);
8209
8210 return addr;
8211}
8212
8213/* Implement TARGET_SETUP_INCOMING_VARARGS. */
8214
8215static void
ef4bddc2 8216aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
8217 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8218 int no_rtl)
8219{
8220 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8221 CUMULATIVE_ARGS local_cum;
8222 int gr_saved, vr_saved;
8223
8224 /* The caller has advanced CUM up to, but not beyond, the last named
8225 argument. Advance a local copy of CUM past the last "real" named
8226 argument, to find out how many registers are left over. */
8227 local_cum = *cum;
8228 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
8229
8230 /* Found out how many registers we need to save. */
8231 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
8232 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
8233
d5726973 8234 if (!TARGET_FLOAT)
43e9d192 8235 {
261fb553 8236 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
8237 vr_saved = 0;
8238 }
8239
8240 if (!no_rtl)
8241 {
8242 if (gr_saved > 0)
8243 {
8244 rtx ptr, mem;
8245
8246 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
8247 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
8248 - gr_saved * UNITS_PER_WORD);
8249 mem = gen_frame_mem (BLKmode, ptr);
8250 set_mem_alias_set (mem, get_varargs_alias_set ());
8251
8252 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
8253 mem, gr_saved);
8254 }
8255 if (vr_saved > 0)
8256 {
8257 /* We can't use move_block_from_reg, because it will use
8258 the wrong mode, storing D regs only. */
ef4bddc2 8259 machine_mode mode = TImode;
43e9d192
IB
8260 int off, i;
8261
8262 /* Set OFF to the offset from virtual_incoming_args_rtx of
8263 the first vector register. The VR save area lies below
8264 the GR one, and is aligned to 16 bytes. */
8265 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
8266 STACK_BOUNDARY / BITS_PER_UNIT);
8267 off -= vr_saved * UNITS_PER_VREG;
8268
8269 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
8270 {
8271 rtx ptr, mem;
8272
8273 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
8274 mem = gen_frame_mem (mode, ptr);
8275 set_mem_alias_set (mem, get_varargs_alias_set ());
8276 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
8277 off += UNITS_PER_VREG;
8278 }
8279 }
8280 }
8281
8282 /* We don't save the size into *PRETEND_SIZE because we want to avoid
8283 any complication of having crtl->args.pretend_args_size changed. */
8799637a 8284 cfun->machine->frame.saved_varargs_size
43e9d192
IB
8285 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
8286 STACK_BOUNDARY / BITS_PER_UNIT)
8287 + vr_saved * UNITS_PER_VREG);
8288}
8289
8290static void
8291aarch64_conditional_register_usage (void)
8292{
8293 int i;
8294 if (!TARGET_FLOAT)
8295 {
8296 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
8297 {
8298 fixed_regs[i] = 1;
8299 call_used_regs[i] = 1;
8300 }
8301 }
8302}
8303
8304/* Walk down the type tree of TYPE counting consecutive base elements.
8305 If *MODEP is VOIDmode, then set it to the first valid floating point
8306 type. If a non-floating point type is found, or if a floating point
8307 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
8308 otherwise return the count in the sub-tree. */
8309static int
ef4bddc2 8310aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 8311{
ef4bddc2 8312 machine_mode mode;
43e9d192
IB
8313 HOST_WIDE_INT size;
8314
8315 switch (TREE_CODE (type))
8316 {
8317 case REAL_TYPE:
8318 mode = TYPE_MODE (type);
8319 if (mode != DFmode && mode != SFmode && mode != TFmode)
8320 return -1;
8321
8322 if (*modep == VOIDmode)
8323 *modep = mode;
8324
8325 if (*modep == mode)
8326 return 1;
8327
8328 break;
8329
8330 case COMPLEX_TYPE:
8331 mode = TYPE_MODE (TREE_TYPE (type));
8332 if (mode != DFmode && mode != SFmode && mode != TFmode)
8333 return -1;
8334
8335 if (*modep == VOIDmode)
8336 *modep = mode;
8337
8338 if (*modep == mode)
8339 return 2;
8340
8341 break;
8342
8343 case VECTOR_TYPE:
8344 /* Use V2SImode and V4SImode as representatives of all 64-bit
8345 and 128-bit vector types. */
8346 size = int_size_in_bytes (type);
8347 switch (size)
8348 {
8349 case 8:
8350 mode = V2SImode;
8351 break;
8352 case 16:
8353 mode = V4SImode;
8354 break;
8355 default:
8356 return -1;
8357 }
8358
8359 if (*modep == VOIDmode)
8360 *modep = mode;
8361
8362 /* Vector modes are considered to be opaque: two vectors are
8363 equivalent for the purposes of being homogeneous aggregates
8364 if they are the same size. */
8365 if (*modep == mode)
8366 return 1;
8367
8368 break;
8369
8370 case ARRAY_TYPE:
8371 {
8372 int count;
8373 tree index = TYPE_DOMAIN (type);
8374
807e902e
KZ
8375 /* Can't handle incomplete types nor sizes that are not
8376 fixed. */
8377 if (!COMPLETE_TYPE_P (type)
8378 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
8379 return -1;
8380
8381 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
8382 if (count == -1
8383 || !index
8384 || !TYPE_MAX_VALUE (index)
cc269bb6 8385 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 8386 || !TYPE_MIN_VALUE (index)
cc269bb6 8387 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
8388 || count < 0)
8389 return -1;
8390
ae7e9ddd
RS
8391 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8392 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
8393
8394 /* There must be no padding. */
807e902e 8395 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
8396 return -1;
8397
8398 return count;
8399 }
8400
8401 case RECORD_TYPE:
8402 {
8403 int count = 0;
8404 int sub_count;
8405 tree field;
8406
807e902e
KZ
8407 /* Can't handle incomplete types nor sizes that are not
8408 fixed. */
8409 if (!COMPLETE_TYPE_P (type)
8410 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
8411 return -1;
8412
8413 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8414 {
8415 if (TREE_CODE (field) != FIELD_DECL)
8416 continue;
8417
8418 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
8419 if (sub_count < 0)
8420 return -1;
8421 count += sub_count;
8422 }
8423
8424 /* There must be no padding. */
807e902e 8425 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
8426 return -1;
8427
8428 return count;
8429 }
8430
8431 case UNION_TYPE:
8432 case QUAL_UNION_TYPE:
8433 {
8434 /* These aren't very interesting except in a degenerate case. */
8435 int count = 0;
8436 int sub_count;
8437 tree field;
8438
807e902e
KZ
8439 /* Can't handle incomplete types nor sizes that are not
8440 fixed. */
8441 if (!COMPLETE_TYPE_P (type)
8442 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
8443 return -1;
8444
8445 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8446 {
8447 if (TREE_CODE (field) != FIELD_DECL)
8448 continue;
8449
8450 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
8451 if (sub_count < 0)
8452 return -1;
8453 count = count > sub_count ? count : sub_count;
8454 }
8455
8456 /* There must be no padding. */
807e902e 8457 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
8458 return -1;
8459
8460 return count;
8461 }
8462
8463 default:
8464 break;
8465 }
8466
8467 return -1;
8468}
8469
b6ec6215
KT
8470/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
8471 type as described in AAPCS64 \S 4.1.2.
8472
8473 See the comment above aarch64_composite_type_p for the notes on MODE. */
8474
8475static bool
8476aarch64_short_vector_p (const_tree type,
8477 machine_mode mode)
8478{
8479 HOST_WIDE_INT size = -1;
8480
8481 if (type && TREE_CODE (type) == VECTOR_TYPE)
8482 size = int_size_in_bytes (type);
8483 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8484 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8485 size = GET_MODE_SIZE (mode);
8486
8487 return (size == 8 || size == 16);
8488}
8489
43e9d192
IB
8490/* Return TRUE if the type, as described by TYPE and MODE, is a composite
8491 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
8492 array types. The C99 floating-point complex types are also considered
8493 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
8494 types, which are GCC extensions and out of the scope of AAPCS64, are
8495 treated as composite types here as well.
8496
8497 Note that MODE itself is not sufficient in determining whether a type
8498 is such a composite type or not. This is because
8499 stor-layout.c:compute_record_mode may have already changed the MODE
8500 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
8501 structure with only one field may have its MODE set to the mode of the
8502 field. Also an integer mode whose size matches the size of the
8503 RECORD_TYPE type may be used to substitute the original mode
8504 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
8505 solely relied on. */
8506
8507static bool
8508aarch64_composite_type_p (const_tree type,
ef4bddc2 8509 machine_mode mode)
43e9d192 8510{
b6ec6215
KT
8511 if (aarch64_short_vector_p (type, mode))
8512 return false;
8513
43e9d192
IB
8514 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
8515 return true;
8516
8517 if (mode == BLKmode
8518 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
8519 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
8520 return true;
8521
8522 return false;
8523}
8524
43e9d192
IB
8525/* Return TRUE if an argument, whose type is described by TYPE and MODE,
8526 shall be passed or returned in simd/fp register(s) (providing these
8527 parameter passing registers are available).
8528
8529 Upon successful return, *COUNT returns the number of needed registers,
8530 *BASE_MODE returns the mode of the individual register and when IS_HAF
8531 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
8532 floating-point aggregate or a homogeneous short-vector aggregate. */
8533
8534static bool
ef4bddc2 8535aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 8536 const_tree type,
ef4bddc2 8537 machine_mode *base_mode,
43e9d192
IB
8538 int *count,
8539 bool *is_ha)
8540{
ef4bddc2 8541 machine_mode new_mode = VOIDmode;
43e9d192
IB
8542 bool composite_p = aarch64_composite_type_p (type, mode);
8543
8544 if (is_ha != NULL) *is_ha = false;
8545
8546 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
8547 || aarch64_short_vector_p (type, mode))
8548 {
8549 *count = 1;
8550 new_mode = mode;
8551 }
8552 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
8553 {
8554 if (is_ha != NULL) *is_ha = true;
8555 *count = 2;
8556 new_mode = GET_MODE_INNER (mode);
8557 }
8558 else if (type && composite_p)
8559 {
8560 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
8561
8562 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
8563 {
8564 if (is_ha != NULL) *is_ha = true;
8565 *count = ag_count;
8566 }
8567 else
8568 return false;
8569 }
8570 else
8571 return false;
8572
8573 *base_mode = new_mode;
8574 return true;
8575}
8576
8577/* Implement TARGET_STRUCT_VALUE_RTX. */
8578
8579static rtx
8580aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
8581 int incoming ATTRIBUTE_UNUSED)
8582{
8583 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
8584}
8585
8586/* Implements target hook vector_mode_supported_p. */
8587static bool
ef4bddc2 8588aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
8589{
8590 if (TARGET_SIMD
8591 && (mode == V4SImode || mode == V8HImode
8592 || mode == V16QImode || mode == V2DImode
8593 || mode == V2SImode || mode == V4HImode
8594 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
8595 || mode == V4SFmode || mode == V2DFmode
8596 || mode == V1DFmode))
43e9d192
IB
8597 return true;
8598
8599 return false;
8600}
8601
b7342d25
IB
8602/* Return appropriate SIMD container
8603 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
8604static machine_mode
8605aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 8606{
b7342d25 8607 gcc_assert (width == 64 || width == 128);
43e9d192 8608 if (TARGET_SIMD)
b7342d25
IB
8609 {
8610 if (width == 128)
8611 switch (mode)
8612 {
8613 case DFmode:
8614 return V2DFmode;
8615 case SFmode:
8616 return V4SFmode;
8617 case SImode:
8618 return V4SImode;
8619 case HImode:
8620 return V8HImode;
8621 case QImode:
8622 return V16QImode;
8623 case DImode:
8624 return V2DImode;
8625 default:
8626 break;
8627 }
8628 else
8629 switch (mode)
8630 {
8631 case SFmode:
8632 return V2SFmode;
8633 case SImode:
8634 return V2SImode;
8635 case HImode:
8636 return V4HImode;
8637 case QImode:
8638 return V8QImode;
8639 default:
8640 break;
8641 }
8642 }
43e9d192
IB
8643 return word_mode;
8644}
8645
b7342d25 8646/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
8647static machine_mode
8648aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
8649{
8650 return aarch64_simd_container_mode (mode, 128);
8651}
8652
3b357264
JG
8653/* Return the bitmask of possible vector sizes for the vectorizer
8654 to iterate over. */
8655static unsigned int
8656aarch64_autovectorize_vector_sizes (void)
8657{
8658 return (16 | 8);
8659}
8660
ac2b960f
YZ
8661/* Implement TARGET_MANGLE_TYPE. */
8662
6f549691 8663static const char *
ac2b960f
YZ
8664aarch64_mangle_type (const_tree type)
8665{
8666 /* The AArch64 ABI documents say that "__va_list" has to be
8667 managled as if it is in the "std" namespace. */
8668 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
8669 return "St9__va_list";
8670
f9d53c27
TB
8671 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8672 builtin types. */
8673 if (TYPE_NAME (type) != NULL)
8674 return aarch64_mangle_builtin_type (type);
c6fc9e43 8675
ac2b960f
YZ
8676 /* Use the default mangling. */
8677 return NULL;
8678}
8679
8baff86e
KT
8680
8681/* Return true if the rtx_insn contains a MEM RTX somewhere
8682 in it. */
75cf1494
KT
8683
8684static bool
8baff86e 8685has_memory_op (rtx_insn *mem_insn)
75cf1494 8686{
8baff86e
KT
8687 subrtx_iterator::array_type array;
8688 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
8689 if (MEM_P (*iter))
8690 return true;
8691
8692 return false;
75cf1494
KT
8693}
8694
8695/* Find the first rtx_insn before insn that will generate an assembly
8696 instruction. */
8697
8698static rtx_insn *
8699aarch64_prev_real_insn (rtx_insn *insn)
8700{
8701 if (!insn)
8702 return NULL;
8703
8704 do
8705 {
8706 insn = prev_real_insn (insn);
8707 }
8708 while (insn && recog_memoized (insn) < 0);
8709
8710 return insn;
8711}
8712
8713static bool
8714is_madd_op (enum attr_type t1)
8715{
8716 unsigned int i;
8717 /* A number of these may be AArch32 only. */
8718 enum attr_type mlatypes[] = {
8719 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8720 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8721 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8722 };
8723
8724 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8725 {
8726 if (t1 == mlatypes[i])
8727 return true;
8728 }
8729
8730 return false;
8731}
8732
8733/* Check if there is a register dependency between a load and the insn
8734 for which we hold recog_data. */
8735
8736static bool
8737dep_between_memop_and_curr (rtx memop)
8738{
8739 rtx load_reg;
8740 int opno;
8741
8baff86e 8742 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
8743
8744 if (!REG_P (SET_DEST (memop)))
8745 return false;
8746
8747 load_reg = SET_DEST (memop);
8baff86e 8748 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
8749 {
8750 rtx operand = recog_data.operand[opno];
8751 if (REG_P (operand)
8752 && reg_overlap_mentioned_p (load_reg, operand))
8753 return true;
8754
8755 }
8756 return false;
8757}
8758
8baff86e
KT
8759
8760/* When working around the Cortex-A53 erratum 835769,
8761 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8762 instruction and has a preceding memory instruction such that a NOP
8763 should be inserted between them. */
8764
75cf1494
KT
8765bool
8766aarch64_madd_needs_nop (rtx_insn* insn)
8767{
8768 enum attr_type attr_type;
8769 rtx_insn *prev;
8770 rtx body;
8771
8772 if (!aarch64_fix_a53_err835769)
8773 return false;
8774
8775 if (recog_memoized (insn) < 0)
8776 return false;
8777
8778 attr_type = get_attr_type (insn);
8779 if (!is_madd_op (attr_type))
8780 return false;
8781
8782 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
8783 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8784 Restore recog state to INSN to avoid state corruption. */
8785 extract_constrain_insn_cached (insn);
8786
8baff86e 8787 if (!prev || !has_memory_op (prev))
75cf1494
KT
8788 return false;
8789
8790 body = single_set (prev);
8791
8792 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
8793 it and the DImode madd, emit a NOP between them. If body is NULL then we
8794 have a complex memory operation, probably a load/store pair.
8795 Be conservative for now and emit a NOP. */
8796 if (GET_MODE (recog_data.operand[0]) == DImode
8797 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
8798 return true;
8799
8800 return false;
8801
8802}
8803
8baff86e
KT
8804
8805/* Implement FINAL_PRESCAN_INSN. */
8806
75cf1494
KT
8807void
8808aarch64_final_prescan_insn (rtx_insn *insn)
8809{
8810 if (aarch64_madd_needs_nop (insn))
8811 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8812}
8813
8814
43e9d192 8815/* Return the equivalent letter for size. */
81c2dfb9 8816static char
43e9d192
IB
8817sizetochar (int size)
8818{
8819 switch (size)
8820 {
8821 case 64: return 'd';
8822 case 32: return 's';
8823 case 16: return 'h';
8824 case 8 : return 'b';
8825 default: gcc_unreachable ();
8826 }
8827}
8828
3520f7cc
JG
8829/* Return true iff x is a uniform vector of floating-point
8830 constants, and the constant can be represented in
8831 quarter-precision form. Note, as aarch64_float_const_representable
8832 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8833static bool
8834aarch64_vect_float_const_representable_p (rtx x)
8835{
8836 int i = 0;
8837 REAL_VALUE_TYPE r0, ri;
8838 rtx x0, xi;
8839
8840 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8841 return false;
8842
8843 x0 = CONST_VECTOR_ELT (x, 0);
8844 if (!CONST_DOUBLE_P (x0))
8845 return false;
8846
8847 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8848
8849 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8850 {
8851 xi = CONST_VECTOR_ELT (x, i);
8852 if (!CONST_DOUBLE_P (xi))
8853 return false;
8854
8855 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8856 if (!REAL_VALUES_EQUAL (r0, ri))
8857 return false;
8858 }
8859
8860 return aarch64_float_const_representable_p (x0);
8861}
8862
d8edd899 8863/* Return true for valid and false for invalid. */
3ea63f60 8864bool
ef4bddc2 8865aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 8866 struct simd_immediate_info *info)
43e9d192
IB
8867{
8868#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8869 matches = 1; \
8870 for (i = 0; i < idx; i += (STRIDE)) \
8871 if (!(TEST)) \
8872 matches = 0; \
8873 if (matches) \
8874 { \
8875 immtype = (CLASS); \
8876 elsize = (ELSIZE); \
43e9d192
IB
8877 eshift = (SHIFT); \
8878 emvn = (NEG); \
8879 break; \
8880 }
8881
8882 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8883 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8884 unsigned char bytes[16];
43e9d192
IB
8885 int immtype = -1, matches;
8886 unsigned int invmask = inverse ? 0xff : 0;
8887 int eshift, emvn;
8888
43e9d192 8889 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 8890 {
81c2dfb9
IB
8891 if (! (aarch64_simd_imm_zero_p (op, mode)
8892 || aarch64_vect_float_const_representable_p (op)))
d8edd899 8893 return false;
3520f7cc 8894
48063b9d
IB
8895 if (info)
8896 {
8897 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 8898 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
8899 info->mvn = false;
8900 info->shift = 0;
8901 }
3520f7cc 8902
d8edd899 8903 return true;
3520f7cc 8904 }
43e9d192
IB
8905
8906 /* Splat vector constant out into a byte vector. */
8907 for (i = 0; i < n_elts; i++)
8908 {
4b1e108c
AL
8909 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8910 it must be laid out in the vector register in reverse order. */
8911 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
8912 unsigned HOST_WIDE_INT elpart;
8913 unsigned int part, parts;
8914
4aa81c2e 8915 if (CONST_INT_P (el))
43e9d192
IB
8916 {
8917 elpart = INTVAL (el);
8918 parts = 1;
8919 }
8920 else if (GET_CODE (el) == CONST_DOUBLE)
8921 {
8922 elpart = CONST_DOUBLE_LOW (el);
8923 parts = 2;
8924 }
8925 else
8926 gcc_unreachable ();
8927
8928 for (part = 0; part < parts; part++)
8929 {
8930 unsigned int byte;
8931 for (byte = 0; byte < innersize; byte++)
8932 {
8933 bytes[idx++] = (elpart & 0xff) ^ invmask;
8934 elpart >>= BITS_PER_UNIT;
8935 }
8936 if (GET_CODE (el) == CONST_DOUBLE)
8937 elpart = CONST_DOUBLE_HIGH (el);
8938 }
8939 }
8940
8941 /* Sanity check. */
8942 gcc_assert (idx == GET_MODE_SIZE (mode));
8943
8944 do
8945 {
8946 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8947 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8948
8949 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8950 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8951
8952 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8953 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8954
8955 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8956 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8957
8958 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8959
8960 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8961
8962 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8963 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8964
8965 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8966 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8967
8968 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8969 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8970
8971 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8972 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8973
8974 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8975
8976 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8977
8978 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 8979 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
8980
8981 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 8982 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
8983
8984 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 8985 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
8986
8987 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 8988 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
8989
8990 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8991
8992 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8993 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8994 }
8995 while (0);
8996
e4f0f84d 8997 if (immtype == -1)
d8edd899 8998 return false;
43e9d192 8999
48063b9d 9000 if (info)
43e9d192 9001 {
48063b9d 9002 info->element_width = elsize;
48063b9d
IB
9003 info->mvn = emvn != 0;
9004 info->shift = eshift;
9005
43e9d192
IB
9006 unsigned HOST_WIDE_INT imm = 0;
9007
e4f0f84d
TB
9008 if (immtype >= 12 && immtype <= 15)
9009 info->msl = true;
9010
43e9d192
IB
9011 /* Un-invert bytes of recognized vector, if necessary. */
9012 if (invmask != 0)
9013 for (i = 0; i < idx; i++)
9014 bytes[i] ^= invmask;
9015
9016 if (immtype == 17)
9017 {
9018 /* FIXME: Broken on 32-bit H_W_I hosts. */
9019 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9020
9021 for (i = 0; i < 8; i++)
9022 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9023 << (i * BITS_PER_UNIT);
9024
43e9d192 9025
48063b9d
IB
9026 info->value = GEN_INT (imm);
9027 }
9028 else
9029 {
9030 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9031 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
9032
9033 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
9034 generic constants. */
9035 if (info->mvn)
43e9d192 9036 imm = ~imm;
48063b9d
IB
9037 imm = (imm >> info->shift) & 0xff;
9038 info->value = GEN_INT (imm);
9039 }
43e9d192
IB
9040 }
9041
48063b9d 9042 return true;
43e9d192
IB
9043#undef CHECK
9044}
9045
43e9d192
IB
9046/* Check of immediate shift constants are within range. */
9047bool
ef4bddc2 9048aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
9049{
9050 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
9051 if (left)
ddeabd3e 9052 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 9053 else
ddeabd3e 9054 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
9055}
9056
3520f7cc
JG
9057/* Return true if X is a uniform vector where all elements
9058 are either the floating-point constant 0.0 or the
9059 integer constant 0. */
43e9d192 9060bool
ef4bddc2 9061aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 9062{
3520f7cc 9063 return x == CONST0_RTX (mode);
43e9d192
IB
9064}
9065
9066bool
ef4bddc2 9067aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
9068{
9069 HOST_WIDE_INT imm = INTVAL (x);
9070 int i;
9071
9072 for (i = 0; i < 8; i++)
9073 {
9074 unsigned int byte = imm & 0xff;
9075 if (byte != 0xff && byte != 0)
9076 return false;
9077 imm >>= 8;
9078 }
9079
9080 return true;
9081}
9082
83f8c414
CSS
9083bool
9084aarch64_mov_operand_p (rtx x,
a5350ddc 9085 enum aarch64_symbol_context context,
ef4bddc2 9086 machine_mode mode)
83f8c414 9087{
83f8c414
CSS
9088 if (GET_CODE (x) == HIGH
9089 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
9090 return true;
9091
82614948 9092 if (CONST_INT_P (x))
83f8c414
CSS
9093 return true;
9094
9095 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
9096 return true;
9097
a5350ddc
CSS
9098 return aarch64_classify_symbolic_expression (x, context)
9099 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
9100}
9101
43e9d192
IB
9102/* Return a const_int vector of VAL. */
9103rtx
ef4bddc2 9104aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
9105{
9106 int nunits = GET_MODE_NUNITS (mode);
9107 rtvec v = rtvec_alloc (nunits);
9108 int i;
9109
9110 for (i=0; i < nunits; i++)
9111 RTVEC_ELT (v, i) = GEN_INT (val);
9112
9113 return gen_rtx_CONST_VECTOR (mode, v);
9114}
9115
051d0e2f
SN
9116/* Check OP is a legal scalar immediate for the MOVI instruction. */
9117
9118bool
ef4bddc2 9119aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 9120{
ef4bddc2 9121 machine_mode vmode;
051d0e2f
SN
9122
9123 gcc_assert (!VECTOR_MODE_P (mode));
9124 vmode = aarch64_preferred_simd_mode (mode);
9125 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 9126 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
9127}
9128
988fa693
JG
9129/* Construct and return a PARALLEL RTX vector with elements numbering the
9130 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
9131 the vector - from the perspective of the architecture. This does not
9132 line up with GCC's perspective on lane numbers, so we end up with
9133 different masks depending on our target endian-ness. The diagram
9134 below may help. We must draw the distinction when building masks
9135 which select one half of the vector. An instruction selecting
9136 architectural low-lanes for a big-endian target, must be described using
9137 a mask selecting GCC high-lanes.
9138
9139 Big-Endian Little-Endian
9140
9141GCC 0 1 2 3 3 2 1 0
9142 | x | x | x | x | | x | x | x | x |
9143Architecture 3 2 1 0 3 2 1 0
9144
9145Low Mask: { 2, 3 } { 0, 1 }
9146High Mask: { 0, 1 } { 2, 3 }
9147*/
9148
43e9d192 9149rtx
ef4bddc2 9150aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
9151{
9152 int nunits = GET_MODE_NUNITS (mode);
9153 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
9154 int high_base = nunits / 2;
9155 int low_base = 0;
9156 int base;
43e9d192
IB
9157 rtx t1;
9158 int i;
9159
988fa693
JG
9160 if (BYTES_BIG_ENDIAN)
9161 base = high ? low_base : high_base;
9162 else
9163 base = high ? high_base : low_base;
9164
9165 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
9166 RTVEC_ELT (v, i) = GEN_INT (base + i);
9167
9168 t1 = gen_rtx_PARALLEL (mode, v);
9169 return t1;
9170}
9171
988fa693
JG
9172/* Check OP for validity as a PARALLEL RTX vector with elements
9173 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
9174 from the perspective of the architecture. See the diagram above
9175 aarch64_simd_vect_par_cnst_half for more details. */
9176
9177bool
ef4bddc2 9178aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
9179 bool high)
9180{
9181 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
9182 HOST_WIDE_INT count_op = XVECLEN (op, 0);
9183 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
9184 int i = 0;
9185
9186 if (!VECTOR_MODE_P (mode))
9187 return false;
9188
9189 if (count_op != count_ideal)
9190 return false;
9191
9192 for (i = 0; i < count_ideal; i++)
9193 {
9194 rtx elt_op = XVECEXP (op, 0, i);
9195 rtx elt_ideal = XVECEXP (ideal, 0, i);
9196
4aa81c2e 9197 if (!CONST_INT_P (elt_op)
988fa693
JG
9198 || INTVAL (elt_ideal) != INTVAL (elt_op))
9199 return false;
9200 }
9201 return true;
9202}
9203
43e9d192
IB
9204/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
9205 HIGH (exclusive). */
9206void
46ed6024
CB
9207aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9208 const_tree exp)
43e9d192
IB
9209{
9210 HOST_WIDE_INT lane;
4aa81c2e 9211 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
9212 lane = INTVAL (operand);
9213
9214 if (lane < low || lane >= high)
46ed6024
CB
9215 {
9216 if (exp)
cf0c27ef 9217 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 9218 else
cf0c27ef 9219 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 9220 }
43e9d192
IB
9221}
9222
43e9d192
IB
9223/* Return TRUE if OP is a valid vector addressing mode. */
9224bool
9225aarch64_simd_mem_operand_p (rtx op)
9226{
9227 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 9228 || REG_P (XEXP (op, 0)));
43e9d192
IB
9229}
9230
2d8c6dc1
AH
9231/* Emit a register copy from operand to operand, taking care not to
9232 early-clobber source registers in the process.
43e9d192 9233
2d8c6dc1
AH
9234 COUNT is the number of components into which the copy needs to be
9235 decomposed. */
43e9d192 9236void
2d8c6dc1
AH
9237aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
9238 unsigned int count)
43e9d192
IB
9239{
9240 unsigned int i;
2d8c6dc1
AH
9241 int rdest = REGNO (operands[0]);
9242 int rsrc = REGNO (operands[1]);
43e9d192
IB
9243
9244 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
9245 || rdest < rsrc)
9246 for (i = 0; i < count; i++)
9247 emit_move_insn (gen_rtx_REG (mode, rdest + i),
9248 gen_rtx_REG (mode, rsrc + i));
43e9d192 9249 else
2d8c6dc1
AH
9250 for (i = 0; i < count; i++)
9251 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
9252 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
9253}
9254
9255/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
9256 one of VSTRUCT modes: OI, CI or XI. */
9257int
647d790d 9258aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 9259{
ef4bddc2 9260 machine_mode mode;
43e9d192
IB
9261
9262 extract_insn_cached (insn);
9263
9264 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
9265 {
9266 mode = GET_MODE (recog_data.operand[0]);
9267 switch (mode)
9268 {
9269 case OImode:
9270 return 8;
9271 case CImode:
9272 return 12;
9273 case XImode:
9274 return 16;
9275 default:
9276 gcc_unreachable ();
9277 }
9278 }
9279 return 4;
9280}
9281
668046d1
DS
9282/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
9283 one of VSTRUCT modes: OI, CI, EI, or XI. */
9284int
9285aarch64_simd_attr_length_rglist (enum machine_mode mode)
9286{
9287 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
9288}
9289
db0253a4
TB
9290/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
9291 alignment of a vector to 128 bits. */
9292static HOST_WIDE_INT
9293aarch64_simd_vector_alignment (const_tree type)
9294{
9439e9a1 9295 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
9296 return MIN (align, 128);
9297}
9298
9299/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
9300static bool
9301aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
9302{
9303 if (is_packed)
9304 return false;
9305
9306 /* We guarantee alignment for vectors up to 128-bits. */
9307 if (tree_int_cst_compare (TYPE_SIZE (type),
9308 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
9309 return false;
9310
9311 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
9312 return true;
9313}
9314
4369c11e
TB
9315/* If VALS is a vector constant that can be loaded into a register
9316 using DUP, generate instructions to do so and return an RTX to
9317 assign to the register. Otherwise return NULL_RTX. */
9318static rtx
9319aarch64_simd_dup_constant (rtx vals)
9320{
ef4bddc2
RS
9321 machine_mode mode = GET_MODE (vals);
9322 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
9323 int n_elts = GET_MODE_NUNITS (mode);
9324 bool all_same = true;
9325 rtx x;
9326 int i;
9327
9328 if (GET_CODE (vals) != CONST_VECTOR)
9329 return NULL_RTX;
9330
9331 for (i = 1; i < n_elts; ++i)
9332 {
9333 x = CONST_VECTOR_ELT (vals, i);
9334 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
9335 all_same = false;
9336 }
9337
9338 if (!all_same)
9339 return NULL_RTX;
9340
9341 /* We can load this constant by using DUP and a constant in a
9342 single ARM register. This will be cheaper than a vector
9343 load. */
9344 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
9345 return gen_rtx_VEC_DUPLICATE (mode, x);
9346}
9347
9348
9349/* Generate code to load VALS, which is a PARALLEL containing only
9350 constants (for vec_init) or CONST_VECTOR, efficiently into a
9351 register. Returns an RTX to copy into the register, or NULL_RTX
9352 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 9353static rtx
4369c11e
TB
9354aarch64_simd_make_constant (rtx vals)
9355{
ef4bddc2 9356 machine_mode mode = GET_MODE (vals);
4369c11e
TB
9357 rtx const_dup;
9358 rtx const_vec = NULL_RTX;
9359 int n_elts = GET_MODE_NUNITS (mode);
9360 int n_const = 0;
9361 int i;
9362
9363 if (GET_CODE (vals) == CONST_VECTOR)
9364 const_vec = vals;
9365 else if (GET_CODE (vals) == PARALLEL)
9366 {
9367 /* A CONST_VECTOR must contain only CONST_INTs and
9368 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9369 Only store valid constants in a CONST_VECTOR. */
9370 for (i = 0; i < n_elts; ++i)
9371 {
9372 rtx x = XVECEXP (vals, 0, i);
9373 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9374 n_const++;
9375 }
9376 if (n_const == n_elts)
9377 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9378 }
9379 else
9380 gcc_unreachable ();
9381
9382 if (const_vec != NULL_RTX
48063b9d 9383 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
9384 /* Load using MOVI/MVNI. */
9385 return const_vec;
9386 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
9387 /* Loaded using DUP. */
9388 return const_dup;
9389 else if (const_vec != NULL_RTX)
9390 /* Load from constant pool. We can not take advantage of single-cycle
9391 LD1 because we need a PC-relative addressing mode. */
9392 return const_vec;
9393 else
9394 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9395 We can not construct an initializer. */
9396 return NULL_RTX;
9397}
9398
9399void
9400aarch64_expand_vector_init (rtx target, rtx vals)
9401{
ef4bddc2
RS
9402 machine_mode mode = GET_MODE (target);
9403 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 9404 int n_elts = GET_MODE_NUNITS (mode);
8b66a2d4
AL
9405 int n_var = 0;
9406 rtx any_const = NULL_RTX;
4369c11e 9407 bool all_same = true;
4369c11e 9408
8b66a2d4 9409 for (int i = 0; i < n_elts; ++i)
4369c11e 9410 {
8b66a2d4 9411 rtx x = XVECEXP (vals, 0, i);
4369c11e 9412 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8b66a2d4
AL
9413 ++n_var;
9414 else
9415 any_const = x;
4369c11e 9416
8b66a2d4 9417 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
4369c11e
TB
9418 all_same = false;
9419 }
9420
9421 if (n_var == 0)
9422 {
9423 rtx constant = aarch64_simd_make_constant (vals);
9424 if (constant != NULL_RTX)
9425 {
9426 emit_move_insn (target, constant);
9427 return;
9428 }
9429 }
9430
9431 /* Splat a single non-constant element if we can. */
9432 if (all_same)
9433 {
8b66a2d4 9434 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
4369c11e
TB
9435 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
9436 return;
9437 }
9438
8b66a2d4
AL
9439 /* Half the fields (or less) are non-constant. Load constant then overwrite
9440 varying fields. Hope that this is more efficient than using the stack. */
9441 if (n_var <= n_elts/2)
4369c11e
TB
9442 {
9443 rtx copy = copy_rtx (vals);
4369c11e 9444
8b66a2d4
AL
9445 /* Load constant part of vector. We really don't care what goes into the
9446 parts we will overwrite, but we're more likely to be able to load the
9447 constant efficiently if it has fewer, larger, repeating parts
9448 (see aarch64_simd_valid_immediate). */
9449 for (int i = 0; i < n_elts; i++)
9450 {
9451 rtx x = XVECEXP (vals, 0, i);
9452 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9453 continue;
9454 rtx subst = any_const;
9455 for (int bit = n_elts / 2; bit > 0; bit /= 2)
9456 {
9457 /* Look in the copied vector, as more elements are const. */
9458 rtx test = XVECEXP (copy, 0, i ^ bit);
9459 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
9460 {
9461 subst = test;
9462 break;
9463 }
9464 }
9465 XVECEXP (copy, 0, i) = subst;
9466 }
4369c11e
TB
9467 aarch64_expand_vector_init (target, copy);
9468
8b66a2d4
AL
9469 /* Insert variables. */
9470 enum insn_code icode = optab_handler (vec_set_optab, mode);
4369c11e 9471 gcc_assert (icode != CODE_FOR_nothing);
8b66a2d4
AL
9472
9473 for (int i = 0; i < n_elts; i++)
9474 {
9475 rtx x = XVECEXP (vals, 0, i);
9476 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9477 continue;
9478 x = copy_to_mode_reg (inner_mode, x);
9479 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
9480 }
4369c11e
TB
9481 return;
9482 }
9483
9484 /* Construct the vector in memory one field at a time
9485 and load the whole vector. */
8b66a2d4
AL
9486 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9487 for (int i = 0; i < n_elts; i++)
4369c11e
TB
9488 emit_move_insn (adjust_address_nv (mem, inner_mode,
9489 i * GET_MODE_SIZE (inner_mode)),
9490 XVECEXP (vals, 0, i));
9491 emit_move_insn (target, mem);
9492
9493}
9494
43e9d192 9495static unsigned HOST_WIDE_INT
ef4bddc2 9496aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
9497{
9498 return
9499 (aarch64_vector_mode_supported_p (mode)
9500 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
9501}
9502
9503#ifndef TLS_SECTION_ASM_FLAG
9504#define TLS_SECTION_ASM_FLAG 'T'
9505#endif
9506
9507void
9508aarch64_elf_asm_named_section (const char *name, unsigned int flags,
9509 tree decl ATTRIBUTE_UNUSED)
9510{
9511 char flagchars[10], *f = flagchars;
9512
9513 /* If we have already declared this section, we can use an
9514 abbreviated form to switch back to it -- unless this section is
9515 part of a COMDAT groups, in which case GAS requires the full
9516 declaration every time. */
9517 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9518 && (flags & SECTION_DECLARED))
9519 {
9520 fprintf (asm_out_file, "\t.section\t%s\n", name);
9521 return;
9522 }
9523
9524 if (!(flags & SECTION_DEBUG))
9525 *f++ = 'a';
9526 if (flags & SECTION_WRITE)
9527 *f++ = 'w';
9528 if (flags & SECTION_CODE)
9529 *f++ = 'x';
9530 if (flags & SECTION_SMALL)
9531 *f++ = 's';
9532 if (flags & SECTION_MERGE)
9533 *f++ = 'M';
9534 if (flags & SECTION_STRINGS)
9535 *f++ = 'S';
9536 if (flags & SECTION_TLS)
9537 *f++ = TLS_SECTION_ASM_FLAG;
9538 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9539 *f++ = 'G';
9540 *f = '\0';
9541
9542 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
9543
9544 if (!(flags & SECTION_NOTYPE))
9545 {
9546 const char *type;
9547 const char *format;
9548
9549 if (flags & SECTION_BSS)
9550 type = "nobits";
9551 else
9552 type = "progbits";
9553
9554#ifdef TYPE_OPERAND_FMT
9555 format = "," TYPE_OPERAND_FMT;
9556#else
9557 format = ",@%s";
9558#endif
9559
9560 fprintf (asm_out_file, format, type);
9561
9562 if (flags & SECTION_ENTSIZE)
9563 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
9564 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9565 {
9566 if (TREE_CODE (decl) == IDENTIFIER_NODE)
9567 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
9568 else
9569 fprintf (asm_out_file, ",%s,comdat",
9570 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
9571 }
9572 }
9573
9574 putc ('\n', asm_out_file);
9575}
9576
9577/* Select a format to encode pointers in exception handling data. */
9578int
9579aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
9580{
9581 int type;
9582 switch (aarch64_cmodel)
9583 {
9584 case AARCH64_CMODEL_TINY:
9585 case AARCH64_CMODEL_TINY_PIC:
9586 case AARCH64_CMODEL_SMALL:
9587 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 9588 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
9589 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
9590 for everything. */
9591 type = DW_EH_PE_sdata4;
9592 break;
9593 default:
9594 /* No assumptions here. 8-byte relocs required. */
9595 type = DW_EH_PE_sdata8;
9596 break;
9597 }
9598 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
9599}
9600
0462169c
SN
9601/* Emit load exclusive. */
9602
9603static void
ef4bddc2 9604aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
9605 rtx mem, rtx model_rtx)
9606{
9607 rtx (*gen) (rtx, rtx, rtx);
9608
9609 switch (mode)
9610 {
9611 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
9612 case HImode: gen = gen_aarch64_load_exclusivehi; break;
9613 case SImode: gen = gen_aarch64_load_exclusivesi; break;
9614 case DImode: gen = gen_aarch64_load_exclusivedi; break;
9615 default:
9616 gcc_unreachable ();
9617 }
9618
9619 emit_insn (gen (rval, mem, model_rtx));
9620}
9621
9622/* Emit store exclusive. */
9623
9624static void
ef4bddc2 9625aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
9626 rtx rval, rtx mem, rtx model_rtx)
9627{
9628 rtx (*gen) (rtx, rtx, rtx, rtx);
9629
9630 switch (mode)
9631 {
9632 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
9633 case HImode: gen = gen_aarch64_store_exclusivehi; break;
9634 case SImode: gen = gen_aarch64_store_exclusivesi; break;
9635 case DImode: gen = gen_aarch64_store_exclusivedi; break;
9636 default:
9637 gcc_unreachable ();
9638 }
9639
9640 emit_insn (gen (bval, rval, mem, model_rtx));
9641}
9642
9643/* Mark the previous jump instruction as unlikely. */
9644
9645static void
9646aarch64_emit_unlikely_jump (rtx insn)
9647{
e5af9ddd 9648 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
9649
9650 insn = emit_jump_insn (insn);
e5af9ddd 9651 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
9652}
9653
9654/* Expand a compare and swap pattern. */
9655
9656void
9657aarch64_expand_compare_and_swap (rtx operands[])
9658{
9659 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 9660 machine_mode mode, cmp_mode;
0462169c
SN
9661 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
9662
9663 bval = operands[0];
9664 rval = operands[1];
9665 mem = operands[2];
9666 oldval = operands[3];
9667 newval = operands[4];
9668 is_weak = operands[5];
9669 mod_s = operands[6];
9670 mod_f = operands[7];
9671 mode = GET_MODE (mem);
9672 cmp_mode = mode;
9673
9674 /* Normally the succ memory model must be stronger than fail, but in the
9675 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9676 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9677
46b35980
AM
9678 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
9679 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
9680 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
9681
9682 switch (mode)
9683 {
9684 case QImode:
9685 case HImode:
9686 /* For short modes, we're going to perform the comparison in SImode,
9687 so do the zero-extension now. */
9688 cmp_mode = SImode;
9689 rval = gen_reg_rtx (SImode);
9690 oldval = convert_modes (SImode, mode, oldval, true);
9691 /* Fall through. */
9692
9693 case SImode:
9694 case DImode:
9695 /* Force the value into a register if needed. */
9696 if (!aarch64_plus_operand (oldval, mode))
9697 oldval = force_reg (cmp_mode, oldval);
9698 break;
9699
9700 default:
9701 gcc_unreachable ();
9702 }
9703
9704 switch (mode)
9705 {
9706 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
9707 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
9708 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
9709 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
9710 default:
9711 gcc_unreachable ();
9712 }
9713
9714 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
9715
9716 if (mode == QImode || mode == HImode)
9717 emit_move_insn (operands[1], gen_lowpart (mode, rval));
9718
9719 x = gen_rtx_REG (CCmode, CC_REGNUM);
9720 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 9721 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
9722}
9723
f70fb3b6
MW
9724/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
9725 sequence implementing an atomic operation. */
9726
9727static void
9728aarch64_emit_post_barrier (enum memmodel model)
9729{
9730 const enum memmodel base_model = memmodel_base (model);
9731
9732 if (is_mm_sync (model)
9733 && (base_model == MEMMODEL_ACQUIRE
9734 || base_model == MEMMODEL_ACQ_REL
9735 || base_model == MEMMODEL_SEQ_CST))
9736 {
9737 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
9738 }
9739}
9740
0462169c
SN
9741/* Split a compare and swap pattern. */
9742
9743void
9744aarch64_split_compare_and_swap (rtx operands[])
9745{
9746 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 9747 machine_mode mode;
0462169c 9748 bool is_weak;
5d8a22a5
DM
9749 rtx_code_label *label1, *label2;
9750 rtx x, cond;
ab876106
MW
9751 enum memmodel model;
9752 rtx model_rtx;
0462169c
SN
9753
9754 rval = operands[0];
9755 mem = operands[1];
9756 oldval = operands[2];
9757 newval = operands[3];
9758 is_weak = (operands[4] != const0_rtx);
ab876106 9759 model_rtx = operands[5];
0462169c
SN
9760 scratch = operands[7];
9761 mode = GET_MODE (mem);
ab876106 9762 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 9763
5d8a22a5 9764 label1 = NULL;
0462169c
SN
9765 if (!is_weak)
9766 {
9767 label1 = gen_label_rtx ();
9768 emit_label (label1);
9769 }
9770 label2 = gen_label_rtx ();
9771
ab876106
MW
9772 /* The initial load can be relaxed for a __sync operation since a final
9773 barrier will be emitted to stop code hoisting. */
9774 if (is_mm_sync (model))
9775 aarch64_emit_load_exclusive (mode, rval, mem,
9776 GEN_INT (MEMMODEL_RELAXED));
9777 else
9778 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c
SN
9779
9780 cond = aarch64_gen_compare_reg (NE, rval, oldval);
9781 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9782 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9783 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
f7df4a84 9784 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 9785
ab876106 9786 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
9787
9788 if (!is_weak)
9789 {
9790 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9791 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9792 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 9793 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
9794 }
9795 else
9796 {
9797 cond = gen_rtx_REG (CCmode, CC_REGNUM);
9798 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 9799 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
9800 }
9801
9802 emit_label (label2);
ab876106
MW
9803
9804 /* Emit any final barrier needed for a __sync operation. */
9805 if (is_mm_sync (model))
9806 aarch64_emit_post_barrier (model);
0462169c
SN
9807}
9808
9809/* Split an atomic operation. */
9810
9811void
9812aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9813 rtx value, rtx model_rtx, rtx cond)
9814{
ef4bddc2
RS
9815 machine_mode mode = GET_MODE (mem);
9816 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
9817 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
9818 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
9819 rtx_code_label *label;
9820 rtx x;
0462169c
SN
9821
9822 label = gen_label_rtx ();
9823 emit_label (label);
9824
9825 if (new_out)
9826 new_out = gen_lowpart (wmode, new_out);
9827 if (old_out)
9828 old_out = gen_lowpart (wmode, old_out);
9829 else
9830 old_out = new_out;
9831 value = simplify_gen_subreg (wmode, value, mode, 0);
9832
f70fb3b6
MW
9833 /* The initial load can be relaxed for a __sync operation since a final
9834 barrier will be emitted to stop code hoisting. */
9835 if (is_sync)
9836 aarch64_emit_load_exclusive (mode, old_out, mem,
9837 GEN_INT (MEMMODEL_RELAXED));
9838 else
9839 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
9840
9841 switch (code)
9842 {
9843 case SET:
9844 new_out = value;
9845 break;
9846
9847 case NOT:
9848 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 9849 emit_insn (gen_rtx_SET (new_out, x));
0462169c 9850 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 9851 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
9852 break;
9853
9854 case MINUS:
9855 if (CONST_INT_P (value))
9856 {
9857 value = GEN_INT (-INTVAL (value));
9858 code = PLUS;
9859 }
9860 /* Fall through. */
9861
9862 default:
9863 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 9864 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
9865 break;
9866 }
9867
9868 aarch64_emit_store_exclusive (mode, cond, mem,
9869 gen_lowpart (mode, new_out), model_rtx);
9870
9871 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9872 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9873 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 9874 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
9875
9876 /* Emit any final barrier needed for a __sync operation. */
9877 if (is_sync)
9878 aarch64_emit_post_barrier (model);
0462169c
SN
9879}
9880
95ca411e
YZ
9881static void
9882aarch64_print_extension (void)
9883{
9884 const struct aarch64_option_extension *opt = NULL;
9885
9886 for (opt = all_extensions; opt->name != NULL; opt++)
9887 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9888 asm_fprintf (asm_out_file, "+%s", opt->name);
9889
9890 asm_fprintf (asm_out_file, "\n");
9891}
9892
43e9d192
IB
9893static void
9894aarch64_start_file (void)
9895{
9896 if (selected_arch)
95ca411e
YZ
9897 {
9898 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9899 aarch64_print_extension ();
9900 }
43e9d192 9901 else if (selected_cpu)
95ca411e 9902 {
682287fb
JG
9903 const char *truncated_name
9904 = aarch64_rewrite_selected_cpu (selected_cpu->name);
9905 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
9906 aarch64_print_extension ();
9907 }
43e9d192
IB
9908 default_file_start();
9909}
9910
9911/* Target hook for c_mode_for_suffix. */
ef4bddc2 9912static machine_mode
43e9d192
IB
9913aarch64_c_mode_for_suffix (char suffix)
9914{
9915 if (suffix == 'q')
9916 return TFmode;
9917
9918 return VOIDmode;
9919}
9920
3520f7cc
JG
9921/* We can only represent floating point constants which will fit in
9922 "quarter-precision" values. These values are characterised by
9923 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9924 by:
9925
9926 (-1)^s * (n/16) * 2^r
9927
9928 Where:
9929 's' is the sign bit.
9930 'n' is an integer in the range 16 <= n <= 31.
9931 'r' is an integer in the range -3 <= r <= 4. */
9932
9933/* Return true iff X can be represented by a quarter-precision
9934 floating point immediate operand X. Note, we cannot represent 0.0. */
9935bool
9936aarch64_float_const_representable_p (rtx x)
9937{
9938 /* This represents our current view of how many bits
9939 make up the mantissa. */
9940 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 9941 int exponent;
3520f7cc 9942 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 9943 REAL_VALUE_TYPE r, m;
807e902e 9944 bool fail;
3520f7cc
JG
9945
9946 if (!CONST_DOUBLE_P (x))
9947 return false;
9948
94bfa2da
TV
9949 if (GET_MODE (x) == VOIDmode)
9950 return false;
9951
3520f7cc
JG
9952 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9953
9954 /* We cannot represent infinities, NaNs or +/-zero. We won't
9955 know if we have +zero until we analyse the mantissa, but we
9956 can reject the other invalid values. */
9957 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9958 || REAL_VALUE_MINUS_ZERO (r))
9959 return false;
9960
ba96cdfb 9961 /* Extract exponent. */
3520f7cc
JG
9962 r = real_value_abs (&r);
9963 exponent = REAL_EXP (&r);
9964
9965 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9966 highest (sign) bit, with a fixed binary point at bit point_pos.
9967 m1 holds the low part of the mantissa, m2 the high part.
9968 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9969 bits for the mantissa, this can fail (low bits will be lost). */
9970 real_ldexp (&m, &r, point_pos - exponent);
807e902e 9971 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
9972
9973 /* If the low part of the mantissa has bits set we cannot represent
9974 the value. */
807e902e 9975 if (w.elt (0) != 0)
3520f7cc
JG
9976 return false;
9977 /* We have rejected the lower HOST_WIDE_INT, so update our
9978 understanding of how many bits lie in the mantissa and
9979 look only at the high HOST_WIDE_INT. */
807e902e 9980 mantissa = w.elt (1);
3520f7cc
JG
9981 point_pos -= HOST_BITS_PER_WIDE_INT;
9982
9983 /* We can only represent values with a mantissa of the form 1.xxxx. */
9984 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9985 if ((mantissa & mask) != 0)
9986 return false;
9987
9988 /* Having filtered unrepresentable values, we may now remove all
9989 but the highest 5 bits. */
9990 mantissa >>= point_pos - 5;
9991
9992 /* We cannot represent the value 0.0, so reject it. This is handled
9993 elsewhere. */
9994 if (mantissa == 0)
9995 return false;
9996
9997 /* Then, as bit 4 is always set, we can mask it off, leaving
9998 the mantissa in the range [0, 15]. */
9999 mantissa &= ~(1 << 4);
10000 gcc_assert (mantissa <= 15);
10001
10002 /* GCC internally does not use IEEE754-like encoding (where normalized
10003 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
10004 Our mantissa values are shifted 4 places to the left relative to
10005 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
10006 by 5 places to correct for GCC's representation. */
10007 exponent = 5 - exponent;
10008
10009 return (exponent >= 0 && exponent <= 7);
10010}
10011
10012char*
81c2dfb9 10013aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 10014 machine_mode mode,
3520f7cc
JG
10015 unsigned width)
10016{
3ea63f60 10017 bool is_valid;
3520f7cc 10018 static char templ[40];
3520f7cc 10019 const char *mnemonic;
e4f0f84d 10020 const char *shift_op;
3520f7cc 10021 unsigned int lane_count = 0;
81c2dfb9 10022 char element_char;
3520f7cc 10023
e4f0f84d 10024 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
10025
10026 /* This will return true to show const_vector is legal for use as either
10027 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
10028 also update INFO to show how the immediate should be generated. */
81c2dfb9 10029 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
10030 gcc_assert (is_valid);
10031
81c2dfb9 10032 element_char = sizetochar (info.element_width);
48063b9d
IB
10033 lane_count = width / info.element_width;
10034
3520f7cc
JG
10035 mode = GET_MODE_INNER (mode);
10036 if (mode == SFmode || mode == DFmode)
10037 {
48063b9d
IB
10038 gcc_assert (info.shift == 0 && ! info.mvn);
10039 if (aarch64_float_const_zero_rtx_p (info.value))
10040 info.value = GEN_INT (0);
10041 else
10042 {
10043#define buf_size 20
10044 REAL_VALUE_TYPE r;
10045 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
10046 char float_buf[buf_size] = {'\0'};
10047 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
10048#undef buf_size
10049
10050 if (lane_count == 1)
10051 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
10052 else
10053 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 10054 lane_count, element_char, float_buf);
48063b9d
IB
10055 return templ;
10056 }
3520f7cc 10057 }
3520f7cc 10058
48063b9d 10059 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 10060 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
10061
10062 if (lane_count == 1)
48063b9d
IB
10063 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
10064 mnemonic, UINTVAL (info.value));
10065 else if (info.shift)
10066 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
10067 ", %s %d", mnemonic, lane_count, element_char,
10068 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 10069 else
48063b9d 10070 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 10071 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
10072 return templ;
10073}
10074
b7342d25
IB
10075char*
10076aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 10077 machine_mode mode)
b7342d25 10078{
ef4bddc2 10079 machine_mode vmode;
b7342d25
IB
10080
10081 gcc_assert (!VECTOR_MODE_P (mode));
10082 vmode = aarch64_simd_container_mode (mode, 64);
10083 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
10084 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
10085}
10086
88b08073
JG
10087/* Split operands into moves from op[1] + op[2] into op[0]. */
10088
10089void
10090aarch64_split_combinev16qi (rtx operands[3])
10091{
10092 unsigned int dest = REGNO (operands[0]);
10093 unsigned int src1 = REGNO (operands[1]);
10094 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 10095 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
10096 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
10097 rtx destlo, desthi;
10098
10099 gcc_assert (halfmode == V16QImode);
10100
10101 if (src1 == dest && src2 == dest + halfregs)
10102 {
10103 /* No-op move. Can't split to nothing; emit something. */
10104 emit_note (NOTE_INSN_DELETED);
10105 return;
10106 }
10107
10108 /* Preserve register attributes for variable tracking. */
10109 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
10110 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
10111 GET_MODE_SIZE (halfmode));
10112
10113 /* Special case of reversed high/low parts. */
10114 if (reg_overlap_mentioned_p (operands[2], destlo)
10115 && reg_overlap_mentioned_p (operands[1], desthi))
10116 {
10117 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
10118 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
10119 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
10120 }
10121 else if (!reg_overlap_mentioned_p (operands[2], destlo))
10122 {
10123 /* Try to avoid unnecessary moves if part of the result
10124 is in the right place already. */
10125 if (src1 != dest)
10126 emit_move_insn (destlo, operands[1]);
10127 if (src2 != dest + halfregs)
10128 emit_move_insn (desthi, operands[2]);
10129 }
10130 else
10131 {
10132 if (src2 != dest + halfregs)
10133 emit_move_insn (desthi, operands[2]);
10134 if (src1 != dest)
10135 emit_move_insn (destlo, operands[1]);
10136 }
10137}
10138
10139/* vec_perm support. */
10140
10141#define MAX_VECT_LEN 16
10142
10143struct expand_vec_perm_d
10144{
10145 rtx target, op0, op1;
10146 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 10147 machine_mode vmode;
88b08073
JG
10148 unsigned char nelt;
10149 bool one_vector_p;
10150 bool testing_p;
10151};
10152
10153/* Generate a variable permutation. */
10154
10155static void
10156aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
10157{
ef4bddc2 10158 machine_mode vmode = GET_MODE (target);
88b08073
JG
10159 bool one_vector_p = rtx_equal_p (op0, op1);
10160
10161 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
10162 gcc_checking_assert (GET_MODE (op0) == vmode);
10163 gcc_checking_assert (GET_MODE (op1) == vmode);
10164 gcc_checking_assert (GET_MODE (sel) == vmode);
10165 gcc_checking_assert (TARGET_SIMD);
10166
10167 if (one_vector_p)
10168 {
10169 if (vmode == V8QImode)
10170 {
10171 /* Expand the argument to a V16QI mode by duplicating it. */
10172 rtx pair = gen_reg_rtx (V16QImode);
10173 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
10174 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
10175 }
10176 else
10177 {
10178 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
10179 }
10180 }
10181 else
10182 {
10183 rtx pair;
10184
10185 if (vmode == V8QImode)
10186 {
10187 pair = gen_reg_rtx (V16QImode);
10188 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
10189 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
10190 }
10191 else
10192 {
10193 pair = gen_reg_rtx (OImode);
10194 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
10195 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
10196 }
10197 }
10198}
10199
10200void
10201aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
10202{
ef4bddc2 10203 machine_mode vmode = GET_MODE (target);
c9d1a16a 10204 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 10205 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 10206 rtx mask;
88b08073
JG
10207
10208 /* The TBL instruction does not use a modulo index, so we must take care
10209 of that ourselves. */
f7c4e5b8
AL
10210 mask = aarch64_simd_gen_const_vector_dup (vmode,
10211 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
10212 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
10213
f7c4e5b8
AL
10214 /* For big-endian, we also need to reverse the index within the vector
10215 (but not which vector). */
10216 if (BYTES_BIG_ENDIAN)
10217 {
10218 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
10219 if (!one_vector_p)
10220 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
10221 sel = expand_simple_binop (vmode, XOR, sel, mask,
10222 NULL, 0, OPTAB_LIB_WIDEN);
10223 }
88b08073
JG
10224 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
10225}
10226
cc4d934f
JG
10227/* Recognize patterns suitable for the TRN instructions. */
10228static bool
10229aarch64_evpc_trn (struct expand_vec_perm_d *d)
10230{
10231 unsigned int i, odd, mask, nelt = d->nelt;
10232 rtx out, in0, in1, x;
10233 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 10234 machine_mode vmode = d->vmode;
cc4d934f
JG
10235
10236 if (GET_MODE_UNIT_SIZE (vmode) > 8)
10237 return false;
10238
10239 /* Note that these are little-endian tests.
10240 We correct for big-endian later. */
10241 if (d->perm[0] == 0)
10242 odd = 0;
10243 else if (d->perm[0] == 1)
10244 odd = 1;
10245 else
10246 return false;
10247 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
10248
10249 for (i = 0; i < nelt; i += 2)
10250 {
10251 if (d->perm[i] != i + odd)
10252 return false;
10253 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
10254 return false;
10255 }
10256
10257 /* Success! */
10258 if (d->testing_p)
10259 return true;
10260
10261 in0 = d->op0;
10262 in1 = d->op1;
10263 if (BYTES_BIG_ENDIAN)
10264 {
10265 x = in0, in0 = in1, in1 = x;
10266 odd = !odd;
10267 }
10268 out = d->target;
10269
10270 if (odd)
10271 {
10272 switch (vmode)
10273 {
10274 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
10275 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
10276 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
10277 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
10278 case V4SImode: gen = gen_aarch64_trn2v4si; break;
10279 case V2SImode: gen = gen_aarch64_trn2v2si; break;
10280 case V2DImode: gen = gen_aarch64_trn2v2di; break;
10281 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
10282 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
10283 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
10284 default:
10285 return false;
10286 }
10287 }
10288 else
10289 {
10290 switch (vmode)
10291 {
10292 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
10293 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
10294 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
10295 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
10296 case V4SImode: gen = gen_aarch64_trn1v4si; break;
10297 case V2SImode: gen = gen_aarch64_trn1v2si; break;
10298 case V2DImode: gen = gen_aarch64_trn1v2di; break;
10299 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
10300 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
10301 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
10302 default:
10303 return false;
10304 }
10305 }
10306
10307 emit_insn (gen (out, in0, in1));
10308 return true;
10309}
10310
10311/* Recognize patterns suitable for the UZP instructions. */
10312static bool
10313aarch64_evpc_uzp (struct expand_vec_perm_d *d)
10314{
10315 unsigned int i, odd, mask, nelt = d->nelt;
10316 rtx out, in0, in1, x;
10317 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 10318 machine_mode vmode = d->vmode;
cc4d934f
JG
10319
10320 if (GET_MODE_UNIT_SIZE (vmode) > 8)
10321 return false;
10322
10323 /* Note that these are little-endian tests.
10324 We correct for big-endian later. */
10325 if (d->perm[0] == 0)
10326 odd = 0;
10327 else if (d->perm[0] == 1)
10328 odd = 1;
10329 else
10330 return false;
10331 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
10332
10333 for (i = 0; i < nelt; i++)
10334 {
10335 unsigned elt = (i * 2 + odd) & mask;
10336 if (d->perm[i] != elt)
10337 return false;
10338 }
10339
10340 /* Success! */
10341 if (d->testing_p)
10342 return true;
10343
10344 in0 = d->op0;
10345 in1 = d->op1;
10346 if (BYTES_BIG_ENDIAN)
10347 {
10348 x = in0, in0 = in1, in1 = x;
10349 odd = !odd;
10350 }
10351 out = d->target;
10352
10353 if (odd)
10354 {
10355 switch (vmode)
10356 {
10357 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
10358 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
10359 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
10360 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
10361 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
10362 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
10363 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
10364 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
10365 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
10366 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
10367 default:
10368 return false;
10369 }
10370 }
10371 else
10372 {
10373 switch (vmode)
10374 {
10375 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
10376 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
10377 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
10378 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
10379 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
10380 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
10381 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
10382 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
10383 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
10384 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
10385 default:
10386 return false;
10387 }
10388 }
10389
10390 emit_insn (gen (out, in0, in1));
10391 return true;
10392}
10393
10394/* Recognize patterns suitable for the ZIP instructions. */
10395static bool
10396aarch64_evpc_zip (struct expand_vec_perm_d *d)
10397{
10398 unsigned int i, high, mask, nelt = d->nelt;
10399 rtx out, in0, in1, x;
10400 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 10401 machine_mode vmode = d->vmode;
cc4d934f
JG
10402
10403 if (GET_MODE_UNIT_SIZE (vmode) > 8)
10404 return false;
10405
10406 /* Note that these are little-endian tests.
10407 We correct for big-endian later. */
10408 high = nelt / 2;
10409 if (d->perm[0] == high)
10410 /* Do Nothing. */
10411 ;
10412 else if (d->perm[0] == 0)
10413 high = 0;
10414 else
10415 return false;
10416 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
10417
10418 for (i = 0; i < nelt / 2; i++)
10419 {
10420 unsigned elt = (i + high) & mask;
10421 if (d->perm[i * 2] != elt)
10422 return false;
10423 elt = (elt + nelt) & mask;
10424 if (d->perm[i * 2 + 1] != elt)
10425 return false;
10426 }
10427
10428 /* Success! */
10429 if (d->testing_p)
10430 return true;
10431
10432 in0 = d->op0;
10433 in1 = d->op1;
10434 if (BYTES_BIG_ENDIAN)
10435 {
10436 x = in0, in0 = in1, in1 = x;
10437 high = !high;
10438 }
10439 out = d->target;
10440
10441 if (high)
10442 {
10443 switch (vmode)
10444 {
10445 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
10446 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
10447 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
10448 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
10449 case V4SImode: gen = gen_aarch64_zip2v4si; break;
10450 case V2SImode: gen = gen_aarch64_zip2v2si; break;
10451 case V2DImode: gen = gen_aarch64_zip2v2di; break;
10452 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
10453 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
10454 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
10455 default:
10456 return false;
10457 }
10458 }
10459 else
10460 {
10461 switch (vmode)
10462 {
10463 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
10464 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
10465 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
10466 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
10467 case V4SImode: gen = gen_aarch64_zip1v4si; break;
10468 case V2SImode: gen = gen_aarch64_zip1v2si; break;
10469 case V2DImode: gen = gen_aarch64_zip1v2di; break;
10470 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
10471 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
10472 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
10473 default:
10474 return false;
10475 }
10476 }
10477
10478 emit_insn (gen (out, in0, in1));
10479 return true;
10480}
10481
ae0533da
AL
10482/* Recognize patterns for the EXT insn. */
10483
10484static bool
10485aarch64_evpc_ext (struct expand_vec_perm_d *d)
10486{
10487 unsigned int i, nelt = d->nelt;
10488 rtx (*gen) (rtx, rtx, rtx, rtx);
10489 rtx offset;
10490
10491 unsigned int location = d->perm[0]; /* Always < nelt. */
10492
10493 /* Check if the extracted indices are increasing by one. */
10494 for (i = 1; i < nelt; i++)
10495 {
10496 unsigned int required = location + i;
10497 if (d->one_vector_p)
10498 {
10499 /* We'll pass the same vector in twice, so allow indices to wrap. */
10500 required &= (nelt - 1);
10501 }
10502 if (d->perm[i] != required)
10503 return false;
10504 }
10505
ae0533da
AL
10506 switch (d->vmode)
10507 {
10508 case V16QImode: gen = gen_aarch64_extv16qi; break;
10509 case V8QImode: gen = gen_aarch64_extv8qi; break;
10510 case V4HImode: gen = gen_aarch64_extv4hi; break;
10511 case V8HImode: gen = gen_aarch64_extv8hi; break;
10512 case V2SImode: gen = gen_aarch64_extv2si; break;
10513 case V4SImode: gen = gen_aarch64_extv4si; break;
10514 case V2SFmode: gen = gen_aarch64_extv2sf; break;
10515 case V4SFmode: gen = gen_aarch64_extv4sf; break;
10516 case V2DImode: gen = gen_aarch64_extv2di; break;
10517 case V2DFmode: gen = gen_aarch64_extv2df; break;
10518 default:
10519 return false;
10520 }
10521
10522 /* Success! */
10523 if (d->testing_p)
10524 return true;
10525
b31e65bb
AL
10526 /* The case where (location == 0) is a no-op for both big- and little-endian,
10527 and is removed by the mid-end at optimization levels -O1 and higher. */
10528
10529 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
10530 {
10531 /* After setup, we want the high elements of the first vector (stored
10532 at the LSB end of the register), and the low elements of the second
10533 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 10534 std::swap (d->op0, d->op1);
ae0533da
AL
10535 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
10536 location = nelt - location;
10537 }
10538
10539 offset = GEN_INT (location);
10540 emit_insn (gen (d->target, d->op0, d->op1, offset));
10541 return true;
10542}
10543
923fcec3
AL
10544/* Recognize patterns for the REV insns. */
10545
10546static bool
10547aarch64_evpc_rev (struct expand_vec_perm_d *d)
10548{
10549 unsigned int i, j, diff, nelt = d->nelt;
10550 rtx (*gen) (rtx, rtx);
10551
10552 if (!d->one_vector_p)
10553 return false;
10554
10555 diff = d->perm[0];
10556 switch (diff)
10557 {
10558 case 7:
10559 switch (d->vmode)
10560 {
10561 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
10562 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
10563 default:
10564 return false;
10565 }
10566 break;
10567 case 3:
10568 switch (d->vmode)
10569 {
10570 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
10571 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
10572 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
10573 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
10574 default:
10575 return false;
10576 }
10577 break;
10578 case 1:
10579 switch (d->vmode)
10580 {
10581 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
10582 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
10583 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
10584 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
10585 case V4SImode: gen = gen_aarch64_rev64v4si; break;
10586 case V2SImode: gen = gen_aarch64_rev64v2si; break;
10587 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
10588 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
10589 default:
10590 return false;
10591 }
10592 break;
10593 default:
10594 return false;
10595 }
10596
10597 for (i = 0; i < nelt ; i += diff + 1)
10598 for (j = 0; j <= diff; j += 1)
10599 {
10600 /* This is guaranteed to be true as the value of diff
10601 is 7, 3, 1 and we should have enough elements in the
10602 queue to generate this. Getting a vector mask with a
10603 value of diff other than these values implies that
10604 something is wrong by the time we get here. */
10605 gcc_assert (i + j < nelt);
10606 if (d->perm[i + j] != i + diff - j)
10607 return false;
10608 }
10609
10610 /* Success! */
10611 if (d->testing_p)
10612 return true;
10613
10614 emit_insn (gen (d->target, d->op0));
10615 return true;
10616}
10617
91bd4114
JG
10618static bool
10619aarch64_evpc_dup (struct expand_vec_perm_d *d)
10620{
10621 rtx (*gen) (rtx, rtx, rtx);
10622 rtx out = d->target;
10623 rtx in0;
ef4bddc2 10624 machine_mode vmode = d->vmode;
91bd4114
JG
10625 unsigned int i, elt, nelt = d->nelt;
10626 rtx lane;
10627
91bd4114
JG
10628 elt = d->perm[0];
10629 for (i = 1; i < nelt; i++)
10630 {
10631 if (elt != d->perm[i])
10632 return false;
10633 }
10634
10635 /* The generic preparation in aarch64_expand_vec_perm_const_1
10636 swaps the operand order and the permute indices if it finds
10637 d->perm[0] to be in the second operand. Thus, we can always
10638 use d->op0 and need not do any extra arithmetic to get the
10639 correct lane number. */
10640 in0 = d->op0;
f901401e 10641 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
10642
10643 switch (vmode)
10644 {
10645 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
10646 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
10647 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
10648 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
10649 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
10650 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
10651 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
10652 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
10653 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
10654 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
10655 default:
10656 return false;
10657 }
10658
10659 emit_insn (gen (out, in0, lane));
10660 return true;
10661}
10662
88b08073
JG
10663static bool
10664aarch64_evpc_tbl (struct expand_vec_perm_d *d)
10665{
10666 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 10667 machine_mode vmode = d->vmode;
88b08073
JG
10668 unsigned int i, nelt = d->nelt;
10669
88b08073
JG
10670 if (d->testing_p)
10671 return true;
10672
10673 /* Generic code will try constant permutation twice. Once with the
10674 original mode and again with the elements lowered to QImode.
10675 So wait and don't do the selector expansion ourselves. */
10676 if (vmode != V8QImode && vmode != V16QImode)
10677 return false;
10678
10679 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
10680 {
10681 int nunits = GET_MODE_NUNITS (vmode);
10682
10683 /* If big-endian and two vectors we end up with a weird mixed-endian
10684 mode on NEON. Reverse the index within each word but not the word
10685 itself. */
10686 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
10687 : d->perm[i]);
10688 }
88b08073
JG
10689 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
10690 sel = force_reg (vmode, sel);
10691
10692 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
10693 return true;
10694}
10695
10696static bool
10697aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
10698{
10699 /* The pattern matching functions above are written to look for a small
10700 number to begin the sequence (0, 1, N/2). If we begin with an index
10701 from the second operand, we can swap the operands. */
10702 if (d->perm[0] >= d->nelt)
10703 {
10704 unsigned i, nelt = d->nelt;
88b08073 10705
0696116a 10706 gcc_assert (nelt == (nelt & -nelt));
88b08073 10707 for (i = 0; i < nelt; ++i)
0696116a 10708 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 10709
cb5c6c29 10710 std::swap (d->op0, d->op1);
88b08073
JG
10711 }
10712
10713 if (TARGET_SIMD)
cc4d934f 10714 {
923fcec3
AL
10715 if (aarch64_evpc_rev (d))
10716 return true;
10717 else if (aarch64_evpc_ext (d))
ae0533da 10718 return true;
f901401e
AL
10719 else if (aarch64_evpc_dup (d))
10720 return true;
ae0533da 10721 else if (aarch64_evpc_zip (d))
cc4d934f
JG
10722 return true;
10723 else if (aarch64_evpc_uzp (d))
10724 return true;
10725 else if (aarch64_evpc_trn (d))
10726 return true;
10727 return aarch64_evpc_tbl (d);
10728 }
88b08073
JG
10729 return false;
10730}
10731
10732/* Expand a vec_perm_const pattern. */
10733
10734bool
10735aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
10736{
10737 struct expand_vec_perm_d d;
10738 int i, nelt, which;
10739
10740 d.target = target;
10741 d.op0 = op0;
10742 d.op1 = op1;
10743
10744 d.vmode = GET_MODE (target);
10745 gcc_assert (VECTOR_MODE_P (d.vmode));
10746 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10747 d.testing_p = false;
10748
10749 for (i = which = 0; i < nelt; ++i)
10750 {
10751 rtx e = XVECEXP (sel, 0, i);
10752 int ei = INTVAL (e) & (2 * nelt - 1);
10753 which |= (ei < nelt ? 1 : 2);
10754 d.perm[i] = ei;
10755 }
10756
10757 switch (which)
10758 {
10759 default:
10760 gcc_unreachable ();
10761
10762 case 3:
10763 d.one_vector_p = false;
10764 if (!rtx_equal_p (op0, op1))
10765 break;
10766
10767 /* The elements of PERM do not suggest that only the first operand
10768 is used, but both operands are identical. Allow easier matching
10769 of the permutation by folding the permutation into the single
10770 input vector. */
10771 /* Fall Through. */
10772 case 2:
10773 for (i = 0; i < nelt; ++i)
10774 d.perm[i] &= nelt - 1;
10775 d.op0 = op1;
10776 d.one_vector_p = true;
10777 break;
10778
10779 case 1:
10780 d.op1 = op0;
10781 d.one_vector_p = true;
10782 break;
10783 }
10784
10785 return aarch64_expand_vec_perm_const_1 (&d);
10786}
10787
10788static bool
ef4bddc2 10789aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
10790 const unsigned char *sel)
10791{
10792 struct expand_vec_perm_d d;
10793 unsigned int i, nelt, which;
10794 bool ret;
10795
10796 d.vmode = vmode;
10797 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10798 d.testing_p = true;
10799 memcpy (d.perm, sel, nelt);
10800
10801 /* Calculate whether all elements are in one vector. */
10802 for (i = which = 0; i < nelt; ++i)
10803 {
10804 unsigned char e = d.perm[i];
10805 gcc_assert (e < 2 * nelt);
10806 which |= (e < nelt ? 1 : 2);
10807 }
10808
10809 /* If all elements are from the second vector, reindex as if from the
10810 first vector. */
10811 if (which == 2)
10812 for (i = 0; i < nelt; ++i)
10813 d.perm[i] -= nelt;
10814
10815 /* Check whether the mask can be applied to a single vector. */
10816 d.one_vector_p = (which != 3);
10817
10818 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10819 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10820 if (!d.one_vector_p)
10821 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10822
10823 start_sequence ();
10824 ret = aarch64_expand_vec_perm_const_1 (&d);
10825 end_sequence ();
10826
10827 return ret;
10828}
10829
668046d1
DS
10830rtx
10831aarch64_reverse_mask (enum machine_mode mode)
10832{
10833 /* We have to reverse each vector because we dont have
10834 a permuted load that can reverse-load according to ABI rules. */
10835 rtx mask;
10836 rtvec v = rtvec_alloc (16);
10837 int i, j;
10838 int nunits = GET_MODE_NUNITS (mode);
10839 int usize = GET_MODE_UNIT_SIZE (mode);
10840
10841 gcc_assert (BYTES_BIG_ENDIAN);
10842 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
10843
10844 for (i = 0; i < nunits; i++)
10845 for (j = 0; j < usize; j++)
10846 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
10847 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
10848 return force_reg (V16QImode, mask);
10849}
10850
97e1ad78
JG
10851/* Implement MODES_TIEABLE_P. */
10852
10853bool
ef4bddc2 10854aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
10855{
10856 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10857 return true;
10858
10859 /* We specifically want to allow elements of "structure" modes to
10860 be tieable to the structure. This more general condition allows
10861 other rarer situations too. */
10862 if (TARGET_SIMD
10863 && aarch64_vector_mode_p (mode1)
10864 && aarch64_vector_mode_p (mode2))
10865 return true;
10866
10867 return false;
10868}
10869
e2c75eea
JG
10870/* Return a new RTX holding the result of moving POINTER forward by
10871 AMOUNT bytes. */
10872
10873static rtx
10874aarch64_move_pointer (rtx pointer, int amount)
10875{
10876 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10877
10878 return adjust_automodify_address (pointer, GET_MODE (pointer),
10879 next, amount);
10880}
10881
10882/* Return a new RTX holding the result of moving POINTER forward by the
10883 size of the mode it points to. */
10884
10885static rtx
10886aarch64_progress_pointer (rtx pointer)
10887{
10888 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10889
10890 return aarch64_move_pointer (pointer, amount);
10891}
10892
10893/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10894 MODE bytes. */
10895
10896static void
10897aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 10898 machine_mode mode)
e2c75eea
JG
10899{
10900 rtx reg = gen_reg_rtx (mode);
10901
10902 /* "Cast" the pointers to the correct mode. */
10903 *src = adjust_address (*src, mode, 0);
10904 *dst = adjust_address (*dst, mode, 0);
10905 /* Emit the memcpy. */
10906 emit_move_insn (reg, *src);
10907 emit_move_insn (*dst, reg);
10908 /* Move the pointers forward. */
10909 *src = aarch64_progress_pointer (*src);
10910 *dst = aarch64_progress_pointer (*dst);
10911}
10912
10913/* Expand movmem, as if from a __builtin_memcpy. Return true if
10914 we succeed, otherwise return false. */
10915
10916bool
10917aarch64_expand_movmem (rtx *operands)
10918{
10919 unsigned int n;
10920 rtx dst = operands[0];
10921 rtx src = operands[1];
10922 rtx base;
10923 bool speed_p = !optimize_function_for_size_p (cfun);
10924
10925 /* When optimizing for size, give a better estimate of the length of a
10926 memcpy call, but use the default otherwise. */
10927 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10928
10929 /* We can't do anything smart if the amount to copy is not constant. */
10930 if (!CONST_INT_P (operands[2]))
10931 return false;
10932
10933 n = UINTVAL (operands[2]);
10934
10935 /* Try to keep the number of instructions low. For cases below 16 bytes we
10936 need to make at most two moves. For cases above 16 bytes it will be one
10937 move for each 16 byte chunk, then at most two additional moves. */
10938 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10939 return false;
10940
10941 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10942 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10943
10944 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10945 src = adjust_automodify_address (src, VOIDmode, base, 0);
10946
10947 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10948 1-byte chunk. */
10949 if (n < 4)
10950 {
10951 if (n >= 2)
10952 {
10953 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10954 n -= 2;
10955 }
10956
10957 if (n == 1)
10958 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10959
10960 return true;
10961 }
10962
10963 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10964 4-byte chunk, partially overlapping with the previously copied chunk. */
10965 if (n < 8)
10966 {
10967 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10968 n -= 4;
10969 if (n > 0)
10970 {
10971 int move = n - 4;
10972
10973 src = aarch64_move_pointer (src, move);
10974 dst = aarch64_move_pointer (dst, move);
10975 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10976 }
10977 return true;
10978 }
10979
10980 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10981 them, then (if applicable) an 8-byte chunk. */
10982 while (n >= 8)
10983 {
10984 if (n / 16)
10985 {
10986 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10987 n -= 16;
10988 }
10989 else
10990 {
10991 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10992 n -= 8;
10993 }
10994 }
10995
10996 /* Finish the final bytes of the copy. We can always do this in one
10997 instruction. We either copy the exact amount we need, or partially
10998 overlap with the previous chunk we copied and copy 8-bytes. */
10999 if (n == 0)
11000 return true;
11001 else if (n == 1)
11002 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
11003 else if (n == 2)
11004 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
11005 else if (n == 4)
11006 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
11007 else
11008 {
11009 if (n == 3)
11010 {
11011 src = aarch64_move_pointer (src, -1);
11012 dst = aarch64_move_pointer (dst, -1);
11013 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
11014 }
11015 else
11016 {
11017 int move = n - 8;
11018
11019 src = aarch64_move_pointer (src, move);
11020 dst = aarch64_move_pointer (dst, move);
11021 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
11022 }
11023 }
11024
11025 return true;
11026}
11027
a3125fc2
CL
11028/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
11029
11030static unsigned HOST_WIDE_INT
11031aarch64_asan_shadow_offset (void)
11032{
11033 return (HOST_WIDE_INT_1 << 36);
11034}
11035
d3006da6 11036static bool
445d7826 11037aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
11038 unsigned int align,
11039 enum by_pieces_operation op,
11040 bool speed_p)
11041{
11042 /* STORE_BY_PIECES can be used when copying a constant string, but
11043 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
11044 For now we always fail this and let the move_by_pieces code copy
11045 the string from read-only memory. */
11046 if (op == STORE_BY_PIECES)
11047 return false;
11048
11049 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
11050}
11051
5f3bc026
ZC
11052static enum machine_mode
11053aarch64_code_to_ccmode (enum rtx_code code)
11054{
11055 switch (code)
11056 {
11057 case NE:
11058 return CC_DNEmode;
11059
11060 case EQ:
11061 return CC_DEQmode;
11062
11063 case LE:
11064 return CC_DLEmode;
11065
11066 case LT:
11067 return CC_DLTmode;
11068
11069 case GE:
11070 return CC_DGEmode;
11071
11072 case GT:
11073 return CC_DGTmode;
11074
11075 case LEU:
11076 return CC_DLEUmode;
11077
11078 case LTU:
11079 return CC_DLTUmode;
11080
11081 case GEU:
11082 return CC_DGEUmode;
11083
11084 case GTU:
11085 return CC_DGTUmode;
11086
11087 default:
11088 return CCmode;
11089 }
11090}
11091
11092static rtx
11093aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
11094 int code, tree treeop0, tree treeop1)
11095{
11096 enum machine_mode op_mode, cmp_mode, cc_mode;
11097 rtx op0, op1, cmp, target;
11098 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
11099 enum insn_code icode;
11100 struct expand_operand ops[4];
11101
11102 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
11103 if (cc_mode == CCmode)
11104 return NULL_RTX;
11105
11106 start_sequence ();
11107 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
11108
11109 op_mode = GET_MODE (op0);
11110 if (op_mode == VOIDmode)
11111 op_mode = GET_MODE (op1);
11112
11113 switch (op_mode)
11114 {
11115 case QImode:
11116 case HImode:
11117 case SImode:
11118 cmp_mode = SImode;
11119 icode = CODE_FOR_cmpsi;
11120 break;
11121
11122 case DImode:
11123 cmp_mode = DImode;
11124 icode = CODE_FOR_cmpdi;
11125 break;
11126
11127 default:
11128 end_sequence ();
11129 return NULL_RTX;
11130 }
11131
11132 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
11133 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
11134 if (!op0 || !op1)
11135 {
11136 end_sequence ();
11137 return NULL_RTX;
11138 }
11139 *prep_seq = get_insns ();
11140 end_sequence ();
11141
11142 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
11143 target = gen_rtx_REG (CCmode, CC_REGNUM);
11144
11145 create_output_operand (&ops[0], target, CCmode);
11146 create_fixed_operand (&ops[1], cmp);
11147 create_fixed_operand (&ops[2], op0);
11148 create_fixed_operand (&ops[3], op1);
11149
11150 start_sequence ();
11151 if (!maybe_expand_insn (icode, 4, ops))
11152 {
11153 end_sequence ();
11154 return NULL_RTX;
11155 }
11156 *gen_seq = get_insns ();
11157 end_sequence ();
11158
11159 return gen_rtx_REG (cc_mode, CC_REGNUM);
11160}
11161
11162static rtx
11163aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
11164 tree treeop0, tree treeop1, int bit_code)
11165{
11166 rtx op0, op1, cmp0, cmp1, target;
11167 enum machine_mode op_mode, cmp_mode, cc_mode;
11168 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
11169 enum insn_code icode = CODE_FOR_ccmp_andsi;
11170 struct expand_operand ops[6];
11171
11172 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
11173 if (cc_mode == CCmode)
11174 return NULL_RTX;
11175
11176 push_to_sequence ((rtx_insn*) *prep_seq);
11177 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
11178
11179 op_mode = GET_MODE (op0);
11180 if (op_mode == VOIDmode)
11181 op_mode = GET_MODE (op1);
11182
11183 switch (op_mode)
11184 {
11185 case QImode:
11186 case HImode:
11187 case SImode:
11188 cmp_mode = SImode;
11189 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
11190 : CODE_FOR_ccmp_iorsi;
11191 break;
11192
11193 case DImode:
11194 cmp_mode = DImode;
11195 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
11196 : CODE_FOR_ccmp_iordi;
11197 break;
11198
11199 default:
11200 end_sequence ();
11201 return NULL_RTX;
11202 }
11203
11204 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
11205 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
11206 if (!op0 || !op1)
11207 {
11208 end_sequence ();
11209 return NULL_RTX;
11210 }
11211 *prep_seq = get_insns ();
11212 end_sequence ();
11213
11214 target = gen_rtx_REG (cc_mode, CC_REGNUM);
11215 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
11216 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
11217
11218 create_fixed_operand (&ops[0], prev);
11219 create_fixed_operand (&ops[1], target);
11220 create_fixed_operand (&ops[2], op0);
11221 create_fixed_operand (&ops[3], op1);
11222 create_fixed_operand (&ops[4], cmp0);
11223 create_fixed_operand (&ops[5], cmp1);
11224
11225 push_to_sequence ((rtx_insn*) *gen_seq);
11226 if (!maybe_expand_insn (icode, 6, ops))
11227 {
11228 end_sequence ();
11229 return NULL_RTX;
11230 }
11231
11232 *gen_seq = get_insns ();
11233 end_sequence ();
11234
11235 return target;
11236}
11237
11238#undef TARGET_GEN_CCMP_FIRST
11239#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
11240
11241#undef TARGET_GEN_CCMP_NEXT
11242#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
11243
6a569cdd
KT
11244/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
11245 instruction fusion of some sort. */
11246
11247static bool
11248aarch64_macro_fusion_p (void)
11249{
b175b679 11250 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
11251}
11252
11253
11254/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
11255 should be kept together during scheduling. */
11256
11257static bool
11258aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
11259{
11260 rtx set_dest;
11261 rtx prev_set = single_set (prev);
11262 rtx curr_set = single_set (curr);
11263 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
11264 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
11265
11266 if (!aarch64_macro_fusion_p ())
11267 return false;
11268
11269 if (simple_sets_p
b175b679 11270 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
11271 {
11272 /* We are trying to match:
11273 prev (mov) == (set (reg r0) (const_int imm16))
11274 curr (movk) == (set (zero_extract (reg r0)
11275 (const_int 16)
11276 (const_int 16))
11277 (const_int imm16_1)) */
11278
11279 set_dest = SET_DEST (curr_set);
11280
11281 if (GET_CODE (set_dest) == ZERO_EXTRACT
11282 && CONST_INT_P (SET_SRC (curr_set))
11283 && CONST_INT_P (SET_SRC (prev_set))
11284 && CONST_INT_P (XEXP (set_dest, 2))
11285 && INTVAL (XEXP (set_dest, 2)) == 16
11286 && REG_P (XEXP (set_dest, 0))
11287 && REG_P (SET_DEST (prev_set))
11288 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
11289 {
11290 return true;
11291 }
11292 }
11293
9bbe08fe 11294 if (simple_sets_p
b175b679 11295 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
11296 {
11297
11298 /* We're trying to match:
11299 prev (adrp) == (set (reg r1)
11300 (high (symbol_ref ("SYM"))))
11301 curr (add) == (set (reg r0)
11302 (lo_sum (reg r1)
11303 (symbol_ref ("SYM"))))
11304 Note that r0 need not necessarily be the same as r1, especially
11305 during pre-regalloc scheduling. */
11306
11307 if (satisfies_constraint_Ush (SET_SRC (prev_set))
11308 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
11309 {
11310 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
11311 && REG_P (XEXP (SET_SRC (curr_set), 0))
11312 && REGNO (XEXP (SET_SRC (curr_set), 0))
11313 == REGNO (SET_DEST (prev_set))
11314 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
11315 XEXP (SET_SRC (curr_set), 1)))
11316 return true;
11317 }
11318 }
11319
cd0cb232 11320 if (simple_sets_p
b175b679 11321 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
11322 {
11323
11324 /* We're trying to match:
11325 prev (movk) == (set (zero_extract (reg r0)
11326 (const_int 16)
11327 (const_int 32))
11328 (const_int imm16_1))
11329 curr (movk) == (set (zero_extract (reg r0)
11330 (const_int 16)
11331 (const_int 48))
11332 (const_int imm16_2)) */
11333
11334 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
11335 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
11336 && REG_P (XEXP (SET_DEST (prev_set), 0))
11337 && REG_P (XEXP (SET_DEST (curr_set), 0))
11338 && REGNO (XEXP (SET_DEST (prev_set), 0))
11339 == REGNO (XEXP (SET_DEST (curr_set), 0))
11340 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
11341 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
11342 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
11343 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
11344 && CONST_INT_P (SET_SRC (prev_set))
11345 && CONST_INT_P (SET_SRC (curr_set)))
11346 return true;
11347
11348 }
d8354ad7 11349 if (simple_sets_p
b175b679 11350 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
11351 {
11352 /* We're trying to match:
11353 prev (adrp) == (set (reg r0)
11354 (high (symbol_ref ("SYM"))))
11355 curr (ldr) == (set (reg r1)
11356 (mem (lo_sum (reg r0)
11357 (symbol_ref ("SYM")))))
11358 or
11359 curr (ldr) == (set (reg r1)
11360 (zero_extend (mem
11361 (lo_sum (reg r0)
11362 (symbol_ref ("SYM")))))) */
11363 if (satisfies_constraint_Ush (SET_SRC (prev_set))
11364 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
11365 {
11366 rtx curr_src = SET_SRC (curr_set);
11367
11368 if (GET_CODE (curr_src) == ZERO_EXTEND)
11369 curr_src = XEXP (curr_src, 0);
11370
11371 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
11372 && REG_P (XEXP (XEXP (curr_src, 0), 0))
11373 && REGNO (XEXP (XEXP (curr_src, 0), 0))
11374 == REGNO (SET_DEST (prev_set))
11375 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
11376 XEXP (SET_SRC (prev_set), 0)))
11377 return true;
11378 }
11379 }
cd0cb232 11380
b175b679 11381 if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
11382 && any_condjump_p (curr))
11383 {
11384 enum attr_type prev_type = get_attr_type (prev);
11385
11386 /* FIXME: this misses some which is considered simple arthematic
11387 instructions for ThunderX. Simple shifts are missed here. */
11388 if (prev_type == TYPE_ALUS_SREG
11389 || prev_type == TYPE_ALUS_IMM
11390 || prev_type == TYPE_LOGICS_REG
11391 || prev_type == TYPE_LOGICS_IMM)
11392 return true;
11393 }
11394
6a569cdd
KT
11395 return false;
11396}
11397
350013bc
BC
11398/* If MEM is in the form of [base+offset], extract the two parts
11399 of address and set to BASE and OFFSET, otherwise return false
11400 after clearing BASE and OFFSET. */
11401
11402bool
11403extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
11404{
11405 rtx addr;
11406
11407 gcc_assert (MEM_P (mem));
11408
11409 addr = XEXP (mem, 0);
11410
11411 if (REG_P (addr))
11412 {
11413 *base = addr;
11414 *offset = const0_rtx;
11415 return true;
11416 }
11417
11418 if (GET_CODE (addr) == PLUS
11419 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
11420 {
11421 *base = XEXP (addr, 0);
11422 *offset = XEXP (addr, 1);
11423 return true;
11424 }
11425
11426 *base = NULL_RTX;
11427 *offset = NULL_RTX;
11428
11429 return false;
11430}
11431
11432/* Types for scheduling fusion. */
11433enum sched_fusion_type
11434{
11435 SCHED_FUSION_NONE = 0,
11436 SCHED_FUSION_LD_SIGN_EXTEND,
11437 SCHED_FUSION_LD_ZERO_EXTEND,
11438 SCHED_FUSION_LD,
11439 SCHED_FUSION_ST,
11440 SCHED_FUSION_NUM
11441};
11442
11443/* If INSN is a load or store of address in the form of [base+offset],
11444 extract the two parts and set to BASE and OFFSET. Return scheduling
11445 fusion type this INSN is. */
11446
11447static enum sched_fusion_type
11448fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
11449{
11450 rtx x, dest, src;
11451 enum sched_fusion_type fusion = SCHED_FUSION_LD;
11452
11453 gcc_assert (INSN_P (insn));
11454 x = PATTERN (insn);
11455 if (GET_CODE (x) != SET)
11456 return SCHED_FUSION_NONE;
11457
11458 src = SET_SRC (x);
11459 dest = SET_DEST (x);
11460
1f46bd52
AP
11461 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
11462 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
350013bc
BC
11463 return SCHED_FUSION_NONE;
11464
11465 if (GET_CODE (src) == SIGN_EXTEND)
11466 {
11467 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
11468 src = XEXP (src, 0);
11469 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
11470 return SCHED_FUSION_NONE;
11471 }
11472 else if (GET_CODE (src) == ZERO_EXTEND)
11473 {
11474 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
11475 src = XEXP (src, 0);
11476 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
11477 return SCHED_FUSION_NONE;
11478 }
11479
11480 if (GET_CODE (src) == MEM && REG_P (dest))
11481 extract_base_offset_in_addr (src, base, offset);
11482 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
11483 {
11484 fusion = SCHED_FUSION_ST;
11485 extract_base_offset_in_addr (dest, base, offset);
11486 }
11487 else
11488 return SCHED_FUSION_NONE;
11489
11490 if (*base == NULL_RTX || *offset == NULL_RTX)
11491 fusion = SCHED_FUSION_NONE;
11492
11493 return fusion;
11494}
11495
11496/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
11497
11498 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
11499 and PRI are only calculated for these instructions. For other instruction,
11500 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
11501 type instruction fusion can be added by returning different priorities.
11502
11503 It's important that irrelevant instructions get the largest FUSION_PRI. */
11504
11505static void
11506aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
11507 int *fusion_pri, int *pri)
11508{
11509 int tmp, off_val;
11510 rtx base, offset;
11511 enum sched_fusion_type fusion;
11512
11513 gcc_assert (INSN_P (insn));
11514
11515 tmp = max_pri - 1;
11516 fusion = fusion_load_store (insn, &base, &offset);
11517 if (fusion == SCHED_FUSION_NONE)
11518 {
11519 *pri = tmp;
11520 *fusion_pri = tmp;
11521 return;
11522 }
11523
11524 /* Set FUSION_PRI according to fusion type and base register. */
11525 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
11526
11527 /* Calculate PRI. */
11528 tmp /= 2;
11529
11530 /* INSN with smaller offset goes first. */
11531 off_val = (int)(INTVAL (offset));
11532 if (off_val >= 0)
11533 tmp -= (off_val & 0xfffff);
11534 else
11535 tmp += ((- off_val) & 0xfffff);
11536
11537 *pri = tmp;
11538 return;
11539}
11540
11541/* Given OPERANDS of consecutive load/store, check if we can merge
11542 them into ldp/stp. LOAD is true if they are load instructions.
11543 MODE is the mode of memory operands. */
11544
11545bool
11546aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
11547 enum machine_mode mode)
11548{
11549 HOST_WIDE_INT offval_1, offval_2, msize;
11550 enum reg_class rclass_1, rclass_2;
11551 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
11552
11553 if (load)
11554 {
11555 mem_1 = operands[1];
11556 mem_2 = operands[3];
11557 reg_1 = operands[0];
11558 reg_2 = operands[2];
11559 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
11560 if (REGNO (reg_1) == REGNO (reg_2))
11561 return false;
11562 }
11563 else
11564 {
11565 mem_1 = operands[0];
11566 mem_2 = operands[2];
11567 reg_1 = operands[1];
11568 reg_2 = operands[3];
11569 }
11570
bf84ac44
AP
11571 /* The mems cannot be volatile. */
11572 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
11573 return false;
11574
350013bc
BC
11575 /* Check if the addresses are in the form of [base+offset]. */
11576 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11577 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11578 return false;
11579 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11580 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11581 return false;
11582
11583 /* Check if the bases are same. */
11584 if (!rtx_equal_p (base_1, base_2))
11585 return false;
11586
11587 offval_1 = INTVAL (offset_1);
11588 offval_2 = INTVAL (offset_2);
11589 msize = GET_MODE_SIZE (mode);
11590 /* Check if the offsets are consecutive. */
11591 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
11592 return false;
11593
11594 /* Check if the addresses are clobbered by load. */
11595 if (load)
11596 {
11597 if (reg_mentioned_p (reg_1, mem_1))
11598 return false;
11599
11600 /* In increasing order, the last load can clobber the address. */
11601 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
11602 return false;
11603 }
11604
11605 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11606 rclass_1 = FP_REGS;
11607 else
11608 rclass_1 = GENERAL_REGS;
11609
11610 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11611 rclass_2 = FP_REGS;
11612 else
11613 rclass_2 = GENERAL_REGS;
11614
11615 /* Check if the registers are of same class. */
11616 if (rclass_1 != rclass_2)
11617 return false;
11618
11619 return true;
11620}
11621
11622/* Given OPERANDS of consecutive load/store, check if we can merge
11623 them into ldp/stp by adjusting the offset. LOAD is true if they
11624 are load instructions. MODE is the mode of memory operands.
11625
11626 Given below consecutive stores:
11627
11628 str w1, [xb, 0x100]
11629 str w1, [xb, 0x104]
11630 str w1, [xb, 0x108]
11631 str w1, [xb, 0x10c]
11632
11633 Though the offsets are out of the range supported by stp, we can
11634 still pair them after adjusting the offset, like:
11635
11636 add scratch, xb, 0x100
11637 stp w1, w1, [scratch]
11638 stp w1, w1, [scratch, 0x8]
11639
11640 The peephole patterns detecting this opportunity should guarantee
11641 the scratch register is avaliable. */
11642
11643bool
11644aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
11645 enum machine_mode mode)
11646{
11647 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
11648 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
11649 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
11650 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
11651
11652 if (load)
11653 {
11654 reg_1 = operands[0];
11655 mem_1 = operands[1];
11656 reg_2 = operands[2];
11657 mem_2 = operands[3];
11658 reg_3 = operands[4];
11659 mem_3 = operands[5];
11660 reg_4 = operands[6];
11661 mem_4 = operands[7];
11662 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
11663 && REG_P (reg_3) && REG_P (reg_4));
11664 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
11665 return false;
11666 }
11667 else
11668 {
11669 mem_1 = operands[0];
11670 reg_1 = operands[1];
11671 mem_2 = operands[2];
11672 reg_2 = operands[3];
11673 mem_3 = operands[4];
11674 reg_3 = operands[5];
11675 mem_4 = operands[6];
11676 reg_4 = operands[7];
11677 }
11678 /* Skip if memory operand is by itslef valid for ldp/stp. */
11679 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
11680 return false;
11681
bf84ac44
AP
11682 /* The mems cannot be volatile. */
11683 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
11684 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
11685 return false;
11686
350013bc
BC
11687 /* Check if the addresses are in the form of [base+offset]. */
11688 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11689 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11690 return false;
11691 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11692 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11693 return false;
11694 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
11695 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
11696 return false;
11697 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
11698 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
11699 return false;
11700
11701 /* Check if the bases are same. */
11702 if (!rtx_equal_p (base_1, base_2)
11703 || !rtx_equal_p (base_2, base_3)
11704 || !rtx_equal_p (base_3, base_4))
11705 return false;
11706
11707 offval_1 = INTVAL (offset_1);
11708 offval_2 = INTVAL (offset_2);
11709 offval_3 = INTVAL (offset_3);
11710 offval_4 = INTVAL (offset_4);
11711 msize = GET_MODE_SIZE (mode);
11712 /* Check if the offsets are consecutive. */
11713 if ((offval_1 != (offval_2 + msize)
11714 || offval_1 != (offval_3 + msize * 2)
11715 || offval_1 != (offval_4 + msize * 3))
11716 && (offval_4 != (offval_3 + msize)
11717 || offval_4 != (offval_2 + msize * 2)
11718 || offval_4 != (offval_1 + msize * 3)))
11719 return false;
11720
11721 /* Check if the addresses are clobbered by load. */
11722 if (load)
11723 {
11724 if (reg_mentioned_p (reg_1, mem_1)
11725 || reg_mentioned_p (reg_2, mem_2)
11726 || reg_mentioned_p (reg_3, mem_3))
11727 return false;
11728
11729 /* In increasing order, the last load can clobber the address. */
11730 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
11731 return false;
11732 }
11733
11734 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11735 rclass_1 = FP_REGS;
11736 else
11737 rclass_1 = GENERAL_REGS;
11738
11739 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11740 rclass_2 = FP_REGS;
11741 else
11742 rclass_2 = GENERAL_REGS;
11743
11744 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
11745 rclass_3 = FP_REGS;
11746 else
11747 rclass_3 = GENERAL_REGS;
11748
11749 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
11750 rclass_4 = FP_REGS;
11751 else
11752 rclass_4 = GENERAL_REGS;
11753
11754 /* Check if the registers are of same class. */
11755 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
11756 return false;
11757
11758 return true;
11759}
11760
11761/* Given OPERANDS of consecutive load/store, this function pairs them
11762 into ldp/stp after adjusting the offset. It depends on the fact
11763 that addresses of load/store instructions are in increasing order.
11764 MODE is the mode of memory operands. CODE is the rtl operator
11765 which should be applied to all memory operands, it's SIGN_EXTEND,
11766 ZERO_EXTEND or UNKNOWN. */
11767
11768bool
11769aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
11770 enum machine_mode mode, RTX_CODE code)
11771{
11772 rtx base, offset, t1, t2;
11773 rtx mem_1, mem_2, mem_3, mem_4;
11774 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
11775
11776 if (load)
11777 {
11778 mem_1 = operands[1];
11779 mem_2 = operands[3];
11780 mem_3 = operands[5];
11781 mem_4 = operands[7];
11782 }
11783 else
11784 {
11785 mem_1 = operands[0];
11786 mem_2 = operands[2];
11787 mem_3 = operands[4];
11788 mem_4 = operands[6];
11789 gcc_assert (code == UNKNOWN);
11790 }
11791
11792 extract_base_offset_in_addr (mem_1, &base, &offset);
11793 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
11794
11795 /* Adjust offset thus it can fit in ldp/stp instruction. */
11796 msize = GET_MODE_SIZE (mode);
11797 stp_off_limit = msize * 0x40;
11798 off_val = INTVAL (offset);
11799 abs_off = (off_val < 0) ? -off_val : off_val;
11800 new_off = abs_off % stp_off_limit;
11801 adj_off = abs_off - new_off;
11802
11803 /* Further adjust to make sure all offsets are OK. */
11804 if ((new_off + msize * 2) >= stp_off_limit)
11805 {
11806 adj_off += stp_off_limit;
11807 new_off -= stp_off_limit;
11808 }
11809
11810 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11811 if (adj_off >= 0x1000)
11812 return false;
11813
11814 if (off_val < 0)
11815 {
11816 adj_off = -adj_off;
11817 new_off = -new_off;
11818 }
11819
11820 /* Create new memory references. */
11821 mem_1 = change_address (mem_1, VOIDmode,
11822 plus_constant (DImode, operands[8], new_off));
11823
11824 /* Check if the adjusted address is OK for ldp/stp. */
11825 if (!aarch64_mem_pair_operand (mem_1, mode))
11826 return false;
11827
11828 msize = GET_MODE_SIZE (mode);
11829 mem_2 = change_address (mem_2, VOIDmode,
11830 plus_constant (DImode,
11831 operands[8],
11832 new_off + msize));
11833 mem_3 = change_address (mem_3, VOIDmode,
11834 plus_constant (DImode,
11835 operands[8],
11836 new_off + msize * 2));
11837 mem_4 = change_address (mem_4, VOIDmode,
11838 plus_constant (DImode,
11839 operands[8],
11840 new_off + msize * 3));
11841
11842 if (code == ZERO_EXTEND)
11843 {
11844 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
11845 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
11846 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
11847 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
11848 }
11849 else if (code == SIGN_EXTEND)
11850 {
11851 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
11852 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
11853 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
11854 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
11855 }
11856
11857 if (load)
11858 {
11859 operands[1] = mem_1;
11860 operands[3] = mem_2;
11861 operands[5] = mem_3;
11862 operands[7] = mem_4;
11863 }
11864 else
11865 {
11866 operands[0] = mem_1;
11867 operands[2] = mem_2;
11868 operands[4] = mem_3;
11869 operands[6] = mem_4;
11870 }
11871
11872 /* Emit adjusting instruction. */
f7df4a84 11873 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 11874 /* Emit ldp/stp instructions. */
f7df4a84
RS
11875 t1 = gen_rtx_SET (operands[0], operands[1]);
11876 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 11877 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
11878 t1 = gen_rtx_SET (operands[4], operands[5]);
11879 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
11880 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11881 return true;
11882}
11883
1b1e81f8
JW
11884/* Return 1 if pseudo register should be created and used to hold
11885 GOT address for PIC code. */
11886
11887bool
11888aarch64_use_pseudo_pic_reg (void)
11889{
11890 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
11891}
11892
7b841a12
JW
11893/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
11894
11895static int
11896aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
11897{
11898 switch (XINT (x, 1))
11899 {
11900 case UNSPEC_GOTSMALLPIC:
11901 case UNSPEC_GOTSMALLPIC28K:
11902 case UNSPEC_GOTTINYPIC:
11903 return 0;
11904 default:
11905 break;
11906 }
11907
11908 return default_unspec_may_trap_p (x, flags);
11909}
11910
43e9d192
IB
11911#undef TARGET_ADDRESS_COST
11912#define TARGET_ADDRESS_COST aarch64_address_cost
11913
11914/* This hook will determines whether unnamed bitfields affect the alignment
11915 of the containing structure. The hook returns true if the structure
11916 should inherit the alignment requirements of an unnamed bitfield's
11917 type. */
11918#undef TARGET_ALIGN_ANON_BITFIELD
11919#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11920
11921#undef TARGET_ASM_ALIGNED_DI_OP
11922#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11923
11924#undef TARGET_ASM_ALIGNED_HI_OP
11925#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11926
11927#undef TARGET_ASM_ALIGNED_SI_OP
11928#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11929
11930#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11931#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11932 hook_bool_const_tree_hwi_hwi_const_tree_true
11933
11934#undef TARGET_ASM_FILE_START
11935#define TARGET_ASM_FILE_START aarch64_start_file
11936
11937#undef TARGET_ASM_OUTPUT_MI_THUNK
11938#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11939
11940#undef TARGET_ASM_SELECT_RTX_SECTION
11941#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11942
11943#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11944#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11945
11946#undef TARGET_BUILD_BUILTIN_VA_LIST
11947#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11948
11949#undef TARGET_CALLEE_COPIES
11950#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11951
11952#undef TARGET_CAN_ELIMINATE
11953#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11954
11955#undef TARGET_CANNOT_FORCE_CONST_MEM
11956#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11957
11958#undef TARGET_CONDITIONAL_REGISTER_USAGE
11959#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11960
11961/* Only the least significant bit is used for initialization guard
11962 variables. */
11963#undef TARGET_CXX_GUARD_MASK_BIT
11964#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11965
11966#undef TARGET_C_MODE_FOR_SUFFIX
11967#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11968
11969#ifdef TARGET_BIG_ENDIAN_DEFAULT
11970#undef TARGET_DEFAULT_TARGET_FLAGS
11971#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11972#endif
11973
11974#undef TARGET_CLASS_MAX_NREGS
11975#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11976
119103ca
JG
11977#undef TARGET_BUILTIN_DECL
11978#define TARGET_BUILTIN_DECL aarch64_builtin_decl
11979
43e9d192
IB
11980#undef TARGET_EXPAND_BUILTIN
11981#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11982
11983#undef TARGET_EXPAND_BUILTIN_VA_START
11984#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11985
9697e620
JG
11986#undef TARGET_FOLD_BUILTIN
11987#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11988
43e9d192
IB
11989#undef TARGET_FUNCTION_ARG
11990#define TARGET_FUNCTION_ARG aarch64_function_arg
11991
11992#undef TARGET_FUNCTION_ARG_ADVANCE
11993#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11994
11995#undef TARGET_FUNCTION_ARG_BOUNDARY
11996#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11997
11998#undef TARGET_FUNCTION_OK_FOR_SIBCALL
11999#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
12000
12001#undef TARGET_FUNCTION_VALUE
12002#define TARGET_FUNCTION_VALUE aarch64_function_value
12003
12004#undef TARGET_FUNCTION_VALUE_REGNO_P
12005#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
12006
12007#undef TARGET_FRAME_POINTER_REQUIRED
12008#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
12009
fc72cba7
AL
12010#undef TARGET_GIMPLE_FOLD_BUILTIN
12011#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 12012
43e9d192
IB
12013#undef TARGET_GIMPLIFY_VA_ARG_EXPR
12014#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
12015
12016#undef TARGET_INIT_BUILTINS
12017#define TARGET_INIT_BUILTINS aarch64_init_builtins
12018
12019#undef TARGET_LEGITIMATE_ADDRESS_P
12020#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
12021
12022#undef TARGET_LEGITIMATE_CONSTANT_P
12023#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
12024
12025#undef TARGET_LIBGCC_CMP_RETURN_MODE
12026#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
12027
38e8f663 12028#undef TARGET_LRA_P
98d404be 12029#define TARGET_LRA_P hook_bool_void_true
38e8f663 12030
ac2b960f
YZ
12031#undef TARGET_MANGLE_TYPE
12032#define TARGET_MANGLE_TYPE aarch64_mangle_type
12033
43e9d192
IB
12034#undef TARGET_MEMORY_MOVE_COST
12035#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
12036
26e0ff94
WD
12037#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
12038#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
12039
43e9d192
IB
12040#undef TARGET_MUST_PASS_IN_STACK
12041#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
12042
12043/* This target hook should return true if accesses to volatile bitfields
12044 should use the narrowest mode possible. It should return false if these
12045 accesses should use the bitfield container type. */
12046#undef TARGET_NARROW_VOLATILE_BITFIELD
12047#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
12048
12049#undef TARGET_OPTION_OVERRIDE
12050#define TARGET_OPTION_OVERRIDE aarch64_override_options
12051
12052#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
12053#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
12054 aarch64_override_options_after_change
12055
12056#undef TARGET_PASS_BY_REFERENCE
12057#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
12058
12059#undef TARGET_PREFERRED_RELOAD_CLASS
12060#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
12061
cee66c68
WD
12062#undef TARGET_SCHED_REASSOCIATION_WIDTH
12063#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
12064
43e9d192
IB
12065#undef TARGET_SECONDARY_RELOAD
12066#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
12067
12068#undef TARGET_SHIFT_TRUNCATION_MASK
12069#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
12070
12071#undef TARGET_SETUP_INCOMING_VARARGS
12072#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
12073
12074#undef TARGET_STRUCT_VALUE_RTX
12075#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
12076
12077#undef TARGET_REGISTER_MOVE_COST
12078#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
12079
12080#undef TARGET_RETURN_IN_MEMORY
12081#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
12082
12083#undef TARGET_RETURN_IN_MSB
12084#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
12085
12086#undef TARGET_RTX_COSTS
7cc2145f 12087#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 12088
d126a4ae
AP
12089#undef TARGET_SCHED_ISSUE_RATE
12090#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
12091
d03f7e44
MK
12092#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
12093#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
12094 aarch64_sched_first_cycle_multipass_dfa_lookahead
12095
43e9d192
IB
12096#undef TARGET_TRAMPOLINE_INIT
12097#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
12098
12099#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
12100#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
12101
12102#undef TARGET_VECTOR_MODE_SUPPORTED_P
12103#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
12104
12105#undef TARGET_ARRAY_MODE_SUPPORTED_P
12106#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
12107
8990e73a
TB
12108#undef TARGET_VECTORIZE_ADD_STMT_COST
12109#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
12110
12111#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
12112#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
12113 aarch64_builtin_vectorization_cost
12114
43e9d192
IB
12115#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
12116#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
12117
42fc9a7f
JG
12118#undef TARGET_VECTORIZE_BUILTINS
12119#define TARGET_VECTORIZE_BUILTINS
12120
12121#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
12122#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
12123 aarch64_builtin_vectorized_function
12124
3b357264
JG
12125#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
12126#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
12127 aarch64_autovectorize_vector_sizes
12128
aa87aced
KV
12129#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
12130#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
12131 aarch64_atomic_assign_expand_fenv
12132
43e9d192
IB
12133/* Section anchor support. */
12134
12135#undef TARGET_MIN_ANCHOR_OFFSET
12136#define TARGET_MIN_ANCHOR_OFFSET -256
12137
12138/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
12139 byte offset; we can do much more for larger data types, but have no way
12140 to determine the size of the access. We assume accesses are aligned. */
12141#undef TARGET_MAX_ANCHOR_OFFSET
12142#define TARGET_MAX_ANCHOR_OFFSET 4095
12143
db0253a4
TB
12144#undef TARGET_VECTOR_ALIGNMENT
12145#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
12146
12147#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
12148#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
12149 aarch64_simd_vector_alignment_reachable
12150
88b08073
JG
12151/* vec_perm support. */
12152
12153#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
12154#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
12155 aarch64_vectorize_vec_perm_const_ok
12156
70f09188 12157
706b2314 12158#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
12159#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
12160
5cb74e90
RR
12161#undef TARGET_FLAGS_REGNUM
12162#define TARGET_FLAGS_REGNUM CC_REGNUM
12163
78607708
TV
12164#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
12165#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
12166
a3125fc2
CL
12167#undef TARGET_ASAN_SHADOW_OFFSET
12168#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
12169
0c4ec427
RE
12170#undef TARGET_LEGITIMIZE_ADDRESS
12171#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
12172
d3006da6
JG
12173#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
12174#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
12175 aarch64_use_by_pieces_infrastructure_p
12176
594bdd53
FY
12177#undef TARGET_CAN_USE_DOLOOP_P
12178#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
12179
6a569cdd
KT
12180#undef TARGET_SCHED_MACRO_FUSION_P
12181#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
12182
12183#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
12184#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
12185
350013bc
BC
12186#undef TARGET_SCHED_FUSION_PRIORITY
12187#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
12188
7b841a12
JW
12189#undef TARGET_UNSPEC_MAY_TRAP_P
12190#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
12191
1b1e81f8
JW
12192#undef TARGET_USE_PSEUDO_PIC_REG
12193#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
12194
43e9d192
IB
12195struct gcc_target targetm = TARGET_INITIALIZER;
12196
12197#include "gt-aarch64.h"