]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
i386.c (ix86_expand_int_movcc): Check result of ix86_expand_int_movcc as boolean.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
5624e564 2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
9fdcd34e 25#include "cfghooks.h"
c7131fb2
AM
26#include "tree.h"
27#include "gimple.h"
43e9d192 28#include "rtl.h"
c7131fb2
AM
29#include "df.h"
30#include "insn-codes.h"
43e9d192 31#include "insn-attr.h"
40e23961 32#include "alias.h"
40e23961 33#include "fold-const.h"
d8a2d370
DN
34#include "stringpool.h"
35#include "stor-layout.h"
36#include "calls.h"
37#include "varasm.h"
43e9d192 38#include "regs.h"
60393bbc
AM
39#include "cfgrtl.h"
40#include "cfganal.h"
41#include "lcm.h"
42#include "cfgbuild.h"
43#include "cfgcleanup.h"
43e9d192 44#include "output.h"
36566b39 45#include "flags.h"
36566b39
PK
46#include "insn-config.h"
47#include "expmed.h"
48#include "dojump.h"
49#include "explow.h"
50#include "emit-rtl.h"
51#include "stmt.h"
43e9d192
IB
52#include "expr.h"
53#include "reload.h"
54#include "toplev.h"
55#include "target.h"
43e9d192 56#include "targhooks.h"
43e9d192
IB
57#include "tm_p.h"
58#include "recog.h"
59#include "langhooks.h"
60#include "diagnostic-core.h"
2fb9a547
AM
61#include "internal-fn.h"
62#include "gimple-fold.h"
63#include "tree-eh.h"
45b0be94 64#include "gimplify.h"
43e9d192
IB
65#include "optabs.h"
66#include "dwarf2.h"
8990e73a
TB
67#include "cfgloop.h"
68#include "tree-vectorizer.h"
d1bcc29f 69#include "aarch64-cost-tables.h"
0ee859b5 70#include "dumpfile.h"
9b2b7279 71#include "builtins.h"
8baff86e 72#include "rtl-iter.h"
9bbe08fe 73#include "tm-constrs.h"
d03f7e44 74#include "sched-int.h"
fde9b31b 75#include "cortex-a57-fma-steering.h"
43e9d192 76
994c5d85 77/* This file should be included last. */
d58627a0
RS
78#include "target-def.h"
79
28514dda
YZ
80/* Defined for convenience. */
81#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
82
43e9d192
IB
83/* Classifies an address.
84
85 ADDRESS_REG_IMM
86 A simple base register plus immediate offset.
87
88 ADDRESS_REG_WB
89 A base register indexed by immediate offset with writeback.
90
91 ADDRESS_REG_REG
92 A base register indexed by (optionally scaled) register.
93
94 ADDRESS_REG_UXTW
95 A base register indexed by (optionally scaled) zero-extended register.
96
97 ADDRESS_REG_SXTW
98 A base register indexed by (optionally scaled) sign-extended register.
99
100 ADDRESS_LO_SUM
101 A LO_SUM rtx with a base register and "LO12" symbol relocation.
102
103 ADDRESS_SYMBOLIC:
104 A constant symbolic address, in pc-relative literal pool. */
105
106enum aarch64_address_type {
107 ADDRESS_REG_IMM,
108 ADDRESS_REG_WB,
109 ADDRESS_REG_REG,
110 ADDRESS_REG_UXTW,
111 ADDRESS_REG_SXTW,
112 ADDRESS_LO_SUM,
113 ADDRESS_SYMBOLIC
114};
115
116struct aarch64_address_info {
117 enum aarch64_address_type type;
118 rtx base;
119 rtx offset;
120 int shift;
121 enum aarch64_symbol_type symbol_type;
122};
123
48063b9d
IB
124struct simd_immediate_info
125{
126 rtx value;
127 int shift;
128 int element_width;
48063b9d 129 bool mvn;
e4f0f84d 130 bool msl;
48063b9d
IB
131};
132
43e9d192
IB
133/* The current code model. */
134enum aarch64_code_model aarch64_cmodel;
135
136#ifdef HAVE_AS_TLS
137#undef TARGET_HAVE_TLS
138#define TARGET_HAVE_TLS 1
139#endif
140
ef4bddc2
RS
141static bool aarch64_composite_type_p (const_tree, machine_mode);
142static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 143 const_tree,
ef4bddc2 144 machine_mode *, int *,
43e9d192
IB
145 bool *);
146static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
147static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 148static void aarch64_override_options_after_change (void);
ef4bddc2 149static bool aarch64_vector_mode_supported_p (machine_mode);
43e9d192 150static unsigned bit_count (unsigned HOST_WIDE_INT);
ef4bddc2 151static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 152 const unsigned char *sel);
ef4bddc2 153static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 154
0c6caaf8
RL
155/* Major revision number of the ARM Architecture implemented by the target. */
156unsigned aarch64_architecture_version;
157
43e9d192 158/* The processor for which instructions should be scheduled. */
02fdbd5b 159enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 160
43e9d192
IB
161/* Mask to specify which instructions we are allowed to generate. */
162unsigned long aarch64_isa_flags = 0;
163
164/* Mask to specify which instruction scheduling options should be used. */
165unsigned long aarch64_tune_flags = 0;
166
8dec06f2
JG
167/* Support for command line parsing of boolean flags in the tuning
168 structures. */
169struct aarch64_flag_desc
170{
171 const char* name;
172 unsigned int flag;
173};
174
175#define AARCH64_FUSION_PAIR(name, internal_name, y) \
176 { name, AARCH64_FUSE_##internal_name },
177static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
178{
179 { "none", AARCH64_FUSE_NOTHING },
180#include "aarch64-fusion-pairs.def"
181 { "all", AARCH64_FUSE_ALL },
182 { NULL, AARCH64_FUSE_NOTHING }
183};
184#undef AARCH64_FUION_PAIR
185
186#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \
187 { name, AARCH64_EXTRA_TUNE_##internal_name },
188static const struct aarch64_flag_desc aarch64_tuning_flags[] =
189{
190 { "none", AARCH64_EXTRA_TUNE_NONE },
191#include "aarch64-tuning-flags.def"
192 { "all", AARCH64_EXTRA_TUNE_ALL },
193 { NULL, AARCH64_EXTRA_TUNE_NONE }
194};
195#undef AARCH64_EXTRA_TUNING_OPTION
196
43e9d192
IB
197/* Tuning parameters. */
198
43e9d192
IB
199static const struct cpu_addrcost_table generic_addrcost_table =
200{
67747367 201 {
bd95e655
JG
202 0, /* hi */
203 0, /* si */
204 0, /* di */
205 0, /* ti */
67747367 206 },
bd95e655
JG
207 0, /* pre_modify */
208 0, /* post_modify */
209 0, /* register_offset */
210 0, /* register_extend */
211 0 /* imm_offset */
43e9d192
IB
212};
213
60bff090
JG
214static const struct cpu_addrcost_table cortexa57_addrcost_table =
215{
60bff090 216 {
bd95e655
JG
217 1, /* hi */
218 0, /* si */
219 0, /* di */
220 1, /* ti */
60bff090 221 },
bd95e655
JG
222 0, /* pre_modify */
223 0, /* post_modify */
224 0, /* register_offset */
225 0, /* register_extend */
226 0, /* imm_offset */
60bff090
JG
227};
228
381e27aa
PT
229static const struct cpu_addrcost_table xgene1_addrcost_table =
230{
381e27aa 231 {
bd95e655
JG
232 1, /* hi */
233 0, /* si */
234 0, /* di */
235 1, /* ti */
381e27aa 236 },
bd95e655
JG
237 1, /* pre_modify */
238 0, /* post_modify */
239 0, /* register_offset */
240 1, /* register_extend */
241 0, /* imm_offset */
381e27aa
PT
242};
243
43e9d192
IB
244static const struct cpu_regmove_cost generic_regmove_cost =
245{
bd95e655 246 1, /* GP2GP */
3969c510
WD
247 /* Avoid the use of slow int<->fp moves for spilling by setting
248 their cost higher than memmov_cost. */
bd95e655
JG
249 5, /* GP2FP */
250 5, /* FP2GP */
251 2 /* FP2FP */
43e9d192
IB
252};
253
e4a9c55a
WD
254static const struct cpu_regmove_cost cortexa57_regmove_cost =
255{
bd95e655 256 1, /* GP2GP */
e4a9c55a
WD
257 /* Avoid the use of slow int<->fp moves for spilling by setting
258 their cost higher than memmov_cost. */
bd95e655
JG
259 5, /* GP2FP */
260 5, /* FP2GP */
261 2 /* FP2FP */
e4a9c55a
WD
262};
263
264static const struct cpu_regmove_cost cortexa53_regmove_cost =
265{
bd95e655 266 1, /* GP2GP */
e4a9c55a
WD
267 /* Avoid the use of slow int<->fp moves for spilling by setting
268 their cost higher than memmov_cost. */
bd95e655
JG
269 5, /* GP2FP */
270 5, /* FP2GP */
271 2 /* FP2FP */
e4a9c55a
WD
272};
273
d1bcc29f
AP
274static const struct cpu_regmove_cost thunderx_regmove_cost =
275{
bd95e655
JG
276 2, /* GP2GP */
277 2, /* GP2FP */
278 6, /* FP2GP */
279 4 /* FP2FP */
d1bcc29f
AP
280};
281
381e27aa
PT
282static const struct cpu_regmove_cost xgene1_regmove_cost =
283{
bd95e655 284 1, /* GP2GP */
381e27aa
PT
285 /* Avoid the use of slow int<->fp moves for spilling by setting
286 their cost higher than memmov_cost. */
bd95e655
JG
287 8, /* GP2FP */
288 8, /* FP2GP */
289 2 /* FP2FP */
381e27aa
PT
290};
291
8990e73a 292/* Generic costs for vector insn classes. */
8990e73a
TB
293static const struct cpu_vector_cost generic_vector_cost =
294{
bd95e655
JG
295 1, /* scalar_stmt_cost */
296 1, /* scalar_load_cost */
297 1, /* scalar_store_cost */
298 1, /* vec_stmt_cost */
299 1, /* vec_to_scalar_cost */
300 1, /* scalar_to_vec_cost */
301 1, /* vec_align_load_cost */
302 1, /* vec_unalign_load_cost */
303 1, /* vec_unalign_store_cost */
304 1, /* vec_store_cost */
305 3, /* cond_taken_branch_cost */
306 1 /* cond_not_taken_branch_cost */
8990e73a
TB
307};
308
60bff090 309/* Generic costs for vector insn classes. */
60bff090
JG
310static const struct cpu_vector_cost cortexa57_vector_cost =
311{
bd95e655
JG
312 1, /* scalar_stmt_cost */
313 4, /* scalar_load_cost */
314 1, /* scalar_store_cost */
315 3, /* vec_stmt_cost */
316 8, /* vec_to_scalar_cost */
317 8, /* scalar_to_vec_cost */
318 5, /* vec_align_load_cost */
319 5, /* vec_unalign_load_cost */
320 1, /* vec_unalign_store_cost */
321 1, /* vec_store_cost */
322 1, /* cond_taken_branch_cost */
323 1 /* cond_not_taken_branch_cost */
60bff090
JG
324};
325
381e27aa 326/* Generic costs for vector insn classes. */
381e27aa
PT
327static const struct cpu_vector_cost xgene1_vector_cost =
328{
bd95e655
JG
329 1, /* scalar_stmt_cost */
330 5, /* scalar_load_cost */
331 1, /* scalar_store_cost */
332 2, /* vec_stmt_cost */
333 4, /* vec_to_scalar_cost */
334 4, /* scalar_to_vec_cost */
335 10, /* vec_align_load_cost */
336 10, /* vec_unalign_load_cost */
337 2, /* vec_unalign_store_cost */
338 2, /* vec_store_cost */
339 2, /* cond_taken_branch_cost */
340 1 /* cond_not_taken_branch_cost */
381e27aa
PT
341};
342
b9066f5a
MW
343/* Generic costs for branch instructions. */
344static const struct cpu_branch_cost generic_branch_cost =
345{
346 2, /* Predictable. */
347 2 /* Unpredictable. */
348};
349
43e9d192
IB
350static const struct tune_params generic_tunings =
351{
4e2cd668 352 &cortexa57_extra_costs,
43e9d192
IB
353 &generic_addrcost_table,
354 &generic_regmove_cost,
8990e73a 355 &generic_vector_cost,
b9066f5a 356 &generic_branch_cost,
bd95e655
JG
357 4, /* memmov_cost */
358 2, /* issue_rate */
e9a3a175 359 AARCH64_FUSE_NOTHING, /* fusible_ops */
0b82a5a2
WD
360 8, /* function_align. */
361 8, /* jump_align. */
362 4, /* loop_align. */
cee66c68
WD
363 2, /* int_reassoc_width. */
364 4, /* fp_reassoc_width. */
50093a33
WD
365 1, /* vec_reassoc_width. */
366 2, /* min_div_recip_mul_sf. */
dfba575f
JG
367 2, /* min_div_recip_mul_df. */
368 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
43e9d192
IB
369};
370
984239ad
KT
371static const struct tune_params cortexa53_tunings =
372{
373 &cortexa53_extra_costs,
374 &generic_addrcost_table,
e4a9c55a 375 &cortexa53_regmove_cost,
984239ad 376 &generic_vector_cost,
b9066f5a 377 &generic_branch_cost,
bd95e655
JG
378 4, /* memmov_cost */
379 2, /* issue_rate */
380 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 381 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
0b82a5a2
WD
382 8, /* function_align. */
383 8, /* jump_align. */
384 4, /* loop_align. */
cee66c68
WD
385 2, /* int_reassoc_width. */
386 4, /* fp_reassoc_width. */
50093a33
WD
387 1, /* vec_reassoc_width. */
388 2, /* min_div_recip_mul_sf. */
dfba575f
JG
389 2, /* min_div_recip_mul_df. */
390 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
984239ad
KT
391};
392
4fd92af6
KT
393static const struct tune_params cortexa57_tunings =
394{
395 &cortexa57_extra_costs,
60bff090 396 &cortexa57_addrcost_table,
e4a9c55a 397 &cortexa57_regmove_cost,
60bff090 398 &cortexa57_vector_cost,
b9066f5a 399 &generic_branch_cost,
bd95e655
JG
400 4, /* memmov_cost */
401 3, /* issue_rate */
402 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 403 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2
WD
404 16, /* function_align. */
405 8, /* jump_align. */
406 4, /* loop_align. */
cee66c68
WD
407 2, /* int_reassoc_width. */
408 4, /* fp_reassoc_width. */
50093a33
WD
409 1, /* vec_reassoc_width. */
410 2, /* min_div_recip_mul_sf. */
dfba575f
JG
411 2, /* min_div_recip_mul_df. */
412 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
413};
414
415static const struct tune_params cortexa72_tunings =
416{
417 &cortexa57_extra_costs,
418 &cortexa57_addrcost_table,
419 &cortexa57_regmove_cost,
420 &cortexa57_vector_cost,
421 &generic_branch_cost,
422 4, /* memmov_cost */
423 3, /* issue_rate */
424 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
425 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
426 16, /* function_align. */
427 8, /* jump_align. */
428 4, /* loop_align. */
429 2, /* int_reassoc_width. */
430 4, /* fp_reassoc_width. */
431 1, /* vec_reassoc_width. */
432 2, /* min_div_recip_mul_sf. */
433 2, /* min_div_recip_mul_df. */
434 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
4fd92af6
KT
435};
436
d1bcc29f
AP
437static const struct tune_params thunderx_tunings =
438{
439 &thunderx_extra_costs,
440 &generic_addrcost_table,
441 &thunderx_regmove_cost,
442 &generic_vector_cost,
b9066f5a 443 &generic_branch_cost,
bd95e655
JG
444 6, /* memmov_cost */
445 2, /* issue_rate */
e9a3a175 446 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
447 8, /* function_align. */
448 8, /* jump_align. */
449 8, /* loop_align. */
cee66c68
WD
450 2, /* int_reassoc_width. */
451 4, /* fp_reassoc_width. */
50093a33
WD
452 1, /* vec_reassoc_width. */
453 2, /* min_div_recip_mul_sf. */
dfba575f
JG
454 2, /* min_div_recip_mul_df. */
455 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
d1bcc29f
AP
456};
457
381e27aa
PT
458static const struct tune_params xgene1_tunings =
459{
460 &xgene1_extra_costs,
461 &xgene1_addrcost_table,
462 &xgene1_regmove_cost,
463 &xgene1_vector_cost,
b9066f5a 464 &generic_branch_cost,
bd95e655
JG
465 6, /* memmov_cost */
466 4, /* issue_rate */
e9a3a175 467 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
468 16, /* function_align. */
469 8, /* jump_align. */
470 16, /* loop_align. */
471 2, /* int_reassoc_width. */
472 4, /* fp_reassoc_width. */
50093a33
WD
473 1, /* vec_reassoc_width. */
474 2, /* min_div_recip_mul_sf. */
dfba575f
JG
475 2, /* min_div_recip_mul_df. */
476 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
381e27aa
PT
477};
478
8dec06f2
JG
479/* Support for fine-grained override of the tuning structures. */
480struct aarch64_tuning_override_function
481{
482 const char* name;
483 void (*parse_override)(const char*, struct tune_params*);
484};
485
486static void aarch64_parse_fuse_string (const char*, struct tune_params*);
487static void aarch64_parse_tune_string (const char*, struct tune_params*);
488
489static const struct aarch64_tuning_override_function
490aarch64_tuning_override_functions[] =
491{
492 { "fuse", aarch64_parse_fuse_string },
493 { "tune", aarch64_parse_tune_string },
494 { NULL, NULL }
495};
496
43e9d192
IB
497/* A processor implementing AArch64. */
498struct processor
499{
500 const char *const name;
46806c44
KT
501 enum aarch64_processor ident;
502 enum aarch64_processor sched_core;
393ae126 503 enum aarch64_arch arch;
0c6caaf8 504 unsigned architecture_version;
43e9d192
IB
505 const unsigned long flags;
506 const struct tune_params *const tune;
507};
508
393ae126
KT
509/* Architectures implementing AArch64. */
510static const struct processor all_architectures[] =
511{
512#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
513 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
514#include "aarch64-arches.def"
515#undef AARCH64_ARCH
516 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
517};
518
43e9d192
IB
519/* Processor cores implementing AArch64. */
520static const struct processor all_cores[] =
521{
7e1bcce3 522#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
393ae126
KT
523 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
524 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
525 FLAGS, &COSTS##_tunings},
43e9d192
IB
526#include "aarch64-cores.def"
527#undef AARCH64_CORE
393ae126
KT
528 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
529 AARCH64_FL_FOR_ARCH8, &generic_tunings},
530 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
531};
532
43e9d192
IB
533
534/* Target specification. These are populated as commandline arguments
535 are processed, or NULL if not specified. */
536static const struct processor *selected_arch;
537static const struct processor *selected_cpu;
538static const struct processor *selected_tune;
539
b175b679
JG
540/* The current tuning set. */
541struct tune_params aarch64_tune_params = generic_tunings;
542
43e9d192
IB
543#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
544
545/* An ISA extension in the co-processor and main instruction set space. */
546struct aarch64_option_extension
547{
548 const char *const name;
549 const unsigned long flags_on;
550 const unsigned long flags_off;
551};
552
553/* ISA extensions in AArch64. */
554static const struct aarch64_option_extension all_extensions[] =
555{
7e1bcce3 556#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
43e9d192
IB
557 {NAME, FLAGS_ON, FLAGS_OFF},
558#include "aarch64-option-extensions.def"
559#undef AARCH64_OPT_EXTENSION
560 {NULL, 0, 0}
561};
562
563/* Used to track the size of an address when generating a pre/post
564 increment address. */
ef4bddc2 565static machine_mode aarch64_memory_reference_mode;
43e9d192 566
43e9d192
IB
567/* A table of valid AArch64 "bitmask immediate" values for
568 logical instructions. */
569
570#define AARCH64_NUM_BITMASKS 5334
571static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
572
43e9d192
IB
573typedef enum aarch64_cond_code
574{
575 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
576 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
577 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
578}
579aarch64_cc;
580
581#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
582
583/* The condition codes of the processor, and the inverse function. */
584static const char * const aarch64_condition_codes[] =
585{
586 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
587 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
588};
589
261fb553
AL
590void
591aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
592{
593 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
594 if (TARGET_GENERAL_REGS_ONLY)
595 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
596 else
597 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
598}
599
26e0ff94 600static unsigned int
50093a33 601aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 602{
50093a33 603 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
604 return aarch64_tune_params.min_div_recip_mul_sf;
605 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
606}
607
cee66c68
WD
608static int
609aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
610 enum machine_mode mode)
611{
612 if (VECTOR_MODE_P (mode))
b175b679 613 return aarch64_tune_params.vec_reassoc_width;
cee66c68 614 if (INTEGRAL_MODE_P (mode))
b175b679 615 return aarch64_tune_params.int_reassoc_width;
cee66c68 616 if (FLOAT_MODE_P (mode))
b175b679 617 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
618 return 1;
619}
620
43e9d192
IB
621/* Provide a mapping from gcc register numbers to dwarf register numbers. */
622unsigned
623aarch64_dbx_register_number (unsigned regno)
624{
625 if (GP_REGNUM_P (regno))
626 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
627 else if (regno == SP_REGNUM)
628 return AARCH64_DWARF_SP;
629 else if (FP_REGNUM_P (regno))
630 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
631
632 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
633 equivalent DWARF register. */
634 return DWARF_FRAME_REGISTERS;
635}
636
637/* Return TRUE if MODE is any of the large INT modes. */
638static bool
ef4bddc2 639aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
640{
641 return mode == OImode || mode == CImode || mode == XImode;
642}
643
644/* Return TRUE if MODE is any of the vector modes. */
645static bool
ef4bddc2 646aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
647{
648 return aarch64_vector_mode_supported_p (mode)
649 || aarch64_vect_struct_mode_p (mode);
650}
651
652/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
653static bool
ef4bddc2 654aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
655 unsigned HOST_WIDE_INT nelems)
656{
657 if (TARGET_SIMD
658 && AARCH64_VALID_SIMD_QREG_MODE (mode)
659 && (nelems >= 2 && nelems <= 4))
660 return true;
661
662 return false;
663}
664
665/* Implement HARD_REGNO_NREGS. */
666
667int
ef4bddc2 668aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
669{
670 switch (aarch64_regno_regclass (regno))
671 {
672 case FP_REGS:
673 case FP_LO_REGS:
674 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
675 default:
676 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
677 }
678 gcc_unreachable ();
679}
680
681/* Implement HARD_REGNO_MODE_OK. */
682
683int
ef4bddc2 684aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
685{
686 if (GET_MODE_CLASS (mode) == MODE_CC)
687 return regno == CC_REGNUM;
688
9259db42
YZ
689 if (regno == SP_REGNUM)
690 /* The purpose of comparing with ptr_mode is to support the
691 global register variable associated with the stack pointer
692 register via the syntax of asm ("wsp") in ILP32. */
693 return mode == Pmode || mode == ptr_mode;
694
695 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
696 return mode == Pmode;
697
698 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
699 return 1;
700
701 if (FP_REGNUM_P (regno))
702 {
703 if (aarch64_vect_struct_mode_p (mode))
704 return
705 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
706 else
707 return 1;
708 }
709
710 return 0;
711}
712
73d9ac6a 713/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 714machine_mode
73d9ac6a 715aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 716 machine_mode mode)
73d9ac6a
IB
717{
718 /* Handle modes that fit within single registers. */
719 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
720 {
721 if (GET_MODE_SIZE (mode) >= 4)
722 return mode;
723 else
724 return SImode;
725 }
726 /* Fall back to generic for multi-reg and very large modes. */
727 else
728 return choose_hard_reg_mode (regno, nregs, false);
729}
730
43e9d192
IB
731/* Return true if calls to DECL should be treated as
732 long-calls (ie called via a register). */
733static bool
734aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
735{
736 return false;
737}
738
739/* Return true if calls to symbol-ref SYM should be treated as
740 long-calls (ie called via a register). */
741bool
742aarch64_is_long_call_p (rtx sym)
743{
744 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
745}
746
747/* Return true if the offsets to a zero/sign-extract operation
748 represent an expression that matches an extend operation. The
749 operands represent the paramters from
750
4745e701 751 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 752bool
ef4bddc2 753aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
754 rtx extract_imm)
755{
756 HOST_WIDE_INT mult_val, extract_val;
757
758 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
759 return false;
760
761 mult_val = INTVAL (mult_imm);
762 extract_val = INTVAL (extract_imm);
763
764 if (extract_val > 8
765 && extract_val < GET_MODE_BITSIZE (mode)
766 && exact_log2 (extract_val & ~7) > 0
767 && (extract_val & 7) <= 4
768 && mult_val == (1 << (extract_val & 7)))
769 return true;
770
771 return false;
772}
773
774/* Emit an insn that's a simple single-set. Both the operands must be
775 known to be valid. */
776inline static rtx
777emit_set_insn (rtx x, rtx y)
778{
f7df4a84 779 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
780}
781
782/* X and Y are two things to compare using CODE. Emit the compare insn and
783 return the rtx for register 0 in the proper mode. */
784rtx
785aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
786{
ef4bddc2 787 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
788 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
789
790 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
791 return cc_reg;
792}
793
794/* Build the SYMBOL_REF for __tls_get_addr. */
795
796static GTY(()) rtx tls_get_addr_libfunc;
797
798rtx
799aarch64_tls_get_addr (void)
800{
801 if (!tls_get_addr_libfunc)
802 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
803 return tls_get_addr_libfunc;
804}
805
806/* Return the TLS model to use for ADDR. */
807
808static enum tls_model
809tls_symbolic_operand_type (rtx addr)
810{
811 enum tls_model tls_kind = TLS_MODEL_NONE;
812 rtx sym, addend;
813
814 if (GET_CODE (addr) == CONST)
815 {
816 split_const (addr, &sym, &addend);
817 if (GET_CODE (sym) == SYMBOL_REF)
818 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
819 }
820 else if (GET_CODE (addr) == SYMBOL_REF)
821 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
822
823 return tls_kind;
824}
825
826/* We'll allow lo_sum's in addresses in our legitimate addresses
827 so that combine would take care of combining addresses where
828 necessary, but for generation purposes, we'll generate the address
829 as :
830 RTL Absolute
831 tmp = hi (symbol_ref); adrp x1, foo
832 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
833 nop
834
835 PIC TLS
836 adrp x1, :got:foo adrp tmp, :tlsgd:foo
837 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
838 bl __tls_get_addr
839 nop
840
841 Load TLS symbol, depending on TLS mechanism and TLS access model.
842
843 Global Dynamic - Traditional TLS:
844 adrp tmp, :tlsgd:imm
845 add dest, tmp, #:tlsgd_lo12:imm
846 bl __tls_get_addr
847
848 Global Dynamic - TLS Descriptors:
849 adrp dest, :tlsdesc:imm
850 ldr tmp, [dest, #:tlsdesc_lo12:imm]
851 add dest, dest, #:tlsdesc_lo12:imm
852 blr tmp
853 mrs tp, tpidr_el0
854 add dest, dest, tp
855
856 Initial Exec:
857 mrs tp, tpidr_el0
858 adrp tmp, :gottprel:imm
859 ldr dest, [tmp, #:gottprel_lo12:imm]
860 add dest, dest, tp
861
862 Local Exec:
863 mrs tp, tpidr_el0
0699caae
RL
864 add t0, tp, #:tprel_hi12:imm, lsl #12
865 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
866*/
867
868static void
869aarch64_load_symref_appropriately (rtx dest, rtx imm,
870 enum aarch64_symbol_type type)
871{
872 switch (type)
873 {
874 case SYMBOL_SMALL_ABSOLUTE:
875 {
28514dda 876 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 877 rtx tmp_reg = dest;
ef4bddc2 878 machine_mode mode = GET_MODE (dest);
28514dda
YZ
879
880 gcc_assert (mode == Pmode || mode == ptr_mode);
881
43e9d192 882 if (can_create_pseudo_p ())
28514dda 883 tmp_reg = gen_reg_rtx (mode);
43e9d192 884
28514dda 885 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
886 emit_insn (gen_add_losym (dest, tmp_reg, imm));
887 return;
888 }
889
a5350ddc 890 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 891 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
892 return;
893
1b1e81f8
JW
894 case SYMBOL_SMALL_GOT_28K:
895 {
896 machine_mode mode = GET_MODE (dest);
897 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
898 rtx insn;
899 rtx mem;
1b1e81f8
JW
900
901 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
902 here before rtl expand. Tree IVOPT will generate rtl pattern to
903 decide rtx costs, in which case pic_offset_table_rtx is not
904 initialized. For that case no need to generate the first adrp
026c3cfd 905 instruction as the final cost for global variable access is
1b1e81f8
JW
906 one instruction. */
907 if (gp_rtx != NULL)
908 {
909 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
910 using the page base as GOT base, the first page may be wasted,
911 in the worst scenario, there is only 28K space for GOT).
912
913 The generate instruction sequence for accessing global variable
914 is:
915
916 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
917
918 Only one instruction needed. But we must initialize
919 pic_offset_table_rtx properly. We generate initialize insn for
920 every global access, and allow CSE to remove all redundant.
921
922 The final instruction sequences will look like the following
923 for multiply global variables access.
924
925 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
926
927 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
928 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
929 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
930 ... */
931
932 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
933 crtl->uses_pic_offset_table = 1;
934 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
935
936 if (mode != GET_MODE (gp_rtx))
937 gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0);
938 }
939
940 if (mode == ptr_mode)
941 {
942 if (mode == DImode)
53021678 943 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 944 else
53021678
JW
945 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
946
947 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
948 }
949 else
950 {
951 gcc_assert (mode == Pmode);
53021678
JW
952
953 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
954 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
955 }
956
53021678
JW
957 /* The operand is expected to be MEM. Whenever the related insn
958 pattern changed, above code which calculate mem should be
959 updated. */
960 gcc_assert (GET_CODE (mem) == MEM);
961 MEM_READONLY_P (mem) = 1;
962 MEM_NOTRAP_P (mem) = 1;
963 emit_insn (insn);
1b1e81f8
JW
964 return;
965 }
966
6642bdb4 967 case SYMBOL_SMALL_GOT_4G:
43e9d192 968 {
28514dda
YZ
969 /* In ILP32, the mode of dest can be either SImode or DImode,
970 while the got entry is always of SImode size. The mode of
971 dest depends on how dest is used: if dest is assigned to a
972 pointer (e.g. in the memory), it has SImode; it may have
973 DImode if dest is dereferenced to access the memeory.
974 This is why we have to handle three different ldr_got_small
975 patterns here (two patterns for ILP32). */
53021678
JW
976
977 rtx insn;
978 rtx mem;
43e9d192 979 rtx tmp_reg = dest;
ef4bddc2 980 machine_mode mode = GET_MODE (dest);
28514dda 981
43e9d192 982 if (can_create_pseudo_p ())
28514dda
YZ
983 tmp_reg = gen_reg_rtx (mode);
984
985 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
986 if (mode == ptr_mode)
987 {
988 if (mode == DImode)
53021678 989 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 990 else
53021678
JW
991 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
992
993 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
994 }
995 else
996 {
997 gcc_assert (mode == Pmode);
53021678
JW
998
999 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1000 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1001 }
1002
53021678
JW
1003 gcc_assert (GET_CODE (mem) == MEM);
1004 MEM_READONLY_P (mem) = 1;
1005 MEM_NOTRAP_P (mem) = 1;
1006 emit_insn (insn);
43e9d192
IB
1007 return;
1008 }
1009
1010 case SYMBOL_SMALL_TLSGD:
1011 {
5d8a22a5 1012 rtx_insn *insns;
43e9d192
IB
1013 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
1014
1015 start_sequence ();
78607708 1016 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
1017 insns = get_insns ();
1018 end_sequence ();
1019
1020 RTL_CONST_CALL_P (insns) = 1;
1021 emit_libcall_block (insns, dest, result, imm);
1022 return;
1023 }
1024
1025 case SYMBOL_SMALL_TLSDESC:
1026 {
ef4bddc2 1027 machine_mode mode = GET_MODE (dest);
621ad2de 1028 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1029 rtx tp;
1030
621ad2de
AP
1031 gcc_assert (mode == Pmode || mode == ptr_mode);
1032
1033 /* In ILP32, the got entry is always of SImode size. Unlike
1034 small GOT, the dest is fixed at reg 0. */
1035 if (TARGET_ILP32)
1036 emit_insn (gen_tlsdesc_small_si (imm));
1037 else
1038 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1039 tp = aarch64_load_tp (NULL);
621ad2de
AP
1040
1041 if (mode != Pmode)
1042 tp = gen_lowpart (mode, tp);
1043
f7df4a84 1044 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1045 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1046 return;
1047 }
1048
1049 case SYMBOL_SMALL_GOTTPREL:
1050 {
621ad2de
AP
1051 /* In ILP32, the mode of dest can be either SImode or DImode,
1052 while the got entry is always of SImode size. The mode of
1053 dest depends on how dest is used: if dest is assigned to a
1054 pointer (e.g. in the memory), it has SImode; it may have
1055 DImode if dest is dereferenced to access the memeory.
1056 This is why we have to handle three different tlsie_small
1057 patterns here (two patterns for ILP32). */
ef4bddc2 1058 machine_mode mode = GET_MODE (dest);
621ad2de 1059 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1060 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1061
1062 if (mode == ptr_mode)
1063 {
1064 if (mode == DImode)
1065 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1066 else
1067 {
1068 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1069 tp = gen_lowpart (mode, tp);
1070 }
1071 }
1072 else
1073 {
1074 gcc_assert (mode == Pmode);
1075 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1076 }
1077
f7df4a84 1078 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1079 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1080 return;
1081 }
1082
8fd17b98 1083 case SYMBOL_TLSLE:
43e9d192
IB
1084 {
1085 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9
AP
1086
1087 if (GET_MODE (dest) != Pmode)
1088 tp = gen_lowpart (GET_MODE (dest), tp);
1089
8fd17b98 1090 emit_insn (gen_tlsle (dest, tp, imm));
43e9d192
IB
1091 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1092 return;
1093 }
1094
87dd8ab0
MS
1095 case SYMBOL_TINY_GOT:
1096 emit_insn (gen_ldr_got_tiny (dest, imm));
1097 return;
1098
43e9d192
IB
1099 default:
1100 gcc_unreachable ();
1101 }
1102}
1103
1104/* Emit a move from SRC to DEST. Assume that the move expanders can
1105 handle all moves if !can_create_pseudo_p (). The distinction is
1106 important because, unlike emit_move_insn, the move expanders know
1107 how to force Pmode objects into the constant pool even when the
1108 constant pool address is not itself legitimate. */
1109static rtx
1110aarch64_emit_move (rtx dest, rtx src)
1111{
1112 return (can_create_pseudo_p ()
1113 ? emit_move_insn (dest, src)
1114 : emit_move_insn_1 (dest, src));
1115}
1116
030d03b8
RE
1117/* Split a 128-bit move operation into two 64-bit move operations,
1118 taking care to handle partial overlap of register to register
1119 copies. Special cases are needed when moving between GP regs and
1120 FP regs. SRC can be a register, constant or memory; DST a register
1121 or memory. If either operand is memory it must not have any side
1122 effects. */
43e9d192
IB
1123void
1124aarch64_split_128bit_move (rtx dst, rtx src)
1125{
030d03b8
RE
1126 rtx dst_lo, dst_hi;
1127 rtx src_lo, src_hi;
43e9d192 1128
ef4bddc2 1129 machine_mode mode = GET_MODE (dst);
12dc6974 1130
030d03b8
RE
1131 gcc_assert (mode == TImode || mode == TFmode);
1132 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1133 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1134
1135 if (REG_P (dst) && REG_P (src))
1136 {
030d03b8
RE
1137 int src_regno = REGNO (src);
1138 int dst_regno = REGNO (dst);
43e9d192 1139
030d03b8 1140 /* Handle FP <-> GP regs. */
43e9d192
IB
1141 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1142 {
030d03b8
RE
1143 src_lo = gen_lowpart (word_mode, src);
1144 src_hi = gen_highpart (word_mode, src);
1145
1146 if (mode == TImode)
1147 {
1148 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1149 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1150 }
1151 else
1152 {
1153 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1154 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1155 }
1156 return;
43e9d192
IB
1157 }
1158 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1159 {
030d03b8
RE
1160 dst_lo = gen_lowpart (word_mode, dst);
1161 dst_hi = gen_highpart (word_mode, dst);
1162
1163 if (mode == TImode)
1164 {
1165 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1166 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1167 }
1168 else
1169 {
1170 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1171 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1172 }
1173 return;
43e9d192 1174 }
43e9d192
IB
1175 }
1176
030d03b8
RE
1177 dst_lo = gen_lowpart (word_mode, dst);
1178 dst_hi = gen_highpart (word_mode, dst);
1179 src_lo = gen_lowpart (word_mode, src);
1180 src_hi = gen_highpart_mode (word_mode, mode, src);
1181
1182 /* At most one pairing may overlap. */
1183 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1184 {
1185 aarch64_emit_move (dst_hi, src_hi);
1186 aarch64_emit_move (dst_lo, src_lo);
1187 }
1188 else
1189 {
1190 aarch64_emit_move (dst_lo, src_lo);
1191 aarch64_emit_move (dst_hi, src_hi);
1192 }
43e9d192
IB
1193}
1194
1195bool
1196aarch64_split_128bit_move_p (rtx dst, rtx src)
1197{
1198 return (! REG_P (src)
1199 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1200}
1201
8b033a8a
SN
1202/* Split a complex SIMD combine. */
1203
1204void
1205aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1206{
ef4bddc2
RS
1207 machine_mode src_mode = GET_MODE (src1);
1208 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1209
1210 gcc_assert (VECTOR_MODE_P (dst_mode));
1211
1212 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1213 {
1214 rtx (*gen) (rtx, rtx, rtx);
1215
1216 switch (src_mode)
1217 {
1218 case V8QImode:
1219 gen = gen_aarch64_simd_combinev8qi;
1220 break;
1221 case V4HImode:
1222 gen = gen_aarch64_simd_combinev4hi;
1223 break;
1224 case V2SImode:
1225 gen = gen_aarch64_simd_combinev2si;
1226 break;
1227 case V2SFmode:
1228 gen = gen_aarch64_simd_combinev2sf;
1229 break;
1230 case DImode:
1231 gen = gen_aarch64_simd_combinedi;
1232 break;
1233 case DFmode:
1234 gen = gen_aarch64_simd_combinedf;
1235 break;
1236 default:
1237 gcc_unreachable ();
1238 }
1239
1240 emit_insn (gen (dst, src1, src2));
1241 return;
1242 }
1243}
1244
fd4842cd
SN
1245/* Split a complex SIMD move. */
1246
1247void
1248aarch64_split_simd_move (rtx dst, rtx src)
1249{
ef4bddc2
RS
1250 machine_mode src_mode = GET_MODE (src);
1251 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1252
1253 gcc_assert (VECTOR_MODE_P (dst_mode));
1254
1255 if (REG_P (dst) && REG_P (src))
1256 {
c59b7e28
SN
1257 rtx (*gen) (rtx, rtx);
1258
fd4842cd
SN
1259 gcc_assert (VECTOR_MODE_P (src_mode));
1260
1261 switch (src_mode)
1262 {
1263 case V16QImode:
c59b7e28 1264 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1265 break;
1266 case V8HImode:
c59b7e28 1267 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1268 break;
1269 case V4SImode:
c59b7e28 1270 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1271 break;
1272 case V2DImode:
c59b7e28 1273 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
1274 break;
1275 case V4SFmode:
c59b7e28 1276 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1277 break;
1278 case V2DFmode:
c59b7e28 1279 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1280 break;
1281 default:
1282 gcc_unreachable ();
1283 }
c59b7e28
SN
1284
1285 emit_insn (gen (dst, src));
fd4842cd
SN
1286 return;
1287 }
1288}
1289
43e9d192 1290static rtx
ef4bddc2 1291aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1292{
1293 if (can_create_pseudo_p ())
e18b4a81 1294 return force_reg (mode, value);
43e9d192
IB
1295 else
1296 {
1297 x = aarch64_emit_move (x, value);
1298 return x;
1299 }
1300}
1301
1302
1303static rtx
ef4bddc2 1304aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1305{
9c023bf0 1306 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1307 {
1308 rtx high;
1309 /* Load the full offset into a register. This
1310 might be improvable in the future. */
1311 high = GEN_INT (offset);
1312 offset = 0;
e18b4a81
YZ
1313 high = aarch64_force_temporary (mode, temp, high);
1314 reg = aarch64_force_temporary (mode, temp,
1315 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1316 }
1317 return plus_constant (mode, reg, offset);
1318}
1319
82614948
RR
1320static int
1321aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1322 machine_mode mode)
43e9d192 1323{
43e9d192
IB
1324 unsigned HOST_WIDE_INT mask;
1325 int i;
1326 bool first;
1327 unsigned HOST_WIDE_INT val;
1328 bool subtargets;
1329 rtx subtarget;
c747993a 1330 int one_match, zero_match, first_not_ffff_match;
82614948 1331 int num_insns = 0;
43e9d192
IB
1332
1333 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1334 {
82614948 1335 if (generate)
f7df4a84 1336 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1337 num_insns++;
1338 return num_insns;
43e9d192
IB
1339 }
1340
1341 if (mode == SImode)
1342 {
1343 /* We know we can't do this in 1 insn, and we must be able to do it
1344 in two; so don't mess around looking for sequences that don't buy
1345 us anything. */
82614948
RR
1346 if (generate)
1347 {
f7df4a84 1348 emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
82614948
RR
1349 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1350 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1351 }
1352 num_insns += 2;
1353 return num_insns;
43e9d192
IB
1354 }
1355
1356 /* Remaining cases are all for DImode. */
1357
1358 val = INTVAL (imm);
1359 subtargets = optimize && can_create_pseudo_p ();
1360
1361 one_match = 0;
1362 zero_match = 0;
1363 mask = 0xffff;
c747993a 1364 first_not_ffff_match = -1;
43e9d192
IB
1365
1366 for (i = 0; i < 64; i += 16, mask <<= 16)
1367 {
c747993a 1368 if ((val & mask) == mask)
43e9d192 1369 one_match++;
c747993a
IB
1370 else
1371 {
1372 if (first_not_ffff_match < 0)
1373 first_not_ffff_match = i;
1374 if ((val & mask) == 0)
1375 zero_match++;
1376 }
43e9d192
IB
1377 }
1378
1379 if (one_match == 2)
1380 {
c747993a
IB
1381 /* Set one of the quarters and then insert back into result. */
1382 mask = 0xffffll << first_not_ffff_match;
82614948
RR
1383 if (generate)
1384 {
f7df4a84 1385 emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
82614948
RR
1386 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1387 GEN_INT ((val >> first_not_ffff_match)
1388 & 0xffff)));
1389 }
1390 num_insns += 2;
1391 return num_insns;
c747993a
IB
1392 }
1393
43e9d192
IB
1394 if (zero_match == 2)
1395 goto simple_sequence;
1396
1397 mask = 0x0ffff0000UL;
1398 for (i = 16; i < 64; i += 16, mask <<= 16)
1399 {
1400 HOST_WIDE_INT comp = mask & ~(mask - 1);
1401
1402 if (aarch64_uimm12_shift (val - (val & mask)))
1403 {
82614948
RR
1404 if (generate)
1405 {
1406 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1407 emit_insn (gen_rtx_SET (subtarget, GEN_INT (val & mask)));
82614948
RR
1408 emit_insn (gen_adddi3 (dest, subtarget,
1409 GEN_INT (val - (val & mask))));
1410 }
1411 num_insns += 2;
1412 return num_insns;
43e9d192
IB
1413 }
1414 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1415 {
82614948
RR
1416 if (generate)
1417 {
1418 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1419 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1420 GEN_INT ((val + comp) & mask)));
1421 emit_insn (gen_adddi3 (dest, subtarget,
1422 GEN_INT (val - ((val + comp) & mask))));
1423 }
1424 num_insns += 2;
1425 return num_insns;
43e9d192
IB
1426 }
1427 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1428 {
82614948
RR
1429 if (generate)
1430 {
1431 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1432 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1433 GEN_INT ((val - comp) | ~mask)));
1434 emit_insn (gen_adddi3 (dest, subtarget,
1435 GEN_INT (val - ((val - comp) | ~mask))));
1436 }
1437 num_insns += 2;
1438 return num_insns;
43e9d192
IB
1439 }
1440 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1441 {
82614948
RR
1442 if (generate)
1443 {
1444 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1445 emit_insn (gen_rtx_SET (subtarget, GEN_INT (val | ~mask)));
82614948
RR
1446 emit_insn (gen_adddi3 (dest, subtarget,
1447 GEN_INT (val - (val | ~mask))));
1448 }
1449 num_insns += 2;
1450 return num_insns;
43e9d192
IB
1451 }
1452 }
1453
1454 /* See if we can do it by arithmetically combining two
1455 immediates. */
1456 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1457 {
1458 int j;
1459 mask = 0xffff;
1460
1461 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1462 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1463 {
82614948
RR
1464 if (generate)
1465 {
1466 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1467 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1468 GEN_INT (aarch64_bitmasks[i])));
1469 emit_insn (gen_adddi3 (dest, subtarget,
1470 GEN_INT (val - aarch64_bitmasks[i])));
1471 }
1472 num_insns += 2;
1473 return num_insns;
43e9d192
IB
1474 }
1475
1476 for (j = 0; j < 64; j += 16, mask <<= 16)
1477 {
1478 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1479 {
82614948
RR
1480 if (generate)
1481 {
f7df4a84 1482 emit_insn (gen_rtx_SET (dest,
82614948
RR
1483 GEN_INT (aarch64_bitmasks[i])));
1484 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1485 GEN_INT ((val >> j) & 0xffff)));
1486 }
1487 num_insns += 2;
1488 return num_insns;
43e9d192
IB
1489 }
1490 }
1491 }
1492
1493 /* See if we can do it by logically combining two immediates. */
1494 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1495 {
1496 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1497 {
1498 int j;
1499
1500 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1501 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1502 {
82614948
RR
1503 if (generate)
1504 {
1505 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
f7df4a84 1506 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1507 GEN_INT (aarch64_bitmasks[i])));
1508 emit_insn (gen_iordi3 (dest, subtarget,
1509 GEN_INT (aarch64_bitmasks[j])));
1510 }
1511 num_insns += 2;
1512 return num_insns;
43e9d192
IB
1513 }
1514 }
1515 else if ((val & aarch64_bitmasks[i]) == val)
1516 {
1517 int j;
1518
1519 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1520 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1521 {
82614948
RR
1522 if (generate)
1523 {
1524 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
f7df4a84 1525 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1526 GEN_INT (aarch64_bitmasks[j])));
1527 emit_insn (gen_anddi3 (dest, subtarget,
1528 GEN_INT (aarch64_bitmasks[i])));
1529 }
1530 num_insns += 2;
1531 return num_insns;
43e9d192
IB
1532 }
1533 }
1534 }
1535
2c274197
KT
1536 if (one_match > zero_match)
1537 {
1538 /* Set either first three quarters or all but the third. */
1539 mask = 0xffffll << (16 - first_not_ffff_match);
82614948 1540 if (generate)
f7df4a84 1541 emit_insn (gen_rtx_SET (dest,
82614948
RR
1542 GEN_INT (val | mask | 0xffffffff00000000ull)));
1543 num_insns ++;
2c274197
KT
1544
1545 /* Now insert other two quarters. */
1546 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1547 i < 64; i += 16, mask <<= 16)
1548 {
1549 if ((val & mask) != mask)
82614948
RR
1550 {
1551 if (generate)
1552 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1553 GEN_INT ((val >> i) & 0xffff)));
1554 num_insns ++;
1555 }
2c274197 1556 }
82614948 1557 return num_insns;
2c274197
KT
1558 }
1559
43e9d192
IB
1560 simple_sequence:
1561 first = true;
1562 mask = 0xffff;
1563 for (i = 0; i < 64; i += 16, mask <<= 16)
1564 {
1565 if ((val & mask) != 0)
1566 {
1567 if (first)
1568 {
82614948 1569 if (generate)
f7df4a84 1570 emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
82614948 1571 num_insns ++;
43e9d192
IB
1572 first = false;
1573 }
1574 else
82614948
RR
1575 {
1576 if (generate)
1577 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1578 GEN_INT ((val >> i) & 0xffff)));
1579 num_insns ++;
1580 }
1581 }
1582 }
1583
1584 return num_insns;
1585}
1586
1587
1588void
1589aarch64_expand_mov_immediate (rtx dest, rtx imm)
1590{
1591 machine_mode mode = GET_MODE (dest);
1592
1593 gcc_assert (mode == SImode || mode == DImode);
1594
1595 /* Check on what type of symbol it is. */
1596 if (GET_CODE (imm) == SYMBOL_REF
1597 || GET_CODE (imm) == LABEL_REF
1598 || GET_CODE (imm) == CONST)
1599 {
1600 rtx mem, base, offset;
1601 enum aarch64_symbol_type sty;
1602
1603 /* If we have (const (plus symbol offset)), separate out the offset
1604 before we start classifying the symbol. */
1605 split_const (imm, &base, &offset);
1606
f8b756b7 1607 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
82614948
RR
1608 switch (sty)
1609 {
1610 case SYMBOL_FORCE_TO_MEM:
1611 if (offset != const0_rtx
1612 && targetm.cannot_force_const_mem (mode, imm))
1613 {
1614 gcc_assert (can_create_pseudo_p ());
1615 base = aarch64_force_temporary (mode, dest, base);
1616 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1617 aarch64_emit_move (dest, base);
1618 return;
1619 }
1620 mem = force_const_mem (ptr_mode, imm);
1621 gcc_assert (mem);
1622 if (mode != ptr_mode)
1623 mem = gen_rtx_ZERO_EXTEND (mode, mem);
f7df4a84 1624 emit_insn (gen_rtx_SET (dest, mem));
82614948
RR
1625 return;
1626
1627 case SYMBOL_SMALL_TLSGD:
1628 case SYMBOL_SMALL_TLSDESC:
1629 case SYMBOL_SMALL_GOTTPREL:
1b1e81f8 1630 case SYMBOL_SMALL_GOT_28K:
6642bdb4 1631 case SYMBOL_SMALL_GOT_4G:
82614948
RR
1632 case SYMBOL_TINY_GOT:
1633 if (offset != const0_rtx)
1634 {
1635 gcc_assert(can_create_pseudo_p ());
1636 base = aarch64_force_temporary (mode, dest, base);
1637 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1638 aarch64_emit_move (dest, base);
1639 return;
1640 }
1641 /* FALLTHRU */
1642
82614948
RR
1643 case SYMBOL_SMALL_ABSOLUTE:
1644 case SYMBOL_TINY_ABSOLUTE:
8fd17b98 1645 case SYMBOL_TLSLE:
82614948
RR
1646 aarch64_load_symref_appropriately (dest, imm, sty);
1647 return;
1648
1649 default:
1650 gcc_unreachable ();
1651 }
1652 }
1653
1654 if (!CONST_INT_P (imm))
1655 {
1656 if (GET_CODE (imm) == HIGH)
f7df4a84 1657 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1658 else
1659 {
1660 rtx mem = force_const_mem (mode, imm);
1661 gcc_assert (mem);
f7df4a84 1662 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 1663 }
82614948
RR
1664
1665 return;
43e9d192 1666 }
82614948
RR
1667
1668 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1669}
1670
1671static bool
fee9ba42
JW
1672aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1673 tree exp ATTRIBUTE_UNUSED)
43e9d192 1674{
fee9ba42 1675 /* Currently, always true. */
43e9d192
IB
1676 return true;
1677}
1678
1679/* Implement TARGET_PASS_BY_REFERENCE. */
1680
1681static bool
1682aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1683 machine_mode mode,
43e9d192
IB
1684 const_tree type,
1685 bool named ATTRIBUTE_UNUSED)
1686{
1687 HOST_WIDE_INT size;
ef4bddc2 1688 machine_mode dummymode;
43e9d192
IB
1689 int nregs;
1690
1691 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1692 size = (mode == BLKmode && type)
1693 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1694
aadc1c43
MHD
1695 /* Aggregates are passed by reference based on their size. */
1696 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1697 {
aadc1c43 1698 size = int_size_in_bytes (type);
43e9d192
IB
1699 }
1700
1701 /* Variable sized arguments are always returned by reference. */
1702 if (size < 0)
1703 return true;
1704
1705 /* Can this be a candidate to be passed in fp/simd register(s)? */
1706 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1707 &dummymode, &nregs,
1708 NULL))
1709 return false;
1710
1711 /* Arguments which are variable sized or larger than 2 registers are
1712 passed by reference unless they are a homogenous floating point
1713 aggregate. */
1714 return size > 2 * UNITS_PER_WORD;
1715}
1716
1717/* Return TRUE if VALTYPE is padded to its least significant bits. */
1718static bool
1719aarch64_return_in_msb (const_tree valtype)
1720{
ef4bddc2 1721 machine_mode dummy_mode;
43e9d192
IB
1722 int dummy_int;
1723
1724 /* Never happens in little-endian mode. */
1725 if (!BYTES_BIG_ENDIAN)
1726 return false;
1727
1728 /* Only composite types smaller than or equal to 16 bytes can
1729 be potentially returned in registers. */
1730 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1731 || int_size_in_bytes (valtype) <= 0
1732 || int_size_in_bytes (valtype) > 16)
1733 return false;
1734
1735 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1736 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1737 is always passed/returned in the least significant bits of fp/simd
1738 register(s). */
1739 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1740 &dummy_mode, &dummy_int, NULL))
1741 return false;
1742
1743 return true;
1744}
1745
1746/* Implement TARGET_FUNCTION_VALUE.
1747 Define how to find the value returned by a function. */
1748
1749static rtx
1750aarch64_function_value (const_tree type, const_tree func,
1751 bool outgoing ATTRIBUTE_UNUSED)
1752{
ef4bddc2 1753 machine_mode mode;
43e9d192
IB
1754 int unsignedp;
1755 int count;
ef4bddc2 1756 machine_mode ag_mode;
43e9d192
IB
1757
1758 mode = TYPE_MODE (type);
1759 if (INTEGRAL_TYPE_P (type))
1760 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1761
1762 if (aarch64_return_in_msb (type))
1763 {
1764 HOST_WIDE_INT size = int_size_in_bytes (type);
1765
1766 if (size % UNITS_PER_WORD != 0)
1767 {
1768 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1769 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1770 }
1771 }
1772
1773 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1774 &ag_mode, &count, NULL))
1775 {
1776 if (!aarch64_composite_type_p (type, mode))
1777 {
1778 gcc_assert (count == 1 && mode == ag_mode);
1779 return gen_rtx_REG (mode, V0_REGNUM);
1780 }
1781 else
1782 {
1783 int i;
1784 rtx par;
1785
1786 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1787 for (i = 0; i < count; i++)
1788 {
1789 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1790 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1791 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1792 XVECEXP (par, 0, i) = tmp;
1793 }
1794 return par;
1795 }
1796 }
1797 else
1798 return gen_rtx_REG (mode, R0_REGNUM);
1799}
1800
1801/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1802 Return true if REGNO is the number of a hard register in which the values
1803 of called function may come back. */
1804
1805static bool
1806aarch64_function_value_regno_p (const unsigned int regno)
1807{
1808 /* Maximum of 16 bytes can be returned in the general registers. Examples
1809 of 16-byte return values are: 128-bit integers and 16-byte small
1810 structures (excluding homogeneous floating-point aggregates). */
1811 if (regno == R0_REGNUM || regno == R1_REGNUM)
1812 return true;
1813
1814 /* Up to four fp/simd registers can return a function value, e.g. a
1815 homogeneous floating-point aggregate having four members. */
1816 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 1817 return TARGET_FLOAT;
43e9d192
IB
1818
1819 return false;
1820}
1821
1822/* Implement TARGET_RETURN_IN_MEMORY.
1823
1824 If the type T of the result of a function is such that
1825 void func (T arg)
1826 would require that arg be passed as a value in a register (or set of
1827 registers) according to the parameter passing rules, then the result
1828 is returned in the same registers as would be used for such an
1829 argument. */
1830
1831static bool
1832aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1833{
1834 HOST_WIDE_INT size;
ef4bddc2 1835 machine_mode ag_mode;
43e9d192
IB
1836 int count;
1837
1838 if (!AGGREGATE_TYPE_P (type)
1839 && TREE_CODE (type) != COMPLEX_TYPE
1840 && TREE_CODE (type) != VECTOR_TYPE)
1841 /* Simple scalar types always returned in registers. */
1842 return false;
1843
1844 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1845 type,
1846 &ag_mode,
1847 &count,
1848 NULL))
1849 return false;
1850
1851 /* Types larger than 2 registers returned in memory. */
1852 size = int_size_in_bytes (type);
1853 return (size < 0 || size > 2 * UNITS_PER_WORD);
1854}
1855
1856static bool
ef4bddc2 1857aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1858 const_tree type, int *nregs)
1859{
1860 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1861 return aarch64_vfp_is_call_or_return_candidate (mode,
1862 type,
1863 &pcum->aapcs_vfp_rmode,
1864 nregs,
1865 NULL);
1866}
1867
1868/* Given MODE and TYPE of a function argument, return the alignment in
1869 bits. The idea is to suppress any stronger alignment requested by
1870 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1871 This is a helper function for local use only. */
1872
1873static unsigned int
ef4bddc2 1874aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192
IB
1875{
1876 unsigned int alignment;
1877
1878 if (type)
1879 {
1880 if (!integer_zerop (TYPE_SIZE (type)))
1881 {
1882 if (TYPE_MODE (type) == mode)
1883 alignment = TYPE_ALIGN (type);
1884 else
1885 alignment = GET_MODE_ALIGNMENT (mode);
1886 }
1887 else
1888 alignment = 0;
1889 }
1890 else
1891 alignment = GET_MODE_ALIGNMENT (mode);
1892
1893 return alignment;
1894}
1895
1896/* Layout a function argument according to the AAPCS64 rules. The rule
1897 numbers refer to the rule numbers in the AAPCS64. */
1898
1899static void
ef4bddc2 1900aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1901 const_tree type,
1902 bool named ATTRIBUTE_UNUSED)
1903{
1904 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1905 int ncrn, nvrn, nregs;
1906 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1907 HOST_WIDE_INT size;
43e9d192
IB
1908
1909 /* We need to do this once per argument. */
1910 if (pcum->aapcs_arg_processed)
1911 return;
1912
1913 pcum->aapcs_arg_processed = true;
1914
3abf17cf
YZ
1915 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1916 size
1917 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1918 UNITS_PER_WORD);
1919
43e9d192
IB
1920 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1921 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1922 mode,
1923 type,
1924 &nregs);
1925
1926 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1927 The following code thus handles passing by SIMD/FP registers first. */
1928
1929 nvrn = pcum->aapcs_nvrn;
1930
1931 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1932 and homogenous short-vector aggregates (HVA). */
1933 if (allocate_nvrn)
1934 {
261fb553
AL
1935 if (!TARGET_FLOAT)
1936 aarch64_err_no_fpadvsimd (mode, "argument");
1937
43e9d192
IB
1938 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1939 {
1940 pcum->aapcs_nextnvrn = nvrn + nregs;
1941 if (!aarch64_composite_type_p (type, mode))
1942 {
1943 gcc_assert (nregs == 1);
1944 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1945 }
1946 else
1947 {
1948 rtx par;
1949 int i;
1950 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1951 for (i = 0; i < nregs; i++)
1952 {
1953 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1954 V0_REGNUM + nvrn + i);
1955 tmp = gen_rtx_EXPR_LIST
1956 (VOIDmode, tmp,
1957 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1958 XVECEXP (par, 0, i) = tmp;
1959 }
1960 pcum->aapcs_reg = par;
1961 }
1962 return;
1963 }
1964 else
1965 {
1966 /* C.3 NSRN is set to 8. */
1967 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1968 goto on_stack;
1969 }
1970 }
1971
1972 ncrn = pcum->aapcs_ncrn;
3abf17cf 1973 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1974
1975 /* C6 - C9. though the sign and zero extension semantics are
1976 handled elsewhere. This is the case where the argument fits
1977 entirely general registers. */
1978 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1979 {
1980 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1981
1982 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1983
1984 /* C.8 if the argument has an alignment of 16 then the NGRN is
1985 rounded up to the next even number. */
1986 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1987 {
1988 ++ncrn;
1989 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1990 }
1991 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1992 A reg is still generated for it, but the caller should be smart
1993 enough not to use it. */
1994 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1995 {
1996 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1997 }
1998 else
1999 {
2000 rtx par;
2001 int i;
2002
2003 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2004 for (i = 0; i < nregs; i++)
2005 {
2006 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2007 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2008 GEN_INT (i * UNITS_PER_WORD));
2009 XVECEXP (par, 0, i) = tmp;
2010 }
2011 pcum->aapcs_reg = par;
2012 }
2013
2014 pcum->aapcs_nextncrn = ncrn + nregs;
2015 return;
2016 }
2017
2018 /* C.11 */
2019 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2020
2021 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2022 this argument and align the total size if necessary. */
43e9d192 2023on_stack:
3abf17cf 2024 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
2025 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2026 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 2027 16 / UNITS_PER_WORD);
43e9d192
IB
2028 return;
2029}
2030
2031/* Implement TARGET_FUNCTION_ARG. */
2032
2033static rtx
ef4bddc2 2034aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2035 const_tree type, bool named)
2036{
2037 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2038 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2039
2040 if (mode == VOIDmode)
2041 return NULL_RTX;
2042
2043 aarch64_layout_arg (pcum_v, mode, type, named);
2044 return pcum->aapcs_reg;
2045}
2046
2047void
2048aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2049 const_tree fntype ATTRIBUTE_UNUSED,
2050 rtx libname ATTRIBUTE_UNUSED,
2051 const_tree fndecl ATTRIBUTE_UNUSED,
2052 unsigned n_named ATTRIBUTE_UNUSED)
2053{
2054 pcum->aapcs_ncrn = 0;
2055 pcum->aapcs_nvrn = 0;
2056 pcum->aapcs_nextncrn = 0;
2057 pcum->aapcs_nextnvrn = 0;
2058 pcum->pcs_variant = ARM_PCS_AAPCS64;
2059 pcum->aapcs_reg = NULL_RTX;
2060 pcum->aapcs_arg_processed = false;
2061 pcum->aapcs_stack_words = 0;
2062 pcum->aapcs_stack_size = 0;
2063
261fb553
AL
2064 if (!TARGET_FLOAT
2065 && fndecl && TREE_PUBLIC (fndecl)
2066 && fntype && fntype != error_mark_node)
2067 {
2068 const_tree type = TREE_TYPE (fntype);
2069 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2070 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2071 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2072 &mode, &nregs, NULL))
2073 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2074 }
43e9d192
IB
2075 return;
2076}
2077
2078static void
2079aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2080 machine_mode mode,
43e9d192
IB
2081 const_tree type,
2082 bool named)
2083{
2084 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2085 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2086 {
2087 aarch64_layout_arg (pcum_v, mode, type, named);
2088 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2089 != (pcum->aapcs_stack_words != 0));
2090 pcum->aapcs_arg_processed = false;
2091 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2092 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2093 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2094 pcum->aapcs_stack_words = 0;
2095 pcum->aapcs_reg = NULL_RTX;
2096 }
2097}
2098
2099bool
2100aarch64_function_arg_regno_p (unsigned regno)
2101{
2102 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2103 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2104}
2105
2106/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2107 PARM_BOUNDARY bits of alignment, but will be given anything up
2108 to STACK_BOUNDARY bits if the type requires it. This makes sure
2109 that both before and after the layout of each argument, the Next
2110 Stacked Argument Address (NSAA) will have a minimum alignment of
2111 8 bytes. */
2112
2113static unsigned int
ef4bddc2 2114aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
2115{
2116 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2117
2118 if (alignment < PARM_BOUNDARY)
2119 alignment = PARM_BOUNDARY;
2120 if (alignment > STACK_BOUNDARY)
2121 alignment = STACK_BOUNDARY;
2122 return alignment;
2123}
2124
2125/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2126
2127 Return true if an argument passed on the stack should be padded upwards,
2128 i.e. if the least-significant byte of the stack slot has useful data.
2129
2130 Small aggregate types are placed in the lowest memory address.
2131
2132 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2133
2134bool
ef4bddc2 2135aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2136{
2137 /* On little-endian targets, the least significant byte of every stack
2138 argument is passed at the lowest byte address of the stack slot. */
2139 if (!BYTES_BIG_ENDIAN)
2140 return true;
2141
00edcfbe 2142 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2143 the least significant byte of a stack argument is passed at the highest
2144 byte address of the stack slot. */
2145 if (type
00edcfbe
YZ
2146 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2147 || POINTER_TYPE_P (type))
43e9d192
IB
2148 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2149 return false;
2150
2151 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2152 return true;
2153}
2154
2155/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2156
2157 It specifies padding for the last (may also be the only)
2158 element of a block move between registers and memory. If
2159 assuming the block is in the memory, padding upward means that
2160 the last element is padded after its highest significant byte,
2161 while in downward padding, the last element is padded at the
2162 its least significant byte side.
2163
2164 Small aggregates and small complex types are always padded
2165 upwards.
2166
2167 We don't need to worry about homogeneous floating-point or
2168 short-vector aggregates; their move is not affected by the
2169 padding direction determined here. Regardless of endianness,
2170 each element of such an aggregate is put in the least
2171 significant bits of a fp/simd register.
2172
2173 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2174 register has useful data, and return the opposite if the most
2175 significant byte does. */
2176
2177bool
ef4bddc2 2178aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2179 bool first ATTRIBUTE_UNUSED)
2180{
2181
2182 /* Small composite types are always padded upward. */
2183 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2184 {
2185 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2186 : GET_MODE_SIZE (mode));
2187 if (size < 2 * UNITS_PER_WORD)
2188 return true;
2189 }
2190
2191 /* Otherwise, use the default padding. */
2192 return !BYTES_BIG_ENDIAN;
2193}
2194
ef4bddc2 2195static machine_mode
43e9d192
IB
2196aarch64_libgcc_cmp_return_mode (void)
2197{
2198 return SImode;
2199}
2200
2201static bool
2202aarch64_frame_pointer_required (void)
2203{
0b7f8166
MS
2204 /* In aarch64_override_options_after_change
2205 flag_omit_leaf_frame_pointer turns off the frame pointer by
2206 default. Turn it back on now if we've not got a leaf
2207 function. */
2208 if (flag_omit_leaf_frame_pointer
2209 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2210 return true;
43e9d192 2211
0b7f8166 2212 return false;
43e9d192
IB
2213}
2214
2215/* Mark the registers that need to be saved by the callee and calculate
2216 the size of the callee-saved registers area and frame record (both FP
2217 and LR may be omitted). */
2218static void
2219aarch64_layout_frame (void)
2220{
2221 HOST_WIDE_INT offset = 0;
2222 int regno;
2223
2224 if (reload_completed && cfun->machine->frame.laid_out)
2225 return;
2226
97826595
MS
2227#define SLOT_NOT_REQUIRED (-2)
2228#define SLOT_REQUIRED (-1)
2229
363ffa50
JW
2230 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2231 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2232
43e9d192
IB
2233 /* First mark all the registers that really need to be saved... */
2234 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2235 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2236
2237 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2238 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2239
2240 /* ... that includes the eh data registers (if needed)... */
2241 if (crtl->calls_eh_return)
2242 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2243 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2244 = SLOT_REQUIRED;
43e9d192
IB
2245
2246 /* ... and any callee saved register that dataflow says is live. */
2247 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2248 if (df_regs_ever_live_p (regno)
1c923b60
JW
2249 && (regno == R30_REGNUM
2250 || !call_used_regs[regno]))
97826595 2251 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2252
2253 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2254 if (df_regs_ever_live_p (regno)
2255 && !call_used_regs[regno])
97826595 2256 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2257
2258 if (frame_pointer_needed)
2259 {
2e1cdae5 2260 /* FP and LR are placed in the linkage record. */
43e9d192 2261 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2262 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2263 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2264 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 2265 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 2266 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2267 }
2268
2269 /* Now assign stack slots for them. */
2e1cdae5 2270 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2271 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2272 {
2273 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2274 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2275 cfun->machine->frame.wb_candidate1 = regno;
2276 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2277 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2278 offset += UNITS_PER_WORD;
2279 }
2280
2281 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2282 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2283 {
2284 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2285 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2286 cfun->machine->frame.wb_candidate1 = regno;
2287 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2288 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2289 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2290 offset += UNITS_PER_WORD;
2291 }
2292
43e9d192
IB
2293 cfun->machine->frame.padding0 =
2294 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2295 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2296
2297 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
2298
2299 cfun->machine->frame.hard_fp_offset
2300 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2301 + get_frame_size ()
2302 + cfun->machine->frame.saved_regs_size,
2303 STACK_BOUNDARY / BITS_PER_UNIT);
2304
2305 cfun->machine->frame.frame_size
2306 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2307 + crtl->outgoing_args_size,
2308 STACK_BOUNDARY / BITS_PER_UNIT);
2309
43e9d192
IB
2310 cfun->machine->frame.laid_out = true;
2311}
2312
43e9d192
IB
2313static bool
2314aarch64_register_saved_on_entry (int regno)
2315{
97826595 2316 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2317}
2318
64dedd72
JW
2319static unsigned
2320aarch64_next_callee_save (unsigned regno, unsigned limit)
2321{
2322 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2323 regno ++;
2324 return regno;
2325}
43e9d192 2326
c5e1f66e 2327static void
ef4bddc2 2328aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2329 HOST_WIDE_INT adjustment)
2330 {
2331 rtx base_rtx = stack_pointer_rtx;
2332 rtx insn, reg, mem;
2333
2334 reg = gen_rtx_REG (mode, regno);
2335 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2336 plus_constant (Pmode, base_rtx, -adjustment));
2337 mem = gen_rtx_MEM (mode, mem);
2338
2339 insn = emit_move_insn (mem, reg);
2340 RTX_FRAME_RELATED_P (insn) = 1;
2341}
2342
80c11907 2343static rtx
ef4bddc2 2344aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2345 HOST_WIDE_INT adjustment)
2346{
2347 switch (mode)
2348 {
2349 case DImode:
2350 return gen_storewb_pairdi_di (base, base, reg, reg2,
2351 GEN_INT (-adjustment),
2352 GEN_INT (UNITS_PER_WORD - adjustment));
2353 case DFmode:
2354 return gen_storewb_pairdf_di (base, base, reg, reg2,
2355 GEN_INT (-adjustment),
2356 GEN_INT (UNITS_PER_WORD - adjustment));
2357 default:
2358 gcc_unreachable ();
2359 }
2360}
2361
2362static void
ef4bddc2 2363aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2364 unsigned regno2, HOST_WIDE_INT adjustment)
2365{
5d8a22a5 2366 rtx_insn *insn;
80c11907
JW
2367 rtx reg1 = gen_rtx_REG (mode, regno1);
2368 rtx reg2 = gen_rtx_REG (mode, regno2);
2369
2370 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2371 reg2, adjustment));
2372 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2373 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2374 RTX_FRAME_RELATED_P (insn) = 1;
2375}
2376
159313d9 2377static rtx
ef4bddc2 2378aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2379 HOST_WIDE_INT adjustment)
2380{
2381 switch (mode)
2382 {
2383 case DImode:
2384 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2385 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2386 case DFmode:
2387 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2388 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2389 default:
2390 gcc_unreachable ();
2391 }
2392}
2393
72df5c1f 2394static rtx
ef4bddc2 2395aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2396 rtx reg2)
2397{
2398 switch (mode)
2399 {
2400 case DImode:
2401 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2402
2403 case DFmode:
2404 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2405
2406 default:
2407 gcc_unreachable ();
2408 }
2409}
2410
2411static rtx
ef4bddc2 2412aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2413 rtx mem2)
2414{
2415 switch (mode)
2416 {
2417 case DImode:
2418 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2419
2420 case DFmode:
2421 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2422
2423 default:
2424 gcc_unreachable ();
2425 }
2426}
2427
43e9d192 2428
43e9d192 2429static void
ef4bddc2 2430aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2431 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2432{
5d8a22a5 2433 rtx_insn *insn;
ef4bddc2 2434 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2435 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2436 unsigned regno;
2437 unsigned regno2;
2438
0ec74a1e 2439 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2440 regno <= limit;
2441 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2442 {
ae13fce3
JW
2443 rtx reg, mem;
2444 HOST_WIDE_INT offset;
64dedd72 2445
ae13fce3
JW
2446 if (skip_wb
2447 && (regno == cfun->machine->frame.wb_candidate1
2448 || regno == cfun->machine->frame.wb_candidate2))
2449 continue;
2450
2451 reg = gen_rtx_REG (mode, regno);
2452 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2453 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2454 offset));
64dedd72
JW
2455
2456 regno2 = aarch64_next_callee_save (regno + 1, limit);
2457
2458 if (regno2 <= limit
2459 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2460 == cfun->machine->frame.reg_offset[regno2]))
2461
43e9d192 2462 {
0ec74a1e 2463 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2464 rtx mem2;
2465
2466 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2467 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2468 offset));
2469 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2470 reg2));
0b4a9743 2471
64dedd72
JW
2472 /* The first part of a frame-related parallel insn is
2473 always assumed to be relevant to the frame
2474 calculations; subsequent parts, are only
2475 frame-related if explicitly marked. */
2476 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2477 regno = regno2;
2478 }
2479 else
8ed2fc62
JW
2480 insn = emit_move_insn (mem, reg);
2481
2482 RTX_FRAME_RELATED_P (insn) = 1;
2483 }
2484}
2485
2486static void
ef4bddc2 2487aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2488 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2489 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2490{
8ed2fc62 2491 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2492 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2493 ? gen_frame_mem : gen_rtx_MEM);
2494 unsigned regno;
2495 unsigned regno2;
2496 HOST_WIDE_INT offset;
2497
2498 for (regno = aarch64_next_callee_save (start, limit);
2499 regno <= limit;
2500 regno = aarch64_next_callee_save (regno + 1, limit))
2501 {
ae13fce3 2502 rtx reg, mem;
8ed2fc62 2503
ae13fce3
JW
2504 if (skip_wb
2505 && (regno == cfun->machine->frame.wb_candidate1
2506 || regno == cfun->machine->frame.wb_candidate2))
2507 continue;
2508
2509 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2510 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2511 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2512
2513 regno2 = aarch64_next_callee_save (regno + 1, limit);
2514
2515 if (regno2 <= limit
2516 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2517 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2518 {
8ed2fc62
JW
2519 rtx reg2 = gen_rtx_REG (mode, regno2);
2520 rtx mem2;
2521
2522 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2523 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2524 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2525
dd991abb 2526 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2527 regno = regno2;
43e9d192 2528 }
8ed2fc62 2529 else
dd991abb
RH
2530 emit_move_insn (reg, mem);
2531 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2532 }
43e9d192
IB
2533}
2534
2535/* AArch64 stack frames generated by this compiler look like:
2536
2537 +-------------------------------+
2538 | |
2539 | incoming stack arguments |
2540 | |
34834420
MS
2541 +-------------------------------+
2542 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2543 | callee-allocated save area |
2544 | for register varargs |
2545 | |
34834420
MS
2546 +-------------------------------+
2547 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2548 | |
2549 +-------------------------------+
454fdba9
RL
2550 | padding0 | \
2551 +-------------------------------+ |
454fdba9 2552 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2553 +-------------------------------+ |
2554 | LR' | |
2555 +-------------------------------+ |
34834420
MS
2556 | FP' | / <- hard_frame_pointer_rtx (aligned)
2557 +-------------------------------+
43e9d192
IB
2558 | dynamic allocation |
2559 +-------------------------------+
34834420
MS
2560 | padding |
2561 +-------------------------------+
2562 | outgoing stack arguments | <-- arg_pointer
2563 | |
2564 +-------------------------------+
2565 | | <-- stack_pointer_rtx (aligned)
43e9d192 2566
34834420
MS
2567 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2568 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2569 unchanged. */
43e9d192
IB
2570
2571/* Generate the prologue instructions for entry into a function.
2572 Establish the stack frame by decreasing the stack pointer with a
2573 properly calculated size and, if necessary, create a frame record
2574 filled with the values of LR and previous frame pointer. The
6991c977 2575 current FP is also set up if it is in use. */
43e9d192
IB
2576
2577void
2578aarch64_expand_prologue (void)
2579{
2580 /* sub sp, sp, #<frame_size>
2581 stp {fp, lr}, [sp, #<frame_size> - 16]
2582 add fp, sp, #<frame_size> - hardfp_offset
2583 stp {cs_reg}, [fp, #-16] etc.
2584
2585 sub sp, sp, <final_adjustment_if_any>
2586 */
43e9d192 2587 HOST_WIDE_INT frame_size, offset;
1c960e02 2588 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2589 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2590 rtx_insn *insn;
43e9d192
IB
2591
2592 aarch64_layout_frame ();
43e9d192 2593
dd991abb
RH
2594 offset = frame_size = cfun->machine->frame.frame_size;
2595 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2596 fp_offset = frame_size - hard_fp_offset;
43e9d192 2597
dd991abb
RH
2598 if (flag_stack_usage_info)
2599 current_function_static_stack_size = frame_size;
43e9d192 2600
44c0e7b9 2601 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2602 if (offset >= 512)
2603 {
2604 /* When the frame has a large size, an initial decrease is done on
2605 the stack pointer to jump over the callee-allocated save area for
2606 register varargs, the local variable area and/or the callee-saved
2607 register area. This will allow the pre-index write-back
2608 store pair instructions to be used for setting up the stack frame
2609 efficiently. */
dd991abb 2610 offset = hard_fp_offset;
43e9d192
IB
2611 if (offset >= 512)
2612 offset = cfun->machine->frame.saved_regs_size;
2613
2614 frame_size -= (offset + crtl->outgoing_args_size);
2615 fp_offset = 0;
2616
2617 if (frame_size >= 0x1000000)
2618 {
2619 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2620 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2621 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2622
2623 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 2624 gen_rtx_SET (stack_pointer_rtx,
dd991abb
RH
2625 plus_constant (Pmode, stack_pointer_rtx,
2626 -frame_size)));
2627 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2628 }
2629 else if (frame_size > 0)
2630 {
dd991abb
RH
2631 int hi_ofs = frame_size & 0xfff000;
2632 int lo_ofs = frame_size & 0x000fff;
2633
2634 if (hi_ofs)
43e9d192
IB
2635 {
2636 insn = emit_insn (gen_add2_insn
dd991abb 2637 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2638 RTX_FRAME_RELATED_P (insn) = 1;
2639 }
dd991abb 2640 if (lo_ofs)
43e9d192
IB
2641 {
2642 insn = emit_insn (gen_add2_insn
dd991abb 2643 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2644 RTX_FRAME_RELATED_P (insn) = 1;
2645 }
2646 }
2647 }
2648 else
2649 frame_size = -1;
2650
2651 if (offset > 0)
2652 {
ae13fce3
JW
2653 bool skip_wb = false;
2654
43e9d192
IB
2655 if (frame_pointer_needed)
2656 {
c5e1f66e
JW
2657 skip_wb = true;
2658
43e9d192
IB
2659 if (fp_offset)
2660 {
2661 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2662 GEN_INT (-offset)));
2663 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2664
2665 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2666 R30_REGNUM, false);
43e9d192
IB
2667 }
2668 else
80c11907 2669 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2670
2671 /* Set up frame pointer to point to the location of the
2672 previous frame pointer on the stack. */
2673 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2674 stack_pointer_rtx,
2675 GEN_INT (fp_offset)));
43e9d192 2676 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2677 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2678 }
2679 else
2680 {
c5e1f66e
JW
2681 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2682 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2683
c5e1f66e
JW
2684 if (fp_offset
2685 || reg1 == FIRST_PSEUDO_REGISTER
2686 || (reg2 == FIRST_PSEUDO_REGISTER
2687 && offset >= 256))
2688 {
2689 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2690 GEN_INT (-offset)));
2691 RTX_FRAME_RELATED_P (insn) = 1;
2692 }
2693 else
2694 {
ef4bddc2 2695 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
2696
2697 skip_wb = true;
2698
2699 if (reg2 == FIRST_PSEUDO_REGISTER)
2700 aarch64_pushwb_single_reg (mode1, reg1, offset);
2701 else
2702 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2703 }
43e9d192
IB
2704 }
2705
c5e1f66e
JW
2706 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2707 skip_wb);
ae13fce3
JW
2708 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2709 skip_wb);
43e9d192
IB
2710 }
2711
2712 /* when offset >= 512,
2713 sub sp, sp, #<outgoing_args_size> */
2714 if (frame_size > -1)
2715 {
2716 if (crtl->outgoing_args_size > 0)
2717 {
2718 insn = emit_insn (gen_add2_insn
2719 (stack_pointer_rtx,
2720 GEN_INT (- crtl->outgoing_args_size)));
2721 RTX_FRAME_RELATED_P (insn) = 1;
2722 }
2723 }
2724}
2725
4f942779
RL
2726/* Return TRUE if we can use a simple_return insn.
2727
2728 This function checks whether the callee saved stack is empty, which
2729 means no restore actions are need. The pro_and_epilogue will use
2730 this to check whether shrink-wrapping opt is feasible. */
2731
2732bool
2733aarch64_use_return_insn_p (void)
2734{
2735 if (!reload_completed)
2736 return false;
2737
2738 if (crtl->profile)
2739 return false;
2740
2741 aarch64_layout_frame ();
2742
2743 return cfun->machine->frame.frame_size == 0;
2744}
2745
43e9d192
IB
2746/* Generate the epilogue instructions for returning from a function. */
2747void
2748aarch64_expand_epilogue (bool for_sibcall)
2749{
1c960e02 2750 HOST_WIDE_INT frame_size, offset;
43e9d192 2751 HOST_WIDE_INT fp_offset;
dd991abb 2752 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2753 rtx_insn *insn;
7e8c2bd5
JW
2754 /* We need to add memory barrier to prevent read from deallocated stack. */
2755 bool need_barrier_p = (get_frame_size () != 0
2756 || cfun->machine->frame.saved_varargs_size);
43e9d192
IB
2757
2758 aarch64_layout_frame ();
43e9d192 2759
1c960e02 2760 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2761 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2762 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2763
2764 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2765 if (offset >= 512)
2766 {
dd991abb 2767 offset = hard_fp_offset;
43e9d192
IB
2768 if (offset >= 512)
2769 offset = cfun->machine->frame.saved_regs_size;
2770
2771 frame_size -= (offset + crtl->outgoing_args_size);
2772 fp_offset = 0;
2773 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2774 {
2775 insn = emit_insn (gen_add2_insn
2776 (stack_pointer_rtx,
2777 GEN_INT (crtl->outgoing_args_size)));
2778 RTX_FRAME_RELATED_P (insn) = 1;
2779 }
2780 }
2781 else
2782 frame_size = -1;
2783
2784 /* If there were outgoing arguments or we've done dynamic stack
2785 allocation, then restore the stack pointer from the frame
2786 pointer. This is at most one insn and more efficient than using
2787 GCC's internal mechanism. */
2788 if (frame_pointer_needed
2789 && (crtl->outgoing_args_size || cfun->calls_alloca))
2790 {
7e8c2bd5
JW
2791 if (cfun->calls_alloca)
2792 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2793
43e9d192
IB
2794 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2795 hard_frame_pointer_rtx,
8f454e9f
JW
2796 GEN_INT (0)));
2797 offset = offset - fp_offset;
43e9d192
IB
2798 }
2799
43e9d192
IB
2800 if (offset > 0)
2801 {
4b92caa1
JW
2802 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2803 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2804 bool skip_wb = true;
dd991abb 2805 rtx cfi_ops = NULL;
4b92caa1 2806
43e9d192 2807 if (frame_pointer_needed)
4b92caa1
JW
2808 fp_offset = 0;
2809 else if (fp_offset
2810 || reg1 == FIRST_PSEUDO_REGISTER
2811 || (reg2 == FIRST_PSEUDO_REGISTER
2812 && offset >= 256))
2813 skip_wb = false;
2814
2815 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2816 skip_wb, &cfi_ops);
4b92caa1 2817 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2818 skip_wb, &cfi_ops);
4b92caa1 2819
7e8c2bd5
JW
2820 if (need_barrier_p)
2821 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2822
4b92caa1 2823 if (skip_wb)
43e9d192 2824 {
ef4bddc2 2825 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2826 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2827
dd991abb 2828 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2829 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2830 {
2831 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2832 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2833 mem = gen_rtx_MEM (mode1, mem);
2834 insn = emit_move_insn (rreg1, mem);
2835 }
4b92caa1
JW
2836 else
2837 {
dd991abb 2838 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2839
dd991abb
RH
2840 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2841 insn = emit_insn (aarch64_gen_loadwb_pair
2842 (mode1, stack_pointer_rtx, rreg1,
2843 rreg2, offset));
4b92caa1 2844 }
43e9d192 2845 }
43e9d192
IB
2846 else
2847 {
2848 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2849 GEN_INT (offset)));
43e9d192 2850 }
43e9d192 2851
dd991abb
RH
2852 /* Reset the CFA to be SP + FRAME_SIZE. */
2853 rtx new_cfa = stack_pointer_rtx;
2854 if (frame_size > 0)
2855 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2856 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2857 REG_NOTES (insn) = cfi_ops;
43e9d192 2858 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2859 }
2860
dd991abb 2861 if (frame_size > 0)
43e9d192 2862 {
7e8c2bd5
JW
2863 if (need_barrier_p)
2864 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2865
43e9d192
IB
2866 if (frame_size >= 0x1000000)
2867 {
2868 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2869 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2870 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2871 }
dd991abb 2872 else
43e9d192 2873 {
dd991abb
RH
2874 int hi_ofs = frame_size & 0xfff000;
2875 int lo_ofs = frame_size & 0x000fff;
2876
2877 if (hi_ofs && lo_ofs)
43e9d192
IB
2878 {
2879 insn = emit_insn (gen_add2_insn
dd991abb 2880 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2881 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2882 frame_size = lo_ofs;
43e9d192 2883 }
dd991abb
RH
2884 insn = emit_insn (gen_add2_insn
2885 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2886 }
2887
dd991abb
RH
2888 /* Reset the CFA to be SP + 0. */
2889 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2890 RTX_FRAME_RELATED_P (insn) = 1;
2891 }
2892
2893 /* Stack adjustment for exception handler. */
2894 if (crtl->calls_eh_return)
2895 {
2896 /* We need to unwind the stack by the offset computed by
2897 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2898 to be SP; letting the CFA move during this adjustment
2899 is just as correct as retaining the CFA from the body
2900 of the function. Therefore, do nothing special. */
2901 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2902 }
2903
2904 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2905 if (!for_sibcall)
2906 emit_jump_insn (ret_rtx);
2907}
2908
2909/* Return the place to copy the exception unwinding return address to.
2910 This will probably be a stack slot, but could (in theory be the
2911 return register). */
2912rtx
2913aarch64_final_eh_return_addr (void)
2914{
1c960e02
MS
2915 HOST_WIDE_INT fp_offset;
2916
43e9d192 2917 aarch64_layout_frame ();
1c960e02
MS
2918
2919 fp_offset = cfun->machine->frame.frame_size
2920 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2921
2922 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2923 return gen_rtx_REG (DImode, LR_REGNUM);
2924
2925 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2926 result in a store to save LR introduced by builtin_eh_return () being
2927 incorrectly deleted because the alias is not detected.
2928 So in the calculation of the address to copy the exception unwinding
2929 return address to, we note 2 cases.
2930 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2931 we return a SP-relative location since all the addresses are SP-relative
2932 in this case. This prevents the store from being optimized away.
2933 If the fp_offset is not 0, then the addresses will be FP-relative and
2934 therefore we return a FP-relative location. */
2935
2936 if (frame_pointer_needed)
2937 {
2938 if (fp_offset)
2939 return gen_frame_mem (DImode,
2940 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2941 else
2942 return gen_frame_mem (DImode,
2943 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2944 }
2945
2946 /* If FP is not needed, we calculate the location of LR, which would be
2947 at the top of the saved registers block. */
2948
2949 return gen_frame_mem (DImode,
2950 plus_constant (Pmode,
2951 stack_pointer_rtx,
2952 fp_offset
2953 + cfun->machine->frame.saved_regs_size
2954 - 2 * UNITS_PER_WORD));
2955}
2956
9dfc162c
JG
2957/* Possibly output code to build up a constant in a register. For
2958 the benefit of the costs infrastructure, returns the number of
2959 instructions which would be emitted. GENERATE inhibits or
2960 enables code generation. */
2961
2962static int
2963aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2964{
9dfc162c
JG
2965 int insns = 0;
2966
43e9d192 2967 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2968 {
2969 if (generate)
2970 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2971 insns = 1;
2972 }
43e9d192
IB
2973 else
2974 {
2975 int i;
2976 int ncount = 0;
2977 int zcount = 0;
2978 HOST_WIDE_INT valp = val >> 16;
2979 HOST_WIDE_INT valm;
2980 HOST_WIDE_INT tval;
2981
2982 for (i = 16; i < 64; i += 16)
2983 {
2984 valm = (valp & 0xffff);
2985
2986 if (valm != 0)
2987 ++ zcount;
2988
2989 if (valm != 0xffff)
2990 ++ ncount;
2991
2992 valp >>= 16;
2993 }
2994
2995 /* zcount contains the number of additional MOVK instructions
2996 required if the constant is built up with an initial MOVZ instruction,
2997 while ncount is the number of MOVK instructions required if starting
2998 with a MOVN instruction. Choose the sequence that yields the fewest
2999 number of instructions, preferring MOVZ instructions when they are both
3000 the same. */
3001 if (ncount < zcount)
3002 {
9dfc162c
JG
3003 if (generate)
3004 emit_move_insn (gen_rtx_REG (Pmode, regnum),
3005 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 3006 tval = 0xffff;
9dfc162c 3007 insns++;
43e9d192
IB
3008 }
3009 else
3010 {
9dfc162c
JG
3011 if (generate)
3012 emit_move_insn (gen_rtx_REG (Pmode, regnum),
3013 GEN_INT (val & 0xffff));
43e9d192 3014 tval = 0;
9dfc162c 3015 insns++;
43e9d192
IB
3016 }
3017
3018 val >>= 16;
3019
3020 for (i = 16; i < 64; i += 16)
3021 {
3022 if ((val & 0xffff) != tval)
9dfc162c
JG
3023 {
3024 if (generate)
3025 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
3026 GEN_INT (i),
3027 GEN_INT (val & 0xffff)));
3028 insns++;
3029 }
43e9d192
IB
3030 val >>= 16;
3031 }
3032 }
9dfc162c 3033 return insns;
43e9d192
IB
3034}
3035
3036static void
d9600ae5 3037aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
3038{
3039 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
3040 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
3041 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
3042
3043 if (mdelta < 0)
3044 mdelta = -mdelta;
3045
3046 if (mdelta >= 4096 * 4096)
3047 {
9dfc162c 3048 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 3049 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
3050 }
3051 else if (mdelta > 0)
3052 {
43e9d192 3053 if (mdelta >= 4096)
d9600ae5 3054 {
f7df4a84 3055 emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
d9600ae5
SN
3056 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
3057 if (delta < 0)
f7df4a84 3058 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3059 gen_rtx_MINUS (Pmode, this_rtx, shift)));
3060 else
f7df4a84 3061 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3062 gen_rtx_PLUS (Pmode, this_rtx, shift)));
3063 }
43e9d192 3064 if (mdelta % 4096 != 0)
d9600ae5
SN
3065 {
3066 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
f7df4a84 3067 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3068 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
3069 }
43e9d192
IB
3070 }
3071}
3072
3073/* Output code to add DELTA to the first argument, and then jump
3074 to FUNCTION. Used for C++ multiple inheritance. */
3075static void
3076aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3077 HOST_WIDE_INT delta,
3078 HOST_WIDE_INT vcall_offset,
3079 tree function)
3080{
3081 /* The this pointer is always in x0. Note that this differs from
3082 Arm where the this pointer maybe bumped to r1 if r0 is required
3083 to return a pointer to an aggregate. On AArch64 a result value
3084 pointer will be in x8. */
3085 int this_regno = R0_REGNUM;
5d8a22a5
DM
3086 rtx this_rtx, temp0, temp1, addr, funexp;
3087 rtx_insn *insn;
43e9d192 3088
75f1d6fc
SN
3089 reload_completed = 1;
3090 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3091
3092 if (vcall_offset == 0)
d9600ae5 3093 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
3094 else
3095 {
28514dda 3096 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3097
75f1d6fc
SN
3098 this_rtx = gen_rtx_REG (Pmode, this_regno);
3099 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3100 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3101
75f1d6fc
SN
3102 addr = this_rtx;
3103 if (delta != 0)
3104 {
3105 if (delta >= -256 && delta < 256)
3106 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3107 plus_constant (Pmode, this_rtx, delta));
3108 else
d9600ae5 3109 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
3110 }
3111
28514dda
YZ
3112 if (Pmode == ptr_mode)
3113 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3114 else
3115 aarch64_emit_move (temp0,
3116 gen_rtx_ZERO_EXTEND (Pmode,
3117 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3118
28514dda 3119 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3120 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3121 else
3122 {
9dfc162c 3123 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 3124 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3125 }
3126
28514dda
YZ
3127 if (Pmode == ptr_mode)
3128 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3129 else
3130 aarch64_emit_move (temp1,
3131 gen_rtx_SIGN_EXTEND (Pmode,
3132 gen_rtx_MEM (ptr_mode, addr)));
3133
75f1d6fc 3134 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3135 }
3136
75f1d6fc
SN
3137 /* Generate a tail call to the target function. */
3138 if (!TREE_USED (function))
3139 {
3140 assemble_external (function);
3141 TREE_USED (function) = 1;
3142 }
3143 funexp = XEXP (DECL_RTL (function), 0);
3144 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3145 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3146 SIBLING_CALL_P (insn) = 1;
3147
3148 insn = get_insns ();
3149 shorten_branches (insn);
3150 final_start_function (insn, file, 1);
3151 final (insn, file, 1);
43e9d192 3152 final_end_function ();
75f1d6fc
SN
3153
3154 /* Stop pretending to be a post-reload pass. */
3155 reload_completed = 0;
43e9d192
IB
3156}
3157
43e9d192
IB
3158static bool
3159aarch64_tls_referenced_p (rtx x)
3160{
3161 if (!TARGET_HAVE_TLS)
3162 return false;
e7de8563
RS
3163 subrtx_iterator::array_type array;
3164 FOR_EACH_SUBRTX (iter, array, x, ALL)
3165 {
3166 const_rtx x = *iter;
3167 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3168 return true;
3169 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3170 TLS offsets, not real symbol references. */
3171 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3172 iter.skip_subrtxes ();
3173 }
3174 return false;
43e9d192
IB
3175}
3176
3177
3178static int
3179aarch64_bitmasks_cmp (const void *i1, const void *i2)
3180{
3181 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
3182 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3183
3184 if (*imm1 < *imm2)
3185 return -1;
3186 if (*imm1 > *imm2)
3187 return +1;
3188 return 0;
3189}
3190
3191
3192static void
3193aarch64_build_bitmask_table (void)
3194{
3195 unsigned HOST_WIDE_INT mask, imm;
3196 unsigned int log_e, e, s, r;
3197 unsigned int nimms = 0;
3198
3199 for (log_e = 1; log_e <= 6; log_e++)
3200 {
3201 e = 1 << log_e;
3202 if (e == 64)
3203 mask = ~(HOST_WIDE_INT) 0;
3204 else
3205 mask = ((HOST_WIDE_INT) 1 << e) - 1;
3206 for (s = 1; s < e; s++)
3207 {
3208 for (r = 0; r < e; r++)
3209 {
3210 /* set s consecutive bits to 1 (s < 64) */
3211 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3212 /* rotate right by r */
3213 if (r != 0)
3214 imm = ((imm >> r) | (imm << (e - r))) & mask;
3215 /* replicate the constant depending on SIMD size */
3216 switch (log_e) {
3217 case 1: imm |= (imm << 2);
3218 case 2: imm |= (imm << 4);
3219 case 3: imm |= (imm << 8);
3220 case 4: imm |= (imm << 16);
3221 case 5: imm |= (imm << 32);
3222 case 6:
3223 break;
3224 default:
3225 gcc_unreachable ();
3226 }
3227 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3228 aarch64_bitmasks[nimms++] = imm;
3229 }
3230 }
3231 }
3232
3233 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3234 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3235 aarch64_bitmasks_cmp);
3236}
3237
3238
3239/* Return true if val can be encoded as a 12-bit unsigned immediate with
3240 a left shift of 0 or 12 bits. */
3241bool
3242aarch64_uimm12_shift (HOST_WIDE_INT val)
3243{
3244 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3245 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3246 );
3247}
3248
3249
3250/* Return true if val is an immediate that can be loaded into a
3251 register by a MOVZ instruction. */
3252static bool
ef4bddc2 3253aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3254{
3255 if (GET_MODE_SIZE (mode) > 4)
3256 {
3257 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3258 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3259 return 1;
3260 }
3261 else
3262 {
3263 /* Ignore sign extension. */
3264 val &= (HOST_WIDE_INT) 0xffffffff;
3265 }
3266 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3267 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3268}
3269
3270
3271/* Return true if val is a valid bitmask immediate. */
3272bool
ef4bddc2 3273aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3274{
3275 if (GET_MODE_SIZE (mode) < 8)
3276 {
3277 /* Replicate bit pattern. */
3278 val &= (HOST_WIDE_INT) 0xffffffff;
3279 val |= val << 32;
3280 }
3281 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3282 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3283}
3284
3285
3286/* Return true if val is an immediate that can be loaded into a
3287 register in a single instruction. */
3288bool
ef4bddc2 3289aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3290{
3291 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3292 return 1;
3293 return aarch64_bitmask_imm (val, mode);
3294}
3295
3296static bool
ef4bddc2 3297aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3298{
3299 rtx base, offset;
7eda14e1 3300
43e9d192
IB
3301 if (GET_CODE (x) == HIGH)
3302 return true;
3303
3304 split_const (x, &base, &offset);
3305 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3306 {
f8b756b7 3307 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
28514dda
YZ
3308 != SYMBOL_FORCE_TO_MEM)
3309 return true;
3310 else
3311 /* Avoid generating a 64-bit relocation in ILP32; leave
3312 to aarch64_expand_mov_immediate to handle it properly. */
3313 return mode != ptr_mode;
3314 }
43e9d192
IB
3315
3316 return aarch64_tls_referenced_p (x);
3317}
3318
3319/* Return true if register REGNO is a valid index register.
3320 STRICT_P is true if REG_OK_STRICT is in effect. */
3321
3322bool
3323aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3324{
3325 if (!HARD_REGISTER_NUM_P (regno))
3326 {
3327 if (!strict_p)
3328 return true;
3329
3330 if (!reg_renumber)
3331 return false;
3332
3333 regno = reg_renumber[regno];
3334 }
3335 return GP_REGNUM_P (regno);
3336}
3337
3338/* Return true if register REGNO is a valid base register for mode MODE.
3339 STRICT_P is true if REG_OK_STRICT is in effect. */
3340
3341bool
3342aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3343{
3344 if (!HARD_REGISTER_NUM_P (regno))
3345 {
3346 if (!strict_p)
3347 return true;
3348
3349 if (!reg_renumber)
3350 return false;
3351
3352 regno = reg_renumber[regno];
3353 }
3354
3355 /* The fake registers will be eliminated to either the stack or
3356 hard frame pointer, both of which are usually valid base registers.
3357 Reload deals with the cases where the eliminated form isn't valid. */
3358 return (GP_REGNUM_P (regno)
3359 || regno == SP_REGNUM
3360 || regno == FRAME_POINTER_REGNUM
3361 || regno == ARG_POINTER_REGNUM);
3362}
3363
3364/* Return true if X is a valid base register for mode MODE.
3365 STRICT_P is true if REG_OK_STRICT is in effect. */
3366
3367static bool
3368aarch64_base_register_rtx_p (rtx x, bool strict_p)
3369{
3370 if (!strict_p && GET_CODE (x) == SUBREG)
3371 x = SUBREG_REG (x);
3372
3373 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3374}
3375
3376/* Return true if address offset is a valid index. If it is, fill in INFO
3377 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3378
3379static bool
3380aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3381 machine_mode mode, bool strict_p)
43e9d192
IB
3382{
3383 enum aarch64_address_type type;
3384 rtx index;
3385 int shift;
3386
3387 /* (reg:P) */
3388 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3389 && GET_MODE (x) == Pmode)
3390 {
3391 type = ADDRESS_REG_REG;
3392 index = x;
3393 shift = 0;
3394 }
3395 /* (sign_extend:DI (reg:SI)) */
3396 else if ((GET_CODE (x) == SIGN_EXTEND
3397 || GET_CODE (x) == ZERO_EXTEND)
3398 && GET_MODE (x) == DImode
3399 && GET_MODE (XEXP (x, 0)) == SImode)
3400 {
3401 type = (GET_CODE (x) == SIGN_EXTEND)
3402 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3403 index = XEXP (x, 0);
3404 shift = 0;
3405 }
3406 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3407 else if (GET_CODE (x) == MULT
3408 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3409 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3410 && GET_MODE (XEXP (x, 0)) == DImode
3411 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3412 && CONST_INT_P (XEXP (x, 1)))
3413 {
3414 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3415 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3416 index = XEXP (XEXP (x, 0), 0);
3417 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3418 }
3419 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3420 else if (GET_CODE (x) == ASHIFT
3421 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3422 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3423 && GET_MODE (XEXP (x, 0)) == DImode
3424 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3425 && CONST_INT_P (XEXP (x, 1)))
3426 {
3427 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3428 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3429 index = XEXP (XEXP (x, 0), 0);
3430 shift = INTVAL (XEXP (x, 1));
3431 }
3432 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3433 else if ((GET_CODE (x) == SIGN_EXTRACT
3434 || GET_CODE (x) == ZERO_EXTRACT)
3435 && GET_MODE (x) == DImode
3436 && GET_CODE (XEXP (x, 0)) == MULT
3437 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3438 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3439 {
3440 type = (GET_CODE (x) == SIGN_EXTRACT)
3441 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3442 index = XEXP (XEXP (x, 0), 0);
3443 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3444 if (INTVAL (XEXP (x, 1)) != 32 + shift
3445 || INTVAL (XEXP (x, 2)) != 0)
3446 shift = -1;
3447 }
3448 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3449 (const_int 0xffffffff<<shift)) */
3450 else if (GET_CODE (x) == AND
3451 && GET_MODE (x) == DImode
3452 && GET_CODE (XEXP (x, 0)) == MULT
3453 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3454 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3455 && CONST_INT_P (XEXP (x, 1)))
3456 {
3457 type = ADDRESS_REG_UXTW;
3458 index = XEXP (XEXP (x, 0), 0);
3459 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3460 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3461 shift = -1;
3462 }
3463 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3464 else if ((GET_CODE (x) == SIGN_EXTRACT
3465 || GET_CODE (x) == ZERO_EXTRACT)
3466 && GET_MODE (x) == DImode
3467 && GET_CODE (XEXP (x, 0)) == ASHIFT
3468 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3469 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3470 {
3471 type = (GET_CODE (x) == SIGN_EXTRACT)
3472 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3473 index = XEXP (XEXP (x, 0), 0);
3474 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3475 if (INTVAL (XEXP (x, 1)) != 32 + shift
3476 || INTVAL (XEXP (x, 2)) != 0)
3477 shift = -1;
3478 }
3479 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3480 (const_int 0xffffffff<<shift)) */
3481 else if (GET_CODE (x) == AND
3482 && GET_MODE (x) == DImode
3483 && GET_CODE (XEXP (x, 0)) == ASHIFT
3484 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3485 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3486 && CONST_INT_P (XEXP (x, 1)))
3487 {
3488 type = ADDRESS_REG_UXTW;
3489 index = XEXP (XEXP (x, 0), 0);
3490 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3491 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3492 shift = -1;
3493 }
3494 /* (mult:P (reg:P) (const_int scale)) */
3495 else if (GET_CODE (x) == MULT
3496 && GET_MODE (x) == Pmode
3497 && GET_MODE (XEXP (x, 0)) == Pmode
3498 && CONST_INT_P (XEXP (x, 1)))
3499 {
3500 type = ADDRESS_REG_REG;
3501 index = XEXP (x, 0);
3502 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3503 }
3504 /* (ashift:P (reg:P) (const_int shift)) */
3505 else if (GET_CODE (x) == ASHIFT
3506 && GET_MODE (x) == Pmode
3507 && GET_MODE (XEXP (x, 0)) == Pmode
3508 && CONST_INT_P (XEXP (x, 1)))
3509 {
3510 type = ADDRESS_REG_REG;
3511 index = XEXP (x, 0);
3512 shift = INTVAL (XEXP (x, 1));
3513 }
3514 else
3515 return false;
3516
3517 if (GET_CODE (index) == SUBREG)
3518 index = SUBREG_REG (index);
3519
3520 if ((shift == 0 ||
3521 (shift > 0 && shift <= 3
3522 && (1 << shift) == GET_MODE_SIZE (mode)))
3523 && REG_P (index)
3524 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3525 {
3526 info->type = type;
3527 info->offset = index;
3528 info->shift = shift;
3529 return true;
3530 }
3531
3532 return false;
3533}
3534
44707478 3535bool
ef4bddc2 3536aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3537{
3538 return (offset >= -64 * GET_MODE_SIZE (mode)
3539 && offset < 64 * GET_MODE_SIZE (mode)
3540 && offset % GET_MODE_SIZE (mode) == 0);
3541}
3542
3543static inline bool
ef4bddc2 3544offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3545 HOST_WIDE_INT offset)
3546{
3547 return offset >= -256 && offset < 256;
3548}
3549
3550static inline bool
ef4bddc2 3551offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3552{
3553 return (offset >= 0
3554 && offset < 4096 * GET_MODE_SIZE (mode)
3555 && offset % GET_MODE_SIZE (mode) == 0);
3556}
3557
3558/* Return true if X is a valid address for machine mode MODE. If it is,
3559 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3560 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3561
3562static bool
3563aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3564 rtx x, machine_mode mode,
43e9d192
IB
3565 RTX_CODE outer_code, bool strict_p)
3566{
3567 enum rtx_code code = GET_CODE (x);
3568 rtx op0, op1;
2d8c6dc1
AH
3569
3570 /* On BE, we use load/store pair for all large int mode load/stores. */
3571 bool load_store_pair_p = (outer_code == PARALLEL
3572 || (BYTES_BIG_ENDIAN
3573 && aarch64_vect_struct_mode_p (mode)));
3574
43e9d192 3575 bool allow_reg_index_p =
2d8c6dc1
AH
3576 !load_store_pair_p
3577 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3578 && !aarch64_vect_struct_mode_p (mode);
3579
3580 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3581 REG addressing. */
3582 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
3583 && (code != POST_INC && code != REG))
3584 return false;
3585
3586 switch (code)
3587 {
3588 case REG:
3589 case SUBREG:
3590 info->type = ADDRESS_REG_IMM;
3591 info->base = x;
3592 info->offset = const0_rtx;
3593 return aarch64_base_register_rtx_p (x, strict_p);
3594
3595 case PLUS:
3596 op0 = XEXP (x, 0);
3597 op1 = XEXP (x, 1);
15c0c5c9
JW
3598
3599 if (! strict_p
4aa81c2e 3600 && REG_P (op0)
15c0c5c9
JW
3601 && (op0 == virtual_stack_vars_rtx
3602 || op0 == frame_pointer_rtx
3603 || op0 == arg_pointer_rtx)
4aa81c2e 3604 && CONST_INT_P (op1))
15c0c5c9
JW
3605 {
3606 info->type = ADDRESS_REG_IMM;
3607 info->base = op0;
3608 info->offset = op1;
3609
3610 return true;
3611 }
3612
43e9d192
IB
3613 if (GET_MODE_SIZE (mode) != 0
3614 && CONST_INT_P (op1)
3615 && aarch64_base_register_rtx_p (op0, strict_p))
3616 {
3617 HOST_WIDE_INT offset = INTVAL (op1);
3618
3619 info->type = ADDRESS_REG_IMM;
3620 info->base = op0;
3621 info->offset = op1;
3622
3623 /* TImode and TFmode values are allowed in both pairs of X
3624 registers and individual Q registers. The available
3625 address modes are:
3626 X,X: 7-bit signed scaled offset
3627 Q: 9-bit signed offset
3628 We conservatively require an offset representable in either mode.
3629 */
3630 if (mode == TImode || mode == TFmode)
44707478 3631 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3632 && offset_9bit_signed_unscaled_p (mode, offset));
3633
2d8c6dc1
AH
3634 /* A 7bit offset check because OImode will emit a ldp/stp
3635 instruction (only big endian will get here).
3636 For ldp/stp instructions, the offset is scaled for the size of a
3637 single element of the pair. */
3638 if (mode == OImode)
3639 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3640
3641 /* Three 9/12 bit offsets checks because CImode will emit three
3642 ldr/str instructions (only big endian will get here). */
3643 if (mode == CImode)
3644 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3645 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3646 || offset_12bit_unsigned_scaled_p (V16QImode,
3647 offset + 32)));
3648
3649 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3650 instructions (only big endian will get here). */
3651 if (mode == XImode)
3652 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3653 && aarch64_offset_7bit_signed_scaled_p (TImode,
3654 offset + 32));
3655
3656 if (load_store_pair_p)
43e9d192 3657 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3658 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3659 else
3660 return (offset_9bit_signed_unscaled_p (mode, offset)
3661 || offset_12bit_unsigned_scaled_p (mode, offset));
3662 }
3663
3664 if (allow_reg_index_p)
3665 {
3666 /* Look for base + (scaled/extended) index register. */
3667 if (aarch64_base_register_rtx_p (op0, strict_p)
3668 && aarch64_classify_index (info, op1, mode, strict_p))
3669 {
3670 info->base = op0;
3671 return true;
3672 }
3673 if (aarch64_base_register_rtx_p (op1, strict_p)
3674 && aarch64_classify_index (info, op0, mode, strict_p))
3675 {
3676 info->base = op1;
3677 return true;
3678 }
3679 }
3680
3681 return false;
3682
3683 case POST_INC:
3684 case POST_DEC:
3685 case PRE_INC:
3686 case PRE_DEC:
3687 info->type = ADDRESS_REG_WB;
3688 info->base = XEXP (x, 0);
3689 info->offset = NULL_RTX;
3690 return aarch64_base_register_rtx_p (info->base, strict_p);
3691
3692 case POST_MODIFY:
3693 case PRE_MODIFY:
3694 info->type = ADDRESS_REG_WB;
3695 info->base = XEXP (x, 0);
3696 if (GET_CODE (XEXP (x, 1)) == PLUS
3697 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3698 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3699 && aarch64_base_register_rtx_p (info->base, strict_p))
3700 {
3701 HOST_WIDE_INT offset;
3702 info->offset = XEXP (XEXP (x, 1), 1);
3703 offset = INTVAL (info->offset);
3704
3705 /* TImode and TFmode values are allowed in both pairs of X
3706 registers and individual Q registers. The available
3707 address modes are:
3708 X,X: 7-bit signed scaled offset
3709 Q: 9-bit signed offset
3710 We conservatively require an offset representable in either mode.
3711 */
3712 if (mode == TImode || mode == TFmode)
44707478 3713 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3714 && offset_9bit_signed_unscaled_p (mode, offset));
3715
2d8c6dc1 3716 if (load_store_pair_p)
43e9d192 3717 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3718 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3719 else
3720 return offset_9bit_signed_unscaled_p (mode, offset);
3721 }
3722 return false;
3723
3724 case CONST:
3725 case SYMBOL_REF:
3726 case LABEL_REF:
79517551
SN
3727 /* load literal: pc-relative constant pool entry. Only supported
3728 for SI mode or larger. */
43e9d192 3729 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
3730
3731 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3732 {
3733 rtx sym, addend;
3734
3735 split_const (x, &sym, &addend);
3736 return (GET_CODE (sym) == LABEL_REF
3737 || (GET_CODE (sym) == SYMBOL_REF
3738 && CONSTANT_POOL_ADDRESS_P (sym)));
3739 }
3740 return false;
3741
3742 case LO_SUM:
3743 info->type = ADDRESS_LO_SUM;
3744 info->base = XEXP (x, 0);
3745 info->offset = XEXP (x, 1);
3746 if (allow_reg_index_p
3747 && aarch64_base_register_rtx_p (info->base, strict_p))
3748 {
3749 rtx sym, offs;
3750 split_const (info->offset, &sym, &offs);
3751 if (GET_CODE (sym) == SYMBOL_REF
f8b756b7 3752 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
43e9d192
IB
3753 == SYMBOL_SMALL_ABSOLUTE))
3754 {
3755 /* The symbol and offset must be aligned to the access size. */
3756 unsigned int align;
3757 unsigned int ref_size;
3758
3759 if (CONSTANT_POOL_ADDRESS_P (sym))
3760 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3761 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3762 {
3763 tree exp = SYMBOL_REF_DECL (sym);
3764 align = TYPE_ALIGN (TREE_TYPE (exp));
3765 align = CONSTANT_ALIGNMENT (exp, align);
3766 }
3767 else if (SYMBOL_REF_DECL (sym))
3768 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3769 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3770 && SYMBOL_REF_BLOCK (sym) != NULL)
3771 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3772 else
3773 align = BITS_PER_UNIT;
3774
3775 ref_size = GET_MODE_SIZE (mode);
3776 if (ref_size == 0)
3777 ref_size = GET_MODE_SIZE (DImode);
3778
3779 return ((INTVAL (offs) & (ref_size - 1)) == 0
3780 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3781 }
3782 }
3783 return false;
3784
3785 default:
3786 return false;
3787 }
3788}
3789
3790bool
3791aarch64_symbolic_address_p (rtx x)
3792{
3793 rtx offset;
3794
3795 split_const (x, &x, &offset);
3796 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3797}
3798
3799/* Classify the base of symbolic expression X, given that X appears in
3800 context CONTEXT. */
da4f13a4
MS
3801
3802enum aarch64_symbol_type
3803aarch64_classify_symbolic_expression (rtx x,
3804 enum aarch64_symbol_context context)
43e9d192
IB
3805{
3806 rtx offset;
da4f13a4 3807
43e9d192 3808 split_const (x, &x, &offset);
f8b756b7 3809 return aarch64_classify_symbol (x, offset, context);
43e9d192
IB
3810}
3811
3812
3813/* Return TRUE if X is a legitimate address for accessing memory in
3814 mode MODE. */
3815static bool
ef4bddc2 3816aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
3817{
3818 struct aarch64_address_info addr;
3819
3820 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3821}
3822
3823/* Return TRUE if X is a legitimate address for accessing memory in
3824 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3825 pair operation. */
3826bool
ef4bddc2 3827aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 3828 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3829{
3830 struct aarch64_address_info addr;
3831
3832 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3833}
3834
3835/* Return TRUE if rtx X is immediate constant 0.0 */
3836bool
3520f7cc 3837aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3838{
3839 REAL_VALUE_TYPE r;
3840
3841 if (GET_MODE (x) == VOIDmode)
3842 return false;
3843
3844 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3845 if (REAL_VALUE_MINUS_ZERO (r))
3846 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3847 return REAL_VALUES_EQUAL (r, dconst0);
3848}
3849
70f09188
AP
3850/* Return the fixed registers used for condition codes. */
3851
3852static bool
3853aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3854{
3855 *p1 = CC_REGNUM;
3856 *p2 = INVALID_REGNUM;
3857 return true;
3858}
3859
78607708
TV
3860/* Emit call insn with PAT and do aarch64-specific handling. */
3861
d07a3fed 3862void
78607708
TV
3863aarch64_emit_call_insn (rtx pat)
3864{
3865 rtx insn = emit_call_insn (pat);
3866
3867 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3868 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3869 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3870}
3871
ef4bddc2 3872machine_mode
43e9d192
IB
3873aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3874{
3875 /* All floating point compares return CCFP if it is an equality
3876 comparison, and CCFPE otherwise. */
3877 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3878 {
3879 switch (code)
3880 {
3881 case EQ:
3882 case NE:
3883 case UNORDERED:
3884 case ORDERED:
3885 case UNLT:
3886 case UNLE:
3887 case UNGT:
3888 case UNGE:
3889 case UNEQ:
3890 case LTGT:
3891 return CCFPmode;
3892
3893 case LT:
3894 case LE:
3895 case GT:
3896 case GE:
3897 return CCFPEmode;
3898
3899 default:
3900 gcc_unreachable ();
3901 }
3902 }
3903
3904 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3905 && y == const0_rtx
3906 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3907 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3908 || GET_CODE (x) == NEG))
43e9d192
IB
3909 return CC_NZmode;
3910
1c992d1e 3911 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3912 the comparison will have to be swapped when we emit the assembly
3913 code. */
3914 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3915 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3916 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3917 || GET_CODE (x) == LSHIFTRT
1c992d1e 3918 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3919 return CC_SWPmode;
3920
1c992d1e
RE
3921 /* Similarly for a negated operand, but we can only do this for
3922 equalities. */
3923 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3924 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3925 && (code == EQ || code == NE)
3926 && GET_CODE (x) == NEG)
3927 return CC_Zmode;
3928
43e9d192
IB
3929 /* A compare of a mode narrower than SI mode against zero can be done
3930 by extending the value in the comparison. */
3931 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3932 && y == const0_rtx)
3933 /* Only use sign-extension if we really need it. */
3934 return ((code == GT || code == GE || code == LE || code == LT)
3935 ? CC_SESWPmode : CC_ZESWPmode);
3936
3937 /* For everything else, return CCmode. */
3938 return CCmode;
3939}
3940
3dfa7055
ZC
3941static int
3942aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3943
cd5660ab 3944int
43e9d192
IB
3945aarch64_get_condition_code (rtx x)
3946{
ef4bddc2 3947 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
3948 enum rtx_code comp_code = GET_CODE (x);
3949
3950 if (GET_MODE_CLASS (mode) != MODE_CC)
3951 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
3952 return aarch64_get_condition_code_1 (mode, comp_code);
3953}
43e9d192 3954
3dfa7055
ZC
3955static int
3956aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3957{
3958 int ne = -1, eq = -1;
43e9d192
IB
3959 switch (mode)
3960 {
3961 case CCFPmode:
3962 case CCFPEmode:
3963 switch (comp_code)
3964 {
3965 case GE: return AARCH64_GE;
3966 case GT: return AARCH64_GT;
3967 case LE: return AARCH64_LS;
3968 case LT: return AARCH64_MI;
3969 case NE: return AARCH64_NE;
3970 case EQ: return AARCH64_EQ;
3971 case ORDERED: return AARCH64_VC;
3972 case UNORDERED: return AARCH64_VS;
3973 case UNLT: return AARCH64_LT;
3974 case UNLE: return AARCH64_LE;
3975 case UNGT: return AARCH64_HI;
3976 case UNGE: return AARCH64_PL;
cd5660ab 3977 default: return -1;
43e9d192
IB
3978 }
3979 break;
3980
3dfa7055
ZC
3981 case CC_DNEmode:
3982 ne = AARCH64_NE;
3983 eq = AARCH64_EQ;
3984 break;
3985
3986 case CC_DEQmode:
3987 ne = AARCH64_EQ;
3988 eq = AARCH64_NE;
3989 break;
3990
3991 case CC_DGEmode:
3992 ne = AARCH64_GE;
3993 eq = AARCH64_LT;
3994 break;
3995
3996 case CC_DLTmode:
3997 ne = AARCH64_LT;
3998 eq = AARCH64_GE;
3999 break;
4000
4001 case CC_DGTmode:
4002 ne = AARCH64_GT;
4003 eq = AARCH64_LE;
4004 break;
4005
4006 case CC_DLEmode:
4007 ne = AARCH64_LE;
4008 eq = AARCH64_GT;
4009 break;
4010
4011 case CC_DGEUmode:
4012 ne = AARCH64_CS;
4013 eq = AARCH64_CC;
4014 break;
4015
4016 case CC_DLTUmode:
4017 ne = AARCH64_CC;
4018 eq = AARCH64_CS;
4019 break;
4020
4021 case CC_DGTUmode:
4022 ne = AARCH64_HI;
4023 eq = AARCH64_LS;
4024 break;
4025
4026 case CC_DLEUmode:
4027 ne = AARCH64_LS;
4028 eq = AARCH64_HI;
4029 break;
4030
43e9d192
IB
4031 case CCmode:
4032 switch (comp_code)
4033 {
4034 case NE: return AARCH64_NE;
4035 case EQ: return AARCH64_EQ;
4036 case GE: return AARCH64_GE;
4037 case GT: return AARCH64_GT;
4038 case LE: return AARCH64_LE;
4039 case LT: return AARCH64_LT;
4040 case GEU: return AARCH64_CS;
4041 case GTU: return AARCH64_HI;
4042 case LEU: return AARCH64_LS;
4043 case LTU: return AARCH64_CC;
cd5660ab 4044 default: return -1;
43e9d192
IB
4045 }
4046 break;
4047
4048 case CC_SWPmode:
4049 case CC_ZESWPmode:
4050 case CC_SESWPmode:
4051 switch (comp_code)
4052 {
4053 case NE: return AARCH64_NE;
4054 case EQ: return AARCH64_EQ;
4055 case GE: return AARCH64_LE;
4056 case GT: return AARCH64_LT;
4057 case LE: return AARCH64_GE;
4058 case LT: return AARCH64_GT;
4059 case GEU: return AARCH64_LS;
4060 case GTU: return AARCH64_CC;
4061 case LEU: return AARCH64_CS;
4062 case LTU: return AARCH64_HI;
cd5660ab 4063 default: return -1;
43e9d192
IB
4064 }
4065 break;
4066
4067 case CC_NZmode:
4068 switch (comp_code)
4069 {
4070 case NE: return AARCH64_NE;
4071 case EQ: return AARCH64_EQ;
4072 case GE: return AARCH64_PL;
4073 case LT: return AARCH64_MI;
cd5660ab 4074 default: return -1;
43e9d192
IB
4075 }
4076 break;
4077
1c992d1e
RE
4078 case CC_Zmode:
4079 switch (comp_code)
4080 {
4081 case NE: return AARCH64_NE;
4082 case EQ: return AARCH64_EQ;
cd5660ab 4083 default: return -1;
1c992d1e
RE
4084 }
4085 break;
4086
43e9d192 4087 default:
cd5660ab 4088 return -1;
43e9d192
IB
4089 break;
4090 }
3dfa7055
ZC
4091
4092 if (comp_code == NE)
4093 return ne;
4094
4095 if (comp_code == EQ)
4096 return eq;
4097
4098 return -1;
43e9d192
IB
4099}
4100
ddeabd3e
AL
4101bool
4102aarch64_const_vec_all_same_in_range_p (rtx x,
4103 HOST_WIDE_INT minval,
4104 HOST_WIDE_INT maxval)
4105{
4106 HOST_WIDE_INT firstval;
4107 int count, i;
4108
4109 if (GET_CODE (x) != CONST_VECTOR
4110 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4111 return false;
4112
4113 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4114 if (firstval < minval || firstval > maxval)
4115 return false;
4116
4117 count = CONST_VECTOR_NUNITS (x);
4118 for (i = 1; i < count; i++)
4119 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4120 return false;
4121
4122 return true;
4123}
4124
4125bool
4126aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4127{
4128 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4129}
4130
43e9d192
IB
4131static unsigned
4132bit_count (unsigned HOST_WIDE_INT value)
4133{
4134 unsigned count = 0;
4135
4136 while (value)
4137 {
4138 count++;
4139 value &= value - 1;
4140 }
4141
4142 return count;
4143}
4144
cf670503
ZC
4145/* N Z C V. */
4146#define AARCH64_CC_V 1
4147#define AARCH64_CC_C (1 << 1)
4148#define AARCH64_CC_Z (1 << 2)
4149#define AARCH64_CC_N (1 << 3)
4150
4151/* N Z C V flags for ccmp. The first code is for AND op and the other
4152 is for IOR op. Indexed by AARCH64_COND_CODE. */
4153static const int aarch64_nzcv_codes[][2] =
4154{
4155 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
4156 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
4157 {AARCH64_CC_C, 0}, /* CS, C == 1. */
4158 {0, AARCH64_CC_C}, /* CC, C == 0. */
4159 {AARCH64_CC_N, 0}, /* MI, N == 1. */
4160 {0, AARCH64_CC_N}, /* PL, N == 0. */
4161 {AARCH64_CC_V, 0}, /* VS, V == 1. */
4162 {0, AARCH64_CC_V}, /* VC, V == 0. */
4163 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
4164 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
4165 {0, AARCH64_CC_V}, /* GE, N == V. */
4166 {AARCH64_CC_V, 0}, /* LT, N != V. */
4167 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
4168 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
4169 {0, 0}, /* AL, Any. */
4170 {0, 0}, /* NV, Any. */
4171};
4172
4173int
4174aarch64_ccmp_mode_to_code (enum machine_mode mode)
4175{
4176 switch (mode)
4177 {
4178 case CC_DNEmode:
4179 return NE;
4180
4181 case CC_DEQmode:
4182 return EQ;
4183
4184 case CC_DLEmode:
4185 return LE;
4186
4187 case CC_DGTmode:
4188 return GT;
4189
4190 case CC_DLTmode:
4191 return LT;
4192
4193 case CC_DGEmode:
4194 return GE;
4195
4196 case CC_DLEUmode:
4197 return LEU;
4198
4199 case CC_DGTUmode:
4200 return GTU;
4201
4202 case CC_DLTUmode:
4203 return LTU;
4204
4205 case CC_DGEUmode:
4206 return GEU;
4207
4208 default:
4209 gcc_unreachable ();
4210 }
4211}
4212
4213
43e9d192
IB
4214void
4215aarch64_print_operand (FILE *f, rtx x, char code)
4216{
4217 switch (code)
4218 {
f541a481
KT
4219 /* An integer or symbol address without a preceding # sign. */
4220 case 'c':
4221 switch (GET_CODE (x))
4222 {
4223 case CONST_INT:
4224 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4225 break;
4226
4227 case SYMBOL_REF:
4228 output_addr_const (f, x);
4229 break;
4230
4231 case CONST:
4232 if (GET_CODE (XEXP (x, 0)) == PLUS
4233 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4234 {
4235 output_addr_const (f, x);
4236 break;
4237 }
4238 /* Fall through. */
4239
4240 default:
4241 output_operand_lossage ("Unsupported operand for code '%c'", code);
4242 }
4243 break;
4244
43e9d192
IB
4245 case 'e':
4246 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4247 {
4248 int n;
4249
4aa81c2e 4250 if (!CONST_INT_P (x)
43e9d192
IB
4251 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4252 {
4253 output_operand_lossage ("invalid operand for '%%%c'", code);
4254 return;
4255 }
4256
4257 switch (n)
4258 {
4259 case 3:
4260 fputc ('b', f);
4261 break;
4262 case 4:
4263 fputc ('h', f);
4264 break;
4265 case 5:
4266 fputc ('w', f);
4267 break;
4268 default:
4269 output_operand_lossage ("invalid operand for '%%%c'", code);
4270 return;
4271 }
4272 }
4273 break;
4274
4275 case 'p':
4276 {
4277 int n;
4278
4279 /* Print N such that 2^N == X. */
4aa81c2e 4280 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4281 {
4282 output_operand_lossage ("invalid operand for '%%%c'", code);
4283 return;
4284 }
4285
4286 asm_fprintf (f, "%d", n);
4287 }
4288 break;
4289
4290 case 'P':
4291 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4292 if (!CONST_INT_P (x))
43e9d192
IB
4293 {
4294 output_operand_lossage ("invalid operand for '%%%c'", code);
4295 return;
4296 }
4297
4298 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4299 break;
4300
4301 case 'H':
4302 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4303 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4304 {
4305 output_operand_lossage ("invalid operand for '%%%c'", code);
4306 return;
4307 }
4308
01a3a324 4309 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4310 break;
4311
43e9d192 4312 case 'm':
cd5660ab
KT
4313 {
4314 int cond_code;
4315 /* Print a condition (eq, ne, etc). */
43e9d192 4316
cd5660ab
KT
4317 /* CONST_TRUE_RTX means always -- that's the default. */
4318 if (x == const_true_rtx)
43e9d192 4319 return;
43e9d192 4320
cd5660ab
KT
4321 if (!COMPARISON_P (x))
4322 {
4323 output_operand_lossage ("invalid operand for '%%%c'", code);
4324 return;
4325 }
4326
4327 cond_code = aarch64_get_condition_code (x);
4328 gcc_assert (cond_code >= 0);
4329 fputs (aarch64_condition_codes[cond_code], f);
4330 }
43e9d192
IB
4331 break;
4332
4333 case 'M':
cd5660ab
KT
4334 {
4335 int cond_code;
4336 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 4337
cd5660ab
KT
4338 /* CONST_TRUE_RTX means never -- that's the default. */
4339 if (x == const_true_rtx)
4340 {
4341 fputs ("nv", f);
4342 return;
4343 }
43e9d192 4344
cd5660ab
KT
4345 if (!COMPARISON_P (x))
4346 {
4347 output_operand_lossage ("invalid operand for '%%%c'", code);
4348 return;
4349 }
4350 cond_code = aarch64_get_condition_code (x);
4351 gcc_assert (cond_code >= 0);
4352 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4353 (cond_code)], f);
4354 }
43e9d192
IB
4355 break;
4356
4357 case 'b':
4358 case 'h':
4359 case 's':
4360 case 'd':
4361 case 'q':
4362 /* Print a scalar FP/SIMD register name. */
4363 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4364 {
4365 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4366 return;
4367 }
50ce6f88 4368 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4369 break;
4370
4371 case 'S':
4372 case 'T':
4373 case 'U':
4374 case 'V':
4375 /* Print the first FP/SIMD register name in a list. */
4376 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4377 {
4378 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4379 return;
4380 }
50ce6f88 4381 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4382 break;
4383
2d8c6dc1
AH
4384 case 'R':
4385 /* Print a scalar FP/SIMD register name + 1. */
4386 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4387 {
4388 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4389 return;
4390 }
4391 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4392 break;
4393
a05c0ddf 4394 case 'X':
50d38551 4395 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4396 if (!CONST_INT_P (x))
a05c0ddf
IB
4397 {
4398 output_operand_lossage ("invalid operand for '%%%c'", code);
4399 return;
4400 }
50d38551 4401 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4402 break;
4403
43e9d192
IB
4404 case 'w':
4405 case 'x':
4406 /* Print a general register name or the zero register (32-bit or
4407 64-bit). */
3520f7cc
JG
4408 if (x == const0_rtx
4409 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4410 {
50ce6f88 4411 asm_fprintf (f, "%czr", code);
43e9d192
IB
4412 break;
4413 }
4414
4415 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4416 {
50ce6f88 4417 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4418 break;
4419 }
4420
4421 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4422 {
50ce6f88 4423 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4424 break;
4425 }
4426
4427 /* Fall through */
4428
4429 case 0:
4430 /* Print a normal operand, if it's a general register, then we
4431 assume DImode. */
4432 if (x == NULL)
4433 {
4434 output_operand_lossage ("missing operand");
4435 return;
4436 }
4437
4438 switch (GET_CODE (x))
4439 {
4440 case REG:
01a3a324 4441 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4442 break;
4443
4444 case MEM:
4445 aarch64_memory_reference_mode = GET_MODE (x);
4446 output_address (XEXP (x, 0));
4447 break;
4448
4449 case LABEL_REF:
4450 case SYMBOL_REF:
4451 output_addr_const (asm_out_file, x);
4452 break;
4453
4454 case CONST_INT:
4455 asm_fprintf (f, "%wd", INTVAL (x));
4456 break;
4457
4458 case CONST_VECTOR:
3520f7cc
JG
4459 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4460 {
ddeabd3e
AL
4461 gcc_assert (
4462 aarch64_const_vec_all_same_in_range_p (x,
4463 HOST_WIDE_INT_MIN,
4464 HOST_WIDE_INT_MAX));
3520f7cc
JG
4465 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4466 }
4467 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4468 {
4469 fputc ('0', f);
4470 }
4471 else
4472 gcc_unreachable ();
43e9d192
IB
4473 break;
4474
3520f7cc
JG
4475 case CONST_DOUBLE:
4476 /* CONST_DOUBLE can represent a double-width integer.
4477 In this case, the mode of x is VOIDmode. */
4478 if (GET_MODE (x) == VOIDmode)
4479 ; /* Do Nothing. */
4480 else if (aarch64_float_const_zero_rtx_p (x))
4481 {
4482 fputc ('0', f);
4483 break;
4484 }
4485 else if (aarch64_float_const_representable_p (x))
4486 {
4487#define buf_size 20
4488 char float_buf[buf_size] = {'\0'};
4489 REAL_VALUE_TYPE r;
4490 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4491 real_to_decimal_for_mode (float_buf, &r,
4492 buf_size, buf_size,
4493 1, GET_MODE (x));
4494 asm_fprintf (asm_out_file, "%s", float_buf);
4495 break;
4496#undef buf_size
4497 }
4498 output_operand_lossage ("invalid constant");
4499 return;
43e9d192
IB
4500 default:
4501 output_operand_lossage ("invalid operand");
4502 return;
4503 }
4504 break;
4505
4506 case 'A':
4507 if (GET_CODE (x) == HIGH)
4508 x = XEXP (x, 0);
4509
4510 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4511 {
6642bdb4 4512 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4513 asm_fprintf (asm_out_file, ":got:");
4514 break;
4515
4516 case SYMBOL_SMALL_TLSGD:
4517 asm_fprintf (asm_out_file, ":tlsgd:");
4518 break;
4519
4520 case SYMBOL_SMALL_TLSDESC:
4521 asm_fprintf (asm_out_file, ":tlsdesc:");
4522 break;
4523
4524 case SYMBOL_SMALL_GOTTPREL:
4525 asm_fprintf (asm_out_file, ":gottprel:");
4526 break;
4527
8fd17b98 4528 case SYMBOL_TLSLE:
43e9d192
IB
4529 asm_fprintf (asm_out_file, ":tprel:");
4530 break;
4531
87dd8ab0
MS
4532 case SYMBOL_TINY_GOT:
4533 gcc_unreachable ();
4534 break;
4535
43e9d192
IB
4536 default:
4537 break;
4538 }
4539 output_addr_const (asm_out_file, x);
4540 break;
4541
4542 case 'L':
4543 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4544 {
6642bdb4 4545 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4546 asm_fprintf (asm_out_file, ":lo12:");
4547 break;
4548
4549 case SYMBOL_SMALL_TLSGD:
4550 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4551 break;
4552
4553 case SYMBOL_SMALL_TLSDESC:
4554 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4555 break;
4556
4557 case SYMBOL_SMALL_GOTTPREL:
4558 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4559 break;
4560
8fd17b98 4561 case SYMBOL_TLSLE:
43e9d192
IB
4562 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4563 break;
4564
87dd8ab0
MS
4565 case SYMBOL_TINY_GOT:
4566 asm_fprintf (asm_out_file, ":got:");
4567 break;
4568
43e9d192
IB
4569 default:
4570 break;
4571 }
4572 output_addr_const (asm_out_file, x);
4573 break;
4574
4575 case 'G':
4576
4577 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4578 {
8fd17b98 4579 case SYMBOL_TLSLE:
43e9d192
IB
4580 asm_fprintf (asm_out_file, ":tprel_hi12:");
4581 break;
4582 default:
4583 break;
4584 }
4585 output_addr_const (asm_out_file, x);
4586 break;
4587
cf670503
ZC
4588 case 'K':
4589 {
4590 int cond_code;
4591 /* Print nzcv. */
4592
4593 if (!COMPARISON_P (x))
4594 {
4595 output_operand_lossage ("invalid operand for '%%%c'", code);
4596 return;
4597 }
4598
4599 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4600 gcc_assert (cond_code >= 0);
4601 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4602 }
4603 break;
4604
4605 case 'k':
4606 {
4607 int cond_code;
4608 /* Print nzcv. */
4609
4610 if (!COMPARISON_P (x))
4611 {
4612 output_operand_lossage ("invalid operand for '%%%c'", code);
4613 return;
4614 }
4615
4616 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4617 gcc_assert (cond_code >= 0);
4618 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4619 }
4620 break;
4621
43e9d192
IB
4622 default:
4623 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4624 return;
4625 }
4626}
4627
4628void
4629aarch64_print_operand_address (FILE *f, rtx x)
4630{
4631 struct aarch64_address_info addr;
4632
4633 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4634 MEM, true))
4635 switch (addr.type)
4636 {
4637 case ADDRESS_REG_IMM:
4638 if (addr.offset == const0_rtx)
01a3a324 4639 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4640 else
16a3246f 4641 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4642 INTVAL (addr.offset));
4643 return;
4644
4645 case ADDRESS_REG_REG:
4646 if (addr.shift == 0)
16a3246f 4647 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4648 reg_names [REGNO (addr.offset)]);
43e9d192 4649 else
16a3246f 4650 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4651 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4652 return;
4653
4654 case ADDRESS_REG_UXTW:
4655 if (addr.shift == 0)
16a3246f 4656 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4657 REGNO (addr.offset) - R0_REGNUM);
4658 else
16a3246f 4659 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4660 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4661 return;
4662
4663 case ADDRESS_REG_SXTW:
4664 if (addr.shift == 0)
16a3246f 4665 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4666 REGNO (addr.offset) - R0_REGNUM);
4667 else
16a3246f 4668 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4669 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4670 return;
4671
4672 case ADDRESS_REG_WB:
4673 switch (GET_CODE (x))
4674 {
4675 case PRE_INC:
16a3246f 4676 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4677 GET_MODE_SIZE (aarch64_memory_reference_mode));
4678 return;
4679 case POST_INC:
16a3246f 4680 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4681 GET_MODE_SIZE (aarch64_memory_reference_mode));
4682 return;
4683 case PRE_DEC:
16a3246f 4684 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4685 GET_MODE_SIZE (aarch64_memory_reference_mode));
4686 return;
4687 case POST_DEC:
16a3246f 4688 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4689 GET_MODE_SIZE (aarch64_memory_reference_mode));
4690 return;
4691 case PRE_MODIFY:
16a3246f 4692 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4693 INTVAL (addr.offset));
4694 return;
4695 case POST_MODIFY:
16a3246f 4696 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4697 INTVAL (addr.offset));
4698 return;
4699 default:
4700 break;
4701 }
4702 break;
4703
4704 case ADDRESS_LO_SUM:
16a3246f 4705 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4706 output_addr_const (f, addr.offset);
4707 asm_fprintf (f, "]");
4708 return;
4709
4710 case ADDRESS_SYMBOLIC:
4711 break;
4712 }
4713
4714 output_addr_const (f, x);
4715}
4716
43e9d192
IB
4717bool
4718aarch64_label_mentioned_p (rtx x)
4719{
4720 const char *fmt;
4721 int i;
4722
4723 if (GET_CODE (x) == LABEL_REF)
4724 return true;
4725
4726 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4727 referencing instruction, but they are constant offsets, not
4728 symbols. */
4729 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4730 return false;
4731
4732 fmt = GET_RTX_FORMAT (GET_CODE (x));
4733 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4734 {
4735 if (fmt[i] == 'E')
4736 {
4737 int j;
4738
4739 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4740 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4741 return 1;
4742 }
4743 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4744 return 1;
4745 }
4746
4747 return 0;
4748}
4749
4750/* Implement REGNO_REG_CLASS. */
4751
4752enum reg_class
4753aarch64_regno_regclass (unsigned regno)
4754{
4755 if (GP_REGNUM_P (regno))
a4a182c6 4756 return GENERAL_REGS;
43e9d192
IB
4757
4758 if (regno == SP_REGNUM)
4759 return STACK_REG;
4760
4761 if (regno == FRAME_POINTER_REGNUM
4762 || regno == ARG_POINTER_REGNUM)
f24bb080 4763 return POINTER_REGS;
43e9d192
IB
4764
4765 if (FP_REGNUM_P (regno))
4766 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4767
4768 return NO_REGS;
4769}
4770
0c4ec427 4771static rtx
ef4bddc2 4772aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
4773{
4774 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4775 where mask is selected by alignment and size of the offset.
4776 We try to pick as large a range for the offset as possible to
4777 maximize the chance of a CSE. However, for aligned addresses
4778 we limit the range to 4k so that structures with different sized
4779 elements are likely to use the same base. */
4780
4781 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4782 {
4783 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4784 HOST_WIDE_INT base_offset;
4785
4786 /* Does it look like we'll need a load/store-pair operation? */
4787 if (GET_MODE_SIZE (mode) > 16
4788 || mode == TImode)
4789 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4790 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4791 /* For offsets aren't a multiple of the access size, the limit is
4792 -256...255. */
4793 else if (offset & (GET_MODE_SIZE (mode) - 1))
4794 base_offset = (offset + 0x100) & ~0x1ff;
4795 else
4796 base_offset = offset & ~0xfff;
4797
4798 if (base_offset == 0)
4799 return x;
4800
4801 offset -= base_offset;
4802 rtx base_reg = gen_reg_rtx (Pmode);
4803 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4804 NULL_RTX);
4805 emit_move_insn (base_reg, val);
4806 x = plus_constant (Pmode, base_reg, offset);
4807 }
4808
4809 return x;
4810}
4811
43e9d192
IB
4812/* Try a machine-dependent way of reloading an illegitimate address
4813 operand. If we find one, push the reload and return the new rtx. */
4814
4815rtx
4816aarch64_legitimize_reload_address (rtx *x_p,
ef4bddc2 4817 machine_mode mode,
43e9d192
IB
4818 int opnum, int type,
4819 int ind_levels ATTRIBUTE_UNUSED)
4820{
4821 rtx x = *x_p;
4822
348d4b0a
BC
4823 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4824 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4825 && GET_CODE (x) == PLUS
4826 && REG_P (XEXP (x, 0))
4827 && CONST_INT_P (XEXP (x, 1)))
4828 {
4829 rtx orig_rtx = x;
4830 x = copy_rtx (x);
4831 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4832 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4833 opnum, (enum reload_type) type);
4834 return x;
4835 }
4836
4837 /* We must recognize output that we have already generated ourselves. */
4838 if (GET_CODE (x) == PLUS
4839 && GET_CODE (XEXP (x, 0)) == PLUS
4840 && REG_P (XEXP (XEXP (x, 0), 0))
4841 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4842 && CONST_INT_P (XEXP (x, 1)))
4843 {
4844 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4845 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4846 opnum, (enum reload_type) type);
4847 return x;
4848 }
4849
4850 /* We wish to handle large displacements off a base register by splitting
4851 the addend across an add and the mem insn. This can cut the number of
4852 extra insns needed from 3 to 1. It is only useful for load/store of a
4853 single register with 12 bit offset field. */
4854 if (GET_CODE (x) == PLUS
4855 && REG_P (XEXP (x, 0))
4856 && CONST_INT_P (XEXP (x, 1))
4857 && HARD_REGISTER_P (XEXP (x, 0))
4858 && mode != TImode
4859 && mode != TFmode
4860 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4861 {
4862 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4863 HOST_WIDE_INT low = val & 0xfff;
4864 HOST_WIDE_INT high = val - low;
4865 HOST_WIDE_INT offs;
4866 rtx cst;
ef4bddc2 4867 machine_mode xmode = GET_MODE (x);
28514dda
YZ
4868
4869 /* In ILP32, xmode can be either DImode or SImode. */
4870 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4871
4872 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4873 BLKmode alignment. */
4874 if (GET_MODE_SIZE (mode) == 0)
4875 return NULL_RTX;
4876
4877 offs = low % GET_MODE_SIZE (mode);
4878
4879 /* Align misaligned offset by adjusting high part to compensate. */
4880 if (offs != 0)
4881 {
4882 if (aarch64_uimm12_shift (high + offs))
4883 {
4884 /* Align down. */
4885 low = low - offs;
4886 high = high + offs;
4887 }
4888 else
4889 {
4890 /* Align up. */
4891 offs = GET_MODE_SIZE (mode) - offs;
4892 low = low + offs;
4893 high = high + (low & 0x1000) - offs;
4894 low &= 0xfff;
4895 }
4896 }
4897
4898 /* Check for overflow. */
4899 if (high + low != val)
4900 return NULL_RTX;
4901
4902 cst = GEN_INT (high);
4903 if (!aarch64_uimm12_shift (high))
28514dda 4904 cst = force_const_mem (xmode, cst);
43e9d192
IB
4905
4906 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4907 in the mem instruction.
4908 Note that replacing this gen_rtx_PLUS with plus_constant is
4909 wrong in this case because we rely on the
4910 (plus (plus reg c1) c2) structure being preserved so that
4911 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4912 x = gen_rtx_PLUS (xmode,
4913 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4914 GEN_INT (low));
43e9d192
IB
4915
4916 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4917 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4918 opnum, (enum reload_type) type);
4919 return x;
4920 }
4921
4922 return NULL_RTX;
4923}
4924
4925
4926static reg_class_t
4927aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4928 reg_class_t rclass,
ef4bddc2 4929 machine_mode mode,
43e9d192
IB
4930 secondary_reload_info *sri)
4931{
43e9d192
IB
4932 /* Without the TARGET_SIMD instructions we cannot move a Q register
4933 to a Q register directly. We need a scratch. */
4934 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4935 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4936 && reg_class_subset_p (rclass, FP_REGS))
4937 {
4938 if (mode == TFmode)
4939 sri->icode = CODE_FOR_aarch64_reload_movtf;
4940 else if (mode == TImode)
4941 sri->icode = CODE_FOR_aarch64_reload_movti;
4942 return NO_REGS;
4943 }
4944
4945 /* A TFmode or TImode memory access should be handled via an FP_REGS
4946 because AArch64 has richer addressing modes for LDR/STR instructions
4947 than LDP/STP instructions. */
d5726973 4948 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
4949 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4950 return FP_REGS;
4951
4952 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4953 return GENERAL_REGS;
43e9d192
IB
4954
4955 return NO_REGS;
4956}
4957
4958static bool
4959aarch64_can_eliminate (const int from, const int to)
4960{
4961 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4962 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4963
4964 if (frame_pointer_needed)
4965 {
4966 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4967 return true;
4968 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4969 return false;
4970 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4971 && !cfun->calls_alloca)
4972 return true;
4973 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4974 return true;
0b7f8166
MS
4975
4976 return false;
43e9d192 4977 }
1c923b60
JW
4978 else
4979 {
4980 /* If we decided that we didn't need a leaf frame pointer but then used
4981 LR in the function, then we'll want a frame pointer after all, so
4982 prevent this elimination to ensure a frame pointer is used. */
4983 if (to == STACK_POINTER_REGNUM
4984 && flag_omit_leaf_frame_pointer
4985 && df_regs_ever_live_p (LR_REGNUM))
4986 return false;
4987 }
777e6976 4988
43e9d192
IB
4989 return true;
4990}
4991
4992HOST_WIDE_INT
4993aarch64_initial_elimination_offset (unsigned from, unsigned to)
4994{
43e9d192 4995 aarch64_layout_frame ();
78c29983
MS
4996
4997 if (to == HARD_FRAME_POINTER_REGNUM)
4998 {
4999 if (from == ARG_POINTER_REGNUM)
1c960e02 5000 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
5001
5002 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
5003 return (cfun->machine->frame.hard_fp_offset
5004 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
5005 }
5006
5007 if (to == STACK_POINTER_REGNUM)
5008 {
5009 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
5010 return (cfun->machine->frame.frame_size
5011 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
5012 }
5013
1c960e02 5014 return cfun->machine->frame.frame_size;
43e9d192
IB
5015}
5016
43e9d192
IB
5017/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5018 previous frame. */
5019
5020rtx
5021aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5022{
5023 if (count != 0)
5024 return const0_rtx;
5025 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5026}
5027
5028
5029static void
5030aarch64_asm_trampoline_template (FILE *f)
5031{
28514dda
YZ
5032 if (TARGET_ILP32)
5033 {
5034 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5035 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5036 }
5037 else
5038 {
5039 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5040 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5041 }
01a3a324 5042 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5043 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5044 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5045 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5046}
5047
5048static void
5049aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5050{
5051 rtx fnaddr, mem, a_tramp;
28514dda 5052 const int tramp_code_sz = 16;
43e9d192
IB
5053
5054 /* Don't need to copy the trailing D-words, we fill those in below. */
5055 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5056 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5057 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5058 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5059 if (GET_MODE (fnaddr) != ptr_mode)
5060 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5061 emit_move_insn (mem, fnaddr);
5062
28514dda 5063 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5064 emit_move_insn (mem, chain_value);
5065
5066 /* XXX We should really define a "clear_cache" pattern and use
5067 gen_clear_cache(). */
5068 a_tramp = XEXP (m_tramp, 0);
5069 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5070 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5071 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5072 ptr_mode);
43e9d192
IB
5073}
5074
5075static unsigned char
ef4bddc2 5076aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5077{
5078 switch (regclass)
5079 {
fee9ba42 5080 case CALLER_SAVE_REGS:
43e9d192
IB
5081 case POINTER_REGS:
5082 case GENERAL_REGS:
5083 case ALL_REGS:
5084 case FP_REGS:
5085 case FP_LO_REGS:
5086 return
7bd11911
KT
5087 aarch64_vector_mode_p (mode)
5088 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5089 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
5090 case STACK_REG:
5091 return 1;
5092
5093 case NO_REGS:
5094 return 0;
5095
5096 default:
5097 break;
5098 }
5099 gcc_unreachable ();
5100}
5101
5102static reg_class_t
78d8b9f0 5103aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 5104{
51bb310d 5105 if (regclass == POINTER_REGS)
78d8b9f0
IB
5106 return GENERAL_REGS;
5107
51bb310d
MS
5108 if (regclass == STACK_REG)
5109 {
5110 if (REG_P(x)
5111 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5112 return regclass;
5113
5114 return NO_REGS;
5115 }
5116
78d8b9f0
IB
5117 /* If it's an integer immediate that MOVI can't handle, then
5118 FP_REGS is not an option, so we return NO_REGS instead. */
5119 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5120 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5121 return NO_REGS;
5122
27bd251b
IB
5123 /* Register eliminiation can result in a request for
5124 SP+constant->FP_REGS. We cannot support such operations which
5125 use SP as source and an FP_REG as destination, so reject out
5126 right now. */
5127 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5128 {
5129 rtx lhs = XEXP (x, 0);
5130
5131 /* Look through a possible SUBREG introduced by ILP32. */
5132 if (GET_CODE (lhs) == SUBREG)
5133 lhs = SUBREG_REG (lhs);
5134
5135 gcc_assert (REG_P (lhs));
5136 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5137 POINTER_REGS));
5138 return NO_REGS;
5139 }
5140
78d8b9f0 5141 return regclass;
43e9d192
IB
5142}
5143
5144void
5145aarch64_asm_output_labelref (FILE* f, const char *name)
5146{
5147 asm_fprintf (f, "%U%s", name);
5148}
5149
5150static void
5151aarch64_elf_asm_constructor (rtx symbol, int priority)
5152{
5153 if (priority == DEFAULT_INIT_PRIORITY)
5154 default_ctor_section_asm_out_constructor (symbol, priority);
5155 else
5156 {
5157 section *s;
5158 char buf[18];
5159 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5160 s = get_section (buf, SECTION_WRITE, NULL);
5161 switch_to_section (s);
5162 assemble_align (POINTER_SIZE);
28514dda 5163 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5164 }
5165}
5166
5167static void
5168aarch64_elf_asm_destructor (rtx symbol, int priority)
5169{
5170 if (priority == DEFAULT_INIT_PRIORITY)
5171 default_dtor_section_asm_out_destructor (symbol, priority);
5172 else
5173 {
5174 section *s;
5175 char buf[18];
5176 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5177 s = get_section (buf, SECTION_WRITE, NULL);
5178 switch_to_section (s);
5179 assemble_align (POINTER_SIZE);
28514dda 5180 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5181 }
5182}
5183
5184const char*
5185aarch64_output_casesi (rtx *operands)
5186{
5187 char buf[100];
5188 char label[100];
b32d5189 5189 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5190 int index;
5191 static const char *const patterns[4][2] =
5192 {
5193 {
5194 "ldrb\t%w3, [%0,%w1,uxtw]",
5195 "add\t%3, %4, %w3, sxtb #2"
5196 },
5197 {
5198 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5199 "add\t%3, %4, %w3, sxth #2"
5200 },
5201 {
5202 "ldr\t%w3, [%0,%w1,uxtw #2]",
5203 "add\t%3, %4, %w3, sxtw #2"
5204 },
5205 /* We assume that DImode is only generated when not optimizing and
5206 that we don't really need 64-bit address offsets. That would
5207 imply an object file with 8GB of code in a single function! */
5208 {
5209 "ldr\t%w3, [%0,%w1,uxtw #2]",
5210 "add\t%3, %4, %w3, sxtw #2"
5211 }
5212 };
5213
5214 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5215
5216 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5217
5218 gcc_assert (index >= 0 && index <= 3);
5219
5220 /* Need to implement table size reduction, by chaning the code below. */
5221 output_asm_insn (patterns[index][0], operands);
5222 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5223 snprintf (buf, sizeof (buf),
5224 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5225 output_asm_insn (buf, operands);
5226 output_asm_insn (patterns[index][1], operands);
5227 output_asm_insn ("br\t%3", operands);
5228 assemble_label (asm_out_file, label);
5229 return "";
5230}
5231
5232
5233/* Return size in bits of an arithmetic operand which is shifted/scaled and
5234 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5235 operator. */
5236
5237int
5238aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5239{
5240 if (shift >= 0 && shift <= 3)
5241 {
5242 int size;
5243 for (size = 8; size <= 32; size *= 2)
5244 {
5245 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5246 if (mask == bits << shift)
5247 return size;
5248 }
5249 }
5250 return 0;
5251}
5252
5253static bool
ef4bddc2 5254aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5255 const_rtx x ATTRIBUTE_UNUSED)
5256{
5257 /* We can't use blocks for constants when we're using a per-function
5258 constant pool. */
5259 return false;
5260}
5261
5262static section *
ef4bddc2 5263aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5264 rtx x ATTRIBUTE_UNUSED,
5265 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5266{
5267 /* Force all constant pool entries into the current function section. */
5268 return function_section (current_function_decl);
5269}
5270
5271
5272/* Costs. */
5273
5274/* Helper function for rtx cost calculation. Strip a shift expression
5275 from X. Returns the inner operand if successful, or the original
5276 expression on failure. */
5277static rtx
5278aarch64_strip_shift (rtx x)
5279{
5280 rtx op = x;
5281
57b77d46
RE
5282 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5283 we can convert both to ROR during final output. */
43e9d192
IB
5284 if ((GET_CODE (op) == ASHIFT
5285 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5286 || GET_CODE (op) == LSHIFTRT
5287 || GET_CODE (op) == ROTATERT
5288 || GET_CODE (op) == ROTATE)
43e9d192
IB
5289 && CONST_INT_P (XEXP (op, 1)))
5290 return XEXP (op, 0);
5291
5292 if (GET_CODE (op) == MULT
5293 && CONST_INT_P (XEXP (op, 1))
5294 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5295 return XEXP (op, 0);
5296
5297 return x;
5298}
5299
4745e701 5300/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5301 expression from X. Returns the inner operand if successful, or the
5302 original expression on failure. We deal with a number of possible
5303 canonicalization variations here. */
5304static rtx
4745e701 5305aarch64_strip_extend (rtx x)
43e9d192
IB
5306{
5307 rtx op = x;
5308
5309 /* Zero and sign extraction of a widened value. */
5310 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5311 && XEXP (op, 2) == const0_rtx
4745e701 5312 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5313 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5314 XEXP (op, 1)))
5315 return XEXP (XEXP (op, 0), 0);
5316
5317 /* It can also be represented (for zero-extend) as an AND with an
5318 immediate. */
5319 if (GET_CODE (op) == AND
5320 && GET_CODE (XEXP (op, 0)) == MULT
5321 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5322 && CONST_INT_P (XEXP (op, 1))
5323 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5324 INTVAL (XEXP (op, 1))) != 0)
5325 return XEXP (XEXP (op, 0), 0);
5326
5327 /* Now handle extended register, as this may also have an optional
5328 left shift by 1..4. */
5329 if (GET_CODE (op) == ASHIFT
5330 && CONST_INT_P (XEXP (op, 1))
5331 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5332 op = XEXP (op, 0);
5333
5334 if (GET_CODE (op) == ZERO_EXTEND
5335 || GET_CODE (op) == SIGN_EXTEND)
5336 op = XEXP (op, 0);
5337
5338 if (op != x)
5339 return op;
5340
4745e701
JG
5341 return x;
5342}
5343
0a78ebe4
KT
5344/* Return true iff CODE is a shift supported in combination
5345 with arithmetic instructions. */
4d1919ed 5346
0a78ebe4
KT
5347static bool
5348aarch64_shift_p (enum rtx_code code)
5349{
5350 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5351}
5352
4745e701 5353/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
5354 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5355 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
5356 operands where needed. */
5357
5358static int
e548c9df 5359aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
5360{
5361 rtx op0, op1;
5362 const struct cpu_cost_table *extra_cost
b175b679 5363 = aarch64_tune_params.insn_extra_cost;
4745e701 5364 int cost = 0;
0a78ebe4 5365 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 5366 machine_mode mode = GET_MODE (x);
4745e701
JG
5367
5368 gcc_checking_assert (code == MULT);
5369
5370 op0 = XEXP (x, 0);
5371 op1 = XEXP (x, 1);
5372
5373 if (VECTOR_MODE_P (mode))
5374 mode = GET_MODE_INNER (mode);
5375
5376 /* Integer multiply/fma. */
5377 if (GET_MODE_CLASS (mode) == MODE_INT)
5378 {
5379 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
5380 if (aarch64_shift_p (GET_CODE (x))
5381 || (CONST_INT_P (op1)
5382 && exact_log2 (INTVAL (op1)) > 0))
4745e701 5383 {
0a78ebe4
KT
5384 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5385 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
5386 if (speed)
5387 {
0a78ebe4
KT
5388 if (compound_p)
5389 {
5390 if (REG_P (op1))
5391 /* ARITH + shift-by-register. */
5392 cost += extra_cost->alu.arith_shift_reg;
5393 else if (is_extend)
5394 /* ARITH + extended register. We don't have a cost field
5395 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5396 cost += extra_cost->alu.extend_arith;
5397 else
5398 /* ARITH + shift-by-immediate. */
5399 cost += extra_cost->alu.arith_shift;
5400 }
4745e701
JG
5401 else
5402 /* LSL (immediate). */
0a78ebe4
KT
5403 cost += extra_cost->alu.shift;
5404
4745e701 5405 }
0a78ebe4
KT
5406 /* Strip extends as we will have costed them in the case above. */
5407 if (is_extend)
5408 op0 = aarch64_strip_extend (op0);
4745e701 5409
e548c9df 5410 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
5411
5412 return cost;
5413 }
5414
d2ac256b
KT
5415 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5416 compound and let the below cases handle it. After all, MNEG is a
5417 special-case alias of MSUB. */
5418 if (GET_CODE (op0) == NEG)
5419 {
5420 op0 = XEXP (op0, 0);
5421 compound_p = true;
5422 }
5423
4745e701
JG
5424 /* Integer multiplies or FMAs have zero/sign extending variants. */
5425 if ((GET_CODE (op0) == ZERO_EXTEND
5426 && GET_CODE (op1) == ZERO_EXTEND)
5427 || (GET_CODE (op0) == SIGN_EXTEND
5428 && GET_CODE (op1) == SIGN_EXTEND))
5429 {
e548c9df
AM
5430 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
5431 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
5432
5433 if (speed)
5434 {
0a78ebe4 5435 if (compound_p)
d2ac256b 5436 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
5437 cost += extra_cost->mult[0].extend_add;
5438 else
5439 /* MUL/SMULL/UMULL. */
5440 cost += extra_cost->mult[0].extend;
5441 }
5442
5443 return cost;
5444 }
5445
d2ac256b 5446 /* This is either an integer multiply or a MADD. In both cases
4745e701 5447 we want to recurse and cost the operands. */
e548c9df
AM
5448 cost += rtx_cost (op0, mode, MULT, 0, speed);
5449 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5450
5451 if (speed)
5452 {
0a78ebe4 5453 if (compound_p)
d2ac256b 5454 /* MADD/MSUB. */
4745e701
JG
5455 cost += extra_cost->mult[mode == DImode].add;
5456 else
5457 /* MUL. */
5458 cost += extra_cost->mult[mode == DImode].simple;
5459 }
5460
5461 return cost;
5462 }
5463 else
5464 {
5465 if (speed)
5466 {
3d840f7d 5467 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
5468 operands. */
5469 if (GET_CODE (op0) == NEG)
3d840f7d 5470 op0 = XEXP (op0, 0);
4745e701 5471 if (GET_CODE (op1) == NEG)
3d840f7d 5472 op1 = XEXP (op1, 0);
4745e701 5473
0a78ebe4 5474 if (compound_p)
4745e701
JG
5475 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5476 cost += extra_cost->fp[mode == DFmode].fma;
5477 else
3d840f7d 5478 /* FMUL/FNMUL. */
4745e701
JG
5479 cost += extra_cost->fp[mode == DFmode].mult;
5480 }
5481
e548c9df
AM
5482 cost += rtx_cost (op0, mode, MULT, 0, speed);
5483 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5484 return cost;
5485 }
43e9d192
IB
5486}
5487
67747367
JG
5488static int
5489aarch64_address_cost (rtx x,
ef4bddc2 5490 machine_mode mode,
67747367
JG
5491 addr_space_t as ATTRIBUTE_UNUSED,
5492 bool speed)
5493{
5494 enum rtx_code c = GET_CODE (x);
b175b679 5495 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
5496 struct aarch64_address_info info;
5497 int cost = 0;
5498 info.shift = 0;
5499
5500 if (!aarch64_classify_address (&info, x, mode, c, false))
5501 {
5502 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5503 {
5504 /* This is a CONST or SYMBOL ref which will be split
5505 in a different way depending on the code model in use.
5506 Cost it through the generic infrastructure. */
e548c9df 5507 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
5508 /* Divide through by the cost of one instruction to
5509 bring it to the same units as the address costs. */
5510 cost_symbol_ref /= COSTS_N_INSNS (1);
5511 /* The cost is then the cost of preparing the address,
5512 followed by an immediate (possibly 0) offset. */
5513 return cost_symbol_ref + addr_cost->imm_offset;
5514 }
5515 else
5516 {
5517 /* This is most likely a jump table from a case
5518 statement. */
5519 return addr_cost->register_offset;
5520 }
5521 }
5522
5523 switch (info.type)
5524 {
5525 case ADDRESS_LO_SUM:
5526 case ADDRESS_SYMBOLIC:
5527 case ADDRESS_REG_IMM:
5528 cost += addr_cost->imm_offset;
5529 break;
5530
5531 case ADDRESS_REG_WB:
5532 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5533 cost += addr_cost->pre_modify;
5534 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5535 cost += addr_cost->post_modify;
5536 else
5537 gcc_unreachable ();
5538
5539 break;
5540
5541 case ADDRESS_REG_REG:
5542 cost += addr_cost->register_offset;
5543 break;
5544
5545 case ADDRESS_REG_UXTW:
5546 case ADDRESS_REG_SXTW:
5547 cost += addr_cost->register_extend;
5548 break;
5549
5550 default:
5551 gcc_unreachable ();
5552 }
5553
5554
5555 if (info.shift > 0)
5556 {
5557 /* For the sake of calculating the cost of the shifted register
5558 component, we can treat same sized modes in the same way. */
5559 switch (GET_MODE_BITSIZE (mode))
5560 {
5561 case 16:
5562 cost += addr_cost->addr_scale_costs.hi;
5563 break;
5564
5565 case 32:
5566 cost += addr_cost->addr_scale_costs.si;
5567 break;
5568
5569 case 64:
5570 cost += addr_cost->addr_scale_costs.di;
5571 break;
5572
5573 /* We can't tell, or this is a 128-bit vector. */
5574 default:
5575 cost += addr_cost->addr_scale_costs.ti;
5576 break;
5577 }
5578 }
5579
5580 return cost;
5581}
5582
b9066f5a
MW
5583/* Return the cost of a branch. If SPEED_P is true then the compiler is
5584 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5585 to be taken. */
5586
5587int
5588aarch64_branch_cost (bool speed_p, bool predictable_p)
5589{
5590 /* When optimizing for speed, use the cost of unpredictable branches. */
5591 const struct cpu_branch_cost *branch_costs =
b175b679 5592 aarch64_tune_params.branch_costs;
b9066f5a
MW
5593
5594 if (!speed_p || predictable_p)
5595 return branch_costs->predictable;
5596 else
5597 return branch_costs->unpredictable;
5598}
5599
7cc2145f
JG
5600/* Return true if the RTX X in mode MODE is a zero or sign extract
5601 usable in an ADD or SUB (extended register) instruction. */
5602static bool
ef4bddc2 5603aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5604{
5605 /* Catch add with a sign extract.
5606 This is add_<optab><mode>_multp2. */
5607 if (GET_CODE (x) == SIGN_EXTRACT
5608 || GET_CODE (x) == ZERO_EXTRACT)
5609 {
5610 rtx op0 = XEXP (x, 0);
5611 rtx op1 = XEXP (x, 1);
5612 rtx op2 = XEXP (x, 2);
5613
5614 if (GET_CODE (op0) == MULT
5615 && CONST_INT_P (op1)
5616 && op2 == const0_rtx
5617 && CONST_INT_P (XEXP (op0, 1))
5618 && aarch64_is_extend_from_extract (mode,
5619 XEXP (op0, 1),
5620 op1))
5621 {
5622 return true;
5623 }
5624 }
e47c4031
KT
5625 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
5626 No shift. */
5627 else if (GET_CODE (x) == SIGN_EXTEND
5628 || GET_CODE (x) == ZERO_EXTEND)
5629 return REG_P (XEXP (x, 0));
7cc2145f
JG
5630
5631 return false;
5632}
5633
61263118
KT
5634static bool
5635aarch64_frint_unspec_p (unsigned int u)
5636{
5637 switch (u)
5638 {
5639 case UNSPEC_FRINTZ:
5640 case UNSPEC_FRINTP:
5641 case UNSPEC_FRINTM:
5642 case UNSPEC_FRINTA:
5643 case UNSPEC_FRINTN:
5644 case UNSPEC_FRINTX:
5645 case UNSPEC_FRINTI:
5646 return true;
5647
5648 default:
5649 return false;
5650 }
5651}
5652
fb0cb7fa
KT
5653/* Return true iff X is an rtx that will match an extr instruction
5654 i.e. as described in the *extr<mode>5_insn family of patterns.
5655 OP0 and OP1 will be set to the operands of the shifts involved
5656 on success and will be NULL_RTX otherwise. */
5657
5658static bool
5659aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
5660{
5661 rtx op0, op1;
5662 machine_mode mode = GET_MODE (x);
5663
5664 *res_op0 = NULL_RTX;
5665 *res_op1 = NULL_RTX;
5666
5667 if (GET_CODE (x) != IOR)
5668 return false;
5669
5670 op0 = XEXP (x, 0);
5671 op1 = XEXP (x, 1);
5672
5673 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
5674 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
5675 {
5676 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5677 if (GET_CODE (op1) == ASHIFT)
5678 std::swap (op0, op1);
5679
5680 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
5681 return false;
5682
5683 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
5684 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
5685
5686 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
5687 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
5688 {
5689 *res_op0 = XEXP (op0, 0);
5690 *res_op1 = XEXP (op1, 0);
5691 return true;
5692 }
5693 }
5694
5695 return false;
5696}
5697
2d5ffe46
AP
5698/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5699 storing it in *COST. Result is true if the total cost of the operation
5700 has now been calculated. */
5701static bool
5702aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5703{
b9e3afe9
AP
5704 rtx inner;
5705 rtx comparator;
5706 enum rtx_code cmpcode;
5707
5708 if (COMPARISON_P (op0))
5709 {
5710 inner = XEXP (op0, 0);
5711 comparator = XEXP (op0, 1);
5712 cmpcode = GET_CODE (op0);
5713 }
5714 else
5715 {
5716 inner = op0;
5717 comparator = const0_rtx;
5718 cmpcode = NE;
5719 }
5720
2d5ffe46
AP
5721 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5722 {
5723 /* Conditional branch. */
b9e3afe9 5724 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5725 return true;
5726 else
5727 {
b9e3afe9 5728 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5729 {
2d5ffe46
AP
5730 if (comparator == const0_rtx)
5731 {
5732 /* TBZ/TBNZ/CBZ/CBNZ. */
5733 if (GET_CODE (inner) == ZERO_EXTRACT)
5734 /* TBZ/TBNZ. */
e548c9df
AM
5735 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
5736 ZERO_EXTRACT, 0, speed);
5737 else
5738 /* CBZ/CBNZ. */
5739 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
5740
5741 return true;
5742 }
5743 }
b9e3afe9 5744 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5745 {
2d5ffe46
AP
5746 /* TBZ/TBNZ. */
5747 if (comparator == const0_rtx)
5748 return true;
5749 }
5750 }
5751 }
b9e3afe9 5752 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5753 {
5754 /* It's a conditional operation based on the status flags,
5755 so it must be some flavor of CSEL. */
5756
5757 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5758 if (GET_CODE (op1) == NEG
5759 || GET_CODE (op1) == NOT
5760 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5761 op1 = XEXP (op1, 0);
5762
e548c9df
AM
5763 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
5764 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
5765 return true;
5766 }
5767
5768 /* We don't know what this is, cost all operands. */
5769 return false;
5770}
5771
43e9d192
IB
5772/* Calculate the cost of calculating X, storing it in *COST. Result
5773 is true if the total cost of the operation has now been calculated. */
5774static bool
e548c9df 5775aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
5776 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5777{
a8eecd00 5778 rtx op0, op1, op2;
73250c4c 5779 const struct cpu_cost_table *extra_cost
b175b679 5780 = aarch64_tune_params.insn_extra_cost;
e548c9df 5781 int code = GET_CODE (x);
43e9d192 5782
7fc5ef02
JG
5783 /* By default, assume that everything has equivalent cost to the
5784 cheapest instruction. Any additional costs are applied as a delta
5785 above this default. */
5786 *cost = COSTS_N_INSNS (1);
5787
43e9d192
IB
5788 switch (code)
5789 {
5790 case SET:
ba123b0d
JG
5791 /* The cost depends entirely on the operands to SET. */
5792 *cost = 0;
43e9d192
IB
5793 op0 = SET_DEST (x);
5794 op1 = SET_SRC (x);
5795
5796 switch (GET_CODE (op0))
5797 {
5798 case MEM:
5799 if (speed)
2961177e
JG
5800 {
5801 rtx address = XEXP (op0, 0);
b6875aac
KV
5802 if (VECTOR_MODE_P (mode))
5803 *cost += extra_cost->ldst.storev;
5804 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
5805 *cost += extra_cost->ldst.store;
5806 else if (mode == SFmode)
5807 *cost += extra_cost->ldst.storef;
5808 else if (mode == DFmode)
5809 *cost += extra_cost->ldst.stored;
5810
5811 *cost +=
5812 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5813 0, speed));
5814 }
43e9d192 5815
e548c9df 5816 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
5817 return true;
5818
5819 case SUBREG:
5820 if (! REG_P (SUBREG_REG (op0)))
e548c9df 5821 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 5822
43e9d192
IB
5823 /* Fall through. */
5824 case REG:
b6875aac
KV
5825 /* The cost is one per vector-register copied. */
5826 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
5827 {
5828 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5829 / GET_MODE_SIZE (V4SImode);
5830 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5831 }
ba123b0d
JG
5832 /* const0_rtx is in general free, but we will use an
5833 instruction to set a register to 0. */
b6875aac
KV
5834 else if (REG_P (op1) || op1 == const0_rtx)
5835 {
5836 /* The cost is 1 per register copied. */
5837 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 5838 / UNITS_PER_WORD;
b6875aac
KV
5839 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5840 }
ba123b0d
JG
5841 else
5842 /* Cost is just the cost of the RHS of the set. */
e548c9df 5843 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
5844 return true;
5845
ba123b0d 5846 case ZERO_EXTRACT:
43e9d192 5847 case SIGN_EXTRACT:
ba123b0d
JG
5848 /* Bit-field insertion. Strip any redundant widening of
5849 the RHS to meet the width of the target. */
43e9d192
IB
5850 if (GET_CODE (op1) == SUBREG)
5851 op1 = SUBREG_REG (op1);
5852 if ((GET_CODE (op1) == ZERO_EXTEND
5853 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5854 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5855 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5856 >= INTVAL (XEXP (op0, 1))))
5857 op1 = XEXP (op1, 0);
ba123b0d
JG
5858
5859 if (CONST_INT_P (op1))
5860 {
5861 /* MOV immediate is assumed to always be cheap. */
5862 *cost = COSTS_N_INSNS (1);
5863 }
5864 else
5865 {
5866 /* BFM. */
5867 if (speed)
5868 *cost += extra_cost->alu.bfi;
e548c9df 5869 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
5870 }
5871
43e9d192
IB
5872 return true;
5873
5874 default:
ba123b0d
JG
5875 /* We can't make sense of this, assume default cost. */
5876 *cost = COSTS_N_INSNS (1);
61263118 5877 return false;
43e9d192
IB
5878 }
5879 return false;
5880
9dfc162c
JG
5881 case CONST_INT:
5882 /* If an instruction can incorporate a constant within the
5883 instruction, the instruction's expression avoids calling
5884 rtx_cost() on the constant. If rtx_cost() is called on a
5885 constant, then it is usually because the constant must be
5886 moved into a register by one or more instructions.
5887
5888 The exception is constant 0, which can be expressed
5889 as XZR/WZR and is therefore free. The exception to this is
5890 if we have (set (reg) (const0_rtx)) in which case we must cost
5891 the move. However, we can catch that when we cost the SET, so
5892 we don't need to consider that here. */
5893 if (x == const0_rtx)
5894 *cost = 0;
5895 else
5896 {
5897 /* To an approximation, building any other constant is
5898 proportionally expensive to the number of instructions
5899 required to build that constant. This is true whether we
5900 are compiling for SPEED or otherwise. */
82614948
RR
5901 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5902 (NULL_RTX, x, false, mode));
9dfc162c
JG
5903 }
5904 return true;
5905
5906 case CONST_DOUBLE:
5907 if (speed)
5908 {
5909 /* mov[df,sf]_aarch64. */
5910 if (aarch64_float_const_representable_p (x))
5911 /* FMOV (scalar immediate). */
5912 *cost += extra_cost->fp[mode == DFmode].fpconst;
5913 else if (!aarch64_float_const_zero_rtx_p (x))
5914 {
5915 /* This will be a load from memory. */
5916 if (mode == DFmode)
5917 *cost += extra_cost->ldst.loadd;
5918 else
5919 *cost += extra_cost->ldst.loadf;
5920 }
5921 else
5922 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5923 or MOV v0.s[0], wzr - neither of which are modeled by the
5924 cost tables. Just use the default cost. */
5925 {
5926 }
5927 }
5928
5929 return true;
5930
43e9d192
IB
5931 case MEM:
5932 if (speed)
2961177e
JG
5933 {
5934 /* For loads we want the base cost of a load, plus an
5935 approximation for the additional cost of the addressing
5936 mode. */
5937 rtx address = XEXP (x, 0);
b6875aac
KV
5938 if (VECTOR_MODE_P (mode))
5939 *cost += extra_cost->ldst.loadv;
5940 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
5941 *cost += extra_cost->ldst.load;
5942 else if (mode == SFmode)
5943 *cost += extra_cost->ldst.loadf;
5944 else if (mode == DFmode)
5945 *cost += extra_cost->ldst.loadd;
5946
5947 *cost +=
5948 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5949 0, speed));
5950 }
43e9d192
IB
5951
5952 return true;
5953
5954 case NEG:
4745e701
JG
5955 op0 = XEXP (x, 0);
5956
b6875aac
KV
5957 if (VECTOR_MODE_P (mode))
5958 {
5959 if (speed)
5960 {
5961 /* FNEG. */
5962 *cost += extra_cost->vect.alu;
5963 }
5964 return false;
5965 }
5966
e548c9df
AM
5967 if (GET_MODE_CLASS (mode) == MODE_INT)
5968 {
4745e701
JG
5969 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5970 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5971 {
5972 /* CSETM. */
e548c9df 5973 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
5974 return true;
5975 }
5976
5977 /* Cost this as SUB wzr, X. */
e548c9df 5978 op0 = CONST0_RTX (mode);
4745e701
JG
5979 op1 = XEXP (x, 0);
5980 goto cost_minus;
5981 }
5982
e548c9df 5983 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
5984 {
5985 /* Support (neg(fma...)) as a single instruction only if
5986 sign of zeros is unimportant. This matches the decision
5987 making in aarch64.md. */
5988 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5989 {
5990 /* FNMADD. */
e548c9df 5991 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
5992 return true;
5993 }
5994 if (speed)
5995 /* FNEG. */
5996 *cost += extra_cost->fp[mode == DFmode].neg;
5997 return false;
5998 }
5999
6000 return false;
43e9d192 6001
781aeb73
KT
6002 case CLRSB:
6003 case CLZ:
6004 if (speed)
b6875aac
KV
6005 {
6006 if (VECTOR_MODE_P (mode))
6007 *cost += extra_cost->vect.alu;
6008 else
6009 *cost += extra_cost->alu.clz;
6010 }
781aeb73
KT
6011
6012 return false;
6013
43e9d192
IB
6014 case COMPARE:
6015 op0 = XEXP (x, 0);
6016 op1 = XEXP (x, 1);
6017
6018 if (op1 == const0_rtx
6019 && GET_CODE (op0) == AND)
6020 {
6021 x = op0;
e548c9df 6022 mode = GET_MODE (op0);
43e9d192
IB
6023 goto cost_logic;
6024 }
6025
a8eecd00
JG
6026 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6027 {
6028 /* TODO: A write to the CC flags possibly costs extra, this
6029 needs encoding in the cost tables. */
6030
6031 /* CC_ZESWPmode supports zero extend for free. */
e548c9df 6032 if (mode == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
a8eecd00
JG
6033 op0 = XEXP (op0, 0);
6034
e548c9df 6035 mode = GET_MODE (op0);
a8eecd00
JG
6036 /* ANDS. */
6037 if (GET_CODE (op0) == AND)
6038 {
6039 x = op0;
6040 goto cost_logic;
6041 }
6042
6043 if (GET_CODE (op0) == PLUS)
6044 {
6045 /* ADDS (and CMN alias). */
6046 x = op0;
6047 goto cost_plus;
6048 }
6049
6050 if (GET_CODE (op0) == MINUS)
6051 {
6052 /* SUBS. */
6053 x = op0;
6054 goto cost_minus;
6055 }
6056
6057 if (GET_CODE (op1) == NEG)
6058 {
6059 /* CMN. */
6060 if (speed)
6061 *cost += extra_cost->alu.arith;
6062
e548c9df
AM
6063 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6064 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
6065 return true;
6066 }
6067
6068 /* CMP.
6069
6070 Compare can freely swap the order of operands, and
6071 canonicalization puts the more complex operation first.
6072 But the integer MINUS logic expects the shift/extend
6073 operation in op1. */
6074 if (! (REG_P (op0)
6075 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6076 {
6077 op0 = XEXP (x, 1);
6078 op1 = XEXP (x, 0);
6079 }
6080 goto cost_minus;
6081 }
6082
6083 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6084 {
6085 /* FCMP. */
6086 if (speed)
6087 *cost += extra_cost->fp[mode == DFmode].compare;
6088
6089 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6090 {
e548c9df 6091 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
6092 /* FCMP supports constant 0.0 for no extra cost. */
6093 return true;
6094 }
6095 return false;
6096 }
6097
b6875aac
KV
6098 if (VECTOR_MODE_P (mode))
6099 {
6100 /* Vector compare. */
6101 if (speed)
6102 *cost += extra_cost->vect.alu;
6103
6104 if (aarch64_float_const_zero_rtx_p (op1))
6105 {
6106 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6107 cost. */
6108 return true;
6109 }
6110 return false;
6111 }
a8eecd00 6112 return false;
43e9d192
IB
6113
6114 case MINUS:
4745e701
JG
6115 {
6116 op0 = XEXP (x, 0);
6117 op1 = XEXP (x, 1);
6118
6119cost_minus:
e548c9df 6120 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 6121
4745e701
JG
6122 /* Detect valid immediates. */
6123 if ((GET_MODE_CLASS (mode) == MODE_INT
6124 || (GET_MODE_CLASS (mode) == MODE_CC
6125 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6126 && CONST_INT_P (op1)
6127 && aarch64_uimm12_shift (INTVAL (op1)))
6128 {
4745e701
JG
6129 if (speed)
6130 /* SUB(S) (immediate). */
6131 *cost += extra_cost->alu.arith;
6132 return true;
4745e701
JG
6133 }
6134
7cc2145f
JG
6135 /* Look for SUB (extended register). */
6136 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6137 {
6138 if (speed)
2533c820 6139 *cost += extra_cost->alu.extend_arith;
7cc2145f 6140
e47c4031
KT
6141 op1 = aarch64_strip_extend (op1);
6142 *cost += rtx_cost (op1, VOIDmode,
e548c9df 6143 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
6144 return true;
6145 }
6146
4745e701
JG
6147 rtx new_op1 = aarch64_strip_extend (op1);
6148
6149 /* Cost this as an FMA-alike operation. */
6150 if ((GET_CODE (new_op1) == MULT
0a78ebe4 6151 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
6152 && code != COMPARE)
6153 {
6154 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6155 (enum rtx_code) code,
6156 speed);
4745e701
JG
6157 return true;
6158 }
43e9d192 6159
e548c9df 6160 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 6161
4745e701
JG
6162 if (speed)
6163 {
b6875aac
KV
6164 if (VECTOR_MODE_P (mode))
6165 {
6166 /* Vector SUB. */
6167 *cost += extra_cost->vect.alu;
6168 }
6169 else if (GET_MODE_CLASS (mode) == MODE_INT)
6170 {
6171 /* SUB(S). */
6172 *cost += extra_cost->alu.arith;
6173 }
4745e701 6174 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6175 {
6176 /* FSUB. */
6177 *cost += extra_cost->fp[mode == DFmode].addsub;
6178 }
4745e701
JG
6179 }
6180 return true;
6181 }
43e9d192
IB
6182
6183 case PLUS:
4745e701
JG
6184 {
6185 rtx new_op0;
43e9d192 6186
4745e701
JG
6187 op0 = XEXP (x, 0);
6188 op1 = XEXP (x, 1);
43e9d192 6189
a8eecd00 6190cost_plus:
4745e701
JG
6191 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6192 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6193 {
6194 /* CSINC. */
e548c9df
AM
6195 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
6196 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
6197 return true;
6198 }
43e9d192 6199
4745e701
JG
6200 if (GET_MODE_CLASS (mode) == MODE_INT
6201 && CONST_INT_P (op1)
6202 && aarch64_uimm12_shift (INTVAL (op1)))
6203 {
e548c9df 6204 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 6205
4745e701
JG
6206 if (speed)
6207 /* ADD (immediate). */
6208 *cost += extra_cost->alu.arith;
6209 return true;
6210 }
6211
e548c9df 6212 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 6213
7cc2145f
JG
6214 /* Look for ADD (extended register). */
6215 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6216 {
6217 if (speed)
2533c820 6218 *cost += extra_cost->alu.extend_arith;
7cc2145f 6219
e47c4031
KT
6220 op0 = aarch64_strip_extend (op0);
6221 *cost += rtx_cost (op0, VOIDmode,
e548c9df 6222 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
6223 return true;
6224 }
6225
4745e701
JG
6226 /* Strip any extend, leave shifts behind as we will
6227 cost them through mult_cost. */
6228 new_op0 = aarch64_strip_extend (op0);
6229
6230 if (GET_CODE (new_op0) == MULT
0a78ebe4 6231 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
6232 {
6233 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6234 speed);
4745e701
JG
6235 return true;
6236 }
6237
e548c9df 6238 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
6239
6240 if (speed)
6241 {
b6875aac
KV
6242 if (VECTOR_MODE_P (mode))
6243 {
6244 /* Vector ADD. */
6245 *cost += extra_cost->vect.alu;
6246 }
6247 else if (GET_MODE_CLASS (mode) == MODE_INT)
6248 {
6249 /* ADD. */
6250 *cost += extra_cost->alu.arith;
6251 }
4745e701 6252 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6253 {
6254 /* FADD. */
6255 *cost += extra_cost->fp[mode == DFmode].addsub;
6256 }
4745e701
JG
6257 }
6258 return true;
6259 }
43e9d192 6260
18b42b2a
KT
6261 case BSWAP:
6262 *cost = COSTS_N_INSNS (1);
6263
6264 if (speed)
b6875aac
KV
6265 {
6266 if (VECTOR_MODE_P (mode))
6267 *cost += extra_cost->vect.alu;
6268 else
6269 *cost += extra_cost->alu.rev;
6270 }
18b42b2a
KT
6271 return false;
6272
43e9d192 6273 case IOR:
f7d5cf8d
KT
6274 if (aarch_rev16_p (x))
6275 {
6276 *cost = COSTS_N_INSNS (1);
6277
b6875aac
KV
6278 if (speed)
6279 {
6280 if (VECTOR_MODE_P (mode))
6281 *cost += extra_cost->vect.alu;
6282 else
6283 *cost += extra_cost->alu.rev;
6284 }
6285 return true;
f7d5cf8d 6286 }
fb0cb7fa
KT
6287
6288 if (aarch64_extr_rtx_p (x, &op0, &op1))
6289 {
e548c9df
AM
6290 *cost += rtx_cost (op0, mode, IOR, 0, speed);
6291 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
6292 if (speed)
6293 *cost += extra_cost->alu.shift;
6294
6295 return true;
6296 }
f7d5cf8d 6297 /* Fall through. */
43e9d192
IB
6298 case XOR:
6299 case AND:
6300 cost_logic:
6301 op0 = XEXP (x, 0);
6302 op1 = XEXP (x, 1);
6303
b6875aac
KV
6304 if (VECTOR_MODE_P (mode))
6305 {
6306 if (speed)
6307 *cost += extra_cost->vect.alu;
6308 return true;
6309 }
6310
268c3b47
JG
6311 if (code == AND
6312 && GET_CODE (op0) == MULT
6313 && CONST_INT_P (XEXP (op0, 1))
6314 && CONST_INT_P (op1)
6315 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
6316 INTVAL (op1)) != 0)
6317 {
6318 /* This is a UBFM/SBFM. */
e548c9df 6319 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
6320 if (speed)
6321 *cost += extra_cost->alu.bfx;
6322 return true;
6323 }
6324
e548c9df 6325 if (GET_MODE_CLASS (mode) == MODE_INT)
43e9d192 6326 {
268c3b47
JG
6327 /* We possibly get the immediate for free, this is not
6328 modelled. */
43e9d192 6329 if (CONST_INT_P (op1)
e548c9df 6330 && aarch64_bitmask_imm (INTVAL (op1), mode))
43e9d192 6331 {
e548c9df 6332 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
268c3b47
JG
6333
6334 if (speed)
6335 *cost += extra_cost->alu.logical;
6336
6337 return true;
43e9d192
IB
6338 }
6339 else
6340 {
268c3b47
JG
6341 rtx new_op0 = op0;
6342
6343 /* Handle ORN, EON, or BIC. */
43e9d192
IB
6344 if (GET_CODE (op0) == NOT)
6345 op0 = XEXP (op0, 0);
268c3b47
JG
6346
6347 new_op0 = aarch64_strip_shift (op0);
6348
6349 /* If we had a shift on op0 then this is a logical-shift-
6350 by-register/immediate operation. Otherwise, this is just
6351 a logical operation. */
6352 if (speed)
6353 {
6354 if (new_op0 != op0)
6355 {
6356 /* Shift by immediate. */
6357 if (CONST_INT_P (XEXP (op0, 1)))
6358 *cost += extra_cost->alu.log_shift;
6359 else
6360 *cost += extra_cost->alu.log_shift_reg;
6361 }
6362 else
6363 *cost += extra_cost->alu.logical;
6364 }
6365
6366 /* In both cases we want to cost both operands. */
e548c9df
AM
6367 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
6368 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
268c3b47
JG
6369
6370 return true;
43e9d192 6371 }
43e9d192
IB
6372 }
6373 return false;
6374
268c3b47 6375 case NOT:
6365da9e
KT
6376 x = XEXP (x, 0);
6377 op0 = aarch64_strip_shift (x);
6378
b6875aac
KV
6379 if (VECTOR_MODE_P (mode))
6380 {
6381 /* Vector NOT. */
6382 *cost += extra_cost->vect.alu;
6383 return false;
6384 }
6385
6365da9e
KT
6386 /* MVN-shifted-reg. */
6387 if (op0 != x)
6388 {
e548c9df 6389 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
6390
6391 if (speed)
6392 *cost += extra_cost->alu.log_shift;
6393
6394 return true;
6395 }
6396 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6397 Handle the second form here taking care that 'a' in the above can
6398 be a shift. */
6399 else if (GET_CODE (op0) == XOR)
6400 {
6401 rtx newop0 = XEXP (op0, 0);
6402 rtx newop1 = XEXP (op0, 1);
6403 rtx op0_stripped = aarch64_strip_shift (newop0);
6404
e548c9df
AM
6405 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
6406 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
6407
6408 if (speed)
6409 {
6410 if (op0_stripped != newop0)
6411 *cost += extra_cost->alu.log_shift;
6412 else
6413 *cost += extra_cost->alu.logical;
6414 }
6415
6416 return true;
6417 }
268c3b47
JG
6418 /* MVN. */
6419 if (speed)
6420 *cost += extra_cost->alu.logical;
6421
268c3b47
JG
6422 return false;
6423
43e9d192 6424 case ZERO_EXTEND:
b1685e62
JG
6425
6426 op0 = XEXP (x, 0);
6427 /* If a value is written in SI mode, then zero extended to DI
6428 mode, the operation will in general be free as a write to
6429 a 'w' register implicitly zeroes the upper bits of an 'x'
6430 register. However, if this is
6431
6432 (set (reg) (zero_extend (reg)))
6433
6434 we must cost the explicit register move. */
6435 if (mode == DImode
6436 && GET_MODE (op0) == SImode
6437 && outer == SET)
6438 {
e548c9df 6439 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62
JG
6440
6441 if (!op_cost && speed)
6442 /* MOV. */
6443 *cost += extra_cost->alu.extend;
6444 else
6445 /* Free, the cost is that of the SI mode operation. */
6446 *cost = op_cost;
6447
6448 return true;
6449 }
e548c9df 6450 else if (MEM_P (op0))
43e9d192 6451 {
b1685e62 6452 /* All loads can zero extend to any size for free. */
e548c9df 6453 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
6454 return true;
6455 }
b1685e62 6456
b1685e62 6457 if (speed)
b6875aac
KV
6458 {
6459 if (VECTOR_MODE_P (mode))
6460 {
6461 /* UMOV. */
6462 *cost += extra_cost->vect.alu;
6463 }
6464 else
6465 {
6466 /* UXTB/UXTH. */
6467 *cost += extra_cost->alu.extend;
6468 }
6469 }
43e9d192
IB
6470 return false;
6471
6472 case SIGN_EXTEND:
b1685e62 6473 if (MEM_P (XEXP (x, 0)))
43e9d192 6474 {
b1685e62
JG
6475 /* LDRSH. */
6476 if (speed)
6477 {
6478 rtx address = XEXP (XEXP (x, 0), 0);
6479 *cost += extra_cost->ldst.load_sign_extend;
6480
6481 *cost +=
6482 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6483 0, speed));
6484 }
43e9d192
IB
6485 return true;
6486 }
b1685e62
JG
6487
6488 if (speed)
b6875aac
KV
6489 {
6490 if (VECTOR_MODE_P (mode))
6491 *cost += extra_cost->vect.alu;
6492 else
6493 *cost += extra_cost->alu.extend;
6494 }
43e9d192
IB
6495 return false;
6496
ba0cfa17
JG
6497 case ASHIFT:
6498 op0 = XEXP (x, 0);
6499 op1 = XEXP (x, 1);
6500
6501 if (CONST_INT_P (op1))
6502 {
ba0cfa17 6503 if (speed)
b6875aac
KV
6504 {
6505 if (VECTOR_MODE_P (mode))
6506 {
6507 /* Vector shift (immediate). */
6508 *cost += extra_cost->vect.alu;
6509 }
6510 else
6511 {
6512 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6513 aliases. */
6514 *cost += extra_cost->alu.shift;
6515 }
6516 }
ba0cfa17
JG
6517
6518 /* We can incorporate zero/sign extend for free. */
6519 if (GET_CODE (op0) == ZERO_EXTEND
6520 || GET_CODE (op0) == SIGN_EXTEND)
6521 op0 = XEXP (op0, 0);
6522
e548c9df 6523 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
6524 return true;
6525 }
6526 else
6527 {
ba0cfa17 6528 if (speed)
b6875aac
KV
6529 {
6530 if (VECTOR_MODE_P (mode))
6531 {
6532 /* Vector shift (register). */
6533 *cost += extra_cost->vect.alu;
6534 }
6535 else
6536 {
6537 /* LSLV. */
6538 *cost += extra_cost->alu.shift_reg;
6539 }
6540 }
ba0cfa17
JG
6541 return false; /* All arguments need to be in registers. */
6542 }
6543
43e9d192 6544 case ROTATE:
43e9d192
IB
6545 case ROTATERT:
6546 case LSHIFTRT:
43e9d192 6547 case ASHIFTRT:
ba0cfa17
JG
6548 op0 = XEXP (x, 0);
6549 op1 = XEXP (x, 1);
43e9d192 6550
ba0cfa17
JG
6551 if (CONST_INT_P (op1))
6552 {
6553 /* ASR (immediate) and friends. */
6554 if (speed)
b6875aac
KV
6555 {
6556 if (VECTOR_MODE_P (mode))
6557 *cost += extra_cost->vect.alu;
6558 else
6559 *cost += extra_cost->alu.shift;
6560 }
43e9d192 6561
e548c9df 6562 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
6563 return true;
6564 }
6565 else
6566 {
6567
6568 /* ASR (register) and friends. */
6569 if (speed)
b6875aac
KV
6570 {
6571 if (VECTOR_MODE_P (mode))
6572 *cost += extra_cost->vect.alu;
6573 else
6574 *cost += extra_cost->alu.shift_reg;
6575 }
ba0cfa17
JG
6576 return false; /* All arguments need to be in registers. */
6577 }
43e9d192 6578
909734be
JG
6579 case SYMBOL_REF:
6580
1b1e81f8
JW
6581 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
6582 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
6583 {
6584 /* LDR. */
6585 if (speed)
6586 *cost += extra_cost->ldst.load;
6587 }
6588 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6589 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6590 {
6591 /* ADRP, followed by ADD. */
6592 *cost += COSTS_N_INSNS (1);
6593 if (speed)
6594 *cost += 2 * extra_cost->alu.arith;
6595 }
6596 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6597 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6598 {
6599 /* ADR. */
6600 if (speed)
6601 *cost += extra_cost->alu.arith;
6602 }
6603
6604 if (flag_pic)
6605 {
6606 /* One extra load instruction, after accessing the GOT. */
6607 *cost += COSTS_N_INSNS (1);
6608 if (speed)
6609 *cost += extra_cost->ldst.load;
6610 }
43e9d192
IB
6611 return true;
6612
909734be 6613 case HIGH:
43e9d192 6614 case LO_SUM:
909734be
JG
6615 /* ADRP/ADD (immediate). */
6616 if (speed)
6617 *cost += extra_cost->alu.arith;
43e9d192
IB
6618 return true;
6619
6620 case ZERO_EXTRACT:
6621 case SIGN_EXTRACT:
7cc2145f
JG
6622 /* UBFX/SBFX. */
6623 if (speed)
b6875aac
KV
6624 {
6625 if (VECTOR_MODE_P (mode))
6626 *cost += extra_cost->vect.alu;
6627 else
6628 *cost += extra_cost->alu.bfx;
6629 }
7cc2145f
JG
6630
6631 /* We can trust that the immediates used will be correct (there
6632 are no by-register forms), so we need only cost op0. */
e548c9df 6633 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
6634 return true;
6635
6636 case MULT:
4745e701
JG
6637 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6638 /* aarch64_rtx_mult_cost always handles recursion to its
6639 operands. */
6640 return true;
43e9d192
IB
6641
6642 case MOD:
6643 case UMOD:
43e9d192
IB
6644 if (speed)
6645 {
b6875aac
KV
6646 if (VECTOR_MODE_P (mode))
6647 *cost += extra_cost->vect.alu;
e548c9df
AM
6648 else if (GET_MODE_CLASS (mode) == MODE_INT)
6649 *cost += (extra_cost->mult[mode == DImode].add
6650 + extra_cost->mult[mode == DImode].idiv);
6651 else if (mode == DFmode)
73250c4c
KT
6652 *cost += (extra_cost->fp[1].mult
6653 + extra_cost->fp[1].div);
e548c9df 6654 else if (mode == SFmode)
73250c4c
KT
6655 *cost += (extra_cost->fp[0].mult
6656 + extra_cost->fp[0].div);
43e9d192
IB
6657 }
6658 return false; /* All arguments need to be in registers. */
6659
6660 case DIV:
6661 case UDIV:
4105fe38 6662 case SQRT:
43e9d192
IB
6663 if (speed)
6664 {
b6875aac
KV
6665 if (VECTOR_MODE_P (mode))
6666 *cost += extra_cost->vect.alu;
6667 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
6668 /* There is no integer SQRT, so only DIV and UDIV can get
6669 here. */
6670 *cost += extra_cost->mult[mode == DImode].idiv;
6671 else
6672 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
6673 }
6674 return false; /* All arguments need to be in registers. */
6675
a8eecd00 6676 case IF_THEN_ELSE:
2d5ffe46
AP
6677 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6678 XEXP (x, 2), cost, speed);
a8eecd00
JG
6679
6680 case EQ:
6681 case NE:
6682 case GT:
6683 case GTU:
6684 case LT:
6685 case LTU:
6686 case GE:
6687 case GEU:
6688 case LE:
6689 case LEU:
6690
6691 return false; /* All arguments must be in registers. */
6692
b292109f
JG
6693 case FMA:
6694 op0 = XEXP (x, 0);
6695 op1 = XEXP (x, 1);
6696 op2 = XEXP (x, 2);
6697
6698 if (speed)
b6875aac
KV
6699 {
6700 if (VECTOR_MODE_P (mode))
6701 *cost += extra_cost->vect.alu;
6702 else
6703 *cost += extra_cost->fp[mode == DFmode].fma;
6704 }
b292109f
JG
6705
6706 /* FMSUB, FNMADD, and FNMSUB are free. */
6707 if (GET_CODE (op0) == NEG)
6708 op0 = XEXP (op0, 0);
6709
6710 if (GET_CODE (op2) == NEG)
6711 op2 = XEXP (op2, 0);
6712
6713 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6714 and the by-element operand as operand 0. */
6715 if (GET_CODE (op1) == NEG)
6716 op1 = XEXP (op1, 0);
6717
6718 /* Catch vector-by-element operations. The by-element operand can
6719 either be (vec_duplicate (vec_select (x))) or just
6720 (vec_select (x)), depending on whether we are multiplying by
6721 a vector or a scalar.
6722
6723 Canonicalization is not very good in these cases, FMA4 will put the
6724 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6725 if (GET_CODE (op0) == VEC_DUPLICATE)
6726 op0 = XEXP (op0, 0);
6727 else if (GET_CODE (op1) == VEC_DUPLICATE)
6728 op1 = XEXP (op1, 0);
6729
6730 if (GET_CODE (op0) == VEC_SELECT)
6731 op0 = XEXP (op0, 0);
6732 else if (GET_CODE (op1) == VEC_SELECT)
6733 op1 = XEXP (op1, 0);
6734
6735 /* If the remaining parameters are not registers,
6736 get the cost to put them into registers. */
e548c9df
AM
6737 *cost += rtx_cost (op0, mode, FMA, 0, speed);
6738 *cost += rtx_cost (op1, mode, FMA, 1, speed);
6739 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
6740 return true;
6741
5e2a765b
KT
6742 case FLOAT:
6743 case UNSIGNED_FLOAT:
6744 if (speed)
6745 *cost += extra_cost->fp[mode == DFmode].fromint;
6746 return false;
6747
b292109f
JG
6748 case FLOAT_EXTEND:
6749 if (speed)
b6875aac
KV
6750 {
6751 if (VECTOR_MODE_P (mode))
6752 {
6753 /*Vector truncate. */
6754 *cost += extra_cost->vect.alu;
6755 }
6756 else
6757 *cost += extra_cost->fp[mode == DFmode].widen;
6758 }
b292109f
JG
6759 return false;
6760
6761 case FLOAT_TRUNCATE:
6762 if (speed)
b6875aac
KV
6763 {
6764 if (VECTOR_MODE_P (mode))
6765 {
6766 /*Vector conversion. */
6767 *cost += extra_cost->vect.alu;
6768 }
6769 else
6770 *cost += extra_cost->fp[mode == DFmode].narrow;
6771 }
b292109f
JG
6772 return false;
6773
61263118
KT
6774 case FIX:
6775 case UNSIGNED_FIX:
6776 x = XEXP (x, 0);
6777 /* Strip the rounding part. They will all be implemented
6778 by the fcvt* family of instructions anyway. */
6779 if (GET_CODE (x) == UNSPEC)
6780 {
6781 unsigned int uns_code = XINT (x, 1);
6782
6783 if (uns_code == UNSPEC_FRINTA
6784 || uns_code == UNSPEC_FRINTM
6785 || uns_code == UNSPEC_FRINTN
6786 || uns_code == UNSPEC_FRINTP
6787 || uns_code == UNSPEC_FRINTZ)
6788 x = XVECEXP (x, 0, 0);
6789 }
6790
6791 if (speed)
b6875aac
KV
6792 {
6793 if (VECTOR_MODE_P (mode))
6794 *cost += extra_cost->vect.alu;
6795 else
6796 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6797 }
e548c9df 6798 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
6799 return true;
6800
b292109f 6801 case ABS:
b6875aac
KV
6802 if (VECTOR_MODE_P (mode))
6803 {
6804 /* ABS (vector). */
6805 if (speed)
6806 *cost += extra_cost->vect.alu;
6807 }
6808 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 6809 {
19261b99
KT
6810 op0 = XEXP (x, 0);
6811
6812 /* FABD, which is analogous to FADD. */
6813 if (GET_CODE (op0) == MINUS)
6814 {
e548c9df
AM
6815 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
6816 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
6817 if (speed)
6818 *cost += extra_cost->fp[mode == DFmode].addsub;
6819
6820 return true;
6821 }
6822 /* Simple FABS is analogous to FNEG. */
b292109f
JG
6823 if (speed)
6824 *cost += extra_cost->fp[mode == DFmode].neg;
6825 }
6826 else
6827 {
6828 /* Integer ABS will either be split to
6829 two arithmetic instructions, or will be an ABS
6830 (scalar), which we don't model. */
6831 *cost = COSTS_N_INSNS (2);
6832 if (speed)
6833 *cost += 2 * extra_cost->alu.arith;
6834 }
6835 return false;
6836
6837 case SMAX:
6838 case SMIN:
6839 if (speed)
6840 {
b6875aac
KV
6841 if (VECTOR_MODE_P (mode))
6842 *cost += extra_cost->vect.alu;
6843 else
6844 {
6845 /* FMAXNM/FMINNM/FMAX/FMIN.
6846 TODO: This may not be accurate for all implementations, but
6847 we do not model this in the cost tables. */
6848 *cost += extra_cost->fp[mode == DFmode].addsub;
6849 }
b292109f
JG
6850 }
6851 return false;
6852
61263118
KT
6853 case UNSPEC:
6854 /* The floating point round to integer frint* instructions. */
6855 if (aarch64_frint_unspec_p (XINT (x, 1)))
6856 {
6857 if (speed)
6858 *cost += extra_cost->fp[mode == DFmode].roundint;
6859
6860 return false;
6861 }
781aeb73
KT
6862
6863 if (XINT (x, 1) == UNSPEC_RBIT)
6864 {
6865 if (speed)
6866 *cost += extra_cost->alu.rev;
6867
6868 return false;
6869 }
61263118
KT
6870 break;
6871
fb620c4a
JG
6872 case TRUNCATE:
6873
6874 /* Decompose <su>muldi3_highpart. */
6875 if (/* (truncate:DI */
6876 mode == DImode
6877 /* (lshiftrt:TI */
6878 && GET_MODE (XEXP (x, 0)) == TImode
6879 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6880 /* (mult:TI */
6881 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6882 /* (ANY_EXTEND:TI (reg:DI))
6883 (ANY_EXTEND:TI (reg:DI))) */
6884 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6885 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6886 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6887 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6888 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6889 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6890 /* (const_int 64) */
6891 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6892 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6893 {
6894 /* UMULH/SMULH. */
6895 if (speed)
6896 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
6897 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6898 mode, MULT, 0, speed);
6899 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6900 mode, MULT, 1, speed);
fb620c4a
JG
6901 return true;
6902 }
6903
6904 /* Fall through. */
43e9d192 6905 default:
61263118 6906 break;
43e9d192 6907 }
61263118
KT
6908
6909 if (dump_file && (dump_flags & TDF_DETAILS))
6910 fprintf (dump_file,
6911 "\nFailed to cost RTX. Assuming default cost.\n");
6912
6913 return true;
43e9d192
IB
6914}
6915
0ee859b5
JG
6916/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6917 calculated for X. This cost is stored in *COST. Returns true
6918 if the total cost of X was calculated. */
6919static bool
e548c9df 6920aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
6921 int param, int *cost, bool speed)
6922{
e548c9df 6923 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5
JG
6924
6925 if (dump_file && (dump_flags & TDF_DETAILS))
6926 {
6927 print_rtl_single (dump_file, x);
6928 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6929 speed ? "Hot" : "Cold",
6930 *cost, result ? "final" : "partial");
6931 }
6932
6933 return result;
6934}
6935
43e9d192 6936static int
ef4bddc2 6937aarch64_register_move_cost (machine_mode mode,
8a3a7e67 6938 reg_class_t from_i, reg_class_t to_i)
43e9d192 6939{
8a3a7e67
RH
6940 enum reg_class from = (enum reg_class) from_i;
6941 enum reg_class to = (enum reg_class) to_i;
43e9d192 6942 const struct cpu_regmove_cost *regmove_cost
b175b679 6943 = aarch64_tune_params.regmove_cost;
43e9d192 6944
3be07662
WD
6945 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6946 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6947 to = GENERAL_REGS;
6948
6949 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6950 from = GENERAL_REGS;
6951
6ee70f81
AP
6952 /* Moving between GPR and stack cost is the same as GP2GP. */
6953 if ((from == GENERAL_REGS && to == STACK_REG)
6954 || (to == GENERAL_REGS && from == STACK_REG))
6955 return regmove_cost->GP2GP;
6956
6957 /* To/From the stack register, we move via the gprs. */
6958 if (to == STACK_REG || from == STACK_REG)
6959 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6960 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6961
8919453c
WD
6962 if (GET_MODE_SIZE (mode) == 16)
6963 {
6964 /* 128-bit operations on general registers require 2 instructions. */
6965 if (from == GENERAL_REGS && to == GENERAL_REGS)
6966 return regmove_cost->GP2GP * 2;
6967 else if (from == GENERAL_REGS)
6968 return regmove_cost->GP2FP * 2;
6969 else if (to == GENERAL_REGS)
6970 return regmove_cost->FP2GP * 2;
6971
6972 /* When AdvSIMD instructions are disabled it is not possible to move
6973 a 128-bit value directly between Q registers. This is handled in
6974 secondary reload. A general register is used as a scratch to move
6975 the upper DI value and the lower DI value is moved directly,
6976 hence the cost is the sum of three moves. */
6977 if (! TARGET_SIMD)
6978 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6979
6980 return regmove_cost->FP2FP;
6981 }
6982
43e9d192
IB
6983 if (from == GENERAL_REGS && to == GENERAL_REGS)
6984 return regmove_cost->GP2GP;
6985 else if (from == GENERAL_REGS)
6986 return regmove_cost->GP2FP;
6987 else if (to == GENERAL_REGS)
6988 return regmove_cost->FP2GP;
6989
43e9d192
IB
6990 return regmove_cost->FP2FP;
6991}
6992
6993static int
ef4bddc2 6994aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
6995 reg_class_t rclass ATTRIBUTE_UNUSED,
6996 bool in ATTRIBUTE_UNUSED)
6997{
b175b679 6998 return aarch64_tune_params.memmov_cost;
43e9d192
IB
6999}
7000
d126a4ae
AP
7001/* Return the number of instructions that can be issued per cycle. */
7002static int
7003aarch64_sched_issue_rate (void)
7004{
b175b679 7005 return aarch64_tune_params.issue_rate;
d126a4ae
AP
7006}
7007
d03f7e44
MK
7008static int
7009aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
7010{
7011 int issue_rate = aarch64_sched_issue_rate ();
7012
7013 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
7014}
7015
8990e73a
TB
7016/* Vectorizer cost model target hooks. */
7017
7018/* Implement targetm.vectorize.builtin_vectorization_cost. */
7019static int
7020aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
7021 tree vectype,
7022 int misalign ATTRIBUTE_UNUSED)
7023{
7024 unsigned elements;
7025
7026 switch (type_of_cost)
7027 {
7028 case scalar_stmt:
b175b679 7029 return aarch64_tune_params.vec_costs->scalar_stmt_cost;
8990e73a
TB
7030
7031 case scalar_load:
b175b679 7032 return aarch64_tune_params.vec_costs->scalar_load_cost;
8990e73a
TB
7033
7034 case scalar_store:
b175b679 7035 return aarch64_tune_params.vec_costs->scalar_store_cost;
8990e73a
TB
7036
7037 case vector_stmt:
b175b679 7038 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7039
7040 case vector_load:
b175b679 7041 return aarch64_tune_params.vec_costs->vec_align_load_cost;
8990e73a
TB
7042
7043 case vector_store:
b175b679 7044 return aarch64_tune_params.vec_costs->vec_store_cost;
8990e73a
TB
7045
7046 case vec_to_scalar:
b175b679 7047 return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
8990e73a
TB
7048
7049 case scalar_to_vec:
b175b679 7050 return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
8990e73a
TB
7051
7052 case unaligned_load:
b175b679 7053 return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
8990e73a
TB
7054
7055 case unaligned_store:
b175b679 7056 return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
8990e73a
TB
7057
7058 case cond_branch_taken:
b175b679 7059 return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
8990e73a
TB
7060
7061 case cond_branch_not_taken:
b175b679 7062 return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
8990e73a
TB
7063
7064 case vec_perm:
7065 case vec_promote_demote:
b175b679 7066 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7067
7068 case vec_construct:
7069 elements = TYPE_VECTOR_SUBPARTS (vectype);
7070 return elements / 2 + 1;
7071
7072 default:
7073 gcc_unreachable ();
7074 }
7075}
7076
7077/* Implement targetm.vectorize.add_stmt_cost. */
7078static unsigned
7079aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
7080 struct _stmt_vec_info *stmt_info, int misalign,
7081 enum vect_cost_model_location where)
7082{
7083 unsigned *cost = (unsigned *) data;
7084 unsigned retval = 0;
7085
7086 if (flag_vect_cost_model)
7087 {
7088 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
7089 int stmt_cost =
7090 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
7091
7092 /* Statements in an inner loop relative to the loop being
7093 vectorized are weighted more heavily. The value here is
7094 a function (linear for now) of the loop nest level. */
7095 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
7096 {
7097 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7098 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
7099 unsigned nest_level = loop_depth (loop);
7100
7101 count *= nest_level;
7102 }
7103
7104 retval = (unsigned) (count * stmt_cost);
7105 cost[where] += retval;
7106 }
7107
7108 return retval;
7109}
7110
0cfff2a1 7111static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 7112
0cfff2a1
KT
7113/* Enum describing the various ways that the
7114 aarch64_parse_{arch,tune,cpu,extension} functions can fail.
7115 This way their callers can choose what kind of error to give. */
43e9d192 7116
0cfff2a1
KT
7117enum aarch64_parse_opt_result
7118{
7119 AARCH64_PARSE_OK, /* Parsing was successful. */
7120 AARCH64_PARSE_MISSING_ARG, /* Missing argument. */
7121 AARCH64_PARSE_INVALID_FEATURE, /* Invalid feature modifier. */
7122 AARCH64_PARSE_INVALID_ARG /* Invalid arch, tune, cpu arg. */
7123};
7124
7125/* Parse the architecture extension string STR and update ISA_FLAGS
7126 with the architecture features turned on or off. Return a
7127 aarch64_parse_opt_result describing the result. */
7128
7129static enum aarch64_parse_opt_result
7130aarch64_parse_extension (char *str, unsigned long *isa_flags)
43e9d192
IB
7131{
7132 /* The extension string is parsed left to right. */
7133 const struct aarch64_option_extension *opt = NULL;
7134
7135 /* Flag to say whether we are adding or removing an extension. */
7136 int adding_ext = -1;
7137
7138 while (str != NULL && *str != 0)
7139 {
7140 char *ext;
7141 size_t len;
7142
7143 str++;
7144 ext = strchr (str, '+');
7145
7146 if (ext != NULL)
7147 len = ext - str;
7148 else
7149 len = strlen (str);
7150
7151 if (len >= 2 && strncmp (str, "no", 2) == 0)
7152 {
7153 adding_ext = 0;
7154 len -= 2;
7155 str += 2;
7156 }
7157 else if (len > 0)
7158 adding_ext = 1;
7159
7160 if (len == 0)
0cfff2a1
KT
7161 return AARCH64_PARSE_MISSING_ARG;
7162
43e9d192
IB
7163
7164 /* Scan over the extensions table trying to find an exact match. */
7165 for (opt = all_extensions; opt->name != NULL; opt++)
7166 {
7167 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
7168 {
7169 /* Add or remove the extension. */
7170 if (adding_ext)
0cfff2a1 7171 *isa_flags |= opt->flags_on;
43e9d192 7172 else
0cfff2a1 7173 *isa_flags &= ~(opt->flags_off);
43e9d192
IB
7174 break;
7175 }
7176 }
7177
7178 if (opt->name == NULL)
7179 {
7180 /* Extension not found in list. */
0cfff2a1 7181 return AARCH64_PARSE_INVALID_FEATURE;
43e9d192
IB
7182 }
7183
7184 str = ext;
7185 };
7186
0cfff2a1 7187 return AARCH64_PARSE_OK;
43e9d192
IB
7188}
7189
0cfff2a1
KT
7190/* Parse the TO_PARSE string and put the architecture struct that it
7191 selects into RES and the architectural features into ISA_FLAGS.
7192 Return an aarch64_parse_opt_result describing the parse result.
7193 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 7194
0cfff2a1
KT
7195static enum aarch64_parse_opt_result
7196aarch64_parse_arch (const char *to_parse, const struct processor **res,
7197 unsigned long *isa_flags)
43e9d192
IB
7198{
7199 char *ext;
7200 const struct processor *arch;
0cfff2a1 7201 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
7202 size_t len;
7203
0cfff2a1 7204 strcpy (str, to_parse);
43e9d192
IB
7205
7206 ext = strchr (str, '+');
7207
7208 if (ext != NULL)
7209 len = ext - str;
7210 else
7211 len = strlen (str);
7212
7213 if (len == 0)
0cfff2a1
KT
7214 return AARCH64_PARSE_MISSING_ARG;
7215
43e9d192 7216
0cfff2a1 7217 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
7218 for (arch = all_architectures; arch->name != NULL; arch++)
7219 {
7220 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
7221 {
0cfff2a1 7222 unsigned long isa_temp = arch->flags;
43e9d192
IB
7223
7224 if (ext != NULL)
7225 {
0cfff2a1
KT
7226 /* TO_PARSE string contains at least one extension. */
7227 enum aarch64_parse_opt_result ext_res
7228 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 7229
0cfff2a1
KT
7230 if (ext_res != AARCH64_PARSE_OK)
7231 return ext_res;
ffee7aa9 7232 }
0cfff2a1
KT
7233 /* Extension parsing was successful. Confirm the result
7234 arch and ISA flags. */
7235 *res = arch;
7236 *isa_flags = isa_temp;
7237 return AARCH64_PARSE_OK;
43e9d192
IB
7238 }
7239 }
7240
7241 /* ARCH name not found in list. */
0cfff2a1 7242 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7243}
7244
0cfff2a1
KT
7245/* Parse the TO_PARSE string and put the result tuning in RES and the
7246 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
7247 describing the parse result. If there is an error parsing, RES and
7248 ISA_FLAGS are left unchanged. */
43e9d192 7249
0cfff2a1
KT
7250static enum aarch64_parse_opt_result
7251aarch64_parse_cpu (const char *to_parse, const struct processor **res,
7252 unsigned long *isa_flags)
43e9d192
IB
7253{
7254 char *ext;
7255 const struct processor *cpu;
0cfff2a1 7256 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
7257 size_t len;
7258
0cfff2a1 7259 strcpy (str, to_parse);
43e9d192
IB
7260
7261 ext = strchr (str, '+');
7262
7263 if (ext != NULL)
7264 len = ext - str;
7265 else
7266 len = strlen (str);
7267
7268 if (len == 0)
0cfff2a1
KT
7269 return AARCH64_PARSE_MISSING_ARG;
7270
43e9d192
IB
7271
7272 /* Loop through the list of supported CPUs to find a match. */
7273 for (cpu = all_cores; cpu->name != NULL; cpu++)
7274 {
7275 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
7276 {
0cfff2a1
KT
7277 unsigned long isa_temp = cpu->flags;
7278
43e9d192
IB
7279
7280 if (ext != NULL)
7281 {
0cfff2a1
KT
7282 /* TO_PARSE string contains at least one extension. */
7283 enum aarch64_parse_opt_result ext_res
7284 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 7285
0cfff2a1
KT
7286 if (ext_res != AARCH64_PARSE_OK)
7287 return ext_res;
7288 }
7289 /* Extension parsing was successfull. Confirm the result
7290 cpu and ISA flags. */
7291 *res = cpu;
7292 *isa_flags = isa_temp;
7293 return AARCH64_PARSE_OK;
43e9d192
IB
7294 }
7295 }
7296
7297 /* CPU name not found in list. */
0cfff2a1 7298 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7299}
7300
0cfff2a1
KT
7301/* Parse the TO_PARSE string and put the cpu it selects into RES.
7302 Return an aarch64_parse_opt_result describing the parse result.
7303 If the parsing fails the RES does not change. */
43e9d192 7304
0cfff2a1
KT
7305static enum aarch64_parse_opt_result
7306aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
7307{
7308 const struct processor *cpu;
0cfff2a1
KT
7309 char *str = (char *) alloca (strlen (to_parse) + 1);
7310
7311 strcpy (str, to_parse);
43e9d192
IB
7312
7313 /* Loop through the list of supported CPUs to find a match. */
7314 for (cpu = all_cores; cpu->name != NULL; cpu++)
7315 {
7316 if (strcmp (cpu->name, str) == 0)
7317 {
0cfff2a1
KT
7318 *res = cpu;
7319 return AARCH64_PARSE_OK;
43e9d192
IB
7320 }
7321 }
7322
7323 /* CPU name not found in list. */
0cfff2a1 7324 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7325}
7326
8dec06f2
JG
7327/* Parse TOKEN, which has length LENGTH to see if it is an option
7328 described in FLAG. If it is, return the index bit for that fusion type.
7329 If not, error (printing OPTION_NAME) and return zero. */
7330
7331static unsigned int
7332aarch64_parse_one_option_token (const char *token,
7333 size_t length,
7334 const struct aarch64_flag_desc *flag,
7335 const char *option_name)
7336{
7337 for (; flag->name != NULL; flag++)
7338 {
7339 if (length == strlen (flag->name)
7340 && !strncmp (flag->name, token, length))
7341 return flag->flag;
7342 }
7343
7344 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
7345 return 0;
7346}
7347
7348/* Parse OPTION which is a comma-separated list of flags to enable.
7349 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
7350 default state we inherit from the CPU tuning structures. OPTION_NAME
7351 gives the top-level option we are parsing in the -moverride string,
7352 for use in error messages. */
7353
7354static unsigned int
7355aarch64_parse_boolean_options (const char *option,
7356 const struct aarch64_flag_desc *flags,
7357 unsigned int initial_state,
7358 const char *option_name)
7359{
7360 const char separator = '.';
7361 const char* specs = option;
7362 const char* ntoken = option;
7363 unsigned int found_flags = initial_state;
7364
7365 while ((ntoken = strchr (specs, separator)))
7366 {
7367 size_t token_length = ntoken - specs;
7368 unsigned token_ops = aarch64_parse_one_option_token (specs,
7369 token_length,
7370 flags,
7371 option_name);
7372 /* If we find "none" (or, for simplicity's sake, an error) anywhere
7373 in the token stream, reset the supported operations. So:
7374
7375 adrp+add.cmp+branch.none.adrp+add
7376
7377 would have the result of turning on only adrp+add fusion. */
7378 if (!token_ops)
7379 found_flags = 0;
7380
7381 found_flags |= token_ops;
7382 specs = ++ntoken;
7383 }
7384
7385 /* We ended with a comma, print something. */
7386 if (!(*specs))
7387 {
7388 error ("%s string ill-formed\n", option_name);
7389 return 0;
7390 }
7391
7392 /* We still have one more token to parse. */
7393 size_t token_length = strlen (specs);
7394 unsigned token_ops = aarch64_parse_one_option_token (specs,
7395 token_length,
7396 flags,
7397 option_name);
7398 if (!token_ops)
7399 found_flags = 0;
7400
7401 found_flags |= token_ops;
7402 return found_flags;
7403}
7404
7405/* Support for overriding instruction fusion. */
7406
7407static void
7408aarch64_parse_fuse_string (const char *fuse_string,
7409 struct tune_params *tune)
7410{
7411 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
7412 aarch64_fusible_pairs,
7413 tune->fusible_ops,
7414 "fuse=");
7415}
7416
7417/* Support for overriding other tuning flags. */
7418
7419static void
7420aarch64_parse_tune_string (const char *tune_string,
7421 struct tune_params *tune)
7422{
7423 tune->extra_tuning_flags
7424 = aarch64_parse_boolean_options (tune_string,
7425 aarch64_tuning_flags,
7426 tune->extra_tuning_flags,
7427 "tune=");
7428}
7429
7430/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
7431 we understand. If it is, extract the option string and handoff to
7432 the appropriate function. */
7433
7434void
7435aarch64_parse_one_override_token (const char* token,
7436 size_t length,
7437 struct tune_params *tune)
7438{
7439 const struct aarch64_tuning_override_function *fn
7440 = aarch64_tuning_override_functions;
7441
7442 const char *option_part = strchr (token, '=');
7443 if (!option_part)
7444 {
7445 error ("tuning string missing in option (%s)", token);
7446 return;
7447 }
7448
7449 /* Get the length of the option name. */
7450 length = option_part - token;
7451 /* Skip the '=' to get to the option string. */
7452 option_part++;
7453
7454 for (; fn->name != NULL; fn++)
7455 {
7456 if (!strncmp (fn->name, token, length))
7457 {
7458 fn->parse_override (option_part, tune);
7459 return;
7460 }
7461 }
7462
7463 error ("unknown tuning option (%s)",token);
7464 return;
7465}
7466
7467/* Parse STRING looking for options in the format:
7468 string :: option:string
7469 option :: name=substring
7470 name :: {a-z}
7471 substring :: defined by option. */
7472
7473static void
7474aarch64_parse_override_string (const char* input_string,
7475 struct tune_params* tune)
7476{
7477 const char separator = ':';
7478 size_t string_length = strlen (input_string) + 1;
7479 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
7480 char *string = string_root;
7481 strncpy (string, input_string, string_length);
7482 string[string_length - 1] = '\0';
7483
7484 char* ntoken = string;
7485
7486 while ((ntoken = strchr (string, separator)))
7487 {
7488 size_t token_length = ntoken - string;
7489 /* Make this substring look like a string. */
7490 *ntoken = '\0';
7491 aarch64_parse_one_override_token (string, token_length, tune);
7492 string = ++ntoken;
7493 }
7494
7495 /* One last option to parse. */
7496 aarch64_parse_one_override_token (string, strlen (string), tune);
7497 free (string_root);
7498}
43e9d192 7499
43e9d192
IB
7500
7501static void
0cfff2a1 7502aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 7503{
0cfff2a1
KT
7504 if (opts->x_flag_omit_frame_pointer)
7505 opts->x_flag_omit_leaf_frame_pointer = false;
7506 else if (opts->x_flag_omit_leaf_frame_pointer)
7507 opts->x_flag_omit_frame_pointer = true;
43e9d192 7508
0cfff2a1
KT
7509 /* If not opzimizing for size, set the default
7510 alignment to what the target wants. */
7511 if (!opts->x_optimize_size)
43e9d192 7512 {
0cfff2a1
KT
7513 if (opts->x_align_loops <= 0)
7514 opts->x_align_loops = aarch64_tune_params.loop_align;
7515 if (opts->x_align_jumps <= 0)
7516 opts->x_align_jumps = aarch64_tune_params.jump_align;
7517 if (opts->x_align_functions <= 0)
7518 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 7519 }
0cfff2a1 7520}
43e9d192 7521
0cfff2a1
KT
7522/* 'Unpack' up the internal tuning structs and update the options
7523 in OPTS. The caller must have set up selected_tune and selected_arch
7524 as all the other target-specific codegen decisions are
7525 derived from them. */
7526
7527static void
7528aarch64_override_options_internal (struct gcc_options *opts)
7529{
7530 aarch64_tune_flags = selected_tune->flags;
7531 aarch64_tune = selected_tune->sched_core;
7532 /* Make a copy of the tuning parameters attached to the core, which
7533 we may later overwrite. */
7534 aarch64_tune_params = *(selected_tune->tune);
7535 aarch64_architecture_version = selected_arch->architecture_version;
7536
7537 if (opts->x_aarch64_override_tune_string)
7538 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
7539 &aarch64_tune_params);
7540
7541 /* This target defaults to strict volatile bitfields. */
7542 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
7543 opts->x_flag_strict_volatile_bitfields = 1;
7544
7545 if (opts->x_aarch64_fix_a53_err835769 == 2)
43e9d192 7546 {
0cfff2a1
KT
7547#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
7548 opts->x_aarch64_fix_a53_err835769 = 1;
7549#else
7550 opts->x_aarch64_fix_a53_err835769 = 0;
7551#endif
43e9d192
IB
7552 }
7553
0cfff2a1
KT
7554 /* -mgeneral-regs-only sets a mask in target_flags, make sure that
7555 aarch64_isa_flags does not contain the FP/SIMD/Crypto feature flags
7556 in case some code tries reading aarch64_isa_flags directly to check if
7557 FP is available. Reuse the aarch64_parse_extension machinery since it
7558 knows how to disable any other flags that fp implies. */
7559 if (TARGET_GENERAL_REGS_ONLY_P (opts->x_target_flags))
43e9d192 7560 {
0cfff2a1
KT
7561 /* aarch64_parse_extension takes char* rather than const char* because
7562 it is usually called from within other parsing functions. */
7563 char tmp_str[] = "+nofp";
7564 aarch64_parse_extension (tmp_str, &aarch64_isa_flags);
43e9d192
IB
7565 }
7566
0cfff2a1 7567 initialize_aarch64_code_model (opts);
63892fa2 7568
0cfff2a1
KT
7569 aarch64_override_options_after_change_1 (opts);
7570}
43e9d192 7571
0cfff2a1
KT
7572/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
7573 specified in STR and throw errors if appropriate. Put the results if
7574 they are valid in RES and ISA_FLAGS. */
43e9d192 7575
0cfff2a1
KT
7576static void
7577aarch64_validate_mcpu (const char *str, const struct processor **res,
7578 unsigned long *isa_flags)
7579{
7580 enum aarch64_parse_opt_result parse_res
7581 = aarch64_parse_cpu (str, res, isa_flags);
7582
7583 if (parse_res == AARCH64_PARSE_OK)
7584 return;
7585
7586 switch (parse_res)
7587 {
7588 case AARCH64_PARSE_MISSING_ARG:
7589 error ("missing cpu name in -mcpu=%qs", str);
7590 break;
7591 case AARCH64_PARSE_INVALID_ARG:
7592 error ("unknown value %qs for -mcpu", str);
7593 break;
7594 case AARCH64_PARSE_INVALID_FEATURE:
7595 error ("invalid feature modifier in -mcpu=%qs", str);
7596 break;
7597 default:
7598 gcc_unreachable ();
7599 }
7600}
7601
7602/* Validate a command-line -march option. Parse the arch and extensions
7603 (if any) specified in STR and throw errors if appropriate. Put the
7604 results, if they are valid, in RES and ISA_FLAGS. */
7605
7606static void
7607aarch64_validate_march (const char *str, const struct processor **res,
7608 unsigned long *isa_flags)
7609{
7610 enum aarch64_parse_opt_result parse_res
7611 = aarch64_parse_arch (str, res, isa_flags);
7612
7613 if (parse_res == AARCH64_PARSE_OK)
7614 return;
7615
7616 switch (parse_res)
7617 {
7618 case AARCH64_PARSE_MISSING_ARG:
7619 error ("missing arch name in -march=%qs", str);
7620 break;
7621 case AARCH64_PARSE_INVALID_ARG:
7622 error ("unknown value %qs for -march", str);
7623 break;
7624 case AARCH64_PARSE_INVALID_FEATURE:
7625 error ("invalid feature modifier in -march=%qs", str);
7626 break;
7627 default:
7628 gcc_unreachable ();
7629 }
7630}
7631
7632/* Validate a command-line -mtune option. Parse the cpu
7633 specified in STR and throw errors if appropriate. Put the
7634 result, if it is valid, in RES. */
7635
7636static void
7637aarch64_validate_mtune (const char *str, const struct processor **res)
7638{
7639 enum aarch64_parse_opt_result parse_res
7640 = aarch64_parse_tune (str, res);
7641
7642 if (parse_res == AARCH64_PARSE_OK)
7643 return;
7644
7645 switch (parse_res)
7646 {
7647 case AARCH64_PARSE_MISSING_ARG:
7648 error ("missing cpu name in -mtune=%qs", str);
7649 break;
7650 case AARCH64_PARSE_INVALID_ARG:
7651 error ("unknown value %qs for -mtune", str);
7652 break;
7653 default:
7654 gcc_unreachable ();
7655 }
7656}
7657
7658/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
7659 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
7660 tuning structs. In particular it must set selected_tune and
7661 aarch64_isa_flags that define the available ISA features and tuning
7662 decisions. It must also set selected_arch as this will be used to
7663 output the .arch asm tags for each function. */
7664
7665static void
7666aarch64_override_options (void)
7667{
7668 unsigned long cpu_isa = 0;
7669 unsigned long arch_isa = 0;
7670 aarch64_isa_flags = 0;
7671
7672 selected_cpu = NULL;
7673 selected_arch = NULL;
7674 selected_tune = NULL;
7675
7676 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
7677 If either of -march or -mtune is given, they override their
7678 respective component of -mcpu. */
7679 if (aarch64_cpu_string)
7680 aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa);
7681
7682 if (aarch64_arch_string)
7683 aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa);
7684
7685 if (aarch64_tune_string)
7686 aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
7687
7688 /* If the user did not specify a processor, choose the default
7689 one for them. This will be the CPU set during configuration using
a3cd0246 7690 --with-cpu, otherwise it is "generic". */
43e9d192
IB
7691 if (!selected_cpu)
7692 {
0cfff2a1
KT
7693 if (selected_arch)
7694 {
7695 selected_cpu = &all_cores[selected_arch->ident];
7696 aarch64_isa_flags = arch_isa;
7697 }
7698 else
7699 {
7700 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
7701 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
7702 }
7703 }
7704 /* If both -mcpu and -march are specified check that they are architecturally
7705 compatible, warn if they're not and prefer the -march ISA flags. */
7706 else if (selected_arch)
7707 {
7708 if (selected_arch->arch != selected_cpu->arch)
7709 {
7710 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
7711 all_architectures[selected_cpu->arch].name,
7712 selected_arch->name);
7713 }
7714 aarch64_isa_flags = arch_isa;
7715 }
7716 else
7717 {
7718 /* -mcpu but no -march. */
7719 aarch64_isa_flags = cpu_isa;
43e9d192
IB
7720 }
7721
0cfff2a1
KT
7722 /* Set the arch as well as we will need it when outputing
7723 the .arch directive in assembly. */
7724 if (!selected_arch)
7725 {
7726 gcc_assert (selected_cpu);
7727 selected_arch = &all_architectures[selected_cpu->arch];
7728 }
43e9d192 7729
43e9d192 7730 if (!selected_tune)
3edaf26d 7731 selected_tune = selected_cpu;
43e9d192 7732
0cfff2a1
KT
7733#ifndef HAVE_AS_MABI_OPTION
7734 /* The compiler may have been configured with 2.23.* binutils, which does
7735 not have support for ILP32. */
7736 if (TARGET_ILP32)
7737 error ("Assembler does not support -mabi=ilp32");
7738#endif
43e9d192 7739
0cfff2a1 7740 aarch64_build_bitmask_table ();
8dec06f2 7741
0cfff2a1
KT
7742 aarch64_override_options_internal (&global_options);
7743
7744 /* Save these options as the default ones in case we push and pop them later
7745 while processing functions with potential target attributes. */
7746 target_option_default_node = target_option_current_node
7747 = build_target_option_node (&global_options);
5e396da6 7748
e2fc7193 7749 aarch64_register_fma_steering ();
fde9b31b 7750
43e9d192
IB
7751}
7752
7753/* Implement targetm.override_options_after_change. */
7754
7755static void
7756aarch64_override_options_after_change (void)
7757{
0cfff2a1 7758 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
7759}
7760
7761static struct machine_function *
7762aarch64_init_machine_status (void)
7763{
7764 struct machine_function *machine;
766090c2 7765 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
7766 return machine;
7767}
7768
7769void
7770aarch64_init_expanders (void)
7771{
7772 init_machine_status = aarch64_init_machine_status;
7773}
7774
7775/* A checking mechanism for the implementation of the various code models. */
7776static void
0cfff2a1 7777initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 7778{
0cfff2a1 7779 if (opts->x_flag_pic)
43e9d192 7780 {
0cfff2a1 7781 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
7782 {
7783 case AARCH64_CMODEL_TINY:
7784 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
7785 break;
7786 case AARCH64_CMODEL_SMALL:
34ecdb0f 7787#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
7788 aarch64_cmodel = (flag_pic == 2
7789 ? AARCH64_CMODEL_SMALL_PIC
7790 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
7791#else
7792 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
7793#endif
43e9d192
IB
7794 break;
7795 case AARCH64_CMODEL_LARGE:
7796 sorry ("code model %qs with -f%s", "large",
0cfff2a1 7797 opts->x_flag_pic > 1 ? "PIC" : "pic");
43e9d192
IB
7798 default:
7799 gcc_unreachable ();
7800 }
7801 }
7802 else
0cfff2a1 7803 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
7804}
7805
7806/* Return true if SYMBOL_REF X binds locally. */
7807
7808static bool
7809aarch64_symbol_binds_local_p (const_rtx x)
7810{
7811 return (SYMBOL_REF_DECL (x)
7812 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
7813 : SYMBOL_REF_LOCAL_P (x));
7814}
7815
7816/* Return true if SYMBOL_REF X is thread local */
7817static bool
7818aarch64_tls_symbol_p (rtx x)
7819{
7820 if (! TARGET_HAVE_TLS)
7821 return false;
7822
7823 if (GET_CODE (x) != SYMBOL_REF)
7824 return false;
7825
7826 return SYMBOL_REF_TLS_MODEL (x) != 0;
7827}
7828
7829/* Classify a TLS symbol into one of the TLS kinds. */
7830enum aarch64_symbol_type
7831aarch64_classify_tls_symbol (rtx x)
7832{
7833 enum tls_model tls_kind = tls_symbolic_operand_type (x);
7834
7835 switch (tls_kind)
7836 {
7837 case TLS_MODEL_GLOBAL_DYNAMIC:
7838 case TLS_MODEL_LOCAL_DYNAMIC:
7839 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
7840
7841 case TLS_MODEL_INITIAL_EXEC:
7842 return SYMBOL_SMALL_GOTTPREL;
7843
7844 case TLS_MODEL_LOCAL_EXEC:
8fd17b98 7845 return SYMBOL_TLSLE;
43e9d192
IB
7846
7847 case TLS_MODEL_EMULATED:
7848 case TLS_MODEL_NONE:
7849 return SYMBOL_FORCE_TO_MEM;
7850
7851 default:
7852 gcc_unreachable ();
7853 }
7854}
7855
7856/* Return the method that should be used to access SYMBOL_REF or
7857 LABEL_REF X in context CONTEXT. */
17f4d4bf 7858
43e9d192 7859enum aarch64_symbol_type
f8b756b7 7860aarch64_classify_symbol (rtx x, rtx offset,
43e9d192
IB
7861 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
7862{
7863 if (GET_CODE (x) == LABEL_REF)
7864 {
7865 switch (aarch64_cmodel)
7866 {
7867 case AARCH64_CMODEL_LARGE:
7868 return SYMBOL_FORCE_TO_MEM;
7869
7870 case AARCH64_CMODEL_TINY_PIC:
7871 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
7872 return SYMBOL_TINY_ABSOLUTE;
7873
1b1e81f8 7874 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
7875 case AARCH64_CMODEL_SMALL_PIC:
7876 case AARCH64_CMODEL_SMALL:
7877 return SYMBOL_SMALL_ABSOLUTE;
7878
7879 default:
7880 gcc_unreachable ();
7881 }
7882 }
7883
17f4d4bf 7884 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 7885 {
4a985a37
MS
7886 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
7887 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
7888
7889 if (aarch64_tls_symbol_p (x))
7890 return aarch64_classify_tls_symbol (x);
7891
17f4d4bf
CSS
7892 switch (aarch64_cmodel)
7893 {
7894 case AARCH64_CMODEL_TINY:
f8b756b7
TB
7895 /* When we retreive symbol + offset address, we have to make sure
7896 the offset does not cause overflow of the final address. But
7897 we have no way of knowing the address of symbol at compile time
7898 so we can't accurately say if the distance between the PC and
7899 symbol + offset is outside the addressible range of +/-1M in the
7900 TINY code model. So we rely on images not being greater than
7901 1M and cap the offset at 1M and anything beyond 1M will have to
7902 be loaded using an alternative mechanism. */
7903 if (SYMBOL_REF_WEAK (x)
7904 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
7905 return SYMBOL_FORCE_TO_MEM;
7906 return SYMBOL_TINY_ABSOLUTE;
7907
17f4d4bf 7908 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
7909 /* Same reasoning as the tiny code model, but the offset cap here is
7910 4G. */
7911 if (SYMBOL_REF_WEAK (x)
3ff5d1f0
TB
7912 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
7913 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
7914 return SYMBOL_FORCE_TO_MEM;
7915 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7916
17f4d4bf 7917 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 7918 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 7919 return SYMBOL_TINY_GOT;
38e6c9a6
MS
7920 return SYMBOL_TINY_ABSOLUTE;
7921
1b1e81f8 7922 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
7923 case AARCH64_CMODEL_SMALL_PIC:
7924 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
7925 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
7926 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 7927 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 7928
17f4d4bf
CSS
7929 default:
7930 gcc_unreachable ();
7931 }
43e9d192 7932 }
17f4d4bf 7933
43e9d192
IB
7934 /* By default push everything into the constant pool. */
7935 return SYMBOL_FORCE_TO_MEM;
7936}
7937
43e9d192
IB
7938bool
7939aarch64_constant_address_p (rtx x)
7940{
7941 return (CONSTANT_P (x) && memory_address_p (DImode, x));
7942}
7943
7944bool
7945aarch64_legitimate_pic_operand_p (rtx x)
7946{
7947 if (GET_CODE (x) == SYMBOL_REF
7948 || (GET_CODE (x) == CONST
7949 && GET_CODE (XEXP (x, 0)) == PLUS
7950 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7951 return false;
7952
7953 return true;
7954}
7955
3520f7cc
JG
7956/* Return true if X holds either a quarter-precision or
7957 floating-point +0.0 constant. */
7958static bool
ef4bddc2 7959aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
7960{
7961 if (!CONST_DOUBLE_P (x))
7962 return false;
7963
6a0f8c01
JW
7964 if (aarch64_float_const_zero_rtx_p (x))
7965 return true;
7966
7967 /* We only handle moving 0.0 to a TFmode register. */
3520f7cc
JG
7968 if (!(mode == SFmode || mode == DFmode))
7969 return false;
7970
3520f7cc
JG
7971 return aarch64_float_const_representable_p (x);
7972}
7973
43e9d192 7974static bool
ef4bddc2 7975aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
7976{
7977 /* Do not allow vector struct mode constants. We could support
7978 0 and -1 easily, but they need support in aarch64-simd.md. */
7979 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7980 return false;
7981
7982 /* This could probably go away because
7983 we now decompose CONST_INTs according to expand_mov_immediate. */
7984 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 7985 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
7986 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7987 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
7988
7989 if (GET_CODE (x) == HIGH
7990 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7991 return true;
7992
7993 return aarch64_constant_address_p (x);
7994}
7995
a5bc806c 7996rtx
43e9d192
IB
7997aarch64_load_tp (rtx target)
7998{
7999 if (!target
8000 || GET_MODE (target) != Pmode
8001 || !register_operand (target, Pmode))
8002 target = gen_reg_rtx (Pmode);
8003
8004 /* Can return in any reg. */
8005 emit_insn (gen_aarch64_load_tp_hard (target));
8006 return target;
8007}
8008
43e9d192
IB
8009/* On AAPCS systems, this is the "struct __va_list". */
8010static GTY(()) tree va_list_type;
8011
8012/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
8013 Return the type to use as __builtin_va_list.
8014
8015 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
8016
8017 struct __va_list
8018 {
8019 void *__stack;
8020 void *__gr_top;
8021 void *__vr_top;
8022 int __gr_offs;
8023 int __vr_offs;
8024 }; */
8025
8026static tree
8027aarch64_build_builtin_va_list (void)
8028{
8029 tree va_list_name;
8030 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
8031
8032 /* Create the type. */
8033 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
8034 /* Give it the required name. */
8035 va_list_name = build_decl (BUILTINS_LOCATION,
8036 TYPE_DECL,
8037 get_identifier ("__va_list"),
8038 va_list_type);
8039 DECL_ARTIFICIAL (va_list_name) = 1;
8040 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 8041 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
8042
8043 /* Create the fields. */
8044 f_stack = build_decl (BUILTINS_LOCATION,
8045 FIELD_DECL, get_identifier ("__stack"),
8046 ptr_type_node);
8047 f_grtop = build_decl (BUILTINS_LOCATION,
8048 FIELD_DECL, get_identifier ("__gr_top"),
8049 ptr_type_node);
8050 f_vrtop = build_decl (BUILTINS_LOCATION,
8051 FIELD_DECL, get_identifier ("__vr_top"),
8052 ptr_type_node);
8053 f_groff = build_decl (BUILTINS_LOCATION,
8054 FIELD_DECL, get_identifier ("__gr_offs"),
8055 integer_type_node);
8056 f_vroff = build_decl (BUILTINS_LOCATION,
8057 FIELD_DECL, get_identifier ("__vr_offs"),
8058 integer_type_node);
8059
8060 DECL_ARTIFICIAL (f_stack) = 1;
8061 DECL_ARTIFICIAL (f_grtop) = 1;
8062 DECL_ARTIFICIAL (f_vrtop) = 1;
8063 DECL_ARTIFICIAL (f_groff) = 1;
8064 DECL_ARTIFICIAL (f_vroff) = 1;
8065
8066 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
8067 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
8068 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
8069 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
8070 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
8071
8072 TYPE_FIELDS (va_list_type) = f_stack;
8073 DECL_CHAIN (f_stack) = f_grtop;
8074 DECL_CHAIN (f_grtop) = f_vrtop;
8075 DECL_CHAIN (f_vrtop) = f_groff;
8076 DECL_CHAIN (f_groff) = f_vroff;
8077
8078 /* Compute its layout. */
8079 layout_type (va_list_type);
8080
8081 return va_list_type;
8082}
8083
8084/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
8085static void
8086aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
8087{
8088 const CUMULATIVE_ARGS *cum;
8089 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
8090 tree stack, grtop, vrtop, groff, vroff;
8091 tree t;
8092 int gr_save_area_size;
8093 int vr_save_area_size;
8094 int vr_offset;
8095
8096 cum = &crtl->args.info;
8097 gr_save_area_size
8098 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
8099 vr_save_area_size
8100 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
8101
d5726973 8102 if (!TARGET_FLOAT)
43e9d192 8103 {
261fb553 8104 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
8105 vr_save_area_size = 0;
8106 }
8107
8108 f_stack = TYPE_FIELDS (va_list_type_node);
8109 f_grtop = DECL_CHAIN (f_stack);
8110 f_vrtop = DECL_CHAIN (f_grtop);
8111 f_groff = DECL_CHAIN (f_vrtop);
8112 f_vroff = DECL_CHAIN (f_groff);
8113
8114 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
8115 NULL_TREE);
8116 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
8117 NULL_TREE);
8118 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
8119 NULL_TREE);
8120 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
8121 NULL_TREE);
8122 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
8123 NULL_TREE);
8124
8125 /* Emit code to initialize STACK, which points to the next varargs stack
8126 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
8127 by named arguments. STACK is 8-byte aligned. */
8128 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
8129 if (cum->aapcs_stack_size > 0)
8130 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
8131 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
8132 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8133
8134 /* Emit code to initialize GRTOP, the top of the GR save area.
8135 virtual_incoming_args_rtx should have been 16 byte aligned. */
8136 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
8137 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
8138 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8139
8140 /* Emit code to initialize VRTOP, the top of the VR save area.
8141 This address is gr_save_area_bytes below GRTOP, rounded
8142 down to the next 16-byte boundary. */
8143 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
8144 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
8145 STACK_BOUNDARY / BITS_PER_UNIT);
8146
8147 if (vr_offset)
8148 t = fold_build_pointer_plus_hwi (t, -vr_offset);
8149 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
8150 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8151
8152 /* Emit code to initialize GROFF, the offset from GRTOP of the
8153 next GPR argument. */
8154 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
8155 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
8156 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8157
8158 /* Likewise emit code to initialize VROFF, the offset from FTOP
8159 of the next VR argument. */
8160 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
8161 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
8162 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8163}
8164
8165/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
8166
8167static tree
8168aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8169 gimple_seq *post_p ATTRIBUTE_UNUSED)
8170{
8171 tree addr;
8172 bool indirect_p;
8173 bool is_ha; /* is HFA or HVA. */
8174 bool dw_align; /* double-word align. */
ef4bddc2 8175 machine_mode ag_mode = VOIDmode;
43e9d192 8176 int nregs;
ef4bddc2 8177 machine_mode mode;
43e9d192
IB
8178
8179 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
8180 tree stack, f_top, f_off, off, arg, roundup, on_stack;
8181 HOST_WIDE_INT size, rsize, adjust, align;
8182 tree t, u, cond1, cond2;
8183
8184 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8185 if (indirect_p)
8186 type = build_pointer_type (type);
8187
8188 mode = TYPE_MODE (type);
8189
8190 f_stack = TYPE_FIELDS (va_list_type_node);
8191 f_grtop = DECL_CHAIN (f_stack);
8192 f_vrtop = DECL_CHAIN (f_grtop);
8193 f_groff = DECL_CHAIN (f_vrtop);
8194 f_vroff = DECL_CHAIN (f_groff);
8195
8196 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
8197 f_stack, NULL_TREE);
8198 size = int_size_in_bytes (type);
8199 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
8200
8201 dw_align = false;
8202 adjust = 0;
8203 if (aarch64_vfp_is_call_or_return_candidate (mode,
8204 type,
8205 &ag_mode,
8206 &nregs,
8207 &is_ha))
8208 {
8209 /* TYPE passed in fp/simd registers. */
d5726973 8210 if (!TARGET_FLOAT)
261fb553 8211 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
8212
8213 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
8214 unshare_expr (valist), f_vrtop, NULL_TREE);
8215 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
8216 unshare_expr (valist), f_vroff, NULL_TREE);
8217
8218 rsize = nregs * UNITS_PER_VREG;
8219
8220 if (is_ha)
8221 {
8222 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
8223 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
8224 }
8225 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
8226 && size < UNITS_PER_VREG)
8227 {
8228 adjust = UNITS_PER_VREG - size;
8229 }
8230 }
8231 else
8232 {
8233 /* TYPE passed in general registers. */
8234 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
8235 unshare_expr (valist), f_grtop, NULL_TREE);
8236 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
8237 unshare_expr (valist), f_groff, NULL_TREE);
8238 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8239 nregs = rsize / UNITS_PER_WORD;
8240
8241 if (align > 8)
8242 dw_align = true;
8243
8244 if (BLOCK_REG_PADDING (mode, type, 1) == downward
8245 && size < UNITS_PER_WORD)
8246 {
8247 adjust = UNITS_PER_WORD - size;
8248 }
8249 }
8250
8251 /* Get a local temporary for the field value. */
8252 off = get_initialized_tmp_var (f_off, pre_p, NULL);
8253
8254 /* Emit code to branch if off >= 0. */
8255 t = build2 (GE_EXPR, boolean_type_node, off,
8256 build_int_cst (TREE_TYPE (off), 0));
8257 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
8258
8259 if (dw_align)
8260 {
8261 /* Emit: offs = (offs + 15) & -16. */
8262 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
8263 build_int_cst (TREE_TYPE (off), 15));
8264 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
8265 build_int_cst (TREE_TYPE (off), -16));
8266 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
8267 }
8268 else
8269 roundup = NULL;
8270
8271 /* Update ap.__[g|v]r_offs */
8272 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
8273 build_int_cst (TREE_TYPE (off), rsize));
8274 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
8275
8276 /* String up. */
8277 if (roundup)
8278 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
8279
8280 /* [cond2] if (ap.__[g|v]r_offs > 0) */
8281 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
8282 build_int_cst (TREE_TYPE (f_off), 0));
8283 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
8284
8285 /* String up: make sure the assignment happens before the use. */
8286 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
8287 COND_EXPR_ELSE (cond1) = t;
8288
8289 /* Prepare the trees handling the argument that is passed on the stack;
8290 the top level node will store in ON_STACK. */
8291 arg = get_initialized_tmp_var (stack, pre_p, NULL);
8292 if (align > 8)
8293 {
8294 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
8295 t = fold_convert (intDI_type_node, arg);
8296 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
8297 build_int_cst (TREE_TYPE (t), 15));
8298 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8299 build_int_cst (TREE_TYPE (t), -16));
8300 t = fold_convert (TREE_TYPE (arg), t);
8301 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
8302 }
8303 else
8304 roundup = NULL;
8305 /* Advance ap.__stack */
8306 t = fold_convert (intDI_type_node, arg);
8307 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
8308 build_int_cst (TREE_TYPE (t), size + 7));
8309 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8310 build_int_cst (TREE_TYPE (t), -8));
8311 t = fold_convert (TREE_TYPE (arg), t);
8312 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
8313 /* String up roundup and advance. */
8314 if (roundup)
8315 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
8316 /* String up with arg */
8317 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
8318 /* Big-endianness related address adjustment. */
8319 if (BLOCK_REG_PADDING (mode, type, 1) == downward
8320 && size < UNITS_PER_WORD)
8321 {
8322 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
8323 size_int (UNITS_PER_WORD - size));
8324 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
8325 }
8326
8327 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
8328 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
8329
8330 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
8331 t = off;
8332 if (adjust)
8333 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
8334 build_int_cst (TREE_TYPE (off), adjust));
8335
8336 t = fold_convert (sizetype, t);
8337 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
8338
8339 if (is_ha)
8340 {
8341 /* type ha; // treat as "struct {ftype field[n];}"
8342 ... [computing offs]
8343 for (i = 0; i <nregs; ++i, offs += 16)
8344 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
8345 return ha; */
8346 int i;
8347 tree tmp_ha, field_t, field_ptr_t;
8348
8349 /* Declare a local variable. */
8350 tmp_ha = create_tmp_var_raw (type, "ha");
8351 gimple_add_tmp_var (tmp_ha);
8352
8353 /* Establish the base type. */
8354 switch (ag_mode)
8355 {
8356 case SFmode:
8357 field_t = float_type_node;
8358 field_ptr_t = float_ptr_type_node;
8359 break;
8360 case DFmode:
8361 field_t = double_type_node;
8362 field_ptr_t = double_ptr_type_node;
8363 break;
8364 case TFmode:
8365 field_t = long_double_type_node;
8366 field_ptr_t = long_double_ptr_type_node;
8367 break;
8368/* The half precision and quad precision are not fully supported yet. Enable
8369 the following code after the support is complete. Need to find the correct
8370 type node for __fp16 *. */
8371#if 0
8372 case HFmode:
8373 field_t = float_type_node;
8374 field_ptr_t = float_ptr_type_node;
8375 break;
8376#endif
8377 case V2SImode:
8378 case V4SImode:
8379 {
8380 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
8381 field_t = build_vector_type_for_mode (innertype, ag_mode);
8382 field_ptr_t = build_pointer_type (field_t);
8383 }
8384 break;
8385 default:
8386 gcc_assert (0);
8387 }
8388
8389 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
8390 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
8391 addr = t;
8392 t = fold_convert (field_ptr_t, addr);
8393 t = build2 (MODIFY_EXPR, field_t,
8394 build1 (INDIRECT_REF, field_t, tmp_ha),
8395 build1 (INDIRECT_REF, field_t, t));
8396
8397 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
8398 for (i = 1; i < nregs; ++i)
8399 {
8400 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
8401 u = fold_convert (field_ptr_t, addr);
8402 u = build2 (MODIFY_EXPR, field_t,
8403 build2 (MEM_REF, field_t, tmp_ha,
8404 build_int_cst (field_ptr_t,
8405 (i *
8406 int_size_in_bytes (field_t)))),
8407 build1 (INDIRECT_REF, field_t, u));
8408 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
8409 }
8410
8411 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
8412 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
8413 }
8414
8415 COND_EXPR_ELSE (cond2) = t;
8416 addr = fold_convert (build_pointer_type (type), cond1);
8417 addr = build_va_arg_indirect_ref (addr);
8418
8419 if (indirect_p)
8420 addr = build_va_arg_indirect_ref (addr);
8421
8422 return addr;
8423}
8424
8425/* Implement TARGET_SETUP_INCOMING_VARARGS. */
8426
8427static void
ef4bddc2 8428aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
8429 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8430 int no_rtl)
8431{
8432 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8433 CUMULATIVE_ARGS local_cum;
8434 int gr_saved, vr_saved;
8435
8436 /* The caller has advanced CUM up to, but not beyond, the last named
8437 argument. Advance a local copy of CUM past the last "real" named
8438 argument, to find out how many registers are left over. */
8439 local_cum = *cum;
8440 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
8441
8442 /* Found out how many registers we need to save. */
8443 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
8444 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
8445
d5726973 8446 if (!TARGET_FLOAT)
43e9d192 8447 {
261fb553 8448 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
8449 vr_saved = 0;
8450 }
8451
8452 if (!no_rtl)
8453 {
8454 if (gr_saved > 0)
8455 {
8456 rtx ptr, mem;
8457
8458 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
8459 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
8460 - gr_saved * UNITS_PER_WORD);
8461 mem = gen_frame_mem (BLKmode, ptr);
8462 set_mem_alias_set (mem, get_varargs_alias_set ());
8463
8464 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
8465 mem, gr_saved);
8466 }
8467 if (vr_saved > 0)
8468 {
8469 /* We can't use move_block_from_reg, because it will use
8470 the wrong mode, storing D regs only. */
ef4bddc2 8471 machine_mode mode = TImode;
43e9d192
IB
8472 int off, i;
8473
8474 /* Set OFF to the offset from virtual_incoming_args_rtx of
8475 the first vector register. The VR save area lies below
8476 the GR one, and is aligned to 16 bytes. */
8477 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
8478 STACK_BOUNDARY / BITS_PER_UNIT);
8479 off -= vr_saved * UNITS_PER_VREG;
8480
8481 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
8482 {
8483 rtx ptr, mem;
8484
8485 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
8486 mem = gen_frame_mem (mode, ptr);
8487 set_mem_alias_set (mem, get_varargs_alias_set ());
8488 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
8489 off += UNITS_PER_VREG;
8490 }
8491 }
8492 }
8493
8494 /* We don't save the size into *PRETEND_SIZE because we want to avoid
8495 any complication of having crtl->args.pretend_args_size changed. */
8799637a 8496 cfun->machine->frame.saved_varargs_size
43e9d192
IB
8497 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
8498 STACK_BOUNDARY / BITS_PER_UNIT)
8499 + vr_saved * UNITS_PER_VREG);
8500}
8501
8502static void
8503aarch64_conditional_register_usage (void)
8504{
8505 int i;
8506 if (!TARGET_FLOAT)
8507 {
8508 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
8509 {
8510 fixed_regs[i] = 1;
8511 call_used_regs[i] = 1;
8512 }
8513 }
8514}
8515
8516/* Walk down the type tree of TYPE counting consecutive base elements.
8517 If *MODEP is VOIDmode, then set it to the first valid floating point
8518 type. If a non-floating point type is found, or if a floating point
8519 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
8520 otherwise return the count in the sub-tree. */
8521static int
ef4bddc2 8522aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 8523{
ef4bddc2 8524 machine_mode mode;
43e9d192
IB
8525 HOST_WIDE_INT size;
8526
8527 switch (TREE_CODE (type))
8528 {
8529 case REAL_TYPE:
8530 mode = TYPE_MODE (type);
8531 if (mode != DFmode && mode != SFmode && mode != TFmode)
8532 return -1;
8533
8534 if (*modep == VOIDmode)
8535 *modep = mode;
8536
8537 if (*modep == mode)
8538 return 1;
8539
8540 break;
8541
8542 case COMPLEX_TYPE:
8543 mode = TYPE_MODE (TREE_TYPE (type));
8544 if (mode != DFmode && mode != SFmode && mode != TFmode)
8545 return -1;
8546
8547 if (*modep == VOIDmode)
8548 *modep = mode;
8549
8550 if (*modep == mode)
8551 return 2;
8552
8553 break;
8554
8555 case VECTOR_TYPE:
8556 /* Use V2SImode and V4SImode as representatives of all 64-bit
8557 and 128-bit vector types. */
8558 size = int_size_in_bytes (type);
8559 switch (size)
8560 {
8561 case 8:
8562 mode = V2SImode;
8563 break;
8564 case 16:
8565 mode = V4SImode;
8566 break;
8567 default:
8568 return -1;
8569 }
8570
8571 if (*modep == VOIDmode)
8572 *modep = mode;
8573
8574 /* Vector modes are considered to be opaque: two vectors are
8575 equivalent for the purposes of being homogeneous aggregates
8576 if they are the same size. */
8577 if (*modep == mode)
8578 return 1;
8579
8580 break;
8581
8582 case ARRAY_TYPE:
8583 {
8584 int count;
8585 tree index = TYPE_DOMAIN (type);
8586
807e902e
KZ
8587 /* Can't handle incomplete types nor sizes that are not
8588 fixed. */
8589 if (!COMPLETE_TYPE_P (type)
8590 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
8591 return -1;
8592
8593 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
8594 if (count == -1
8595 || !index
8596 || !TYPE_MAX_VALUE (index)
cc269bb6 8597 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 8598 || !TYPE_MIN_VALUE (index)
cc269bb6 8599 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
8600 || count < 0)
8601 return -1;
8602
ae7e9ddd
RS
8603 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8604 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
8605
8606 /* There must be no padding. */
807e902e 8607 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
8608 return -1;
8609
8610 return count;
8611 }
8612
8613 case RECORD_TYPE:
8614 {
8615 int count = 0;
8616 int sub_count;
8617 tree field;
8618
807e902e
KZ
8619 /* Can't handle incomplete types nor sizes that are not
8620 fixed. */
8621 if (!COMPLETE_TYPE_P (type)
8622 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
8623 return -1;
8624
8625 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8626 {
8627 if (TREE_CODE (field) != FIELD_DECL)
8628 continue;
8629
8630 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
8631 if (sub_count < 0)
8632 return -1;
8633 count += sub_count;
8634 }
8635
8636 /* There must be no padding. */
807e902e 8637 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
8638 return -1;
8639
8640 return count;
8641 }
8642
8643 case UNION_TYPE:
8644 case QUAL_UNION_TYPE:
8645 {
8646 /* These aren't very interesting except in a degenerate case. */
8647 int count = 0;
8648 int sub_count;
8649 tree field;
8650
807e902e
KZ
8651 /* Can't handle incomplete types nor sizes that are not
8652 fixed. */
8653 if (!COMPLETE_TYPE_P (type)
8654 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
8655 return -1;
8656
8657 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8658 {
8659 if (TREE_CODE (field) != FIELD_DECL)
8660 continue;
8661
8662 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
8663 if (sub_count < 0)
8664 return -1;
8665 count = count > sub_count ? count : sub_count;
8666 }
8667
8668 /* There must be no padding. */
807e902e 8669 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
8670 return -1;
8671
8672 return count;
8673 }
8674
8675 default:
8676 break;
8677 }
8678
8679 return -1;
8680}
8681
b6ec6215
KT
8682/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
8683 type as described in AAPCS64 \S 4.1.2.
8684
8685 See the comment above aarch64_composite_type_p for the notes on MODE. */
8686
8687static bool
8688aarch64_short_vector_p (const_tree type,
8689 machine_mode mode)
8690{
8691 HOST_WIDE_INT size = -1;
8692
8693 if (type && TREE_CODE (type) == VECTOR_TYPE)
8694 size = int_size_in_bytes (type);
8695 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8696 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8697 size = GET_MODE_SIZE (mode);
8698
8699 return (size == 8 || size == 16);
8700}
8701
43e9d192
IB
8702/* Return TRUE if the type, as described by TYPE and MODE, is a composite
8703 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
8704 array types. The C99 floating-point complex types are also considered
8705 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
8706 types, which are GCC extensions and out of the scope of AAPCS64, are
8707 treated as composite types here as well.
8708
8709 Note that MODE itself is not sufficient in determining whether a type
8710 is such a composite type or not. This is because
8711 stor-layout.c:compute_record_mode may have already changed the MODE
8712 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
8713 structure with only one field may have its MODE set to the mode of the
8714 field. Also an integer mode whose size matches the size of the
8715 RECORD_TYPE type may be used to substitute the original mode
8716 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
8717 solely relied on. */
8718
8719static bool
8720aarch64_composite_type_p (const_tree type,
ef4bddc2 8721 machine_mode mode)
43e9d192 8722{
b6ec6215
KT
8723 if (aarch64_short_vector_p (type, mode))
8724 return false;
8725
43e9d192
IB
8726 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
8727 return true;
8728
8729 if (mode == BLKmode
8730 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
8731 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
8732 return true;
8733
8734 return false;
8735}
8736
43e9d192
IB
8737/* Return TRUE if an argument, whose type is described by TYPE and MODE,
8738 shall be passed or returned in simd/fp register(s) (providing these
8739 parameter passing registers are available).
8740
8741 Upon successful return, *COUNT returns the number of needed registers,
8742 *BASE_MODE returns the mode of the individual register and when IS_HAF
8743 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
8744 floating-point aggregate or a homogeneous short-vector aggregate. */
8745
8746static bool
ef4bddc2 8747aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 8748 const_tree type,
ef4bddc2 8749 machine_mode *base_mode,
43e9d192
IB
8750 int *count,
8751 bool *is_ha)
8752{
ef4bddc2 8753 machine_mode new_mode = VOIDmode;
43e9d192
IB
8754 bool composite_p = aarch64_composite_type_p (type, mode);
8755
8756 if (is_ha != NULL) *is_ha = false;
8757
8758 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
8759 || aarch64_short_vector_p (type, mode))
8760 {
8761 *count = 1;
8762 new_mode = mode;
8763 }
8764 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
8765 {
8766 if (is_ha != NULL) *is_ha = true;
8767 *count = 2;
8768 new_mode = GET_MODE_INNER (mode);
8769 }
8770 else if (type && composite_p)
8771 {
8772 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
8773
8774 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
8775 {
8776 if (is_ha != NULL) *is_ha = true;
8777 *count = ag_count;
8778 }
8779 else
8780 return false;
8781 }
8782 else
8783 return false;
8784
8785 *base_mode = new_mode;
8786 return true;
8787}
8788
8789/* Implement TARGET_STRUCT_VALUE_RTX. */
8790
8791static rtx
8792aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
8793 int incoming ATTRIBUTE_UNUSED)
8794{
8795 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
8796}
8797
8798/* Implements target hook vector_mode_supported_p. */
8799static bool
ef4bddc2 8800aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
8801{
8802 if (TARGET_SIMD
8803 && (mode == V4SImode || mode == V8HImode
8804 || mode == V16QImode || mode == V2DImode
8805 || mode == V2SImode || mode == V4HImode
8806 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
8807 || mode == V4SFmode || mode == V2DFmode
8808 || mode == V1DFmode))
43e9d192
IB
8809 return true;
8810
8811 return false;
8812}
8813
b7342d25
IB
8814/* Return appropriate SIMD container
8815 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
8816static machine_mode
8817aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 8818{
b7342d25 8819 gcc_assert (width == 64 || width == 128);
43e9d192 8820 if (TARGET_SIMD)
b7342d25
IB
8821 {
8822 if (width == 128)
8823 switch (mode)
8824 {
8825 case DFmode:
8826 return V2DFmode;
8827 case SFmode:
8828 return V4SFmode;
8829 case SImode:
8830 return V4SImode;
8831 case HImode:
8832 return V8HImode;
8833 case QImode:
8834 return V16QImode;
8835 case DImode:
8836 return V2DImode;
8837 default:
8838 break;
8839 }
8840 else
8841 switch (mode)
8842 {
8843 case SFmode:
8844 return V2SFmode;
8845 case SImode:
8846 return V2SImode;
8847 case HImode:
8848 return V4HImode;
8849 case QImode:
8850 return V8QImode;
8851 default:
8852 break;
8853 }
8854 }
43e9d192
IB
8855 return word_mode;
8856}
8857
b7342d25 8858/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
8859static machine_mode
8860aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
8861{
8862 return aarch64_simd_container_mode (mode, 128);
8863}
8864
3b357264
JG
8865/* Return the bitmask of possible vector sizes for the vectorizer
8866 to iterate over. */
8867static unsigned int
8868aarch64_autovectorize_vector_sizes (void)
8869{
8870 return (16 | 8);
8871}
8872
ac2b960f
YZ
8873/* Implement TARGET_MANGLE_TYPE. */
8874
6f549691 8875static const char *
ac2b960f
YZ
8876aarch64_mangle_type (const_tree type)
8877{
8878 /* The AArch64 ABI documents say that "__va_list" has to be
8879 managled as if it is in the "std" namespace. */
8880 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
8881 return "St9__va_list";
8882
c2ec330c
AL
8883 /* Half-precision float. */
8884 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
8885 return "Dh";
8886
f9d53c27
TB
8887 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8888 builtin types. */
8889 if (TYPE_NAME (type) != NULL)
8890 return aarch64_mangle_builtin_type (type);
c6fc9e43 8891
ac2b960f
YZ
8892 /* Use the default mangling. */
8893 return NULL;
8894}
8895
8baff86e
KT
8896
8897/* Return true if the rtx_insn contains a MEM RTX somewhere
8898 in it. */
75cf1494
KT
8899
8900static bool
8baff86e 8901has_memory_op (rtx_insn *mem_insn)
75cf1494 8902{
8baff86e
KT
8903 subrtx_iterator::array_type array;
8904 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
8905 if (MEM_P (*iter))
8906 return true;
8907
8908 return false;
75cf1494
KT
8909}
8910
8911/* Find the first rtx_insn before insn that will generate an assembly
8912 instruction. */
8913
8914static rtx_insn *
8915aarch64_prev_real_insn (rtx_insn *insn)
8916{
8917 if (!insn)
8918 return NULL;
8919
8920 do
8921 {
8922 insn = prev_real_insn (insn);
8923 }
8924 while (insn && recog_memoized (insn) < 0);
8925
8926 return insn;
8927}
8928
8929static bool
8930is_madd_op (enum attr_type t1)
8931{
8932 unsigned int i;
8933 /* A number of these may be AArch32 only. */
8934 enum attr_type mlatypes[] = {
8935 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8936 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8937 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8938 };
8939
8940 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8941 {
8942 if (t1 == mlatypes[i])
8943 return true;
8944 }
8945
8946 return false;
8947}
8948
8949/* Check if there is a register dependency between a load and the insn
8950 for which we hold recog_data. */
8951
8952static bool
8953dep_between_memop_and_curr (rtx memop)
8954{
8955 rtx load_reg;
8956 int opno;
8957
8baff86e 8958 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
8959
8960 if (!REG_P (SET_DEST (memop)))
8961 return false;
8962
8963 load_reg = SET_DEST (memop);
8baff86e 8964 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
8965 {
8966 rtx operand = recog_data.operand[opno];
8967 if (REG_P (operand)
8968 && reg_overlap_mentioned_p (load_reg, operand))
8969 return true;
8970
8971 }
8972 return false;
8973}
8974
8baff86e
KT
8975
8976/* When working around the Cortex-A53 erratum 835769,
8977 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8978 instruction and has a preceding memory instruction such that a NOP
8979 should be inserted between them. */
8980
75cf1494
KT
8981bool
8982aarch64_madd_needs_nop (rtx_insn* insn)
8983{
8984 enum attr_type attr_type;
8985 rtx_insn *prev;
8986 rtx body;
8987
8988 if (!aarch64_fix_a53_err835769)
8989 return false;
8990
8991 if (recog_memoized (insn) < 0)
8992 return false;
8993
8994 attr_type = get_attr_type (insn);
8995 if (!is_madd_op (attr_type))
8996 return false;
8997
8998 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
8999 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
9000 Restore recog state to INSN to avoid state corruption. */
9001 extract_constrain_insn_cached (insn);
9002
8baff86e 9003 if (!prev || !has_memory_op (prev))
75cf1494
KT
9004 return false;
9005
9006 body = single_set (prev);
9007
9008 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
9009 it and the DImode madd, emit a NOP between them. If body is NULL then we
9010 have a complex memory operation, probably a load/store pair.
9011 Be conservative for now and emit a NOP. */
9012 if (GET_MODE (recog_data.operand[0]) == DImode
9013 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
9014 return true;
9015
9016 return false;
9017
9018}
9019
8baff86e
KT
9020
9021/* Implement FINAL_PRESCAN_INSN. */
9022
75cf1494
KT
9023void
9024aarch64_final_prescan_insn (rtx_insn *insn)
9025{
9026 if (aarch64_madd_needs_nop (insn))
9027 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
9028}
9029
9030
43e9d192 9031/* Return the equivalent letter for size. */
81c2dfb9 9032static char
43e9d192
IB
9033sizetochar (int size)
9034{
9035 switch (size)
9036 {
9037 case 64: return 'd';
9038 case 32: return 's';
9039 case 16: return 'h';
9040 case 8 : return 'b';
9041 default: gcc_unreachable ();
9042 }
9043}
9044
3520f7cc
JG
9045/* Return true iff x is a uniform vector of floating-point
9046 constants, and the constant can be represented in
9047 quarter-precision form. Note, as aarch64_float_const_representable
9048 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
9049static bool
9050aarch64_vect_float_const_representable_p (rtx x)
9051{
9052 int i = 0;
9053 REAL_VALUE_TYPE r0, ri;
9054 rtx x0, xi;
9055
9056 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
9057 return false;
9058
9059 x0 = CONST_VECTOR_ELT (x, 0);
9060 if (!CONST_DOUBLE_P (x0))
9061 return false;
9062
9063 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
9064
9065 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
9066 {
9067 xi = CONST_VECTOR_ELT (x, i);
9068 if (!CONST_DOUBLE_P (xi))
9069 return false;
9070
9071 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
9072 if (!REAL_VALUES_EQUAL (r0, ri))
9073 return false;
9074 }
9075
9076 return aarch64_float_const_representable_p (x0);
9077}
9078
d8edd899 9079/* Return true for valid and false for invalid. */
3ea63f60 9080bool
ef4bddc2 9081aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 9082 struct simd_immediate_info *info)
43e9d192
IB
9083{
9084#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
9085 matches = 1; \
9086 for (i = 0; i < idx; i += (STRIDE)) \
9087 if (!(TEST)) \
9088 matches = 0; \
9089 if (matches) \
9090 { \
9091 immtype = (CLASS); \
9092 elsize = (ELSIZE); \
43e9d192
IB
9093 eshift = (SHIFT); \
9094 emvn = (NEG); \
9095 break; \
9096 }
9097
9098 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 9099 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 9100 unsigned char bytes[16];
43e9d192
IB
9101 int immtype = -1, matches;
9102 unsigned int invmask = inverse ? 0xff : 0;
9103 int eshift, emvn;
9104
43e9d192 9105 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 9106 {
81c2dfb9
IB
9107 if (! (aarch64_simd_imm_zero_p (op, mode)
9108 || aarch64_vect_float_const_representable_p (op)))
d8edd899 9109 return false;
3520f7cc 9110
48063b9d
IB
9111 if (info)
9112 {
9113 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 9114 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
9115 info->mvn = false;
9116 info->shift = 0;
9117 }
3520f7cc 9118
d8edd899 9119 return true;
3520f7cc 9120 }
43e9d192
IB
9121
9122 /* Splat vector constant out into a byte vector. */
9123 for (i = 0; i < n_elts; i++)
9124 {
4b1e108c
AL
9125 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
9126 it must be laid out in the vector register in reverse order. */
9127 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
9128 unsigned HOST_WIDE_INT elpart;
9129 unsigned int part, parts;
9130
4aa81c2e 9131 if (CONST_INT_P (el))
43e9d192
IB
9132 {
9133 elpart = INTVAL (el);
9134 parts = 1;
9135 }
9136 else if (GET_CODE (el) == CONST_DOUBLE)
9137 {
9138 elpart = CONST_DOUBLE_LOW (el);
9139 parts = 2;
9140 }
9141 else
9142 gcc_unreachable ();
9143
9144 for (part = 0; part < parts; part++)
9145 {
9146 unsigned int byte;
9147 for (byte = 0; byte < innersize; byte++)
9148 {
9149 bytes[idx++] = (elpart & 0xff) ^ invmask;
9150 elpart >>= BITS_PER_UNIT;
9151 }
9152 if (GET_CODE (el) == CONST_DOUBLE)
9153 elpart = CONST_DOUBLE_HIGH (el);
9154 }
9155 }
9156
9157 /* Sanity check. */
9158 gcc_assert (idx == GET_MODE_SIZE (mode));
9159
9160 do
9161 {
9162 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9163 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
9164
9165 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9166 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
9167
9168 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9169 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
9170
9171 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9172 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
9173
9174 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
9175
9176 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
9177
9178 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9179 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
9180
9181 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9182 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
9183
9184 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9185 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
9186
9187 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9188 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
9189
9190 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
9191
9192 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
9193
9194 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 9195 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
9196
9197 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 9198 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
9199
9200 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 9201 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
9202
9203 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 9204 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
9205
9206 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
9207
9208 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9209 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
9210 }
9211 while (0);
9212
e4f0f84d 9213 if (immtype == -1)
d8edd899 9214 return false;
43e9d192 9215
48063b9d 9216 if (info)
43e9d192 9217 {
48063b9d 9218 info->element_width = elsize;
48063b9d
IB
9219 info->mvn = emvn != 0;
9220 info->shift = eshift;
9221
43e9d192
IB
9222 unsigned HOST_WIDE_INT imm = 0;
9223
e4f0f84d
TB
9224 if (immtype >= 12 && immtype <= 15)
9225 info->msl = true;
9226
43e9d192
IB
9227 /* Un-invert bytes of recognized vector, if necessary. */
9228 if (invmask != 0)
9229 for (i = 0; i < idx; i++)
9230 bytes[i] ^= invmask;
9231
9232 if (immtype == 17)
9233 {
9234 /* FIXME: Broken on 32-bit H_W_I hosts. */
9235 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9236
9237 for (i = 0; i < 8; i++)
9238 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9239 << (i * BITS_PER_UNIT);
9240
43e9d192 9241
48063b9d
IB
9242 info->value = GEN_INT (imm);
9243 }
9244 else
9245 {
9246 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9247 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
9248
9249 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
9250 generic constants. */
9251 if (info->mvn)
43e9d192 9252 imm = ~imm;
48063b9d
IB
9253 imm = (imm >> info->shift) & 0xff;
9254 info->value = GEN_INT (imm);
9255 }
43e9d192
IB
9256 }
9257
48063b9d 9258 return true;
43e9d192
IB
9259#undef CHECK
9260}
9261
43e9d192
IB
9262/* Check of immediate shift constants are within range. */
9263bool
ef4bddc2 9264aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
9265{
9266 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
9267 if (left)
ddeabd3e 9268 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 9269 else
ddeabd3e 9270 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
9271}
9272
3520f7cc
JG
9273/* Return true if X is a uniform vector where all elements
9274 are either the floating-point constant 0.0 or the
9275 integer constant 0. */
43e9d192 9276bool
ef4bddc2 9277aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 9278{
3520f7cc 9279 return x == CONST0_RTX (mode);
43e9d192
IB
9280}
9281
9282bool
ef4bddc2 9283aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
9284{
9285 HOST_WIDE_INT imm = INTVAL (x);
9286 int i;
9287
9288 for (i = 0; i < 8; i++)
9289 {
9290 unsigned int byte = imm & 0xff;
9291 if (byte != 0xff && byte != 0)
9292 return false;
9293 imm >>= 8;
9294 }
9295
9296 return true;
9297}
9298
83f8c414
CSS
9299bool
9300aarch64_mov_operand_p (rtx x,
a5350ddc 9301 enum aarch64_symbol_context context,
ef4bddc2 9302 machine_mode mode)
83f8c414 9303{
83f8c414
CSS
9304 if (GET_CODE (x) == HIGH
9305 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
9306 return true;
9307
82614948 9308 if (CONST_INT_P (x))
83f8c414
CSS
9309 return true;
9310
9311 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
9312 return true;
9313
a5350ddc
CSS
9314 return aarch64_classify_symbolic_expression (x, context)
9315 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
9316}
9317
43e9d192
IB
9318/* Return a const_int vector of VAL. */
9319rtx
ef4bddc2 9320aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
9321{
9322 int nunits = GET_MODE_NUNITS (mode);
9323 rtvec v = rtvec_alloc (nunits);
9324 int i;
9325
9326 for (i=0; i < nunits; i++)
9327 RTVEC_ELT (v, i) = GEN_INT (val);
9328
9329 return gen_rtx_CONST_VECTOR (mode, v);
9330}
9331
051d0e2f
SN
9332/* Check OP is a legal scalar immediate for the MOVI instruction. */
9333
9334bool
ef4bddc2 9335aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 9336{
ef4bddc2 9337 machine_mode vmode;
051d0e2f
SN
9338
9339 gcc_assert (!VECTOR_MODE_P (mode));
9340 vmode = aarch64_preferred_simd_mode (mode);
9341 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 9342 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
9343}
9344
988fa693
JG
9345/* Construct and return a PARALLEL RTX vector with elements numbering the
9346 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
9347 the vector - from the perspective of the architecture. This does not
9348 line up with GCC's perspective on lane numbers, so we end up with
9349 different masks depending on our target endian-ness. The diagram
9350 below may help. We must draw the distinction when building masks
9351 which select one half of the vector. An instruction selecting
9352 architectural low-lanes for a big-endian target, must be described using
9353 a mask selecting GCC high-lanes.
9354
9355 Big-Endian Little-Endian
9356
9357GCC 0 1 2 3 3 2 1 0
9358 | x | x | x | x | | x | x | x | x |
9359Architecture 3 2 1 0 3 2 1 0
9360
9361Low Mask: { 2, 3 } { 0, 1 }
9362High Mask: { 0, 1 } { 2, 3 }
9363*/
9364
43e9d192 9365rtx
ef4bddc2 9366aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
9367{
9368 int nunits = GET_MODE_NUNITS (mode);
9369 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
9370 int high_base = nunits / 2;
9371 int low_base = 0;
9372 int base;
43e9d192
IB
9373 rtx t1;
9374 int i;
9375
988fa693
JG
9376 if (BYTES_BIG_ENDIAN)
9377 base = high ? low_base : high_base;
9378 else
9379 base = high ? high_base : low_base;
9380
9381 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
9382 RTVEC_ELT (v, i) = GEN_INT (base + i);
9383
9384 t1 = gen_rtx_PARALLEL (mode, v);
9385 return t1;
9386}
9387
988fa693
JG
9388/* Check OP for validity as a PARALLEL RTX vector with elements
9389 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
9390 from the perspective of the architecture. See the diagram above
9391 aarch64_simd_vect_par_cnst_half for more details. */
9392
9393bool
ef4bddc2 9394aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
9395 bool high)
9396{
9397 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
9398 HOST_WIDE_INT count_op = XVECLEN (op, 0);
9399 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
9400 int i = 0;
9401
9402 if (!VECTOR_MODE_P (mode))
9403 return false;
9404
9405 if (count_op != count_ideal)
9406 return false;
9407
9408 for (i = 0; i < count_ideal; i++)
9409 {
9410 rtx elt_op = XVECEXP (op, 0, i);
9411 rtx elt_ideal = XVECEXP (ideal, 0, i);
9412
4aa81c2e 9413 if (!CONST_INT_P (elt_op)
988fa693
JG
9414 || INTVAL (elt_ideal) != INTVAL (elt_op))
9415 return false;
9416 }
9417 return true;
9418}
9419
43e9d192
IB
9420/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
9421 HIGH (exclusive). */
9422void
46ed6024
CB
9423aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9424 const_tree exp)
43e9d192
IB
9425{
9426 HOST_WIDE_INT lane;
4aa81c2e 9427 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
9428 lane = INTVAL (operand);
9429
9430 if (lane < low || lane >= high)
46ed6024
CB
9431 {
9432 if (exp)
cf0c27ef 9433 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 9434 else
cf0c27ef 9435 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 9436 }
43e9d192
IB
9437}
9438
43e9d192
IB
9439/* Return TRUE if OP is a valid vector addressing mode. */
9440bool
9441aarch64_simd_mem_operand_p (rtx op)
9442{
9443 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 9444 || REG_P (XEXP (op, 0)));
43e9d192
IB
9445}
9446
2d8c6dc1
AH
9447/* Emit a register copy from operand to operand, taking care not to
9448 early-clobber source registers in the process.
43e9d192 9449
2d8c6dc1
AH
9450 COUNT is the number of components into which the copy needs to be
9451 decomposed. */
43e9d192 9452void
2d8c6dc1
AH
9453aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
9454 unsigned int count)
43e9d192
IB
9455{
9456 unsigned int i;
2d8c6dc1
AH
9457 int rdest = REGNO (operands[0]);
9458 int rsrc = REGNO (operands[1]);
43e9d192
IB
9459
9460 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
9461 || rdest < rsrc)
9462 for (i = 0; i < count; i++)
9463 emit_move_insn (gen_rtx_REG (mode, rdest + i),
9464 gen_rtx_REG (mode, rsrc + i));
43e9d192 9465 else
2d8c6dc1
AH
9466 for (i = 0; i < count; i++)
9467 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
9468 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
9469}
9470
9471/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
9472 one of VSTRUCT modes: OI, CI or XI. */
9473int
647d790d 9474aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 9475{
ef4bddc2 9476 machine_mode mode;
43e9d192
IB
9477
9478 extract_insn_cached (insn);
9479
9480 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
9481 {
9482 mode = GET_MODE (recog_data.operand[0]);
9483 switch (mode)
9484 {
9485 case OImode:
9486 return 8;
9487 case CImode:
9488 return 12;
9489 case XImode:
9490 return 16;
9491 default:
9492 gcc_unreachable ();
9493 }
9494 }
9495 return 4;
9496}
9497
668046d1
DS
9498/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
9499 one of VSTRUCT modes: OI, CI, EI, or XI. */
9500int
9501aarch64_simd_attr_length_rglist (enum machine_mode mode)
9502{
9503 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
9504}
9505
db0253a4
TB
9506/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
9507 alignment of a vector to 128 bits. */
9508static HOST_WIDE_INT
9509aarch64_simd_vector_alignment (const_tree type)
9510{
9439e9a1 9511 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
9512 return MIN (align, 128);
9513}
9514
9515/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
9516static bool
9517aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
9518{
9519 if (is_packed)
9520 return false;
9521
9522 /* We guarantee alignment for vectors up to 128-bits. */
9523 if (tree_int_cst_compare (TYPE_SIZE (type),
9524 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
9525 return false;
9526
9527 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
9528 return true;
9529}
9530
4369c11e
TB
9531/* If VALS is a vector constant that can be loaded into a register
9532 using DUP, generate instructions to do so and return an RTX to
9533 assign to the register. Otherwise return NULL_RTX. */
9534static rtx
9535aarch64_simd_dup_constant (rtx vals)
9536{
ef4bddc2
RS
9537 machine_mode mode = GET_MODE (vals);
9538 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
9539 int n_elts = GET_MODE_NUNITS (mode);
9540 bool all_same = true;
9541 rtx x;
9542 int i;
9543
9544 if (GET_CODE (vals) != CONST_VECTOR)
9545 return NULL_RTX;
9546
9547 for (i = 1; i < n_elts; ++i)
9548 {
9549 x = CONST_VECTOR_ELT (vals, i);
9550 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
9551 all_same = false;
9552 }
9553
9554 if (!all_same)
9555 return NULL_RTX;
9556
9557 /* We can load this constant by using DUP and a constant in a
9558 single ARM register. This will be cheaper than a vector
9559 load. */
9560 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
9561 return gen_rtx_VEC_DUPLICATE (mode, x);
9562}
9563
9564
9565/* Generate code to load VALS, which is a PARALLEL containing only
9566 constants (for vec_init) or CONST_VECTOR, efficiently into a
9567 register. Returns an RTX to copy into the register, or NULL_RTX
9568 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 9569static rtx
4369c11e
TB
9570aarch64_simd_make_constant (rtx vals)
9571{
ef4bddc2 9572 machine_mode mode = GET_MODE (vals);
4369c11e
TB
9573 rtx const_dup;
9574 rtx const_vec = NULL_RTX;
9575 int n_elts = GET_MODE_NUNITS (mode);
9576 int n_const = 0;
9577 int i;
9578
9579 if (GET_CODE (vals) == CONST_VECTOR)
9580 const_vec = vals;
9581 else if (GET_CODE (vals) == PARALLEL)
9582 {
9583 /* A CONST_VECTOR must contain only CONST_INTs and
9584 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9585 Only store valid constants in a CONST_VECTOR. */
9586 for (i = 0; i < n_elts; ++i)
9587 {
9588 rtx x = XVECEXP (vals, 0, i);
9589 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9590 n_const++;
9591 }
9592 if (n_const == n_elts)
9593 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9594 }
9595 else
9596 gcc_unreachable ();
9597
9598 if (const_vec != NULL_RTX
48063b9d 9599 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
9600 /* Load using MOVI/MVNI. */
9601 return const_vec;
9602 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
9603 /* Loaded using DUP. */
9604 return const_dup;
9605 else if (const_vec != NULL_RTX)
9606 /* Load from constant pool. We can not take advantage of single-cycle
9607 LD1 because we need a PC-relative addressing mode. */
9608 return const_vec;
9609 else
9610 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9611 We can not construct an initializer. */
9612 return NULL_RTX;
9613}
9614
9615void
9616aarch64_expand_vector_init (rtx target, rtx vals)
9617{
ef4bddc2
RS
9618 machine_mode mode = GET_MODE (target);
9619 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 9620 int n_elts = GET_MODE_NUNITS (mode);
8b66a2d4
AL
9621 int n_var = 0;
9622 rtx any_const = NULL_RTX;
4369c11e 9623 bool all_same = true;
4369c11e 9624
8b66a2d4 9625 for (int i = 0; i < n_elts; ++i)
4369c11e 9626 {
8b66a2d4 9627 rtx x = XVECEXP (vals, 0, i);
4369c11e 9628 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8b66a2d4
AL
9629 ++n_var;
9630 else
9631 any_const = x;
4369c11e 9632
8b66a2d4 9633 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
4369c11e
TB
9634 all_same = false;
9635 }
9636
9637 if (n_var == 0)
9638 {
9639 rtx constant = aarch64_simd_make_constant (vals);
9640 if (constant != NULL_RTX)
9641 {
9642 emit_move_insn (target, constant);
9643 return;
9644 }
9645 }
9646
9647 /* Splat a single non-constant element if we can. */
9648 if (all_same)
9649 {
8b66a2d4 9650 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
4369c11e
TB
9651 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
9652 return;
9653 }
9654
8b66a2d4
AL
9655 /* Half the fields (or less) are non-constant. Load constant then overwrite
9656 varying fields. Hope that this is more efficient than using the stack. */
9657 if (n_var <= n_elts/2)
4369c11e
TB
9658 {
9659 rtx copy = copy_rtx (vals);
4369c11e 9660
8b66a2d4
AL
9661 /* Load constant part of vector. We really don't care what goes into the
9662 parts we will overwrite, but we're more likely to be able to load the
9663 constant efficiently if it has fewer, larger, repeating parts
9664 (see aarch64_simd_valid_immediate). */
9665 for (int i = 0; i < n_elts; i++)
9666 {
9667 rtx x = XVECEXP (vals, 0, i);
9668 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9669 continue;
9670 rtx subst = any_const;
9671 for (int bit = n_elts / 2; bit > 0; bit /= 2)
9672 {
9673 /* Look in the copied vector, as more elements are const. */
9674 rtx test = XVECEXP (copy, 0, i ^ bit);
9675 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
9676 {
9677 subst = test;
9678 break;
9679 }
9680 }
9681 XVECEXP (copy, 0, i) = subst;
9682 }
4369c11e
TB
9683 aarch64_expand_vector_init (target, copy);
9684
8b66a2d4
AL
9685 /* Insert variables. */
9686 enum insn_code icode = optab_handler (vec_set_optab, mode);
4369c11e 9687 gcc_assert (icode != CODE_FOR_nothing);
8b66a2d4
AL
9688
9689 for (int i = 0; i < n_elts; i++)
9690 {
9691 rtx x = XVECEXP (vals, 0, i);
9692 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9693 continue;
9694 x = copy_to_mode_reg (inner_mode, x);
9695 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
9696 }
4369c11e
TB
9697 return;
9698 }
9699
9700 /* Construct the vector in memory one field at a time
9701 and load the whole vector. */
8b66a2d4
AL
9702 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9703 for (int i = 0; i < n_elts; i++)
4369c11e
TB
9704 emit_move_insn (adjust_address_nv (mem, inner_mode,
9705 i * GET_MODE_SIZE (inner_mode)),
9706 XVECEXP (vals, 0, i));
9707 emit_move_insn (target, mem);
9708
9709}
9710
43e9d192 9711static unsigned HOST_WIDE_INT
ef4bddc2 9712aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
9713{
9714 return
9715 (aarch64_vector_mode_supported_p (mode)
9716 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
9717}
9718
9719#ifndef TLS_SECTION_ASM_FLAG
9720#define TLS_SECTION_ASM_FLAG 'T'
9721#endif
9722
9723void
9724aarch64_elf_asm_named_section (const char *name, unsigned int flags,
9725 tree decl ATTRIBUTE_UNUSED)
9726{
9727 char flagchars[10], *f = flagchars;
9728
9729 /* If we have already declared this section, we can use an
9730 abbreviated form to switch back to it -- unless this section is
9731 part of a COMDAT groups, in which case GAS requires the full
9732 declaration every time. */
9733 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9734 && (flags & SECTION_DECLARED))
9735 {
9736 fprintf (asm_out_file, "\t.section\t%s\n", name);
9737 return;
9738 }
9739
9740 if (!(flags & SECTION_DEBUG))
9741 *f++ = 'a';
9742 if (flags & SECTION_WRITE)
9743 *f++ = 'w';
9744 if (flags & SECTION_CODE)
9745 *f++ = 'x';
9746 if (flags & SECTION_SMALL)
9747 *f++ = 's';
9748 if (flags & SECTION_MERGE)
9749 *f++ = 'M';
9750 if (flags & SECTION_STRINGS)
9751 *f++ = 'S';
9752 if (flags & SECTION_TLS)
9753 *f++ = TLS_SECTION_ASM_FLAG;
9754 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9755 *f++ = 'G';
9756 *f = '\0';
9757
9758 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
9759
9760 if (!(flags & SECTION_NOTYPE))
9761 {
9762 const char *type;
9763 const char *format;
9764
9765 if (flags & SECTION_BSS)
9766 type = "nobits";
9767 else
9768 type = "progbits";
9769
9770#ifdef TYPE_OPERAND_FMT
9771 format = "," TYPE_OPERAND_FMT;
9772#else
9773 format = ",@%s";
9774#endif
9775
9776 fprintf (asm_out_file, format, type);
9777
9778 if (flags & SECTION_ENTSIZE)
9779 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
9780 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
9781 {
9782 if (TREE_CODE (decl) == IDENTIFIER_NODE)
9783 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
9784 else
9785 fprintf (asm_out_file, ",%s,comdat",
9786 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
9787 }
9788 }
9789
9790 putc ('\n', asm_out_file);
9791}
9792
9793/* Select a format to encode pointers in exception handling data. */
9794int
9795aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
9796{
9797 int type;
9798 switch (aarch64_cmodel)
9799 {
9800 case AARCH64_CMODEL_TINY:
9801 case AARCH64_CMODEL_TINY_PIC:
9802 case AARCH64_CMODEL_SMALL:
9803 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 9804 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
9805 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
9806 for everything. */
9807 type = DW_EH_PE_sdata4;
9808 break;
9809 default:
9810 /* No assumptions here. 8-byte relocs required. */
9811 type = DW_EH_PE_sdata8;
9812 break;
9813 }
9814 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
9815}
9816
0462169c
SN
9817/* Emit load exclusive. */
9818
9819static void
ef4bddc2 9820aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
9821 rtx mem, rtx model_rtx)
9822{
9823 rtx (*gen) (rtx, rtx, rtx);
9824
9825 switch (mode)
9826 {
9827 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
9828 case HImode: gen = gen_aarch64_load_exclusivehi; break;
9829 case SImode: gen = gen_aarch64_load_exclusivesi; break;
9830 case DImode: gen = gen_aarch64_load_exclusivedi; break;
9831 default:
9832 gcc_unreachable ();
9833 }
9834
9835 emit_insn (gen (rval, mem, model_rtx));
9836}
9837
9838/* Emit store exclusive. */
9839
9840static void
ef4bddc2 9841aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
9842 rtx rval, rtx mem, rtx model_rtx)
9843{
9844 rtx (*gen) (rtx, rtx, rtx, rtx);
9845
9846 switch (mode)
9847 {
9848 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
9849 case HImode: gen = gen_aarch64_store_exclusivehi; break;
9850 case SImode: gen = gen_aarch64_store_exclusivesi; break;
9851 case DImode: gen = gen_aarch64_store_exclusivedi; break;
9852 default:
9853 gcc_unreachable ();
9854 }
9855
9856 emit_insn (gen (bval, rval, mem, model_rtx));
9857}
9858
9859/* Mark the previous jump instruction as unlikely. */
9860
9861static void
9862aarch64_emit_unlikely_jump (rtx insn)
9863{
e5af9ddd 9864 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
9865
9866 insn = emit_jump_insn (insn);
e5af9ddd 9867 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
9868}
9869
9870/* Expand a compare and swap pattern. */
9871
9872void
9873aarch64_expand_compare_and_swap (rtx operands[])
9874{
9875 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 9876 machine_mode mode, cmp_mode;
0462169c
SN
9877 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
9878
9879 bval = operands[0];
9880 rval = operands[1];
9881 mem = operands[2];
9882 oldval = operands[3];
9883 newval = operands[4];
9884 is_weak = operands[5];
9885 mod_s = operands[6];
9886 mod_f = operands[7];
9887 mode = GET_MODE (mem);
9888 cmp_mode = mode;
9889
9890 /* Normally the succ memory model must be stronger than fail, but in the
9891 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9892 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9893
46b35980
AM
9894 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
9895 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
9896 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
9897
9898 switch (mode)
9899 {
9900 case QImode:
9901 case HImode:
9902 /* For short modes, we're going to perform the comparison in SImode,
9903 so do the zero-extension now. */
9904 cmp_mode = SImode;
9905 rval = gen_reg_rtx (SImode);
9906 oldval = convert_modes (SImode, mode, oldval, true);
9907 /* Fall through. */
9908
9909 case SImode:
9910 case DImode:
9911 /* Force the value into a register if needed. */
9912 if (!aarch64_plus_operand (oldval, mode))
9913 oldval = force_reg (cmp_mode, oldval);
9914 break;
9915
9916 default:
9917 gcc_unreachable ();
9918 }
9919
9920 switch (mode)
9921 {
9922 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
9923 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
9924 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
9925 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
9926 default:
9927 gcc_unreachable ();
9928 }
9929
9930 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
9931
9932 if (mode == QImode || mode == HImode)
9933 emit_move_insn (operands[1], gen_lowpart (mode, rval));
9934
9935 x = gen_rtx_REG (CCmode, CC_REGNUM);
9936 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 9937 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
9938}
9939
f70fb3b6
MW
9940/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
9941 sequence implementing an atomic operation. */
9942
9943static void
9944aarch64_emit_post_barrier (enum memmodel model)
9945{
9946 const enum memmodel base_model = memmodel_base (model);
9947
9948 if (is_mm_sync (model)
9949 && (base_model == MEMMODEL_ACQUIRE
9950 || base_model == MEMMODEL_ACQ_REL
9951 || base_model == MEMMODEL_SEQ_CST))
9952 {
9953 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
9954 }
9955}
9956
0462169c
SN
9957/* Split a compare and swap pattern. */
9958
9959void
9960aarch64_split_compare_and_swap (rtx operands[])
9961{
9962 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 9963 machine_mode mode;
0462169c 9964 bool is_weak;
5d8a22a5
DM
9965 rtx_code_label *label1, *label2;
9966 rtx x, cond;
ab876106
MW
9967 enum memmodel model;
9968 rtx model_rtx;
0462169c
SN
9969
9970 rval = operands[0];
9971 mem = operands[1];
9972 oldval = operands[2];
9973 newval = operands[3];
9974 is_weak = (operands[4] != const0_rtx);
ab876106 9975 model_rtx = operands[5];
0462169c
SN
9976 scratch = operands[7];
9977 mode = GET_MODE (mem);
ab876106 9978 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 9979
5d8a22a5 9980 label1 = NULL;
0462169c
SN
9981 if (!is_weak)
9982 {
9983 label1 = gen_label_rtx ();
9984 emit_label (label1);
9985 }
9986 label2 = gen_label_rtx ();
9987
ab876106
MW
9988 /* The initial load can be relaxed for a __sync operation since a final
9989 barrier will be emitted to stop code hoisting. */
9990 if (is_mm_sync (model))
9991 aarch64_emit_load_exclusive (mode, rval, mem,
9992 GEN_INT (MEMMODEL_RELAXED));
9993 else
9994 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c
SN
9995
9996 cond = aarch64_gen_compare_reg (NE, rval, oldval);
9997 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9998 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9999 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
f7df4a84 10000 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 10001
ab876106 10002 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
10003
10004 if (!is_weak)
10005 {
10006 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
10007 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10008 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 10009 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
10010 }
10011 else
10012 {
10013 cond = gen_rtx_REG (CCmode, CC_REGNUM);
10014 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 10015 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
10016 }
10017
10018 emit_label (label2);
ab876106
MW
10019
10020 /* Emit any final barrier needed for a __sync operation. */
10021 if (is_mm_sync (model))
10022 aarch64_emit_post_barrier (model);
0462169c
SN
10023}
10024
10025/* Split an atomic operation. */
10026
10027void
10028aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
10029 rtx value, rtx model_rtx, rtx cond)
10030{
ef4bddc2
RS
10031 machine_mode mode = GET_MODE (mem);
10032 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
10033 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
10034 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
10035 rtx_code_label *label;
10036 rtx x;
0462169c
SN
10037
10038 label = gen_label_rtx ();
10039 emit_label (label);
10040
10041 if (new_out)
10042 new_out = gen_lowpart (wmode, new_out);
10043 if (old_out)
10044 old_out = gen_lowpart (wmode, old_out);
10045 else
10046 old_out = new_out;
10047 value = simplify_gen_subreg (wmode, value, mode, 0);
10048
f70fb3b6
MW
10049 /* The initial load can be relaxed for a __sync operation since a final
10050 barrier will be emitted to stop code hoisting. */
10051 if (is_sync)
10052 aarch64_emit_load_exclusive (mode, old_out, mem,
10053 GEN_INT (MEMMODEL_RELAXED));
10054 else
10055 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
10056
10057 switch (code)
10058 {
10059 case SET:
10060 new_out = value;
10061 break;
10062
10063 case NOT:
10064 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 10065 emit_insn (gen_rtx_SET (new_out, x));
0462169c 10066 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 10067 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
10068 break;
10069
10070 case MINUS:
10071 if (CONST_INT_P (value))
10072 {
10073 value = GEN_INT (-INTVAL (value));
10074 code = PLUS;
10075 }
10076 /* Fall through. */
10077
10078 default:
10079 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 10080 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
10081 break;
10082 }
10083
10084 aarch64_emit_store_exclusive (mode, cond, mem,
10085 gen_lowpart (mode, new_out), model_rtx);
10086
10087 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10088 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10089 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 10090 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
10091
10092 /* Emit any final barrier needed for a __sync operation. */
10093 if (is_sync)
10094 aarch64_emit_post_barrier (model);
0462169c
SN
10095}
10096
95ca411e
YZ
10097static void
10098aarch64_print_extension (void)
10099{
10100 const struct aarch64_option_extension *opt = NULL;
10101
10102 for (opt = all_extensions; opt->name != NULL; opt++)
10103 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
10104 asm_fprintf (asm_out_file, "+%s", opt->name);
10105
10106 asm_fprintf (asm_out_file, "\n");
10107}
10108
43e9d192
IB
10109static void
10110aarch64_start_file (void)
10111{
10112 if (selected_arch)
95ca411e
YZ
10113 {
10114 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
10115 aarch64_print_extension ();
10116 }
43e9d192 10117 else if (selected_cpu)
95ca411e 10118 {
682287fb
JG
10119 const char *truncated_name
10120 = aarch64_rewrite_selected_cpu (selected_cpu->name);
10121 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
10122 aarch64_print_extension ();
10123 }
43e9d192
IB
10124 default_file_start();
10125}
10126
c2ec330c
AL
10127static void
10128aarch64_init_libfuncs (void)
10129{
10130 /* Half-precision float operations. The compiler handles all operations
10131 with NULL libfuncs by converting to SFmode. */
10132
10133 /* Conversions. */
10134 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
10135 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
10136
10137 /* Arithmetic. */
10138 set_optab_libfunc (add_optab, HFmode, NULL);
10139 set_optab_libfunc (sdiv_optab, HFmode, NULL);
10140 set_optab_libfunc (smul_optab, HFmode, NULL);
10141 set_optab_libfunc (neg_optab, HFmode, NULL);
10142 set_optab_libfunc (sub_optab, HFmode, NULL);
10143
10144 /* Comparisons. */
10145 set_optab_libfunc (eq_optab, HFmode, NULL);
10146 set_optab_libfunc (ne_optab, HFmode, NULL);
10147 set_optab_libfunc (lt_optab, HFmode, NULL);
10148 set_optab_libfunc (le_optab, HFmode, NULL);
10149 set_optab_libfunc (ge_optab, HFmode, NULL);
10150 set_optab_libfunc (gt_optab, HFmode, NULL);
10151 set_optab_libfunc (unord_optab, HFmode, NULL);
10152}
10153
43e9d192 10154/* Target hook for c_mode_for_suffix. */
ef4bddc2 10155static machine_mode
43e9d192
IB
10156aarch64_c_mode_for_suffix (char suffix)
10157{
10158 if (suffix == 'q')
10159 return TFmode;
10160
10161 return VOIDmode;
10162}
10163
3520f7cc
JG
10164/* We can only represent floating point constants which will fit in
10165 "quarter-precision" values. These values are characterised by
10166 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
10167 by:
10168
10169 (-1)^s * (n/16) * 2^r
10170
10171 Where:
10172 's' is the sign bit.
10173 'n' is an integer in the range 16 <= n <= 31.
10174 'r' is an integer in the range -3 <= r <= 4. */
10175
10176/* Return true iff X can be represented by a quarter-precision
10177 floating point immediate operand X. Note, we cannot represent 0.0. */
10178bool
10179aarch64_float_const_representable_p (rtx x)
10180{
10181 /* This represents our current view of how many bits
10182 make up the mantissa. */
10183 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 10184 int exponent;
3520f7cc 10185 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 10186 REAL_VALUE_TYPE r, m;
807e902e 10187 bool fail;
3520f7cc
JG
10188
10189 if (!CONST_DOUBLE_P (x))
10190 return false;
10191
c2ec330c
AL
10192 /* We don't support HFmode constants yet. */
10193 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
10194 return false;
10195
3520f7cc
JG
10196 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10197
10198 /* We cannot represent infinities, NaNs or +/-zero. We won't
10199 know if we have +zero until we analyse the mantissa, but we
10200 can reject the other invalid values. */
10201 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
10202 || REAL_VALUE_MINUS_ZERO (r))
10203 return false;
10204
ba96cdfb 10205 /* Extract exponent. */
3520f7cc
JG
10206 r = real_value_abs (&r);
10207 exponent = REAL_EXP (&r);
10208
10209 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
10210 highest (sign) bit, with a fixed binary point at bit point_pos.
10211 m1 holds the low part of the mantissa, m2 the high part.
10212 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
10213 bits for the mantissa, this can fail (low bits will be lost). */
10214 real_ldexp (&m, &r, point_pos - exponent);
807e902e 10215 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
10216
10217 /* If the low part of the mantissa has bits set we cannot represent
10218 the value. */
807e902e 10219 if (w.elt (0) != 0)
3520f7cc
JG
10220 return false;
10221 /* We have rejected the lower HOST_WIDE_INT, so update our
10222 understanding of how many bits lie in the mantissa and
10223 look only at the high HOST_WIDE_INT. */
807e902e 10224 mantissa = w.elt (1);
3520f7cc
JG
10225 point_pos -= HOST_BITS_PER_WIDE_INT;
10226
10227 /* We can only represent values with a mantissa of the form 1.xxxx. */
10228 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
10229 if ((mantissa & mask) != 0)
10230 return false;
10231
10232 /* Having filtered unrepresentable values, we may now remove all
10233 but the highest 5 bits. */
10234 mantissa >>= point_pos - 5;
10235
10236 /* We cannot represent the value 0.0, so reject it. This is handled
10237 elsewhere. */
10238 if (mantissa == 0)
10239 return false;
10240
10241 /* Then, as bit 4 is always set, we can mask it off, leaving
10242 the mantissa in the range [0, 15]. */
10243 mantissa &= ~(1 << 4);
10244 gcc_assert (mantissa <= 15);
10245
10246 /* GCC internally does not use IEEE754-like encoding (where normalized
10247 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
10248 Our mantissa values are shifted 4 places to the left relative to
10249 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
10250 by 5 places to correct for GCC's representation. */
10251 exponent = 5 - exponent;
10252
10253 return (exponent >= 0 && exponent <= 7);
10254}
10255
10256char*
81c2dfb9 10257aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 10258 machine_mode mode,
3520f7cc
JG
10259 unsigned width)
10260{
3ea63f60 10261 bool is_valid;
3520f7cc 10262 static char templ[40];
3520f7cc 10263 const char *mnemonic;
e4f0f84d 10264 const char *shift_op;
3520f7cc 10265 unsigned int lane_count = 0;
81c2dfb9 10266 char element_char;
3520f7cc 10267
e4f0f84d 10268 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
10269
10270 /* This will return true to show const_vector is legal for use as either
10271 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
10272 also update INFO to show how the immediate should be generated. */
81c2dfb9 10273 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
10274 gcc_assert (is_valid);
10275
81c2dfb9 10276 element_char = sizetochar (info.element_width);
48063b9d
IB
10277 lane_count = width / info.element_width;
10278
3520f7cc
JG
10279 mode = GET_MODE_INNER (mode);
10280 if (mode == SFmode || mode == DFmode)
10281 {
48063b9d
IB
10282 gcc_assert (info.shift == 0 && ! info.mvn);
10283 if (aarch64_float_const_zero_rtx_p (info.value))
10284 info.value = GEN_INT (0);
10285 else
10286 {
10287#define buf_size 20
10288 REAL_VALUE_TYPE r;
10289 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
10290 char float_buf[buf_size] = {'\0'};
10291 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
10292#undef buf_size
10293
10294 if (lane_count == 1)
10295 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
10296 else
10297 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 10298 lane_count, element_char, float_buf);
48063b9d
IB
10299 return templ;
10300 }
3520f7cc 10301 }
3520f7cc 10302
48063b9d 10303 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 10304 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
10305
10306 if (lane_count == 1)
48063b9d
IB
10307 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
10308 mnemonic, UINTVAL (info.value));
10309 else if (info.shift)
10310 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
10311 ", %s %d", mnemonic, lane_count, element_char,
10312 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 10313 else
48063b9d 10314 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 10315 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
10316 return templ;
10317}
10318
b7342d25
IB
10319char*
10320aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 10321 machine_mode mode)
b7342d25 10322{
ef4bddc2 10323 machine_mode vmode;
b7342d25
IB
10324
10325 gcc_assert (!VECTOR_MODE_P (mode));
10326 vmode = aarch64_simd_container_mode (mode, 64);
10327 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
10328 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
10329}
10330
88b08073
JG
10331/* Split operands into moves from op[1] + op[2] into op[0]. */
10332
10333void
10334aarch64_split_combinev16qi (rtx operands[3])
10335{
10336 unsigned int dest = REGNO (operands[0]);
10337 unsigned int src1 = REGNO (operands[1]);
10338 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 10339 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
10340 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
10341 rtx destlo, desthi;
10342
10343 gcc_assert (halfmode == V16QImode);
10344
10345 if (src1 == dest && src2 == dest + halfregs)
10346 {
10347 /* No-op move. Can't split to nothing; emit something. */
10348 emit_note (NOTE_INSN_DELETED);
10349 return;
10350 }
10351
10352 /* Preserve register attributes for variable tracking. */
10353 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
10354 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
10355 GET_MODE_SIZE (halfmode));
10356
10357 /* Special case of reversed high/low parts. */
10358 if (reg_overlap_mentioned_p (operands[2], destlo)
10359 && reg_overlap_mentioned_p (operands[1], desthi))
10360 {
10361 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
10362 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
10363 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
10364 }
10365 else if (!reg_overlap_mentioned_p (operands[2], destlo))
10366 {
10367 /* Try to avoid unnecessary moves if part of the result
10368 is in the right place already. */
10369 if (src1 != dest)
10370 emit_move_insn (destlo, operands[1]);
10371 if (src2 != dest + halfregs)
10372 emit_move_insn (desthi, operands[2]);
10373 }
10374 else
10375 {
10376 if (src2 != dest + halfregs)
10377 emit_move_insn (desthi, operands[2]);
10378 if (src1 != dest)
10379 emit_move_insn (destlo, operands[1]);
10380 }
10381}
10382
10383/* vec_perm support. */
10384
10385#define MAX_VECT_LEN 16
10386
10387struct expand_vec_perm_d
10388{
10389 rtx target, op0, op1;
10390 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 10391 machine_mode vmode;
88b08073
JG
10392 unsigned char nelt;
10393 bool one_vector_p;
10394 bool testing_p;
10395};
10396
10397/* Generate a variable permutation. */
10398
10399static void
10400aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
10401{
ef4bddc2 10402 machine_mode vmode = GET_MODE (target);
88b08073
JG
10403 bool one_vector_p = rtx_equal_p (op0, op1);
10404
10405 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
10406 gcc_checking_assert (GET_MODE (op0) == vmode);
10407 gcc_checking_assert (GET_MODE (op1) == vmode);
10408 gcc_checking_assert (GET_MODE (sel) == vmode);
10409 gcc_checking_assert (TARGET_SIMD);
10410
10411 if (one_vector_p)
10412 {
10413 if (vmode == V8QImode)
10414 {
10415 /* Expand the argument to a V16QI mode by duplicating it. */
10416 rtx pair = gen_reg_rtx (V16QImode);
10417 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
10418 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
10419 }
10420 else
10421 {
10422 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
10423 }
10424 }
10425 else
10426 {
10427 rtx pair;
10428
10429 if (vmode == V8QImode)
10430 {
10431 pair = gen_reg_rtx (V16QImode);
10432 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
10433 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
10434 }
10435 else
10436 {
10437 pair = gen_reg_rtx (OImode);
10438 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
10439 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
10440 }
10441 }
10442}
10443
10444void
10445aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
10446{
ef4bddc2 10447 machine_mode vmode = GET_MODE (target);
c9d1a16a 10448 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 10449 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 10450 rtx mask;
88b08073
JG
10451
10452 /* The TBL instruction does not use a modulo index, so we must take care
10453 of that ourselves. */
f7c4e5b8
AL
10454 mask = aarch64_simd_gen_const_vector_dup (vmode,
10455 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
10456 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
10457
f7c4e5b8
AL
10458 /* For big-endian, we also need to reverse the index within the vector
10459 (but not which vector). */
10460 if (BYTES_BIG_ENDIAN)
10461 {
10462 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
10463 if (!one_vector_p)
10464 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
10465 sel = expand_simple_binop (vmode, XOR, sel, mask,
10466 NULL, 0, OPTAB_LIB_WIDEN);
10467 }
88b08073
JG
10468 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
10469}
10470
cc4d934f
JG
10471/* Recognize patterns suitable for the TRN instructions. */
10472static bool
10473aarch64_evpc_trn (struct expand_vec_perm_d *d)
10474{
10475 unsigned int i, odd, mask, nelt = d->nelt;
10476 rtx out, in0, in1, x;
10477 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 10478 machine_mode vmode = d->vmode;
cc4d934f
JG
10479
10480 if (GET_MODE_UNIT_SIZE (vmode) > 8)
10481 return false;
10482
10483 /* Note that these are little-endian tests.
10484 We correct for big-endian later. */
10485 if (d->perm[0] == 0)
10486 odd = 0;
10487 else if (d->perm[0] == 1)
10488 odd = 1;
10489 else
10490 return false;
10491 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
10492
10493 for (i = 0; i < nelt; i += 2)
10494 {
10495 if (d->perm[i] != i + odd)
10496 return false;
10497 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
10498 return false;
10499 }
10500
10501 /* Success! */
10502 if (d->testing_p)
10503 return true;
10504
10505 in0 = d->op0;
10506 in1 = d->op1;
10507 if (BYTES_BIG_ENDIAN)
10508 {
10509 x = in0, in0 = in1, in1 = x;
10510 odd = !odd;
10511 }
10512 out = d->target;
10513
10514 if (odd)
10515 {
10516 switch (vmode)
10517 {
10518 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
10519 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
10520 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
10521 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
10522 case V4SImode: gen = gen_aarch64_trn2v4si; break;
10523 case V2SImode: gen = gen_aarch64_trn2v2si; break;
10524 case V2DImode: gen = gen_aarch64_trn2v2di; break;
10525 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
10526 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
10527 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
10528 default:
10529 return false;
10530 }
10531 }
10532 else
10533 {
10534 switch (vmode)
10535 {
10536 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
10537 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
10538 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
10539 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
10540 case V4SImode: gen = gen_aarch64_trn1v4si; break;
10541 case V2SImode: gen = gen_aarch64_trn1v2si; break;
10542 case V2DImode: gen = gen_aarch64_trn1v2di; break;
10543 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
10544 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
10545 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
10546 default:
10547 return false;
10548 }
10549 }
10550
10551 emit_insn (gen (out, in0, in1));
10552 return true;
10553}
10554
10555/* Recognize patterns suitable for the UZP instructions. */
10556static bool
10557aarch64_evpc_uzp (struct expand_vec_perm_d *d)
10558{
10559 unsigned int i, odd, mask, nelt = d->nelt;
10560 rtx out, in0, in1, x;
10561 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 10562 machine_mode vmode = d->vmode;
cc4d934f
JG
10563
10564 if (GET_MODE_UNIT_SIZE (vmode) > 8)
10565 return false;
10566
10567 /* Note that these are little-endian tests.
10568 We correct for big-endian later. */
10569 if (d->perm[0] == 0)
10570 odd = 0;
10571 else if (d->perm[0] == 1)
10572 odd = 1;
10573 else
10574 return false;
10575 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
10576
10577 for (i = 0; i < nelt; i++)
10578 {
10579 unsigned elt = (i * 2 + odd) & mask;
10580 if (d->perm[i] != elt)
10581 return false;
10582 }
10583
10584 /* Success! */
10585 if (d->testing_p)
10586 return true;
10587
10588 in0 = d->op0;
10589 in1 = d->op1;
10590 if (BYTES_BIG_ENDIAN)
10591 {
10592 x = in0, in0 = in1, in1 = x;
10593 odd = !odd;
10594 }
10595 out = d->target;
10596
10597 if (odd)
10598 {
10599 switch (vmode)
10600 {
10601 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
10602 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
10603 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
10604 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
10605 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
10606 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
10607 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
10608 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
10609 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
10610 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
10611 default:
10612 return false;
10613 }
10614 }
10615 else
10616 {
10617 switch (vmode)
10618 {
10619 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
10620 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
10621 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
10622 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
10623 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
10624 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
10625 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
10626 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
10627 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
10628 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
10629 default:
10630 return false;
10631 }
10632 }
10633
10634 emit_insn (gen (out, in0, in1));
10635 return true;
10636}
10637
10638/* Recognize patterns suitable for the ZIP instructions. */
10639static bool
10640aarch64_evpc_zip (struct expand_vec_perm_d *d)
10641{
10642 unsigned int i, high, mask, nelt = d->nelt;
10643 rtx out, in0, in1, x;
10644 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 10645 machine_mode vmode = d->vmode;
cc4d934f
JG
10646
10647 if (GET_MODE_UNIT_SIZE (vmode) > 8)
10648 return false;
10649
10650 /* Note that these are little-endian tests.
10651 We correct for big-endian later. */
10652 high = nelt / 2;
10653 if (d->perm[0] == high)
10654 /* Do Nothing. */
10655 ;
10656 else if (d->perm[0] == 0)
10657 high = 0;
10658 else
10659 return false;
10660 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
10661
10662 for (i = 0; i < nelt / 2; i++)
10663 {
10664 unsigned elt = (i + high) & mask;
10665 if (d->perm[i * 2] != elt)
10666 return false;
10667 elt = (elt + nelt) & mask;
10668 if (d->perm[i * 2 + 1] != elt)
10669 return false;
10670 }
10671
10672 /* Success! */
10673 if (d->testing_p)
10674 return true;
10675
10676 in0 = d->op0;
10677 in1 = d->op1;
10678 if (BYTES_BIG_ENDIAN)
10679 {
10680 x = in0, in0 = in1, in1 = x;
10681 high = !high;
10682 }
10683 out = d->target;
10684
10685 if (high)
10686 {
10687 switch (vmode)
10688 {
10689 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
10690 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
10691 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
10692 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
10693 case V4SImode: gen = gen_aarch64_zip2v4si; break;
10694 case V2SImode: gen = gen_aarch64_zip2v2si; break;
10695 case V2DImode: gen = gen_aarch64_zip2v2di; break;
10696 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
10697 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
10698 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
10699 default:
10700 return false;
10701 }
10702 }
10703 else
10704 {
10705 switch (vmode)
10706 {
10707 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
10708 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
10709 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
10710 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
10711 case V4SImode: gen = gen_aarch64_zip1v4si; break;
10712 case V2SImode: gen = gen_aarch64_zip1v2si; break;
10713 case V2DImode: gen = gen_aarch64_zip1v2di; break;
10714 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
10715 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
10716 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
10717 default:
10718 return false;
10719 }
10720 }
10721
10722 emit_insn (gen (out, in0, in1));
10723 return true;
10724}
10725
ae0533da
AL
10726/* Recognize patterns for the EXT insn. */
10727
10728static bool
10729aarch64_evpc_ext (struct expand_vec_perm_d *d)
10730{
10731 unsigned int i, nelt = d->nelt;
10732 rtx (*gen) (rtx, rtx, rtx, rtx);
10733 rtx offset;
10734
10735 unsigned int location = d->perm[0]; /* Always < nelt. */
10736
10737 /* Check if the extracted indices are increasing by one. */
10738 for (i = 1; i < nelt; i++)
10739 {
10740 unsigned int required = location + i;
10741 if (d->one_vector_p)
10742 {
10743 /* We'll pass the same vector in twice, so allow indices to wrap. */
10744 required &= (nelt - 1);
10745 }
10746 if (d->perm[i] != required)
10747 return false;
10748 }
10749
ae0533da
AL
10750 switch (d->vmode)
10751 {
10752 case V16QImode: gen = gen_aarch64_extv16qi; break;
10753 case V8QImode: gen = gen_aarch64_extv8qi; break;
10754 case V4HImode: gen = gen_aarch64_extv4hi; break;
10755 case V8HImode: gen = gen_aarch64_extv8hi; break;
10756 case V2SImode: gen = gen_aarch64_extv2si; break;
10757 case V4SImode: gen = gen_aarch64_extv4si; break;
10758 case V2SFmode: gen = gen_aarch64_extv2sf; break;
10759 case V4SFmode: gen = gen_aarch64_extv4sf; break;
10760 case V2DImode: gen = gen_aarch64_extv2di; break;
10761 case V2DFmode: gen = gen_aarch64_extv2df; break;
10762 default:
10763 return false;
10764 }
10765
10766 /* Success! */
10767 if (d->testing_p)
10768 return true;
10769
b31e65bb
AL
10770 /* The case where (location == 0) is a no-op for both big- and little-endian,
10771 and is removed by the mid-end at optimization levels -O1 and higher. */
10772
10773 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
10774 {
10775 /* After setup, we want the high elements of the first vector (stored
10776 at the LSB end of the register), and the low elements of the second
10777 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 10778 std::swap (d->op0, d->op1);
ae0533da
AL
10779 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
10780 location = nelt - location;
10781 }
10782
10783 offset = GEN_INT (location);
10784 emit_insn (gen (d->target, d->op0, d->op1, offset));
10785 return true;
10786}
10787
923fcec3
AL
10788/* Recognize patterns for the REV insns. */
10789
10790static bool
10791aarch64_evpc_rev (struct expand_vec_perm_d *d)
10792{
10793 unsigned int i, j, diff, nelt = d->nelt;
10794 rtx (*gen) (rtx, rtx);
10795
10796 if (!d->one_vector_p)
10797 return false;
10798
10799 diff = d->perm[0];
10800 switch (diff)
10801 {
10802 case 7:
10803 switch (d->vmode)
10804 {
10805 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
10806 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
10807 default:
10808 return false;
10809 }
10810 break;
10811 case 3:
10812 switch (d->vmode)
10813 {
10814 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
10815 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
10816 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
10817 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
10818 default:
10819 return false;
10820 }
10821 break;
10822 case 1:
10823 switch (d->vmode)
10824 {
10825 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
10826 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
10827 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
10828 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
10829 case V4SImode: gen = gen_aarch64_rev64v4si; break;
10830 case V2SImode: gen = gen_aarch64_rev64v2si; break;
10831 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
10832 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
10833 default:
10834 return false;
10835 }
10836 break;
10837 default:
10838 return false;
10839 }
10840
10841 for (i = 0; i < nelt ; i += diff + 1)
10842 for (j = 0; j <= diff; j += 1)
10843 {
10844 /* This is guaranteed to be true as the value of diff
10845 is 7, 3, 1 and we should have enough elements in the
10846 queue to generate this. Getting a vector mask with a
10847 value of diff other than these values implies that
10848 something is wrong by the time we get here. */
10849 gcc_assert (i + j < nelt);
10850 if (d->perm[i + j] != i + diff - j)
10851 return false;
10852 }
10853
10854 /* Success! */
10855 if (d->testing_p)
10856 return true;
10857
10858 emit_insn (gen (d->target, d->op0));
10859 return true;
10860}
10861
91bd4114
JG
10862static bool
10863aarch64_evpc_dup (struct expand_vec_perm_d *d)
10864{
10865 rtx (*gen) (rtx, rtx, rtx);
10866 rtx out = d->target;
10867 rtx in0;
ef4bddc2 10868 machine_mode vmode = d->vmode;
91bd4114
JG
10869 unsigned int i, elt, nelt = d->nelt;
10870 rtx lane;
10871
91bd4114
JG
10872 elt = d->perm[0];
10873 for (i = 1; i < nelt; i++)
10874 {
10875 if (elt != d->perm[i])
10876 return false;
10877 }
10878
10879 /* The generic preparation in aarch64_expand_vec_perm_const_1
10880 swaps the operand order and the permute indices if it finds
10881 d->perm[0] to be in the second operand. Thus, we can always
10882 use d->op0 and need not do any extra arithmetic to get the
10883 correct lane number. */
10884 in0 = d->op0;
f901401e 10885 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
10886
10887 switch (vmode)
10888 {
10889 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
10890 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
10891 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
10892 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
10893 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
10894 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
10895 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
10896 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
10897 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
10898 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
10899 default:
10900 return false;
10901 }
10902
10903 emit_insn (gen (out, in0, lane));
10904 return true;
10905}
10906
88b08073
JG
10907static bool
10908aarch64_evpc_tbl (struct expand_vec_perm_d *d)
10909{
10910 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 10911 machine_mode vmode = d->vmode;
88b08073
JG
10912 unsigned int i, nelt = d->nelt;
10913
88b08073
JG
10914 if (d->testing_p)
10915 return true;
10916
10917 /* Generic code will try constant permutation twice. Once with the
10918 original mode and again with the elements lowered to QImode.
10919 So wait and don't do the selector expansion ourselves. */
10920 if (vmode != V8QImode && vmode != V16QImode)
10921 return false;
10922
10923 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
10924 {
10925 int nunits = GET_MODE_NUNITS (vmode);
10926
10927 /* If big-endian and two vectors we end up with a weird mixed-endian
10928 mode on NEON. Reverse the index within each word but not the word
10929 itself. */
10930 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
10931 : d->perm[i]);
10932 }
88b08073
JG
10933 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
10934 sel = force_reg (vmode, sel);
10935
10936 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
10937 return true;
10938}
10939
10940static bool
10941aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
10942{
10943 /* The pattern matching functions above are written to look for a small
10944 number to begin the sequence (0, 1, N/2). If we begin with an index
10945 from the second operand, we can swap the operands. */
10946 if (d->perm[0] >= d->nelt)
10947 {
10948 unsigned i, nelt = d->nelt;
88b08073 10949
0696116a 10950 gcc_assert (nelt == (nelt & -nelt));
88b08073 10951 for (i = 0; i < nelt; ++i)
0696116a 10952 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 10953
cb5c6c29 10954 std::swap (d->op0, d->op1);
88b08073
JG
10955 }
10956
10957 if (TARGET_SIMD)
cc4d934f 10958 {
923fcec3
AL
10959 if (aarch64_evpc_rev (d))
10960 return true;
10961 else if (aarch64_evpc_ext (d))
ae0533da 10962 return true;
f901401e
AL
10963 else if (aarch64_evpc_dup (d))
10964 return true;
ae0533da 10965 else if (aarch64_evpc_zip (d))
cc4d934f
JG
10966 return true;
10967 else if (aarch64_evpc_uzp (d))
10968 return true;
10969 else if (aarch64_evpc_trn (d))
10970 return true;
10971 return aarch64_evpc_tbl (d);
10972 }
88b08073
JG
10973 return false;
10974}
10975
10976/* Expand a vec_perm_const pattern. */
10977
10978bool
10979aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
10980{
10981 struct expand_vec_perm_d d;
10982 int i, nelt, which;
10983
10984 d.target = target;
10985 d.op0 = op0;
10986 d.op1 = op1;
10987
10988 d.vmode = GET_MODE (target);
10989 gcc_assert (VECTOR_MODE_P (d.vmode));
10990 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10991 d.testing_p = false;
10992
10993 for (i = which = 0; i < nelt; ++i)
10994 {
10995 rtx e = XVECEXP (sel, 0, i);
10996 int ei = INTVAL (e) & (2 * nelt - 1);
10997 which |= (ei < nelt ? 1 : 2);
10998 d.perm[i] = ei;
10999 }
11000
11001 switch (which)
11002 {
11003 default:
11004 gcc_unreachable ();
11005
11006 case 3:
11007 d.one_vector_p = false;
11008 if (!rtx_equal_p (op0, op1))
11009 break;
11010
11011 /* The elements of PERM do not suggest that only the first operand
11012 is used, but both operands are identical. Allow easier matching
11013 of the permutation by folding the permutation into the single
11014 input vector. */
11015 /* Fall Through. */
11016 case 2:
11017 for (i = 0; i < nelt; ++i)
11018 d.perm[i] &= nelt - 1;
11019 d.op0 = op1;
11020 d.one_vector_p = true;
11021 break;
11022
11023 case 1:
11024 d.op1 = op0;
11025 d.one_vector_p = true;
11026 break;
11027 }
11028
11029 return aarch64_expand_vec_perm_const_1 (&d);
11030}
11031
11032static bool
ef4bddc2 11033aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
11034 const unsigned char *sel)
11035{
11036 struct expand_vec_perm_d d;
11037 unsigned int i, nelt, which;
11038 bool ret;
11039
11040 d.vmode = vmode;
11041 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11042 d.testing_p = true;
11043 memcpy (d.perm, sel, nelt);
11044
11045 /* Calculate whether all elements are in one vector. */
11046 for (i = which = 0; i < nelt; ++i)
11047 {
11048 unsigned char e = d.perm[i];
11049 gcc_assert (e < 2 * nelt);
11050 which |= (e < nelt ? 1 : 2);
11051 }
11052
11053 /* If all elements are from the second vector, reindex as if from the
11054 first vector. */
11055 if (which == 2)
11056 for (i = 0; i < nelt; ++i)
11057 d.perm[i] -= nelt;
11058
11059 /* Check whether the mask can be applied to a single vector. */
11060 d.one_vector_p = (which != 3);
11061
11062 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11063 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11064 if (!d.one_vector_p)
11065 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11066
11067 start_sequence ();
11068 ret = aarch64_expand_vec_perm_const_1 (&d);
11069 end_sequence ();
11070
11071 return ret;
11072}
11073
668046d1
DS
11074rtx
11075aarch64_reverse_mask (enum machine_mode mode)
11076{
11077 /* We have to reverse each vector because we dont have
11078 a permuted load that can reverse-load according to ABI rules. */
11079 rtx mask;
11080 rtvec v = rtvec_alloc (16);
11081 int i, j;
11082 int nunits = GET_MODE_NUNITS (mode);
11083 int usize = GET_MODE_UNIT_SIZE (mode);
11084
11085 gcc_assert (BYTES_BIG_ENDIAN);
11086 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
11087
11088 for (i = 0; i < nunits; i++)
11089 for (j = 0; j < usize; j++)
11090 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
11091 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
11092 return force_reg (V16QImode, mask);
11093}
11094
97e1ad78
JG
11095/* Implement MODES_TIEABLE_P. */
11096
11097bool
ef4bddc2 11098aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
11099{
11100 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
11101 return true;
11102
11103 /* We specifically want to allow elements of "structure" modes to
11104 be tieable to the structure. This more general condition allows
11105 other rarer situations too. */
11106 if (TARGET_SIMD
11107 && aarch64_vector_mode_p (mode1)
11108 && aarch64_vector_mode_p (mode2))
11109 return true;
11110
11111 return false;
11112}
11113
e2c75eea
JG
11114/* Return a new RTX holding the result of moving POINTER forward by
11115 AMOUNT bytes. */
11116
11117static rtx
11118aarch64_move_pointer (rtx pointer, int amount)
11119{
11120 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
11121
11122 return adjust_automodify_address (pointer, GET_MODE (pointer),
11123 next, amount);
11124}
11125
11126/* Return a new RTX holding the result of moving POINTER forward by the
11127 size of the mode it points to. */
11128
11129static rtx
11130aarch64_progress_pointer (rtx pointer)
11131{
11132 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
11133
11134 return aarch64_move_pointer (pointer, amount);
11135}
11136
11137/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
11138 MODE bytes. */
11139
11140static void
11141aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 11142 machine_mode mode)
e2c75eea
JG
11143{
11144 rtx reg = gen_reg_rtx (mode);
11145
11146 /* "Cast" the pointers to the correct mode. */
11147 *src = adjust_address (*src, mode, 0);
11148 *dst = adjust_address (*dst, mode, 0);
11149 /* Emit the memcpy. */
11150 emit_move_insn (reg, *src);
11151 emit_move_insn (*dst, reg);
11152 /* Move the pointers forward. */
11153 *src = aarch64_progress_pointer (*src);
11154 *dst = aarch64_progress_pointer (*dst);
11155}
11156
11157/* Expand movmem, as if from a __builtin_memcpy. Return true if
11158 we succeed, otherwise return false. */
11159
11160bool
11161aarch64_expand_movmem (rtx *operands)
11162{
11163 unsigned int n;
11164 rtx dst = operands[0];
11165 rtx src = operands[1];
11166 rtx base;
11167 bool speed_p = !optimize_function_for_size_p (cfun);
11168
11169 /* When optimizing for size, give a better estimate of the length of a
11170 memcpy call, but use the default otherwise. */
11171 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
11172
11173 /* We can't do anything smart if the amount to copy is not constant. */
11174 if (!CONST_INT_P (operands[2]))
11175 return false;
11176
11177 n = UINTVAL (operands[2]);
11178
11179 /* Try to keep the number of instructions low. For cases below 16 bytes we
11180 need to make at most two moves. For cases above 16 bytes it will be one
11181 move for each 16 byte chunk, then at most two additional moves. */
11182 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
11183 return false;
11184
11185 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11186 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
11187
11188 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
11189 src = adjust_automodify_address (src, VOIDmode, base, 0);
11190
11191 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
11192 1-byte chunk. */
11193 if (n < 4)
11194 {
11195 if (n >= 2)
11196 {
11197 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
11198 n -= 2;
11199 }
11200
11201 if (n == 1)
11202 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
11203
11204 return true;
11205 }
11206
11207 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
11208 4-byte chunk, partially overlapping with the previously copied chunk. */
11209 if (n < 8)
11210 {
11211 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
11212 n -= 4;
11213 if (n > 0)
11214 {
11215 int move = n - 4;
11216
11217 src = aarch64_move_pointer (src, move);
11218 dst = aarch64_move_pointer (dst, move);
11219 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
11220 }
11221 return true;
11222 }
11223
11224 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
11225 them, then (if applicable) an 8-byte chunk. */
11226 while (n >= 8)
11227 {
11228 if (n / 16)
11229 {
11230 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
11231 n -= 16;
11232 }
11233 else
11234 {
11235 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
11236 n -= 8;
11237 }
11238 }
11239
11240 /* Finish the final bytes of the copy. We can always do this in one
11241 instruction. We either copy the exact amount we need, or partially
11242 overlap with the previous chunk we copied and copy 8-bytes. */
11243 if (n == 0)
11244 return true;
11245 else if (n == 1)
11246 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
11247 else if (n == 2)
11248 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
11249 else if (n == 4)
11250 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
11251 else
11252 {
11253 if (n == 3)
11254 {
11255 src = aarch64_move_pointer (src, -1);
11256 dst = aarch64_move_pointer (dst, -1);
11257 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
11258 }
11259 else
11260 {
11261 int move = n - 8;
11262
11263 src = aarch64_move_pointer (src, move);
11264 dst = aarch64_move_pointer (dst, move);
11265 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
11266 }
11267 }
11268
11269 return true;
11270}
11271
a3125fc2
CL
11272/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
11273
11274static unsigned HOST_WIDE_INT
11275aarch64_asan_shadow_offset (void)
11276{
11277 return (HOST_WIDE_INT_1 << 36);
11278}
11279
d3006da6 11280static bool
445d7826 11281aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
11282 unsigned int align,
11283 enum by_pieces_operation op,
11284 bool speed_p)
11285{
11286 /* STORE_BY_PIECES can be used when copying a constant string, but
11287 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
11288 For now we always fail this and let the move_by_pieces code copy
11289 the string from read-only memory. */
11290 if (op == STORE_BY_PIECES)
11291 return false;
11292
11293 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
11294}
11295
5f3bc026
ZC
11296static enum machine_mode
11297aarch64_code_to_ccmode (enum rtx_code code)
11298{
11299 switch (code)
11300 {
11301 case NE:
11302 return CC_DNEmode;
11303
11304 case EQ:
11305 return CC_DEQmode;
11306
11307 case LE:
11308 return CC_DLEmode;
11309
11310 case LT:
11311 return CC_DLTmode;
11312
11313 case GE:
11314 return CC_DGEmode;
11315
11316 case GT:
11317 return CC_DGTmode;
11318
11319 case LEU:
11320 return CC_DLEUmode;
11321
11322 case LTU:
11323 return CC_DLTUmode;
11324
11325 case GEU:
11326 return CC_DGEUmode;
11327
11328 case GTU:
11329 return CC_DGTUmode;
11330
11331 default:
11332 return CCmode;
11333 }
11334}
11335
11336static rtx
11337aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
11338 int code, tree treeop0, tree treeop1)
11339{
11340 enum machine_mode op_mode, cmp_mode, cc_mode;
11341 rtx op0, op1, cmp, target;
11342 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
11343 enum insn_code icode;
11344 struct expand_operand ops[4];
11345
11346 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
11347 if (cc_mode == CCmode)
11348 return NULL_RTX;
11349
11350 start_sequence ();
11351 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
11352
11353 op_mode = GET_MODE (op0);
11354 if (op_mode == VOIDmode)
11355 op_mode = GET_MODE (op1);
11356
11357 switch (op_mode)
11358 {
11359 case QImode:
11360 case HImode:
11361 case SImode:
11362 cmp_mode = SImode;
11363 icode = CODE_FOR_cmpsi;
11364 break;
11365
11366 case DImode:
11367 cmp_mode = DImode;
11368 icode = CODE_FOR_cmpdi;
11369 break;
11370
11371 default:
11372 end_sequence ();
11373 return NULL_RTX;
11374 }
11375
11376 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
11377 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
11378 if (!op0 || !op1)
11379 {
11380 end_sequence ();
11381 return NULL_RTX;
11382 }
11383 *prep_seq = get_insns ();
11384 end_sequence ();
11385
11386 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
11387 target = gen_rtx_REG (CCmode, CC_REGNUM);
11388
11389 create_output_operand (&ops[0], target, CCmode);
11390 create_fixed_operand (&ops[1], cmp);
11391 create_fixed_operand (&ops[2], op0);
11392 create_fixed_operand (&ops[3], op1);
11393
11394 start_sequence ();
11395 if (!maybe_expand_insn (icode, 4, ops))
11396 {
11397 end_sequence ();
11398 return NULL_RTX;
11399 }
11400 *gen_seq = get_insns ();
11401 end_sequence ();
11402
11403 return gen_rtx_REG (cc_mode, CC_REGNUM);
11404}
11405
11406static rtx
11407aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
11408 tree treeop0, tree treeop1, int bit_code)
11409{
11410 rtx op0, op1, cmp0, cmp1, target;
11411 enum machine_mode op_mode, cmp_mode, cc_mode;
11412 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
11413 enum insn_code icode = CODE_FOR_ccmp_andsi;
11414 struct expand_operand ops[6];
11415
11416 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
11417 if (cc_mode == CCmode)
11418 return NULL_RTX;
11419
11420 push_to_sequence ((rtx_insn*) *prep_seq);
11421 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
11422
11423 op_mode = GET_MODE (op0);
11424 if (op_mode == VOIDmode)
11425 op_mode = GET_MODE (op1);
11426
11427 switch (op_mode)
11428 {
11429 case QImode:
11430 case HImode:
11431 case SImode:
11432 cmp_mode = SImode;
11433 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
11434 : CODE_FOR_ccmp_iorsi;
11435 break;
11436
11437 case DImode:
11438 cmp_mode = DImode;
11439 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
11440 : CODE_FOR_ccmp_iordi;
11441 break;
11442
11443 default:
11444 end_sequence ();
11445 return NULL_RTX;
11446 }
11447
11448 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
11449 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
11450 if (!op0 || !op1)
11451 {
11452 end_sequence ();
11453 return NULL_RTX;
11454 }
11455 *prep_seq = get_insns ();
11456 end_sequence ();
11457
11458 target = gen_rtx_REG (cc_mode, CC_REGNUM);
11459 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
11460 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
11461
11462 create_fixed_operand (&ops[0], prev);
11463 create_fixed_operand (&ops[1], target);
11464 create_fixed_operand (&ops[2], op0);
11465 create_fixed_operand (&ops[3], op1);
11466 create_fixed_operand (&ops[4], cmp0);
11467 create_fixed_operand (&ops[5], cmp1);
11468
11469 push_to_sequence ((rtx_insn*) *gen_seq);
11470 if (!maybe_expand_insn (icode, 6, ops))
11471 {
11472 end_sequence ();
11473 return NULL_RTX;
11474 }
11475
11476 *gen_seq = get_insns ();
11477 end_sequence ();
11478
11479 return target;
11480}
11481
11482#undef TARGET_GEN_CCMP_FIRST
11483#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
11484
11485#undef TARGET_GEN_CCMP_NEXT
11486#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
11487
6a569cdd
KT
11488/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
11489 instruction fusion of some sort. */
11490
11491static bool
11492aarch64_macro_fusion_p (void)
11493{
b175b679 11494 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
11495}
11496
11497
11498/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
11499 should be kept together during scheduling. */
11500
11501static bool
11502aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
11503{
11504 rtx set_dest;
11505 rtx prev_set = single_set (prev);
11506 rtx curr_set = single_set (curr);
11507 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
11508 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
11509
11510 if (!aarch64_macro_fusion_p ())
11511 return false;
11512
11513 if (simple_sets_p
b175b679 11514 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
11515 {
11516 /* We are trying to match:
11517 prev (mov) == (set (reg r0) (const_int imm16))
11518 curr (movk) == (set (zero_extract (reg r0)
11519 (const_int 16)
11520 (const_int 16))
11521 (const_int imm16_1)) */
11522
11523 set_dest = SET_DEST (curr_set);
11524
11525 if (GET_CODE (set_dest) == ZERO_EXTRACT
11526 && CONST_INT_P (SET_SRC (curr_set))
11527 && CONST_INT_P (SET_SRC (prev_set))
11528 && CONST_INT_P (XEXP (set_dest, 2))
11529 && INTVAL (XEXP (set_dest, 2)) == 16
11530 && REG_P (XEXP (set_dest, 0))
11531 && REG_P (SET_DEST (prev_set))
11532 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
11533 {
11534 return true;
11535 }
11536 }
11537
9bbe08fe 11538 if (simple_sets_p
b175b679 11539 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
11540 {
11541
11542 /* We're trying to match:
11543 prev (adrp) == (set (reg r1)
11544 (high (symbol_ref ("SYM"))))
11545 curr (add) == (set (reg r0)
11546 (lo_sum (reg r1)
11547 (symbol_ref ("SYM"))))
11548 Note that r0 need not necessarily be the same as r1, especially
11549 during pre-regalloc scheduling. */
11550
11551 if (satisfies_constraint_Ush (SET_SRC (prev_set))
11552 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
11553 {
11554 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
11555 && REG_P (XEXP (SET_SRC (curr_set), 0))
11556 && REGNO (XEXP (SET_SRC (curr_set), 0))
11557 == REGNO (SET_DEST (prev_set))
11558 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
11559 XEXP (SET_SRC (curr_set), 1)))
11560 return true;
11561 }
11562 }
11563
cd0cb232 11564 if (simple_sets_p
b175b679 11565 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
11566 {
11567
11568 /* We're trying to match:
11569 prev (movk) == (set (zero_extract (reg r0)
11570 (const_int 16)
11571 (const_int 32))
11572 (const_int imm16_1))
11573 curr (movk) == (set (zero_extract (reg r0)
11574 (const_int 16)
11575 (const_int 48))
11576 (const_int imm16_2)) */
11577
11578 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
11579 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
11580 && REG_P (XEXP (SET_DEST (prev_set), 0))
11581 && REG_P (XEXP (SET_DEST (curr_set), 0))
11582 && REGNO (XEXP (SET_DEST (prev_set), 0))
11583 == REGNO (XEXP (SET_DEST (curr_set), 0))
11584 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
11585 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
11586 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
11587 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
11588 && CONST_INT_P (SET_SRC (prev_set))
11589 && CONST_INT_P (SET_SRC (curr_set)))
11590 return true;
11591
11592 }
d8354ad7 11593 if (simple_sets_p
b175b679 11594 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
11595 {
11596 /* We're trying to match:
11597 prev (adrp) == (set (reg r0)
11598 (high (symbol_ref ("SYM"))))
11599 curr (ldr) == (set (reg r1)
11600 (mem (lo_sum (reg r0)
11601 (symbol_ref ("SYM")))))
11602 or
11603 curr (ldr) == (set (reg r1)
11604 (zero_extend (mem
11605 (lo_sum (reg r0)
11606 (symbol_ref ("SYM")))))) */
11607 if (satisfies_constraint_Ush (SET_SRC (prev_set))
11608 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
11609 {
11610 rtx curr_src = SET_SRC (curr_set);
11611
11612 if (GET_CODE (curr_src) == ZERO_EXTEND)
11613 curr_src = XEXP (curr_src, 0);
11614
11615 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
11616 && REG_P (XEXP (XEXP (curr_src, 0), 0))
11617 && REGNO (XEXP (XEXP (curr_src, 0), 0))
11618 == REGNO (SET_DEST (prev_set))
11619 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
11620 XEXP (SET_SRC (prev_set), 0)))
11621 return true;
11622 }
11623 }
cd0cb232 11624
b175b679 11625 if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
11626 && any_condjump_p (curr))
11627 {
11628 enum attr_type prev_type = get_attr_type (prev);
11629
11630 /* FIXME: this misses some which is considered simple arthematic
11631 instructions for ThunderX. Simple shifts are missed here. */
11632 if (prev_type == TYPE_ALUS_SREG
11633 || prev_type == TYPE_ALUS_IMM
11634 || prev_type == TYPE_LOGICS_REG
11635 || prev_type == TYPE_LOGICS_IMM)
11636 return true;
11637 }
11638
6a569cdd
KT
11639 return false;
11640}
11641
350013bc
BC
11642/* If MEM is in the form of [base+offset], extract the two parts
11643 of address and set to BASE and OFFSET, otherwise return false
11644 after clearing BASE and OFFSET. */
11645
11646bool
11647extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
11648{
11649 rtx addr;
11650
11651 gcc_assert (MEM_P (mem));
11652
11653 addr = XEXP (mem, 0);
11654
11655 if (REG_P (addr))
11656 {
11657 *base = addr;
11658 *offset = const0_rtx;
11659 return true;
11660 }
11661
11662 if (GET_CODE (addr) == PLUS
11663 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
11664 {
11665 *base = XEXP (addr, 0);
11666 *offset = XEXP (addr, 1);
11667 return true;
11668 }
11669
11670 *base = NULL_RTX;
11671 *offset = NULL_RTX;
11672
11673 return false;
11674}
11675
11676/* Types for scheduling fusion. */
11677enum sched_fusion_type
11678{
11679 SCHED_FUSION_NONE = 0,
11680 SCHED_FUSION_LD_SIGN_EXTEND,
11681 SCHED_FUSION_LD_ZERO_EXTEND,
11682 SCHED_FUSION_LD,
11683 SCHED_FUSION_ST,
11684 SCHED_FUSION_NUM
11685};
11686
11687/* If INSN is a load or store of address in the form of [base+offset],
11688 extract the two parts and set to BASE and OFFSET. Return scheduling
11689 fusion type this INSN is. */
11690
11691static enum sched_fusion_type
11692fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
11693{
11694 rtx x, dest, src;
11695 enum sched_fusion_type fusion = SCHED_FUSION_LD;
11696
11697 gcc_assert (INSN_P (insn));
11698 x = PATTERN (insn);
11699 if (GET_CODE (x) != SET)
11700 return SCHED_FUSION_NONE;
11701
11702 src = SET_SRC (x);
11703 dest = SET_DEST (x);
11704
1f46bd52
AP
11705 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
11706 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
350013bc
BC
11707 return SCHED_FUSION_NONE;
11708
11709 if (GET_CODE (src) == SIGN_EXTEND)
11710 {
11711 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
11712 src = XEXP (src, 0);
11713 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
11714 return SCHED_FUSION_NONE;
11715 }
11716 else if (GET_CODE (src) == ZERO_EXTEND)
11717 {
11718 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
11719 src = XEXP (src, 0);
11720 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
11721 return SCHED_FUSION_NONE;
11722 }
11723
11724 if (GET_CODE (src) == MEM && REG_P (dest))
11725 extract_base_offset_in_addr (src, base, offset);
11726 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
11727 {
11728 fusion = SCHED_FUSION_ST;
11729 extract_base_offset_in_addr (dest, base, offset);
11730 }
11731 else
11732 return SCHED_FUSION_NONE;
11733
11734 if (*base == NULL_RTX || *offset == NULL_RTX)
11735 fusion = SCHED_FUSION_NONE;
11736
11737 return fusion;
11738}
11739
11740/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
11741
11742 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
11743 and PRI are only calculated for these instructions. For other instruction,
11744 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
11745 type instruction fusion can be added by returning different priorities.
11746
11747 It's important that irrelevant instructions get the largest FUSION_PRI. */
11748
11749static void
11750aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
11751 int *fusion_pri, int *pri)
11752{
11753 int tmp, off_val;
11754 rtx base, offset;
11755 enum sched_fusion_type fusion;
11756
11757 gcc_assert (INSN_P (insn));
11758
11759 tmp = max_pri - 1;
11760 fusion = fusion_load_store (insn, &base, &offset);
11761 if (fusion == SCHED_FUSION_NONE)
11762 {
11763 *pri = tmp;
11764 *fusion_pri = tmp;
11765 return;
11766 }
11767
11768 /* Set FUSION_PRI according to fusion type and base register. */
11769 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
11770
11771 /* Calculate PRI. */
11772 tmp /= 2;
11773
11774 /* INSN with smaller offset goes first. */
11775 off_val = (int)(INTVAL (offset));
11776 if (off_val >= 0)
11777 tmp -= (off_val & 0xfffff);
11778 else
11779 tmp += ((- off_val) & 0xfffff);
11780
11781 *pri = tmp;
11782 return;
11783}
11784
11785/* Given OPERANDS of consecutive load/store, check if we can merge
11786 them into ldp/stp. LOAD is true if they are load instructions.
11787 MODE is the mode of memory operands. */
11788
11789bool
11790aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
11791 enum machine_mode mode)
11792{
11793 HOST_WIDE_INT offval_1, offval_2, msize;
11794 enum reg_class rclass_1, rclass_2;
11795 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
11796
11797 if (load)
11798 {
11799 mem_1 = operands[1];
11800 mem_2 = operands[3];
11801 reg_1 = operands[0];
11802 reg_2 = operands[2];
11803 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
11804 if (REGNO (reg_1) == REGNO (reg_2))
11805 return false;
11806 }
11807 else
11808 {
11809 mem_1 = operands[0];
11810 mem_2 = operands[2];
11811 reg_1 = operands[1];
11812 reg_2 = operands[3];
11813 }
11814
bf84ac44
AP
11815 /* The mems cannot be volatile. */
11816 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
11817 return false;
11818
350013bc
BC
11819 /* Check if the addresses are in the form of [base+offset]. */
11820 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11821 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11822 return false;
11823 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11824 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11825 return false;
11826
11827 /* Check if the bases are same. */
11828 if (!rtx_equal_p (base_1, base_2))
11829 return false;
11830
11831 offval_1 = INTVAL (offset_1);
11832 offval_2 = INTVAL (offset_2);
11833 msize = GET_MODE_SIZE (mode);
11834 /* Check if the offsets are consecutive. */
11835 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
11836 return false;
11837
11838 /* Check if the addresses are clobbered by load. */
11839 if (load)
11840 {
11841 if (reg_mentioned_p (reg_1, mem_1))
11842 return false;
11843
11844 /* In increasing order, the last load can clobber the address. */
11845 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
11846 return false;
11847 }
11848
11849 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11850 rclass_1 = FP_REGS;
11851 else
11852 rclass_1 = GENERAL_REGS;
11853
11854 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11855 rclass_2 = FP_REGS;
11856 else
11857 rclass_2 = GENERAL_REGS;
11858
11859 /* Check if the registers are of same class. */
11860 if (rclass_1 != rclass_2)
11861 return false;
11862
11863 return true;
11864}
11865
11866/* Given OPERANDS of consecutive load/store, check if we can merge
11867 them into ldp/stp by adjusting the offset. LOAD is true if they
11868 are load instructions. MODE is the mode of memory operands.
11869
11870 Given below consecutive stores:
11871
11872 str w1, [xb, 0x100]
11873 str w1, [xb, 0x104]
11874 str w1, [xb, 0x108]
11875 str w1, [xb, 0x10c]
11876
11877 Though the offsets are out of the range supported by stp, we can
11878 still pair them after adjusting the offset, like:
11879
11880 add scratch, xb, 0x100
11881 stp w1, w1, [scratch]
11882 stp w1, w1, [scratch, 0x8]
11883
11884 The peephole patterns detecting this opportunity should guarantee
11885 the scratch register is avaliable. */
11886
11887bool
11888aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
11889 enum machine_mode mode)
11890{
11891 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
11892 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
11893 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
11894 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
11895
11896 if (load)
11897 {
11898 reg_1 = operands[0];
11899 mem_1 = operands[1];
11900 reg_2 = operands[2];
11901 mem_2 = operands[3];
11902 reg_3 = operands[4];
11903 mem_3 = operands[5];
11904 reg_4 = operands[6];
11905 mem_4 = operands[7];
11906 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
11907 && REG_P (reg_3) && REG_P (reg_4));
11908 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
11909 return false;
11910 }
11911 else
11912 {
11913 mem_1 = operands[0];
11914 reg_1 = operands[1];
11915 mem_2 = operands[2];
11916 reg_2 = operands[3];
11917 mem_3 = operands[4];
11918 reg_3 = operands[5];
11919 mem_4 = operands[6];
11920 reg_4 = operands[7];
11921 }
11922 /* Skip if memory operand is by itslef valid for ldp/stp. */
11923 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
11924 return false;
11925
bf84ac44
AP
11926 /* The mems cannot be volatile. */
11927 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
11928 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
11929 return false;
11930
350013bc
BC
11931 /* Check if the addresses are in the form of [base+offset]. */
11932 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
11933 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
11934 return false;
11935 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
11936 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
11937 return false;
11938 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
11939 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
11940 return false;
11941 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
11942 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
11943 return false;
11944
11945 /* Check if the bases are same. */
11946 if (!rtx_equal_p (base_1, base_2)
11947 || !rtx_equal_p (base_2, base_3)
11948 || !rtx_equal_p (base_3, base_4))
11949 return false;
11950
11951 offval_1 = INTVAL (offset_1);
11952 offval_2 = INTVAL (offset_2);
11953 offval_3 = INTVAL (offset_3);
11954 offval_4 = INTVAL (offset_4);
11955 msize = GET_MODE_SIZE (mode);
11956 /* Check if the offsets are consecutive. */
11957 if ((offval_1 != (offval_2 + msize)
11958 || offval_1 != (offval_3 + msize * 2)
11959 || offval_1 != (offval_4 + msize * 3))
11960 && (offval_4 != (offval_3 + msize)
11961 || offval_4 != (offval_2 + msize * 2)
11962 || offval_4 != (offval_1 + msize * 3)))
11963 return false;
11964
11965 /* Check if the addresses are clobbered by load. */
11966 if (load)
11967 {
11968 if (reg_mentioned_p (reg_1, mem_1)
11969 || reg_mentioned_p (reg_2, mem_2)
11970 || reg_mentioned_p (reg_3, mem_3))
11971 return false;
11972
11973 /* In increasing order, the last load can clobber the address. */
11974 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
11975 return false;
11976 }
11977
11978 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11979 rclass_1 = FP_REGS;
11980 else
11981 rclass_1 = GENERAL_REGS;
11982
11983 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11984 rclass_2 = FP_REGS;
11985 else
11986 rclass_2 = GENERAL_REGS;
11987
11988 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
11989 rclass_3 = FP_REGS;
11990 else
11991 rclass_3 = GENERAL_REGS;
11992
11993 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
11994 rclass_4 = FP_REGS;
11995 else
11996 rclass_4 = GENERAL_REGS;
11997
11998 /* Check if the registers are of same class. */
11999 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
12000 return false;
12001
12002 return true;
12003}
12004
12005/* Given OPERANDS of consecutive load/store, this function pairs them
12006 into ldp/stp after adjusting the offset. It depends on the fact
12007 that addresses of load/store instructions are in increasing order.
12008 MODE is the mode of memory operands. CODE is the rtl operator
12009 which should be applied to all memory operands, it's SIGN_EXTEND,
12010 ZERO_EXTEND or UNKNOWN. */
12011
12012bool
12013aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
12014 enum machine_mode mode, RTX_CODE code)
12015{
12016 rtx base, offset, t1, t2;
12017 rtx mem_1, mem_2, mem_3, mem_4;
12018 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
12019
12020 if (load)
12021 {
12022 mem_1 = operands[1];
12023 mem_2 = operands[3];
12024 mem_3 = operands[5];
12025 mem_4 = operands[7];
12026 }
12027 else
12028 {
12029 mem_1 = operands[0];
12030 mem_2 = operands[2];
12031 mem_3 = operands[4];
12032 mem_4 = operands[6];
12033 gcc_assert (code == UNKNOWN);
12034 }
12035
12036 extract_base_offset_in_addr (mem_1, &base, &offset);
12037 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
12038
12039 /* Adjust offset thus it can fit in ldp/stp instruction. */
12040 msize = GET_MODE_SIZE (mode);
12041 stp_off_limit = msize * 0x40;
12042 off_val = INTVAL (offset);
12043 abs_off = (off_val < 0) ? -off_val : off_val;
12044 new_off = abs_off % stp_off_limit;
12045 adj_off = abs_off - new_off;
12046
12047 /* Further adjust to make sure all offsets are OK. */
12048 if ((new_off + msize * 2) >= stp_off_limit)
12049 {
12050 adj_off += stp_off_limit;
12051 new_off -= stp_off_limit;
12052 }
12053
12054 /* Make sure the adjustment can be done with ADD/SUB instructions. */
12055 if (adj_off >= 0x1000)
12056 return false;
12057
12058 if (off_val < 0)
12059 {
12060 adj_off = -adj_off;
12061 new_off = -new_off;
12062 }
12063
12064 /* Create new memory references. */
12065 mem_1 = change_address (mem_1, VOIDmode,
12066 plus_constant (DImode, operands[8], new_off));
12067
12068 /* Check if the adjusted address is OK for ldp/stp. */
12069 if (!aarch64_mem_pair_operand (mem_1, mode))
12070 return false;
12071
12072 msize = GET_MODE_SIZE (mode);
12073 mem_2 = change_address (mem_2, VOIDmode,
12074 plus_constant (DImode,
12075 operands[8],
12076 new_off + msize));
12077 mem_3 = change_address (mem_3, VOIDmode,
12078 plus_constant (DImode,
12079 operands[8],
12080 new_off + msize * 2));
12081 mem_4 = change_address (mem_4, VOIDmode,
12082 plus_constant (DImode,
12083 operands[8],
12084 new_off + msize * 3));
12085
12086 if (code == ZERO_EXTEND)
12087 {
12088 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
12089 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
12090 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
12091 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
12092 }
12093 else if (code == SIGN_EXTEND)
12094 {
12095 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
12096 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
12097 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
12098 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
12099 }
12100
12101 if (load)
12102 {
12103 operands[1] = mem_1;
12104 operands[3] = mem_2;
12105 operands[5] = mem_3;
12106 operands[7] = mem_4;
12107 }
12108 else
12109 {
12110 operands[0] = mem_1;
12111 operands[2] = mem_2;
12112 operands[4] = mem_3;
12113 operands[6] = mem_4;
12114 }
12115
12116 /* Emit adjusting instruction. */
f7df4a84 12117 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 12118 /* Emit ldp/stp instructions. */
f7df4a84
RS
12119 t1 = gen_rtx_SET (operands[0], operands[1]);
12120 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 12121 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
12122 t1 = gen_rtx_SET (operands[4], operands[5]);
12123 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
12124 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
12125 return true;
12126}
12127
1b1e81f8
JW
12128/* Return 1 if pseudo register should be created and used to hold
12129 GOT address for PIC code. */
12130
12131bool
12132aarch64_use_pseudo_pic_reg (void)
12133{
12134 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
12135}
12136
7b841a12
JW
12137/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
12138
12139static int
12140aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
12141{
12142 switch (XINT (x, 1))
12143 {
12144 case UNSPEC_GOTSMALLPIC:
12145 case UNSPEC_GOTSMALLPIC28K:
12146 case UNSPEC_GOTTINYPIC:
12147 return 0;
12148 default:
12149 break;
12150 }
12151
12152 return default_unspec_may_trap_p (x, flags);
12153}
12154
c2ec330c
AL
12155/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
12156static tree
12157aarch64_promoted_type (const_tree t)
12158{
12159 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
12160 return float_type_node;
12161 return NULL_TREE;
12162}
43e9d192
IB
12163#undef TARGET_ADDRESS_COST
12164#define TARGET_ADDRESS_COST aarch64_address_cost
12165
12166/* This hook will determines whether unnamed bitfields affect the alignment
12167 of the containing structure. The hook returns true if the structure
12168 should inherit the alignment requirements of an unnamed bitfield's
12169 type. */
12170#undef TARGET_ALIGN_ANON_BITFIELD
12171#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
12172
12173#undef TARGET_ASM_ALIGNED_DI_OP
12174#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
12175
12176#undef TARGET_ASM_ALIGNED_HI_OP
12177#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
12178
12179#undef TARGET_ASM_ALIGNED_SI_OP
12180#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
12181
12182#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
12183#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
12184 hook_bool_const_tree_hwi_hwi_const_tree_true
12185
12186#undef TARGET_ASM_FILE_START
12187#define TARGET_ASM_FILE_START aarch64_start_file
12188
12189#undef TARGET_ASM_OUTPUT_MI_THUNK
12190#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
12191
12192#undef TARGET_ASM_SELECT_RTX_SECTION
12193#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
12194
12195#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
12196#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
12197
12198#undef TARGET_BUILD_BUILTIN_VA_LIST
12199#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
12200
12201#undef TARGET_CALLEE_COPIES
12202#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
12203
12204#undef TARGET_CAN_ELIMINATE
12205#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
12206
12207#undef TARGET_CANNOT_FORCE_CONST_MEM
12208#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
12209
12210#undef TARGET_CONDITIONAL_REGISTER_USAGE
12211#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
12212
12213/* Only the least significant bit is used for initialization guard
12214 variables. */
12215#undef TARGET_CXX_GUARD_MASK_BIT
12216#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
12217
12218#undef TARGET_C_MODE_FOR_SUFFIX
12219#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
12220
12221#ifdef TARGET_BIG_ENDIAN_DEFAULT
12222#undef TARGET_DEFAULT_TARGET_FLAGS
12223#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
12224#endif
12225
12226#undef TARGET_CLASS_MAX_NREGS
12227#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
12228
119103ca
JG
12229#undef TARGET_BUILTIN_DECL
12230#define TARGET_BUILTIN_DECL aarch64_builtin_decl
12231
43e9d192
IB
12232#undef TARGET_EXPAND_BUILTIN
12233#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
12234
12235#undef TARGET_EXPAND_BUILTIN_VA_START
12236#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
12237
9697e620
JG
12238#undef TARGET_FOLD_BUILTIN
12239#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
12240
43e9d192
IB
12241#undef TARGET_FUNCTION_ARG
12242#define TARGET_FUNCTION_ARG aarch64_function_arg
12243
12244#undef TARGET_FUNCTION_ARG_ADVANCE
12245#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
12246
12247#undef TARGET_FUNCTION_ARG_BOUNDARY
12248#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
12249
12250#undef TARGET_FUNCTION_OK_FOR_SIBCALL
12251#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
12252
12253#undef TARGET_FUNCTION_VALUE
12254#define TARGET_FUNCTION_VALUE aarch64_function_value
12255
12256#undef TARGET_FUNCTION_VALUE_REGNO_P
12257#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
12258
12259#undef TARGET_FRAME_POINTER_REQUIRED
12260#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
12261
fc72cba7
AL
12262#undef TARGET_GIMPLE_FOLD_BUILTIN
12263#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 12264
43e9d192
IB
12265#undef TARGET_GIMPLIFY_VA_ARG_EXPR
12266#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
12267
12268#undef TARGET_INIT_BUILTINS
12269#define TARGET_INIT_BUILTINS aarch64_init_builtins
12270
12271#undef TARGET_LEGITIMATE_ADDRESS_P
12272#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
12273
12274#undef TARGET_LEGITIMATE_CONSTANT_P
12275#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
12276
12277#undef TARGET_LIBGCC_CMP_RETURN_MODE
12278#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
12279
38e8f663 12280#undef TARGET_LRA_P
98d404be 12281#define TARGET_LRA_P hook_bool_void_true
38e8f663 12282
ac2b960f
YZ
12283#undef TARGET_MANGLE_TYPE
12284#define TARGET_MANGLE_TYPE aarch64_mangle_type
12285
43e9d192
IB
12286#undef TARGET_MEMORY_MOVE_COST
12287#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
12288
26e0ff94
WD
12289#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
12290#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
12291
43e9d192
IB
12292#undef TARGET_MUST_PASS_IN_STACK
12293#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
12294
12295/* This target hook should return true if accesses to volatile bitfields
12296 should use the narrowest mode possible. It should return false if these
12297 accesses should use the bitfield container type. */
12298#undef TARGET_NARROW_VOLATILE_BITFIELD
12299#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
12300
12301#undef TARGET_OPTION_OVERRIDE
12302#define TARGET_OPTION_OVERRIDE aarch64_override_options
12303
12304#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
12305#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
12306 aarch64_override_options_after_change
12307
12308#undef TARGET_PASS_BY_REFERENCE
12309#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
12310
12311#undef TARGET_PREFERRED_RELOAD_CLASS
12312#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
12313
cee66c68
WD
12314#undef TARGET_SCHED_REASSOCIATION_WIDTH
12315#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
12316
c2ec330c
AL
12317#undef TARGET_PROMOTED_TYPE
12318#define TARGET_PROMOTED_TYPE aarch64_promoted_type
12319
43e9d192
IB
12320#undef TARGET_SECONDARY_RELOAD
12321#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
12322
12323#undef TARGET_SHIFT_TRUNCATION_MASK
12324#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
12325
12326#undef TARGET_SETUP_INCOMING_VARARGS
12327#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
12328
12329#undef TARGET_STRUCT_VALUE_RTX
12330#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
12331
12332#undef TARGET_REGISTER_MOVE_COST
12333#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
12334
12335#undef TARGET_RETURN_IN_MEMORY
12336#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
12337
12338#undef TARGET_RETURN_IN_MSB
12339#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
12340
12341#undef TARGET_RTX_COSTS
7cc2145f 12342#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 12343
d126a4ae
AP
12344#undef TARGET_SCHED_ISSUE_RATE
12345#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
12346
d03f7e44
MK
12347#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
12348#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
12349 aarch64_sched_first_cycle_multipass_dfa_lookahead
12350
43e9d192
IB
12351#undef TARGET_TRAMPOLINE_INIT
12352#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
12353
12354#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
12355#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
12356
12357#undef TARGET_VECTOR_MODE_SUPPORTED_P
12358#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
12359
12360#undef TARGET_ARRAY_MODE_SUPPORTED_P
12361#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
12362
8990e73a
TB
12363#undef TARGET_VECTORIZE_ADD_STMT_COST
12364#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
12365
12366#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
12367#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
12368 aarch64_builtin_vectorization_cost
12369
43e9d192
IB
12370#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
12371#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
12372
42fc9a7f
JG
12373#undef TARGET_VECTORIZE_BUILTINS
12374#define TARGET_VECTORIZE_BUILTINS
12375
12376#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
12377#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
12378 aarch64_builtin_vectorized_function
12379
3b357264
JG
12380#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
12381#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
12382 aarch64_autovectorize_vector_sizes
12383
aa87aced
KV
12384#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
12385#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
12386 aarch64_atomic_assign_expand_fenv
12387
43e9d192
IB
12388/* Section anchor support. */
12389
12390#undef TARGET_MIN_ANCHOR_OFFSET
12391#define TARGET_MIN_ANCHOR_OFFSET -256
12392
12393/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
12394 byte offset; we can do much more for larger data types, but have no way
12395 to determine the size of the access. We assume accesses are aligned. */
12396#undef TARGET_MAX_ANCHOR_OFFSET
12397#define TARGET_MAX_ANCHOR_OFFSET 4095
12398
db0253a4
TB
12399#undef TARGET_VECTOR_ALIGNMENT
12400#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
12401
12402#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
12403#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
12404 aarch64_simd_vector_alignment_reachable
12405
88b08073
JG
12406/* vec_perm support. */
12407
12408#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
12409#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
12410 aarch64_vectorize_vec_perm_const_ok
12411
c2ec330c
AL
12412#undef TARGET_INIT_LIBFUNCS
12413#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 12414
706b2314 12415#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
12416#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
12417
5cb74e90
RR
12418#undef TARGET_FLAGS_REGNUM
12419#define TARGET_FLAGS_REGNUM CC_REGNUM
12420
78607708
TV
12421#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
12422#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
12423
a3125fc2
CL
12424#undef TARGET_ASAN_SHADOW_OFFSET
12425#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
12426
0c4ec427
RE
12427#undef TARGET_LEGITIMIZE_ADDRESS
12428#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
12429
d3006da6
JG
12430#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
12431#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
12432 aarch64_use_by_pieces_infrastructure_p
12433
594bdd53
FY
12434#undef TARGET_CAN_USE_DOLOOP_P
12435#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
12436
6a569cdd
KT
12437#undef TARGET_SCHED_MACRO_FUSION_P
12438#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
12439
12440#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
12441#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
12442
350013bc
BC
12443#undef TARGET_SCHED_FUSION_PRIORITY
12444#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
12445
7b841a12
JW
12446#undef TARGET_UNSPEC_MAY_TRAP_P
12447#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
12448
1b1e81f8
JW
12449#undef TARGET_USE_PSEUDO_PIC_REG
12450#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
12451
43e9d192
IB
12452struct gcc_target targetm = TARGET_INITIALIZER;
12453
12454#include "gt-aarch64.h"