]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64 array_mode 7/8] Combine the expanders using VSTRUCT:nregs
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
5624e564 2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
9fdcd34e 25#include "cfghooks.h"
c7131fb2
AM
26#include "tree.h"
27#include "gimple.h"
43e9d192 28#include "rtl.h"
c7131fb2
AM
29#include "df.h"
30#include "insn-codes.h"
43e9d192 31#include "insn-attr.h"
40e23961 32#include "alias.h"
40e23961 33#include "fold-const.h"
d8a2d370
DN
34#include "stringpool.h"
35#include "stor-layout.h"
36#include "calls.h"
37#include "varasm.h"
43e9d192 38#include "regs.h"
60393bbc
AM
39#include "cfgrtl.h"
40#include "cfganal.h"
41#include "lcm.h"
42#include "cfgbuild.h"
43#include "cfgcleanup.h"
43e9d192 44#include "output.h"
36566b39 45#include "flags.h"
36566b39
PK
46#include "insn-config.h"
47#include "expmed.h"
48#include "dojump.h"
49#include "explow.h"
50#include "emit-rtl.h"
51#include "stmt.h"
43e9d192
IB
52#include "expr.h"
53#include "reload.h"
54#include "toplev.h"
55#include "target.h"
43e9d192 56#include "targhooks.h"
43e9d192
IB
57#include "tm_p.h"
58#include "recog.h"
59#include "langhooks.h"
5a2c8331
KT
60#include "opts.h"
61#include "diagnostic.h"
43e9d192 62#include "diagnostic-core.h"
2fb9a547
AM
63#include "internal-fn.h"
64#include "gimple-fold.h"
65#include "tree-eh.h"
45b0be94 66#include "gimplify.h"
43e9d192
IB
67#include "optabs.h"
68#include "dwarf2.h"
8990e73a
TB
69#include "cfgloop.h"
70#include "tree-vectorizer.h"
d1bcc29f 71#include "aarch64-cost-tables.h"
0ee859b5 72#include "dumpfile.h"
9b2b7279 73#include "builtins.h"
8baff86e 74#include "rtl-iter.h"
9bbe08fe 75#include "tm-constrs.h"
d03f7e44 76#include "sched-int.h"
fde9b31b 77#include "cortex-a57-fma-steering.h"
d78006d9 78#include "target-globals.h"
43e9d192 79
994c5d85 80/* This file should be included last. */
d58627a0
RS
81#include "target-def.h"
82
28514dda
YZ
83/* Defined for convenience. */
84#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
85
43e9d192
IB
86/* Classifies an address.
87
88 ADDRESS_REG_IMM
89 A simple base register plus immediate offset.
90
91 ADDRESS_REG_WB
92 A base register indexed by immediate offset with writeback.
93
94 ADDRESS_REG_REG
95 A base register indexed by (optionally scaled) register.
96
97 ADDRESS_REG_UXTW
98 A base register indexed by (optionally scaled) zero-extended register.
99
100 ADDRESS_REG_SXTW
101 A base register indexed by (optionally scaled) sign-extended register.
102
103 ADDRESS_LO_SUM
104 A LO_SUM rtx with a base register and "LO12" symbol relocation.
105
106 ADDRESS_SYMBOLIC:
107 A constant symbolic address, in pc-relative literal pool. */
108
109enum aarch64_address_type {
110 ADDRESS_REG_IMM,
111 ADDRESS_REG_WB,
112 ADDRESS_REG_REG,
113 ADDRESS_REG_UXTW,
114 ADDRESS_REG_SXTW,
115 ADDRESS_LO_SUM,
116 ADDRESS_SYMBOLIC
117};
118
119struct aarch64_address_info {
120 enum aarch64_address_type type;
121 rtx base;
122 rtx offset;
123 int shift;
124 enum aarch64_symbol_type symbol_type;
125};
126
48063b9d
IB
127struct simd_immediate_info
128{
129 rtx value;
130 int shift;
131 int element_width;
48063b9d 132 bool mvn;
e4f0f84d 133 bool msl;
48063b9d
IB
134};
135
43e9d192
IB
136/* The current code model. */
137enum aarch64_code_model aarch64_cmodel;
138
139#ifdef HAVE_AS_TLS
140#undef TARGET_HAVE_TLS
141#define TARGET_HAVE_TLS 1
142#endif
143
ef4bddc2
RS
144static bool aarch64_composite_type_p (const_tree, machine_mode);
145static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 146 const_tree,
ef4bddc2 147 machine_mode *, int *,
43e9d192
IB
148 bool *);
149static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
150static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 151static void aarch64_override_options_after_change (void);
ef4bddc2 152static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 153static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 154 const unsigned char *sel);
ef4bddc2 155static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 156
0c6caaf8
RL
157/* Major revision number of the ARM Architecture implemented by the target. */
158unsigned aarch64_architecture_version;
159
43e9d192 160/* The processor for which instructions should be scheduled. */
02fdbd5b 161enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 162
43e9d192
IB
163/* Mask to specify which instruction scheduling options should be used. */
164unsigned long aarch64_tune_flags = 0;
165
8dec06f2
JG
166/* Support for command line parsing of boolean flags in the tuning
167 structures. */
168struct aarch64_flag_desc
169{
170 const char* name;
171 unsigned int flag;
172};
173
ed9fa8d2 174#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
175 { name, AARCH64_FUSE_##internal_name },
176static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
177{
178 { "none", AARCH64_FUSE_NOTHING },
179#include "aarch64-fusion-pairs.def"
180 { "all", AARCH64_FUSE_ALL },
181 { NULL, AARCH64_FUSE_NOTHING }
182};
183#undef AARCH64_FUION_PAIR
184
a339a01c 185#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
186 { name, AARCH64_EXTRA_TUNE_##internal_name },
187static const struct aarch64_flag_desc aarch64_tuning_flags[] =
188{
189 { "none", AARCH64_EXTRA_TUNE_NONE },
190#include "aarch64-tuning-flags.def"
191 { "all", AARCH64_EXTRA_TUNE_ALL },
192 { NULL, AARCH64_EXTRA_TUNE_NONE }
193};
194#undef AARCH64_EXTRA_TUNING_OPTION
195
43e9d192
IB
196/* Tuning parameters. */
197
43e9d192
IB
198static const struct cpu_addrcost_table generic_addrcost_table =
199{
67747367 200 {
bd95e655
JG
201 0, /* hi */
202 0, /* si */
203 0, /* di */
204 0, /* ti */
67747367 205 },
bd95e655
JG
206 0, /* pre_modify */
207 0, /* post_modify */
208 0, /* register_offset */
209 0, /* register_extend */
210 0 /* imm_offset */
43e9d192
IB
211};
212
60bff090
JG
213static const struct cpu_addrcost_table cortexa57_addrcost_table =
214{
60bff090 215 {
bd95e655
JG
216 1, /* hi */
217 0, /* si */
218 0, /* di */
219 1, /* ti */
60bff090 220 },
bd95e655
JG
221 0, /* pre_modify */
222 0, /* post_modify */
223 0, /* register_offset */
224 0, /* register_extend */
225 0, /* imm_offset */
60bff090
JG
226};
227
381e27aa
PT
228static const struct cpu_addrcost_table xgene1_addrcost_table =
229{
381e27aa 230 {
bd95e655
JG
231 1, /* hi */
232 0, /* si */
233 0, /* di */
234 1, /* ti */
381e27aa 235 },
bd95e655
JG
236 1, /* pre_modify */
237 0, /* post_modify */
238 0, /* register_offset */
239 1, /* register_extend */
240 0, /* imm_offset */
381e27aa
PT
241};
242
43e9d192
IB
243static const struct cpu_regmove_cost generic_regmove_cost =
244{
bd95e655 245 1, /* GP2GP */
3969c510
WD
246 /* Avoid the use of slow int<->fp moves for spilling by setting
247 their cost higher than memmov_cost. */
bd95e655
JG
248 5, /* GP2FP */
249 5, /* FP2GP */
250 2 /* FP2FP */
43e9d192
IB
251};
252
e4a9c55a
WD
253static const struct cpu_regmove_cost cortexa57_regmove_cost =
254{
bd95e655 255 1, /* GP2GP */
e4a9c55a
WD
256 /* Avoid the use of slow int<->fp moves for spilling by setting
257 their cost higher than memmov_cost. */
bd95e655
JG
258 5, /* GP2FP */
259 5, /* FP2GP */
260 2 /* FP2FP */
e4a9c55a
WD
261};
262
263static const struct cpu_regmove_cost cortexa53_regmove_cost =
264{
bd95e655 265 1, /* GP2GP */
e4a9c55a
WD
266 /* Avoid the use of slow int<->fp moves for spilling by setting
267 their cost higher than memmov_cost. */
bd95e655
JG
268 5, /* GP2FP */
269 5, /* FP2GP */
270 2 /* FP2FP */
e4a9c55a
WD
271};
272
d1bcc29f
AP
273static const struct cpu_regmove_cost thunderx_regmove_cost =
274{
bd95e655
JG
275 2, /* GP2GP */
276 2, /* GP2FP */
277 6, /* FP2GP */
278 4 /* FP2FP */
d1bcc29f
AP
279};
280
381e27aa
PT
281static const struct cpu_regmove_cost xgene1_regmove_cost =
282{
bd95e655 283 1, /* GP2GP */
381e27aa
PT
284 /* Avoid the use of slow int<->fp moves for spilling by setting
285 their cost higher than memmov_cost. */
bd95e655
JG
286 8, /* GP2FP */
287 8, /* FP2GP */
288 2 /* FP2FP */
381e27aa
PT
289};
290
8990e73a 291/* Generic costs for vector insn classes. */
8990e73a
TB
292static const struct cpu_vector_cost generic_vector_cost =
293{
bd95e655
JG
294 1, /* scalar_stmt_cost */
295 1, /* scalar_load_cost */
296 1, /* scalar_store_cost */
297 1, /* vec_stmt_cost */
298 1, /* vec_to_scalar_cost */
299 1, /* scalar_to_vec_cost */
300 1, /* vec_align_load_cost */
301 1, /* vec_unalign_load_cost */
302 1, /* vec_unalign_store_cost */
303 1, /* vec_store_cost */
304 3, /* cond_taken_branch_cost */
305 1 /* cond_not_taken_branch_cost */
8990e73a
TB
306};
307
60bff090 308/* Generic costs for vector insn classes. */
60bff090
JG
309static const struct cpu_vector_cost cortexa57_vector_cost =
310{
bd95e655
JG
311 1, /* scalar_stmt_cost */
312 4, /* scalar_load_cost */
313 1, /* scalar_store_cost */
314 3, /* vec_stmt_cost */
315 8, /* vec_to_scalar_cost */
316 8, /* scalar_to_vec_cost */
317 5, /* vec_align_load_cost */
318 5, /* vec_unalign_load_cost */
319 1, /* vec_unalign_store_cost */
320 1, /* vec_store_cost */
321 1, /* cond_taken_branch_cost */
322 1 /* cond_not_taken_branch_cost */
60bff090
JG
323};
324
381e27aa 325/* Generic costs for vector insn classes. */
381e27aa
PT
326static const struct cpu_vector_cost xgene1_vector_cost =
327{
bd95e655
JG
328 1, /* scalar_stmt_cost */
329 5, /* scalar_load_cost */
330 1, /* scalar_store_cost */
331 2, /* vec_stmt_cost */
332 4, /* vec_to_scalar_cost */
333 4, /* scalar_to_vec_cost */
334 10, /* vec_align_load_cost */
335 10, /* vec_unalign_load_cost */
336 2, /* vec_unalign_store_cost */
337 2, /* vec_store_cost */
338 2, /* cond_taken_branch_cost */
339 1 /* cond_not_taken_branch_cost */
381e27aa
PT
340};
341
b9066f5a
MW
342/* Generic costs for branch instructions. */
343static const struct cpu_branch_cost generic_branch_cost =
344{
345 2, /* Predictable. */
346 2 /* Unpredictable. */
347};
348
43e9d192
IB
349static const struct tune_params generic_tunings =
350{
4e2cd668 351 &cortexa57_extra_costs,
43e9d192
IB
352 &generic_addrcost_table,
353 &generic_regmove_cost,
8990e73a 354 &generic_vector_cost,
b9066f5a 355 &generic_branch_cost,
bd95e655
JG
356 4, /* memmov_cost */
357 2, /* issue_rate */
e9a3a175 358 AARCH64_FUSE_NOTHING, /* fusible_ops */
0b82a5a2
WD
359 8, /* function_align. */
360 8, /* jump_align. */
361 4, /* loop_align. */
cee66c68
WD
362 2, /* int_reassoc_width. */
363 4, /* fp_reassoc_width. */
50093a33
WD
364 1, /* vec_reassoc_width. */
365 2, /* min_div_recip_mul_sf. */
dfba575f
JG
366 2, /* min_div_recip_mul_df. */
367 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
43e9d192
IB
368};
369
984239ad
KT
370static const struct tune_params cortexa53_tunings =
371{
372 &cortexa53_extra_costs,
373 &generic_addrcost_table,
e4a9c55a 374 &cortexa53_regmove_cost,
984239ad 375 &generic_vector_cost,
b9066f5a 376 &generic_branch_cost,
bd95e655
JG
377 4, /* memmov_cost */
378 2, /* issue_rate */
379 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 380 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
0b82a5a2
WD
381 8, /* function_align. */
382 8, /* jump_align. */
383 4, /* loop_align. */
cee66c68
WD
384 2, /* int_reassoc_width. */
385 4, /* fp_reassoc_width. */
50093a33
WD
386 1, /* vec_reassoc_width. */
387 2, /* min_div_recip_mul_sf. */
dfba575f
JG
388 2, /* min_div_recip_mul_df. */
389 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
984239ad
KT
390};
391
4fd92af6
KT
392static const struct tune_params cortexa57_tunings =
393{
394 &cortexa57_extra_costs,
60bff090 395 &cortexa57_addrcost_table,
e4a9c55a 396 &cortexa57_regmove_cost,
60bff090 397 &cortexa57_vector_cost,
b9066f5a 398 &generic_branch_cost,
bd95e655
JG
399 4, /* memmov_cost */
400 3, /* issue_rate */
401 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 402 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2
WD
403 16, /* function_align. */
404 8, /* jump_align. */
405 4, /* loop_align. */
cee66c68
WD
406 2, /* int_reassoc_width. */
407 4, /* fp_reassoc_width. */
50093a33
WD
408 1, /* vec_reassoc_width. */
409 2, /* min_div_recip_mul_sf. */
dfba575f
JG
410 2, /* min_div_recip_mul_df. */
411 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
412};
413
414static const struct tune_params cortexa72_tunings =
415{
416 &cortexa57_extra_costs,
417 &cortexa57_addrcost_table,
418 &cortexa57_regmove_cost,
419 &cortexa57_vector_cost,
420 &generic_branch_cost,
421 4, /* memmov_cost */
422 3, /* issue_rate */
423 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
424 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
425 16, /* function_align. */
426 8, /* jump_align. */
427 4, /* loop_align. */
428 2, /* int_reassoc_width. */
429 4, /* fp_reassoc_width. */
430 1, /* vec_reassoc_width. */
431 2, /* min_div_recip_mul_sf. */
432 2, /* min_div_recip_mul_df. */
433 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
4fd92af6
KT
434};
435
d1bcc29f
AP
436static const struct tune_params thunderx_tunings =
437{
438 &thunderx_extra_costs,
439 &generic_addrcost_table,
440 &thunderx_regmove_cost,
441 &generic_vector_cost,
b9066f5a 442 &generic_branch_cost,
bd95e655
JG
443 6, /* memmov_cost */
444 2, /* issue_rate */
e9a3a175 445 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
446 8, /* function_align. */
447 8, /* jump_align. */
448 8, /* loop_align. */
cee66c68
WD
449 2, /* int_reassoc_width. */
450 4, /* fp_reassoc_width. */
50093a33
WD
451 1, /* vec_reassoc_width. */
452 2, /* min_div_recip_mul_sf. */
dfba575f
JG
453 2, /* min_div_recip_mul_df. */
454 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
d1bcc29f
AP
455};
456
381e27aa
PT
457static const struct tune_params xgene1_tunings =
458{
459 &xgene1_extra_costs,
460 &xgene1_addrcost_table,
461 &xgene1_regmove_cost,
462 &xgene1_vector_cost,
b9066f5a 463 &generic_branch_cost,
bd95e655
JG
464 6, /* memmov_cost */
465 4, /* issue_rate */
e9a3a175 466 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
467 16, /* function_align. */
468 8, /* jump_align. */
469 16, /* loop_align. */
470 2, /* int_reassoc_width. */
471 4, /* fp_reassoc_width. */
50093a33
WD
472 1, /* vec_reassoc_width. */
473 2, /* min_div_recip_mul_sf. */
dfba575f
JG
474 2, /* min_div_recip_mul_df. */
475 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
381e27aa
PT
476};
477
8dec06f2
JG
478/* Support for fine-grained override of the tuning structures. */
479struct aarch64_tuning_override_function
480{
481 const char* name;
482 void (*parse_override)(const char*, struct tune_params*);
483};
484
485static void aarch64_parse_fuse_string (const char*, struct tune_params*);
486static void aarch64_parse_tune_string (const char*, struct tune_params*);
487
488static const struct aarch64_tuning_override_function
489aarch64_tuning_override_functions[] =
490{
491 { "fuse", aarch64_parse_fuse_string },
492 { "tune", aarch64_parse_tune_string },
493 { NULL, NULL }
494};
495
43e9d192
IB
496/* A processor implementing AArch64. */
497struct processor
498{
499 const char *const name;
46806c44
KT
500 enum aarch64_processor ident;
501 enum aarch64_processor sched_core;
393ae126 502 enum aarch64_arch arch;
0c6caaf8 503 unsigned architecture_version;
43e9d192
IB
504 const unsigned long flags;
505 const struct tune_params *const tune;
506};
507
393ae126
KT
508/* Architectures implementing AArch64. */
509static const struct processor all_architectures[] =
510{
511#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
512 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
513#include "aarch64-arches.def"
514#undef AARCH64_ARCH
515 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
516};
517
43e9d192
IB
518/* Processor cores implementing AArch64. */
519static const struct processor all_cores[] =
520{
7e1bcce3 521#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
393ae126
KT
522 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
523 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
524 FLAGS, &COSTS##_tunings},
43e9d192
IB
525#include "aarch64-cores.def"
526#undef AARCH64_CORE
393ae126
KT
527 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
528 AARCH64_FL_FOR_ARCH8, &generic_tunings},
529 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
530};
531
43e9d192 532
361fb3ee
KT
533/* Target specification. These are populated by the -march, -mtune, -mcpu
534 handling code or by target attributes. */
43e9d192
IB
535static const struct processor *selected_arch;
536static const struct processor *selected_cpu;
537static const struct processor *selected_tune;
538
b175b679
JG
539/* The current tuning set. */
540struct tune_params aarch64_tune_params = generic_tunings;
541
43e9d192
IB
542#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
543
544/* An ISA extension in the co-processor and main instruction set space. */
545struct aarch64_option_extension
546{
547 const char *const name;
548 const unsigned long flags_on;
549 const unsigned long flags_off;
550};
551
552/* ISA extensions in AArch64. */
553static const struct aarch64_option_extension all_extensions[] =
554{
7e1bcce3 555#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
43e9d192
IB
556 {NAME, FLAGS_ON, FLAGS_OFF},
557#include "aarch64-option-extensions.def"
558#undef AARCH64_OPT_EXTENSION
559 {NULL, 0, 0}
560};
561
562/* Used to track the size of an address when generating a pre/post
563 increment address. */
ef4bddc2 564static machine_mode aarch64_memory_reference_mode;
43e9d192 565
43e9d192
IB
566/* A table of valid AArch64 "bitmask immediate" values for
567 logical instructions. */
568
569#define AARCH64_NUM_BITMASKS 5334
570static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
571
43e9d192
IB
572typedef enum aarch64_cond_code
573{
574 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
575 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
576 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
577}
578aarch64_cc;
579
580#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
581
582/* The condition codes of the processor, and the inverse function. */
583static const char * const aarch64_condition_codes[] =
584{
585 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
586 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
587};
588
973d2e01
TP
589/* Generate code to enable conditional branches in functions over 1 MiB. */
590const char *
591aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
592 const char * branch_format)
593{
594 rtx_code_label * tmp_label = gen_label_rtx ();
595 char label_buf[256];
596 char buffer[128];
597 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
598 CODE_LABEL_NUMBER (tmp_label));
599 const char *label_ptr = targetm.strip_name_encoding (label_buf);
600 rtx dest_label = operands[pos_label];
601 operands[pos_label] = tmp_label;
602
603 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
604 output_asm_insn (buffer, operands);
605
606 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
607 operands[pos_label] = dest_label;
608 output_asm_insn (buffer, operands);
609 return "";
610}
611
261fb553
AL
612void
613aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
614{
615 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
616 if (TARGET_GENERAL_REGS_ONLY)
617 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
618 else
619 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
620}
621
26e0ff94 622static unsigned int
50093a33 623aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 624{
50093a33 625 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
626 return aarch64_tune_params.min_div_recip_mul_sf;
627 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
628}
629
cee66c68
WD
630static int
631aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
632 enum machine_mode mode)
633{
634 if (VECTOR_MODE_P (mode))
b175b679 635 return aarch64_tune_params.vec_reassoc_width;
cee66c68 636 if (INTEGRAL_MODE_P (mode))
b175b679 637 return aarch64_tune_params.int_reassoc_width;
cee66c68 638 if (FLOAT_MODE_P (mode))
b175b679 639 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
640 return 1;
641}
642
43e9d192
IB
643/* Provide a mapping from gcc register numbers to dwarf register numbers. */
644unsigned
645aarch64_dbx_register_number (unsigned regno)
646{
647 if (GP_REGNUM_P (regno))
648 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
649 else if (regno == SP_REGNUM)
650 return AARCH64_DWARF_SP;
651 else if (FP_REGNUM_P (regno))
652 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
653
654 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
655 equivalent DWARF register. */
656 return DWARF_FRAME_REGISTERS;
657}
658
659/* Return TRUE if MODE is any of the large INT modes. */
660static bool
ef4bddc2 661aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
662{
663 return mode == OImode || mode == CImode || mode == XImode;
664}
665
666/* Return TRUE if MODE is any of the vector modes. */
667static bool
ef4bddc2 668aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
669{
670 return aarch64_vector_mode_supported_p (mode)
671 || aarch64_vect_struct_mode_p (mode);
672}
673
674/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
675static bool
ef4bddc2 676aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
677 unsigned HOST_WIDE_INT nelems)
678{
679 if (TARGET_SIMD
680 && AARCH64_VALID_SIMD_QREG_MODE (mode)
681 && (nelems >= 2 && nelems <= 4))
682 return true;
683
684 return false;
685}
686
687/* Implement HARD_REGNO_NREGS. */
688
689int
ef4bddc2 690aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
691{
692 switch (aarch64_regno_regclass (regno))
693 {
694 case FP_REGS:
695 case FP_LO_REGS:
696 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
697 default:
698 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
699 }
700 gcc_unreachable ();
701}
702
703/* Implement HARD_REGNO_MODE_OK. */
704
705int
ef4bddc2 706aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
707{
708 if (GET_MODE_CLASS (mode) == MODE_CC)
709 return regno == CC_REGNUM;
710
9259db42
YZ
711 if (regno == SP_REGNUM)
712 /* The purpose of comparing with ptr_mode is to support the
713 global register variable associated with the stack pointer
714 register via the syntax of asm ("wsp") in ILP32. */
715 return mode == Pmode || mode == ptr_mode;
716
717 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
718 return mode == Pmode;
719
720 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
721 return 1;
722
723 if (FP_REGNUM_P (regno))
724 {
725 if (aarch64_vect_struct_mode_p (mode))
726 return
727 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
728 else
729 return 1;
730 }
731
732 return 0;
733}
734
73d9ac6a 735/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 736machine_mode
73d9ac6a 737aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 738 machine_mode mode)
73d9ac6a
IB
739{
740 /* Handle modes that fit within single registers. */
741 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
742 {
743 if (GET_MODE_SIZE (mode) >= 4)
744 return mode;
745 else
746 return SImode;
747 }
748 /* Fall back to generic for multi-reg and very large modes. */
749 else
750 return choose_hard_reg_mode (regno, nregs, false);
751}
752
43e9d192
IB
753/* Return true if calls to DECL should be treated as
754 long-calls (ie called via a register). */
755static bool
756aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
757{
758 return false;
759}
760
761/* Return true if calls to symbol-ref SYM should be treated as
762 long-calls (ie called via a register). */
763bool
764aarch64_is_long_call_p (rtx sym)
765{
766 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
767}
768
b60d63cb
JW
769/* Return true if calls to symbol-ref SYM should not go through
770 plt stubs. */
771
772bool
773aarch64_is_noplt_call_p (rtx sym)
774{
775 const_tree decl = SYMBOL_REF_DECL (sym);
776
777 if (flag_pic
778 && decl
779 && (!flag_plt
780 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
781 && !targetm.binds_local_p (decl))
782 return true;
783
784 return false;
785}
786
43e9d192
IB
787/* Return true if the offsets to a zero/sign-extract operation
788 represent an expression that matches an extend operation. The
789 operands represent the paramters from
790
4745e701 791 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 792bool
ef4bddc2 793aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
794 rtx extract_imm)
795{
796 HOST_WIDE_INT mult_val, extract_val;
797
798 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
799 return false;
800
801 mult_val = INTVAL (mult_imm);
802 extract_val = INTVAL (extract_imm);
803
804 if (extract_val > 8
805 && extract_val < GET_MODE_BITSIZE (mode)
806 && exact_log2 (extract_val & ~7) > 0
807 && (extract_val & 7) <= 4
808 && mult_val == (1 << (extract_val & 7)))
809 return true;
810
811 return false;
812}
813
814/* Emit an insn that's a simple single-set. Both the operands must be
815 known to be valid. */
816inline static rtx
817emit_set_insn (rtx x, rtx y)
818{
f7df4a84 819 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
820}
821
822/* X and Y are two things to compare using CODE. Emit the compare insn and
823 return the rtx for register 0 in the proper mode. */
824rtx
825aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
826{
ef4bddc2 827 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
828 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
829
830 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
831 return cc_reg;
832}
833
834/* Build the SYMBOL_REF for __tls_get_addr. */
835
836static GTY(()) rtx tls_get_addr_libfunc;
837
838rtx
839aarch64_tls_get_addr (void)
840{
841 if (!tls_get_addr_libfunc)
842 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
843 return tls_get_addr_libfunc;
844}
845
846/* Return the TLS model to use for ADDR. */
847
848static enum tls_model
849tls_symbolic_operand_type (rtx addr)
850{
851 enum tls_model tls_kind = TLS_MODEL_NONE;
852 rtx sym, addend;
853
854 if (GET_CODE (addr) == CONST)
855 {
856 split_const (addr, &sym, &addend);
857 if (GET_CODE (sym) == SYMBOL_REF)
858 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
859 }
860 else if (GET_CODE (addr) == SYMBOL_REF)
861 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
862
863 return tls_kind;
864}
865
866/* We'll allow lo_sum's in addresses in our legitimate addresses
867 so that combine would take care of combining addresses where
868 necessary, but for generation purposes, we'll generate the address
869 as :
870 RTL Absolute
871 tmp = hi (symbol_ref); adrp x1, foo
872 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
873 nop
874
875 PIC TLS
876 adrp x1, :got:foo adrp tmp, :tlsgd:foo
877 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
878 bl __tls_get_addr
879 nop
880
881 Load TLS symbol, depending on TLS mechanism and TLS access model.
882
883 Global Dynamic - Traditional TLS:
884 adrp tmp, :tlsgd:imm
885 add dest, tmp, #:tlsgd_lo12:imm
886 bl __tls_get_addr
887
888 Global Dynamic - TLS Descriptors:
889 adrp dest, :tlsdesc:imm
890 ldr tmp, [dest, #:tlsdesc_lo12:imm]
891 add dest, dest, #:tlsdesc_lo12:imm
892 blr tmp
893 mrs tp, tpidr_el0
894 add dest, dest, tp
895
896 Initial Exec:
897 mrs tp, tpidr_el0
898 adrp tmp, :gottprel:imm
899 ldr dest, [tmp, #:gottprel_lo12:imm]
900 add dest, dest, tp
901
902 Local Exec:
903 mrs tp, tpidr_el0
0699caae
RL
904 add t0, tp, #:tprel_hi12:imm, lsl #12
905 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
906*/
907
908static void
909aarch64_load_symref_appropriately (rtx dest, rtx imm,
910 enum aarch64_symbol_type type)
911{
912 switch (type)
913 {
914 case SYMBOL_SMALL_ABSOLUTE:
915 {
28514dda 916 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 917 rtx tmp_reg = dest;
ef4bddc2 918 machine_mode mode = GET_MODE (dest);
28514dda
YZ
919
920 gcc_assert (mode == Pmode || mode == ptr_mode);
921
43e9d192 922 if (can_create_pseudo_p ())
28514dda 923 tmp_reg = gen_reg_rtx (mode);
43e9d192 924
28514dda 925 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
926 emit_insn (gen_add_losym (dest, tmp_reg, imm));
927 return;
928 }
929
a5350ddc 930 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 931 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
932 return;
933
1b1e81f8
JW
934 case SYMBOL_SMALL_GOT_28K:
935 {
936 machine_mode mode = GET_MODE (dest);
937 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
938 rtx insn;
939 rtx mem;
1b1e81f8
JW
940
941 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
942 here before rtl expand. Tree IVOPT will generate rtl pattern to
943 decide rtx costs, in which case pic_offset_table_rtx is not
944 initialized. For that case no need to generate the first adrp
026c3cfd 945 instruction as the final cost for global variable access is
1b1e81f8
JW
946 one instruction. */
947 if (gp_rtx != NULL)
948 {
949 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
950 using the page base as GOT base, the first page may be wasted,
951 in the worst scenario, there is only 28K space for GOT).
952
953 The generate instruction sequence for accessing global variable
954 is:
955
a3957742 956 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
957
958 Only one instruction needed. But we must initialize
959 pic_offset_table_rtx properly. We generate initialize insn for
960 every global access, and allow CSE to remove all redundant.
961
962 The final instruction sequences will look like the following
963 for multiply global variables access.
964
a3957742 965 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 966
a3957742
JW
967 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
968 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
969 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
970 ... */
1b1e81f8
JW
971
972 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
973 crtl->uses_pic_offset_table = 1;
974 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
975
976 if (mode != GET_MODE (gp_rtx))
977 gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0);
978 }
979
980 if (mode == ptr_mode)
981 {
982 if (mode == DImode)
53021678 983 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 984 else
53021678
JW
985 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
986
987 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
988 }
989 else
990 {
991 gcc_assert (mode == Pmode);
53021678
JW
992
993 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
994 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
995 }
996
53021678
JW
997 /* The operand is expected to be MEM. Whenever the related insn
998 pattern changed, above code which calculate mem should be
999 updated. */
1000 gcc_assert (GET_CODE (mem) == MEM);
1001 MEM_READONLY_P (mem) = 1;
1002 MEM_NOTRAP_P (mem) = 1;
1003 emit_insn (insn);
1b1e81f8
JW
1004 return;
1005 }
1006
6642bdb4 1007 case SYMBOL_SMALL_GOT_4G:
43e9d192 1008 {
28514dda
YZ
1009 /* In ILP32, the mode of dest can be either SImode or DImode,
1010 while the got entry is always of SImode size. The mode of
1011 dest depends on how dest is used: if dest is assigned to a
1012 pointer (e.g. in the memory), it has SImode; it may have
1013 DImode if dest is dereferenced to access the memeory.
1014 This is why we have to handle three different ldr_got_small
1015 patterns here (two patterns for ILP32). */
53021678
JW
1016
1017 rtx insn;
1018 rtx mem;
43e9d192 1019 rtx tmp_reg = dest;
ef4bddc2 1020 machine_mode mode = GET_MODE (dest);
28514dda 1021
43e9d192 1022 if (can_create_pseudo_p ())
28514dda
YZ
1023 tmp_reg = gen_reg_rtx (mode);
1024
1025 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1026 if (mode == ptr_mode)
1027 {
1028 if (mode == DImode)
53021678 1029 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1030 else
53021678
JW
1031 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1032
1033 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1034 }
1035 else
1036 {
1037 gcc_assert (mode == Pmode);
53021678
JW
1038
1039 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1040 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1041 }
1042
53021678
JW
1043 gcc_assert (GET_CODE (mem) == MEM);
1044 MEM_READONLY_P (mem) = 1;
1045 MEM_NOTRAP_P (mem) = 1;
1046 emit_insn (insn);
43e9d192
IB
1047 return;
1048 }
1049
1050 case SYMBOL_SMALL_TLSGD:
1051 {
5d8a22a5 1052 rtx_insn *insns;
43e9d192
IB
1053 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
1054
1055 start_sequence ();
78607708 1056 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
1057 insns = get_insns ();
1058 end_sequence ();
1059
1060 RTL_CONST_CALL_P (insns) = 1;
1061 emit_libcall_block (insns, dest, result, imm);
1062 return;
1063 }
1064
1065 case SYMBOL_SMALL_TLSDESC:
1066 {
ef4bddc2 1067 machine_mode mode = GET_MODE (dest);
621ad2de 1068 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e06d03 1069 rtx offset;
43e9d192
IB
1070 rtx tp;
1071
621ad2de
AP
1072 gcc_assert (mode == Pmode || mode == ptr_mode);
1073
43e06d03
RR
1074 if (can_create_pseudo_p ())
1075 {
1076 rtx reg = gen_reg_rtx (mode);
1077
1078 if (TARGET_ILP32)
1079 emit_insn (gen_tlsdesc_small_pseudo_si (reg, imm));
1080 else
1081 emit_insn (gen_tlsdesc_small_pseudo_di (reg, imm));
1082
1083 offset = reg;
1084 }
621ad2de 1085 else
43e06d03
RR
1086 {
1087 /* In ILP32, the got entry is always of SImode size. Unlike
1088 small GOT, the dest is fixed at reg 0. */
1089 if (TARGET_ILP32)
1090 emit_insn (gen_tlsdesc_small_si (imm));
1091 else
1092 emit_insn (gen_tlsdesc_small_di (imm));
1093
1094 offset = x0;
1095 }
43e9d192 1096 tp = aarch64_load_tp (NULL);
621ad2de
AP
1097
1098 if (mode != Pmode)
1099 tp = gen_lowpart (mode, tp);
1100
43e06d03 1101 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, offset)));
43e9d192
IB
1102 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1103 return;
1104 }
1105
79496620 1106 case SYMBOL_SMALL_TLSIE:
43e9d192 1107 {
621ad2de
AP
1108 /* In ILP32, the mode of dest can be either SImode or DImode,
1109 while the got entry is always of SImode size. The mode of
1110 dest depends on how dest is used: if dest is assigned to a
1111 pointer (e.g. in the memory), it has SImode; it may have
1112 DImode if dest is dereferenced to access the memeory.
1113 This is why we have to handle three different tlsie_small
1114 patterns here (two patterns for ILP32). */
ef4bddc2 1115 machine_mode mode = GET_MODE (dest);
621ad2de 1116 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1117 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1118
1119 if (mode == ptr_mode)
1120 {
1121 if (mode == DImode)
1122 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1123 else
1124 {
1125 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1126 tp = gen_lowpart (mode, tp);
1127 }
1128 }
1129 else
1130 {
1131 gcc_assert (mode == Pmode);
1132 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1133 }
1134
f7df4a84 1135 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1136 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1137 return;
1138 }
1139
cbf5629e 1140 case SYMBOL_TLSLE12:
d18ba284 1141 case SYMBOL_TLSLE24:
cbf5629e
JW
1142 case SYMBOL_TLSLE32:
1143 case SYMBOL_TLSLE48:
43e9d192 1144 {
cbf5629e 1145 machine_mode mode = GET_MODE (dest);
43e9d192 1146 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1147
cbf5629e
JW
1148 if (mode != Pmode)
1149 tp = gen_lowpart (mode, tp);
1150
1151 switch (type)
1152 {
1153 case SYMBOL_TLSLE12:
1154 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1155 (dest, tp, imm));
1156 break;
1157 case SYMBOL_TLSLE24:
1158 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1159 (dest, tp, imm));
1160 break;
1161 case SYMBOL_TLSLE32:
1162 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1163 (dest, imm));
1164 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1165 (dest, dest, tp));
1166 break;
1167 case SYMBOL_TLSLE48:
1168 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1169 (dest, imm));
1170 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1171 (dest, dest, tp));
1172 break;
1173 default:
1174 gcc_unreachable ();
1175 }
e6f7f0e9 1176
43e9d192
IB
1177 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1178 return;
1179 }
1180
87dd8ab0
MS
1181 case SYMBOL_TINY_GOT:
1182 emit_insn (gen_ldr_got_tiny (dest, imm));
1183 return;
1184
5ae7caad
JW
1185 case SYMBOL_TINY_TLSIE:
1186 {
1187 machine_mode mode = GET_MODE (dest);
1188 rtx tp = aarch64_load_tp (NULL);
1189
1190 if (mode == ptr_mode)
1191 {
1192 if (mode == DImode)
1193 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1194 else
1195 {
1196 tp = gen_lowpart (mode, tp);
1197 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1198 }
1199 }
1200 else
1201 {
1202 gcc_assert (mode == Pmode);
1203 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1204 }
1205
1206 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1207 return;
1208 }
1209
43e9d192
IB
1210 default:
1211 gcc_unreachable ();
1212 }
1213}
1214
1215/* Emit a move from SRC to DEST. Assume that the move expanders can
1216 handle all moves if !can_create_pseudo_p (). The distinction is
1217 important because, unlike emit_move_insn, the move expanders know
1218 how to force Pmode objects into the constant pool even when the
1219 constant pool address is not itself legitimate. */
1220static rtx
1221aarch64_emit_move (rtx dest, rtx src)
1222{
1223 return (can_create_pseudo_p ()
1224 ? emit_move_insn (dest, src)
1225 : emit_move_insn_1 (dest, src));
1226}
1227
030d03b8
RE
1228/* Split a 128-bit move operation into two 64-bit move operations,
1229 taking care to handle partial overlap of register to register
1230 copies. Special cases are needed when moving between GP regs and
1231 FP regs. SRC can be a register, constant or memory; DST a register
1232 or memory. If either operand is memory it must not have any side
1233 effects. */
43e9d192
IB
1234void
1235aarch64_split_128bit_move (rtx dst, rtx src)
1236{
030d03b8
RE
1237 rtx dst_lo, dst_hi;
1238 rtx src_lo, src_hi;
43e9d192 1239
ef4bddc2 1240 machine_mode mode = GET_MODE (dst);
12dc6974 1241
030d03b8
RE
1242 gcc_assert (mode == TImode || mode == TFmode);
1243 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1244 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1245
1246 if (REG_P (dst) && REG_P (src))
1247 {
030d03b8
RE
1248 int src_regno = REGNO (src);
1249 int dst_regno = REGNO (dst);
43e9d192 1250
030d03b8 1251 /* Handle FP <-> GP regs. */
43e9d192
IB
1252 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1253 {
030d03b8
RE
1254 src_lo = gen_lowpart (word_mode, src);
1255 src_hi = gen_highpart (word_mode, src);
1256
1257 if (mode == TImode)
1258 {
1259 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1260 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1261 }
1262 else
1263 {
1264 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1265 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1266 }
1267 return;
43e9d192
IB
1268 }
1269 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1270 {
030d03b8
RE
1271 dst_lo = gen_lowpart (word_mode, dst);
1272 dst_hi = gen_highpart (word_mode, dst);
1273
1274 if (mode == TImode)
1275 {
1276 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1277 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1278 }
1279 else
1280 {
1281 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1282 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1283 }
1284 return;
43e9d192 1285 }
43e9d192
IB
1286 }
1287
030d03b8
RE
1288 dst_lo = gen_lowpart (word_mode, dst);
1289 dst_hi = gen_highpart (word_mode, dst);
1290 src_lo = gen_lowpart (word_mode, src);
1291 src_hi = gen_highpart_mode (word_mode, mode, src);
1292
1293 /* At most one pairing may overlap. */
1294 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1295 {
1296 aarch64_emit_move (dst_hi, src_hi);
1297 aarch64_emit_move (dst_lo, src_lo);
1298 }
1299 else
1300 {
1301 aarch64_emit_move (dst_lo, src_lo);
1302 aarch64_emit_move (dst_hi, src_hi);
1303 }
43e9d192
IB
1304}
1305
1306bool
1307aarch64_split_128bit_move_p (rtx dst, rtx src)
1308{
1309 return (! REG_P (src)
1310 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1311}
1312
8b033a8a
SN
1313/* Split a complex SIMD combine. */
1314
1315void
1316aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1317{
ef4bddc2
RS
1318 machine_mode src_mode = GET_MODE (src1);
1319 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1320
1321 gcc_assert (VECTOR_MODE_P (dst_mode));
1322
1323 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1324 {
1325 rtx (*gen) (rtx, rtx, rtx);
1326
1327 switch (src_mode)
1328 {
1329 case V8QImode:
1330 gen = gen_aarch64_simd_combinev8qi;
1331 break;
1332 case V4HImode:
1333 gen = gen_aarch64_simd_combinev4hi;
1334 break;
1335 case V2SImode:
1336 gen = gen_aarch64_simd_combinev2si;
1337 break;
7c369485
AL
1338 case V4HFmode:
1339 gen = gen_aarch64_simd_combinev4hf;
1340 break;
8b033a8a
SN
1341 case V2SFmode:
1342 gen = gen_aarch64_simd_combinev2sf;
1343 break;
1344 case DImode:
1345 gen = gen_aarch64_simd_combinedi;
1346 break;
1347 case DFmode:
1348 gen = gen_aarch64_simd_combinedf;
1349 break;
1350 default:
1351 gcc_unreachable ();
1352 }
1353
1354 emit_insn (gen (dst, src1, src2));
1355 return;
1356 }
1357}
1358
fd4842cd
SN
1359/* Split a complex SIMD move. */
1360
1361void
1362aarch64_split_simd_move (rtx dst, rtx src)
1363{
ef4bddc2
RS
1364 machine_mode src_mode = GET_MODE (src);
1365 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1366
1367 gcc_assert (VECTOR_MODE_P (dst_mode));
1368
1369 if (REG_P (dst) && REG_P (src))
1370 {
c59b7e28
SN
1371 rtx (*gen) (rtx, rtx);
1372
fd4842cd
SN
1373 gcc_assert (VECTOR_MODE_P (src_mode));
1374
1375 switch (src_mode)
1376 {
1377 case V16QImode:
c59b7e28 1378 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1379 break;
1380 case V8HImode:
c59b7e28 1381 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1382 break;
1383 case V4SImode:
c59b7e28 1384 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1385 break;
1386 case V2DImode:
c59b7e28 1387 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 1388 break;
71a11456
AL
1389 case V8HFmode:
1390 gen = gen_aarch64_split_simd_movv8hf;
1391 break;
fd4842cd 1392 case V4SFmode:
c59b7e28 1393 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1394 break;
1395 case V2DFmode:
c59b7e28 1396 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1397 break;
1398 default:
1399 gcc_unreachable ();
1400 }
c59b7e28
SN
1401
1402 emit_insn (gen (dst, src));
fd4842cd
SN
1403 return;
1404 }
1405}
1406
43e9d192 1407static rtx
ef4bddc2 1408aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1409{
1410 if (can_create_pseudo_p ())
e18b4a81 1411 return force_reg (mode, value);
43e9d192
IB
1412 else
1413 {
1414 x = aarch64_emit_move (x, value);
1415 return x;
1416 }
1417}
1418
1419
1420static rtx
ef4bddc2 1421aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1422{
9c023bf0 1423 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1424 {
1425 rtx high;
1426 /* Load the full offset into a register. This
1427 might be improvable in the future. */
1428 high = GEN_INT (offset);
1429 offset = 0;
e18b4a81
YZ
1430 high = aarch64_force_temporary (mode, temp, high);
1431 reg = aarch64_force_temporary (mode, temp,
1432 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1433 }
1434 return plus_constant (mode, reg, offset);
1435}
1436
82614948
RR
1437static int
1438aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1439 machine_mode mode)
43e9d192 1440{
43e9d192
IB
1441 unsigned HOST_WIDE_INT mask;
1442 int i;
1443 bool first;
1444 unsigned HOST_WIDE_INT val;
1445 bool subtargets;
1446 rtx subtarget;
c747993a 1447 int one_match, zero_match, first_not_ffff_match;
82614948 1448 int num_insns = 0;
43e9d192
IB
1449
1450 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1451 {
82614948 1452 if (generate)
f7df4a84 1453 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1454 num_insns++;
1455 return num_insns;
43e9d192
IB
1456 }
1457
1458 if (mode == SImode)
1459 {
1460 /* We know we can't do this in 1 insn, and we must be able to do it
1461 in two; so don't mess around looking for sequences that don't buy
1462 us anything. */
82614948
RR
1463 if (generate)
1464 {
f7df4a84 1465 emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
82614948
RR
1466 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1467 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1468 }
1469 num_insns += 2;
1470 return num_insns;
43e9d192
IB
1471 }
1472
1473 /* Remaining cases are all for DImode. */
1474
1475 val = INTVAL (imm);
1476 subtargets = optimize && can_create_pseudo_p ();
1477
1478 one_match = 0;
1479 zero_match = 0;
1480 mask = 0xffff;
c747993a 1481 first_not_ffff_match = -1;
43e9d192
IB
1482
1483 for (i = 0; i < 64; i += 16, mask <<= 16)
1484 {
c747993a 1485 if ((val & mask) == mask)
43e9d192 1486 one_match++;
c747993a
IB
1487 else
1488 {
1489 if (first_not_ffff_match < 0)
1490 first_not_ffff_match = i;
1491 if ((val & mask) == 0)
1492 zero_match++;
1493 }
43e9d192
IB
1494 }
1495
1496 if (one_match == 2)
1497 {
c747993a
IB
1498 /* Set one of the quarters and then insert back into result. */
1499 mask = 0xffffll << first_not_ffff_match;
82614948
RR
1500 if (generate)
1501 {
f7df4a84 1502 emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
82614948
RR
1503 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1504 GEN_INT ((val >> first_not_ffff_match)
1505 & 0xffff)));
1506 }
1507 num_insns += 2;
1508 return num_insns;
c747993a
IB
1509 }
1510
43e9d192
IB
1511 if (zero_match == 2)
1512 goto simple_sequence;
1513
1514 mask = 0x0ffff0000UL;
1515 for (i = 16; i < 64; i += 16, mask <<= 16)
1516 {
1517 HOST_WIDE_INT comp = mask & ~(mask - 1);
1518
1519 if (aarch64_uimm12_shift (val - (val & mask)))
1520 {
82614948
RR
1521 if (generate)
1522 {
1523 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1524 emit_insn (gen_rtx_SET (subtarget, GEN_INT (val & mask)));
82614948
RR
1525 emit_insn (gen_adddi3 (dest, subtarget,
1526 GEN_INT (val - (val & mask))));
1527 }
1528 num_insns += 2;
1529 return num_insns;
43e9d192
IB
1530 }
1531 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1532 {
82614948
RR
1533 if (generate)
1534 {
1535 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1536 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1537 GEN_INT ((val + comp) & mask)));
1538 emit_insn (gen_adddi3 (dest, subtarget,
1539 GEN_INT (val - ((val + comp) & mask))));
1540 }
1541 num_insns += 2;
1542 return num_insns;
43e9d192
IB
1543 }
1544 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1545 {
82614948
RR
1546 if (generate)
1547 {
1548 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1549 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1550 GEN_INT ((val - comp) | ~mask)));
1551 emit_insn (gen_adddi3 (dest, subtarget,
1552 GEN_INT (val - ((val - comp) | ~mask))));
1553 }
1554 num_insns += 2;
1555 return num_insns;
43e9d192
IB
1556 }
1557 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1558 {
82614948
RR
1559 if (generate)
1560 {
1561 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1562 emit_insn (gen_rtx_SET (subtarget, GEN_INT (val | ~mask)));
82614948
RR
1563 emit_insn (gen_adddi3 (dest, subtarget,
1564 GEN_INT (val - (val | ~mask))));
1565 }
1566 num_insns += 2;
1567 return num_insns;
43e9d192
IB
1568 }
1569 }
1570
1571 /* See if we can do it by arithmetically combining two
1572 immediates. */
1573 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1574 {
1575 int j;
1576 mask = 0xffff;
1577
1578 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1579 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1580 {
82614948
RR
1581 if (generate)
1582 {
1583 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
f7df4a84 1584 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1585 GEN_INT (aarch64_bitmasks[i])));
1586 emit_insn (gen_adddi3 (dest, subtarget,
1587 GEN_INT (val - aarch64_bitmasks[i])));
1588 }
1589 num_insns += 2;
1590 return num_insns;
43e9d192
IB
1591 }
1592
1593 for (j = 0; j < 64; j += 16, mask <<= 16)
1594 {
1595 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1596 {
82614948
RR
1597 if (generate)
1598 {
f7df4a84 1599 emit_insn (gen_rtx_SET (dest,
82614948
RR
1600 GEN_INT (aarch64_bitmasks[i])));
1601 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1602 GEN_INT ((val >> j) & 0xffff)));
1603 }
1604 num_insns += 2;
1605 return num_insns;
43e9d192
IB
1606 }
1607 }
1608 }
1609
1610 /* See if we can do it by logically combining two immediates. */
1611 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1612 {
1613 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1614 {
1615 int j;
1616
1617 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1618 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1619 {
82614948
RR
1620 if (generate)
1621 {
1622 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
f7df4a84 1623 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1624 GEN_INT (aarch64_bitmasks[i])));
1625 emit_insn (gen_iordi3 (dest, subtarget,
1626 GEN_INT (aarch64_bitmasks[j])));
1627 }
1628 num_insns += 2;
1629 return num_insns;
43e9d192
IB
1630 }
1631 }
1632 else if ((val & aarch64_bitmasks[i]) == val)
1633 {
1634 int j;
1635
1636 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1637 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1638 {
82614948
RR
1639 if (generate)
1640 {
1641 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
f7df4a84 1642 emit_insn (gen_rtx_SET (subtarget,
82614948
RR
1643 GEN_INT (aarch64_bitmasks[j])));
1644 emit_insn (gen_anddi3 (dest, subtarget,
1645 GEN_INT (aarch64_bitmasks[i])));
1646 }
1647 num_insns += 2;
1648 return num_insns;
43e9d192
IB
1649 }
1650 }
1651 }
1652
2c274197
KT
1653 if (one_match > zero_match)
1654 {
1655 /* Set either first three quarters or all but the third. */
1656 mask = 0xffffll << (16 - first_not_ffff_match);
82614948 1657 if (generate)
f7df4a84 1658 emit_insn (gen_rtx_SET (dest,
82614948
RR
1659 GEN_INT (val | mask | 0xffffffff00000000ull)));
1660 num_insns ++;
2c274197
KT
1661
1662 /* Now insert other two quarters. */
1663 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1664 i < 64; i += 16, mask <<= 16)
1665 {
1666 if ((val & mask) != mask)
82614948
RR
1667 {
1668 if (generate)
1669 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1670 GEN_INT ((val >> i) & 0xffff)));
1671 num_insns ++;
1672 }
2c274197 1673 }
82614948 1674 return num_insns;
2c274197
KT
1675 }
1676
43e9d192
IB
1677 simple_sequence:
1678 first = true;
1679 mask = 0xffff;
1680 for (i = 0; i < 64; i += 16, mask <<= 16)
1681 {
1682 if ((val & mask) != 0)
1683 {
1684 if (first)
1685 {
82614948 1686 if (generate)
f7df4a84 1687 emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
82614948 1688 num_insns ++;
43e9d192
IB
1689 first = false;
1690 }
1691 else
82614948
RR
1692 {
1693 if (generate)
1694 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1695 GEN_INT ((val >> i) & 0xffff)));
1696 num_insns ++;
1697 }
1698 }
1699 }
1700
1701 return num_insns;
1702}
1703
1704
1705void
1706aarch64_expand_mov_immediate (rtx dest, rtx imm)
1707{
1708 machine_mode mode = GET_MODE (dest);
1709
1710 gcc_assert (mode == SImode || mode == DImode);
1711
1712 /* Check on what type of symbol it is. */
1713 if (GET_CODE (imm) == SYMBOL_REF
1714 || GET_CODE (imm) == LABEL_REF
1715 || GET_CODE (imm) == CONST)
1716 {
1717 rtx mem, base, offset;
1718 enum aarch64_symbol_type sty;
1719
1720 /* If we have (const (plus symbol offset)), separate out the offset
1721 before we start classifying the symbol. */
1722 split_const (imm, &base, &offset);
1723
f8b756b7 1724 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
82614948
RR
1725 switch (sty)
1726 {
1727 case SYMBOL_FORCE_TO_MEM:
1728 if (offset != const0_rtx
1729 && targetm.cannot_force_const_mem (mode, imm))
1730 {
1731 gcc_assert (can_create_pseudo_p ());
1732 base = aarch64_force_temporary (mode, dest, base);
1733 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1734 aarch64_emit_move (dest, base);
1735 return;
1736 }
b4f50fd4 1737
82614948
RR
1738 mem = force_const_mem (ptr_mode, imm);
1739 gcc_assert (mem);
b4f50fd4
RR
1740
1741 /* If we aren't generating PC relative literals, then
1742 we need to expand the literal pool access carefully.
1743 This is something that needs to be done in a number
1744 of places, so could well live as a separate function. */
1745 if (nopcrelative_literal_loads)
1746 {
1747 gcc_assert (can_create_pseudo_p ());
1748 base = gen_reg_rtx (ptr_mode);
1749 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
1750 mem = gen_rtx_MEM (ptr_mode, base);
1751 }
1752
82614948
RR
1753 if (mode != ptr_mode)
1754 mem = gen_rtx_ZERO_EXTEND (mode, mem);
b4f50fd4 1755
f7df4a84 1756 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 1757
82614948
RR
1758 return;
1759
1760 case SYMBOL_SMALL_TLSGD:
1761 case SYMBOL_SMALL_TLSDESC:
79496620 1762 case SYMBOL_SMALL_TLSIE:
1b1e81f8 1763 case SYMBOL_SMALL_GOT_28K:
6642bdb4 1764 case SYMBOL_SMALL_GOT_4G:
82614948 1765 case SYMBOL_TINY_GOT:
5ae7caad 1766 case SYMBOL_TINY_TLSIE:
82614948
RR
1767 if (offset != const0_rtx)
1768 {
1769 gcc_assert(can_create_pseudo_p ());
1770 base = aarch64_force_temporary (mode, dest, base);
1771 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1772 aarch64_emit_move (dest, base);
1773 return;
1774 }
1775 /* FALLTHRU */
1776
82614948
RR
1777 case SYMBOL_SMALL_ABSOLUTE:
1778 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 1779 case SYMBOL_TLSLE12:
d18ba284 1780 case SYMBOL_TLSLE24:
cbf5629e
JW
1781 case SYMBOL_TLSLE32:
1782 case SYMBOL_TLSLE48:
82614948
RR
1783 aarch64_load_symref_appropriately (dest, imm, sty);
1784 return;
1785
1786 default:
1787 gcc_unreachable ();
1788 }
1789 }
1790
1791 if (!CONST_INT_P (imm))
1792 {
1793 if (GET_CODE (imm) == HIGH)
f7df4a84 1794 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1795 else
1796 {
1797 rtx mem = force_const_mem (mode, imm);
1798 gcc_assert (mem);
f7df4a84 1799 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 1800 }
82614948
RR
1801
1802 return;
43e9d192 1803 }
82614948
RR
1804
1805 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1806}
1807
1808static bool
fee9ba42
JW
1809aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1810 tree exp ATTRIBUTE_UNUSED)
43e9d192 1811{
fee9ba42 1812 /* Currently, always true. */
43e9d192
IB
1813 return true;
1814}
1815
1816/* Implement TARGET_PASS_BY_REFERENCE. */
1817
1818static bool
1819aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1820 machine_mode mode,
43e9d192
IB
1821 const_tree type,
1822 bool named ATTRIBUTE_UNUSED)
1823{
1824 HOST_WIDE_INT size;
ef4bddc2 1825 machine_mode dummymode;
43e9d192
IB
1826 int nregs;
1827
1828 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1829 size = (mode == BLKmode && type)
1830 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1831
aadc1c43
MHD
1832 /* Aggregates are passed by reference based on their size. */
1833 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1834 {
aadc1c43 1835 size = int_size_in_bytes (type);
43e9d192
IB
1836 }
1837
1838 /* Variable sized arguments are always returned by reference. */
1839 if (size < 0)
1840 return true;
1841
1842 /* Can this be a candidate to be passed in fp/simd register(s)? */
1843 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1844 &dummymode, &nregs,
1845 NULL))
1846 return false;
1847
1848 /* Arguments which are variable sized or larger than 2 registers are
1849 passed by reference unless they are a homogenous floating point
1850 aggregate. */
1851 return size > 2 * UNITS_PER_WORD;
1852}
1853
1854/* Return TRUE if VALTYPE is padded to its least significant bits. */
1855static bool
1856aarch64_return_in_msb (const_tree valtype)
1857{
ef4bddc2 1858 machine_mode dummy_mode;
43e9d192
IB
1859 int dummy_int;
1860
1861 /* Never happens in little-endian mode. */
1862 if (!BYTES_BIG_ENDIAN)
1863 return false;
1864
1865 /* Only composite types smaller than or equal to 16 bytes can
1866 be potentially returned in registers. */
1867 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1868 || int_size_in_bytes (valtype) <= 0
1869 || int_size_in_bytes (valtype) > 16)
1870 return false;
1871
1872 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1873 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1874 is always passed/returned in the least significant bits of fp/simd
1875 register(s). */
1876 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1877 &dummy_mode, &dummy_int, NULL))
1878 return false;
1879
1880 return true;
1881}
1882
1883/* Implement TARGET_FUNCTION_VALUE.
1884 Define how to find the value returned by a function. */
1885
1886static rtx
1887aarch64_function_value (const_tree type, const_tree func,
1888 bool outgoing ATTRIBUTE_UNUSED)
1889{
ef4bddc2 1890 machine_mode mode;
43e9d192
IB
1891 int unsignedp;
1892 int count;
ef4bddc2 1893 machine_mode ag_mode;
43e9d192
IB
1894
1895 mode = TYPE_MODE (type);
1896 if (INTEGRAL_TYPE_P (type))
1897 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1898
1899 if (aarch64_return_in_msb (type))
1900 {
1901 HOST_WIDE_INT size = int_size_in_bytes (type);
1902
1903 if (size % UNITS_PER_WORD != 0)
1904 {
1905 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1906 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1907 }
1908 }
1909
1910 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1911 &ag_mode, &count, NULL))
1912 {
1913 if (!aarch64_composite_type_p (type, mode))
1914 {
1915 gcc_assert (count == 1 && mode == ag_mode);
1916 return gen_rtx_REG (mode, V0_REGNUM);
1917 }
1918 else
1919 {
1920 int i;
1921 rtx par;
1922
1923 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1924 for (i = 0; i < count; i++)
1925 {
1926 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1927 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1928 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1929 XVECEXP (par, 0, i) = tmp;
1930 }
1931 return par;
1932 }
1933 }
1934 else
1935 return gen_rtx_REG (mode, R0_REGNUM);
1936}
1937
1938/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1939 Return true if REGNO is the number of a hard register in which the values
1940 of called function may come back. */
1941
1942static bool
1943aarch64_function_value_regno_p (const unsigned int regno)
1944{
1945 /* Maximum of 16 bytes can be returned in the general registers. Examples
1946 of 16-byte return values are: 128-bit integers and 16-byte small
1947 structures (excluding homogeneous floating-point aggregates). */
1948 if (regno == R0_REGNUM || regno == R1_REGNUM)
1949 return true;
1950
1951 /* Up to four fp/simd registers can return a function value, e.g. a
1952 homogeneous floating-point aggregate having four members. */
1953 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 1954 return TARGET_FLOAT;
43e9d192
IB
1955
1956 return false;
1957}
1958
1959/* Implement TARGET_RETURN_IN_MEMORY.
1960
1961 If the type T of the result of a function is such that
1962 void func (T arg)
1963 would require that arg be passed as a value in a register (or set of
1964 registers) according to the parameter passing rules, then the result
1965 is returned in the same registers as would be used for such an
1966 argument. */
1967
1968static bool
1969aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1970{
1971 HOST_WIDE_INT size;
ef4bddc2 1972 machine_mode ag_mode;
43e9d192
IB
1973 int count;
1974
1975 if (!AGGREGATE_TYPE_P (type)
1976 && TREE_CODE (type) != COMPLEX_TYPE
1977 && TREE_CODE (type) != VECTOR_TYPE)
1978 /* Simple scalar types always returned in registers. */
1979 return false;
1980
1981 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1982 type,
1983 &ag_mode,
1984 &count,
1985 NULL))
1986 return false;
1987
1988 /* Types larger than 2 registers returned in memory. */
1989 size = int_size_in_bytes (type);
1990 return (size < 0 || size > 2 * UNITS_PER_WORD);
1991}
1992
1993static bool
ef4bddc2 1994aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1995 const_tree type, int *nregs)
1996{
1997 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1998 return aarch64_vfp_is_call_or_return_candidate (mode,
1999 type,
2000 &pcum->aapcs_vfp_rmode,
2001 nregs,
2002 NULL);
2003}
2004
2005/* Given MODE and TYPE of a function argument, return the alignment in
2006 bits. The idea is to suppress any stronger alignment requested by
2007 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2008 This is a helper function for local use only. */
2009
2010static unsigned int
ef4bddc2 2011aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192
IB
2012{
2013 unsigned int alignment;
2014
2015 if (type)
2016 {
2017 if (!integer_zerop (TYPE_SIZE (type)))
2018 {
2019 if (TYPE_MODE (type) == mode)
2020 alignment = TYPE_ALIGN (type);
2021 else
2022 alignment = GET_MODE_ALIGNMENT (mode);
2023 }
2024 else
2025 alignment = 0;
2026 }
2027 else
2028 alignment = GET_MODE_ALIGNMENT (mode);
2029
2030 return alignment;
2031}
2032
2033/* Layout a function argument according to the AAPCS64 rules. The rule
2034 numbers refer to the rule numbers in the AAPCS64. */
2035
2036static void
ef4bddc2 2037aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2038 const_tree type,
2039 bool named ATTRIBUTE_UNUSED)
2040{
2041 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2042 int ncrn, nvrn, nregs;
2043 bool allocate_ncrn, allocate_nvrn;
3abf17cf 2044 HOST_WIDE_INT size;
43e9d192
IB
2045
2046 /* We need to do this once per argument. */
2047 if (pcum->aapcs_arg_processed)
2048 return;
2049
2050 pcum->aapcs_arg_processed = true;
2051
3abf17cf
YZ
2052 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2053 size
2054 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2055 UNITS_PER_WORD);
2056
43e9d192
IB
2057 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2058 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2059 mode,
2060 type,
2061 &nregs);
2062
2063 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2064 The following code thus handles passing by SIMD/FP registers first. */
2065
2066 nvrn = pcum->aapcs_nvrn;
2067
2068 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2069 and homogenous short-vector aggregates (HVA). */
2070 if (allocate_nvrn)
2071 {
261fb553
AL
2072 if (!TARGET_FLOAT)
2073 aarch64_err_no_fpadvsimd (mode, "argument");
2074
43e9d192
IB
2075 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2076 {
2077 pcum->aapcs_nextnvrn = nvrn + nregs;
2078 if (!aarch64_composite_type_p (type, mode))
2079 {
2080 gcc_assert (nregs == 1);
2081 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2082 }
2083 else
2084 {
2085 rtx par;
2086 int i;
2087 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2088 for (i = 0; i < nregs; i++)
2089 {
2090 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2091 V0_REGNUM + nvrn + i);
2092 tmp = gen_rtx_EXPR_LIST
2093 (VOIDmode, tmp,
2094 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2095 XVECEXP (par, 0, i) = tmp;
2096 }
2097 pcum->aapcs_reg = par;
2098 }
2099 return;
2100 }
2101 else
2102 {
2103 /* C.3 NSRN is set to 8. */
2104 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2105 goto on_stack;
2106 }
2107 }
2108
2109 ncrn = pcum->aapcs_ncrn;
3abf17cf 2110 nregs = size / UNITS_PER_WORD;
43e9d192
IB
2111
2112 /* C6 - C9. though the sign and zero extension semantics are
2113 handled elsewhere. This is the case where the argument fits
2114 entirely general registers. */
2115 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2116 {
2117 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2118
2119 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2120
2121 /* C.8 if the argument has an alignment of 16 then the NGRN is
2122 rounded up to the next even number. */
2123 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
2124 {
2125 ++ncrn;
2126 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
2127 }
2128 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2129 A reg is still generated for it, but the caller should be smart
2130 enough not to use it. */
2131 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2132 {
2133 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
2134 }
2135 else
2136 {
2137 rtx par;
2138 int i;
2139
2140 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2141 for (i = 0; i < nregs; i++)
2142 {
2143 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2144 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2145 GEN_INT (i * UNITS_PER_WORD));
2146 XVECEXP (par, 0, i) = tmp;
2147 }
2148 pcum->aapcs_reg = par;
2149 }
2150
2151 pcum->aapcs_nextncrn = ncrn + nregs;
2152 return;
2153 }
2154
2155 /* C.11 */
2156 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2157
2158 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2159 this argument and align the total size if necessary. */
43e9d192 2160on_stack:
3abf17cf 2161 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
2162 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2163 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 2164 16 / UNITS_PER_WORD);
43e9d192
IB
2165 return;
2166}
2167
2168/* Implement TARGET_FUNCTION_ARG. */
2169
2170static rtx
ef4bddc2 2171aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2172 const_tree type, bool named)
2173{
2174 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2175 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2176
2177 if (mode == VOIDmode)
2178 return NULL_RTX;
2179
2180 aarch64_layout_arg (pcum_v, mode, type, named);
2181 return pcum->aapcs_reg;
2182}
2183
2184void
2185aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2186 const_tree fntype ATTRIBUTE_UNUSED,
2187 rtx libname ATTRIBUTE_UNUSED,
2188 const_tree fndecl ATTRIBUTE_UNUSED,
2189 unsigned n_named ATTRIBUTE_UNUSED)
2190{
2191 pcum->aapcs_ncrn = 0;
2192 pcum->aapcs_nvrn = 0;
2193 pcum->aapcs_nextncrn = 0;
2194 pcum->aapcs_nextnvrn = 0;
2195 pcum->pcs_variant = ARM_PCS_AAPCS64;
2196 pcum->aapcs_reg = NULL_RTX;
2197 pcum->aapcs_arg_processed = false;
2198 pcum->aapcs_stack_words = 0;
2199 pcum->aapcs_stack_size = 0;
2200
261fb553
AL
2201 if (!TARGET_FLOAT
2202 && fndecl && TREE_PUBLIC (fndecl)
2203 && fntype && fntype != error_mark_node)
2204 {
2205 const_tree type = TREE_TYPE (fntype);
2206 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2207 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2208 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2209 &mode, &nregs, NULL))
2210 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2211 }
43e9d192
IB
2212 return;
2213}
2214
2215static void
2216aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2217 machine_mode mode,
43e9d192
IB
2218 const_tree type,
2219 bool named)
2220{
2221 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2222 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2223 {
2224 aarch64_layout_arg (pcum_v, mode, type, named);
2225 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2226 != (pcum->aapcs_stack_words != 0));
2227 pcum->aapcs_arg_processed = false;
2228 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2229 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2230 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2231 pcum->aapcs_stack_words = 0;
2232 pcum->aapcs_reg = NULL_RTX;
2233 }
2234}
2235
2236bool
2237aarch64_function_arg_regno_p (unsigned regno)
2238{
2239 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2240 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2241}
2242
2243/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2244 PARM_BOUNDARY bits of alignment, but will be given anything up
2245 to STACK_BOUNDARY bits if the type requires it. This makes sure
2246 that both before and after the layout of each argument, the Next
2247 Stacked Argument Address (NSAA) will have a minimum alignment of
2248 8 bytes. */
2249
2250static unsigned int
ef4bddc2 2251aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
2252{
2253 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2254
2255 if (alignment < PARM_BOUNDARY)
2256 alignment = PARM_BOUNDARY;
2257 if (alignment > STACK_BOUNDARY)
2258 alignment = STACK_BOUNDARY;
2259 return alignment;
2260}
2261
2262/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2263
2264 Return true if an argument passed on the stack should be padded upwards,
2265 i.e. if the least-significant byte of the stack slot has useful data.
2266
2267 Small aggregate types are placed in the lowest memory address.
2268
2269 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2270
2271bool
ef4bddc2 2272aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2273{
2274 /* On little-endian targets, the least significant byte of every stack
2275 argument is passed at the lowest byte address of the stack slot. */
2276 if (!BYTES_BIG_ENDIAN)
2277 return true;
2278
00edcfbe 2279 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2280 the least significant byte of a stack argument is passed at the highest
2281 byte address of the stack slot. */
2282 if (type
00edcfbe
YZ
2283 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2284 || POINTER_TYPE_P (type))
43e9d192
IB
2285 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2286 return false;
2287
2288 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2289 return true;
2290}
2291
2292/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2293
2294 It specifies padding for the last (may also be the only)
2295 element of a block move between registers and memory. If
2296 assuming the block is in the memory, padding upward means that
2297 the last element is padded after its highest significant byte,
2298 while in downward padding, the last element is padded at the
2299 its least significant byte side.
2300
2301 Small aggregates and small complex types are always padded
2302 upwards.
2303
2304 We don't need to worry about homogeneous floating-point or
2305 short-vector aggregates; their move is not affected by the
2306 padding direction determined here. Regardless of endianness,
2307 each element of such an aggregate is put in the least
2308 significant bits of a fp/simd register.
2309
2310 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2311 register has useful data, and return the opposite if the most
2312 significant byte does. */
2313
2314bool
ef4bddc2 2315aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2316 bool first ATTRIBUTE_UNUSED)
2317{
2318
2319 /* Small composite types are always padded upward. */
2320 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2321 {
2322 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2323 : GET_MODE_SIZE (mode));
2324 if (size < 2 * UNITS_PER_WORD)
2325 return true;
2326 }
2327
2328 /* Otherwise, use the default padding. */
2329 return !BYTES_BIG_ENDIAN;
2330}
2331
ef4bddc2 2332static machine_mode
43e9d192
IB
2333aarch64_libgcc_cmp_return_mode (void)
2334{
2335 return SImode;
2336}
2337
2338static bool
2339aarch64_frame_pointer_required (void)
2340{
0b7f8166
MS
2341 /* In aarch64_override_options_after_change
2342 flag_omit_leaf_frame_pointer turns off the frame pointer by
2343 default. Turn it back on now if we've not got a leaf
2344 function. */
2345 if (flag_omit_leaf_frame_pointer
2346 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2347 return true;
43e9d192 2348
0b7f8166 2349 return false;
43e9d192
IB
2350}
2351
2352/* Mark the registers that need to be saved by the callee and calculate
2353 the size of the callee-saved registers area and frame record (both FP
2354 and LR may be omitted). */
2355static void
2356aarch64_layout_frame (void)
2357{
2358 HOST_WIDE_INT offset = 0;
2359 int regno;
2360
2361 if (reload_completed && cfun->machine->frame.laid_out)
2362 return;
2363
97826595
MS
2364#define SLOT_NOT_REQUIRED (-2)
2365#define SLOT_REQUIRED (-1)
2366
363ffa50
JW
2367 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2368 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2369
43e9d192
IB
2370 /* First mark all the registers that really need to be saved... */
2371 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2372 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2373
2374 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2375 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2376
2377 /* ... that includes the eh data registers (if needed)... */
2378 if (crtl->calls_eh_return)
2379 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2380 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2381 = SLOT_REQUIRED;
43e9d192
IB
2382
2383 /* ... and any callee saved register that dataflow says is live. */
2384 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2385 if (df_regs_ever_live_p (regno)
1c923b60
JW
2386 && (regno == R30_REGNUM
2387 || !call_used_regs[regno]))
97826595 2388 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2389
2390 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2391 if (df_regs_ever_live_p (regno)
2392 && !call_used_regs[regno])
97826595 2393 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2394
2395 if (frame_pointer_needed)
2396 {
2e1cdae5 2397 /* FP and LR are placed in the linkage record. */
43e9d192 2398 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2399 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2400 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2401 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 2402 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 2403 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2404 }
2405
2406 /* Now assign stack slots for them. */
2e1cdae5 2407 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2408 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2409 {
2410 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2411 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2412 cfun->machine->frame.wb_candidate1 = regno;
2413 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2414 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2415 offset += UNITS_PER_WORD;
2416 }
2417
2418 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2419 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2420 {
2421 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2422 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2423 cfun->machine->frame.wb_candidate1 = regno;
2424 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2425 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2426 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2427 offset += UNITS_PER_WORD;
2428 }
2429
43e9d192
IB
2430 cfun->machine->frame.padding0 =
2431 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2432 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2433
2434 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
2435
2436 cfun->machine->frame.hard_fp_offset
2437 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2438 + get_frame_size ()
2439 + cfun->machine->frame.saved_regs_size,
2440 STACK_BOUNDARY / BITS_PER_UNIT);
2441
2442 cfun->machine->frame.frame_size
2443 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2444 + crtl->outgoing_args_size,
2445 STACK_BOUNDARY / BITS_PER_UNIT);
2446
43e9d192
IB
2447 cfun->machine->frame.laid_out = true;
2448}
2449
43e9d192
IB
2450static bool
2451aarch64_register_saved_on_entry (int regno)
2452{
97826595 2453 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2454}
2455
64dedd72
JW
2456static unsigned
2457aarch64_next_callee_save (unsigned regno, unsigned limit)
2458{
2459 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2460 regno ++;
2461 return regno;
2462}
43e9d192 2463
c5e1f66e 2464static void
ef4bddc2 2465aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2466 HOST_WIDE_INT adjustment)
2467 {
2468 rtx base_rtx = stack_pointer_rtx;
2469 rtx insn, reg, mem;
2470
2471 reg = gen_rtx_REG (mode, regno);
2472 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2473 plus_constant (Pmode, base_rtx, -adjustment));
2474 mem = gen_rtx_MEM (mode, mem);
2475
2476 insn = emit_move_insn (mem, reg);
2477 RTX_FRAME_RELATED_P (insn) = 1;
2478}
2479
80c11907 2480static rtx
ef4bddc2 2481aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2482 HOST_WIDE_INT adjustment)
2483{
2484 switch (mode)
2485 {
2486 case DImode:
2487 return gen_storewb_pairdi_di (base, base, reg, reg2,
2488 GEN_INT (-adjustment),
2489 GEN_INT (UNITS_PER_WORD - adjustment));
2490 case DFmode:
2491 return gen_storewb_pairdf_di (base, base, reg, reg2,
2492 GEN_INT (-adjustment),
2493 GEN_INT (UNITS_PER_WORD - adjustment));
2494 default:
2495 gcc_unreachable ();
2496 }
2497}
2498
2499static void
ef4bddc2 2500aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2501 unsigned regno2, HOST_WIDE_INT adjustment)
2502{
5d8a22a5 2503 rtx_insn *insn;
80c11907
JW
2504 rtx reg1 = gen_rtx_REG (mode, regno1);
2505 rtx reg2 = gen_rtx_REG (mode, regno2);
2506
2507 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2508 reg2, adjustment));
2509 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2510 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2511 RTX_FRAME_RELATED_P (insn) = 1;
2512}
2513
159313d9 2514static rtx
ef4bddc2 2515aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2516 HOST_WIDE_INT adjustment)
2517{
2518 switch (mode)
2519 {
2520 case DImode:
2521 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2522 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2523 case DFmode:
2524 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2525 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2526 default:
2527 gcc_unreachable ();
2528 }
2529}
2530
72df5c1f 2531static rtx
ef4bddc2 2532aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2533 rtx reg2)
2534{
2535 switch (mode)
2536 {
2537 case DImode:
2538 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2539
2540 case DFmode:
2541 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2542
2543 default:
2544 gcc_unreachable ();
2545 }
2546}
2547
2548static rtx
ef4bddc2 2549aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2550 rtx mem2)
2551{
2552 switch (mode)
2553 {
2554 case DImode:
2555 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2556
2557 case DFmode:
2558 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2559
2560 default:
2561 gcc_unreachable ();
2562 }
2563}
2564
43e9d192 2565
43e9d192 2566static void
ef4bddc2 2567aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2568 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2569{
5d8a22a5 2570 rtx_insn *insn;
ef4bddc2 2571 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2572 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2573 unsigned regno;
2574 unsigned regno2;
2575
0ec74a1e 2576 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2577 regno <= limit;
2578 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2579 {
ae13fce3
JW
2580 rtx reg, mem;
2581 HOST_WIDE_INT offset;
64dedd72 2582
ae13fce3
JW
2583 if (skip_wb
2584 && (regno == cfun->machine->frame.wb_candidate1
2585 || regno == cfun->machine->frame.wb_candidate2))
2586 continue;
2587
2588 reg = gen_rtx_REG (mode, regno);
2589 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2590 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2591 offset));
64dedd72
JW
2592
2593 regno2 = aarch64_next_callee_save (regno + 1, limit);
2594
2595 if (regno2 <= limit
2596 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2597 == cfun->machine->frame.reg_offset[regno2]))
2598
43e9d192 2599 {
0ec74a1e 2600 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2601 rtx mem2;
2602
2603 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2604 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2605 offset));
2606 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2607 reg2));
0b4a9743 2608
64dedd72
JW
2609 /* The first part of a frame-related parallel insn is
2610 always assumed to be relevant to the frame
2611 calculations; subsequent parts, are only
2612 frame-related if explicitly marked. */
2613 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2614 regno = regno2;
2615 }
2616 else
8ed2fc62
JW
2617 insn = emit_move_insn (mem, reg);
2618
2619 RTX_FRAME_RELATED_P (insn) = 1;
2620 }
2621}
2622
2623static void
ef4bddc2 2624aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2625 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2626 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2627{
8ed2fc62 2628 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2629 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2630 ? gen_frame_mem : gen_rtx_MEM);
2631 unsigned regno;
2632 unsigned regno2;
2633 HOST_WIDE_INT offset;
2634
2635 for (regno = aarch64_next_callee_save (start, limit);
2636 regno <= limit;
2637 regno = aarch64_next_callee_save (regno + 1, limit))
2638 {
ae13fce3 2639 rtx reg, mem;
8ed2fc62 2640
ae13fce3
JW
2641 if (skip_wb
2642 && (regno == cfun->machine->frame.wb_candidate1
2643 || regno == cfun->machine->frame.wb_candidate2))
2644 continue;
2645
2646 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2647 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2648 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2649
2650 regno2 = aarch64_next_callee_save (regno + 1, limit);
2651
2652 if (regno2 <= limit
2653 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2654 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2655 {
8ed2fc62
JW
2656 rtx reg2 = gen_rtx_REG (mode, regno2);
2657 rtx mem2;
2658
2659 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2660 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2661 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2662
dd991abb 2663 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2664 regno = regno2;
43e9d192 2665 }
8ed2fc62 2666 else
dd991abb
RH
2667 emit_move_insn (reg, mem);
2668 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2669 }
43e9d192
IB
2670}
2671
2672/* AArch64 stack frames generated by this compiler look like:
2673
2674 +-------------------------------+
2675 | |
2676 | incoming stack arguments |
2677 | |
34834420
MS
2678 +-------------------------------+
2679 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2680 | callee-allocated save area |
2681 | for register varargs |
2682 | |
34834420
MS
2683 +-------------------------------+
2684 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2685 | |
2686 +-------------------------------+
454fdba9
RL
2687 | padding0 | \
2688 +-------------------------------+ |
454fdba9 2689 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2690 +-------------------------------+ |
2691 | LR' | |
2692 +-------------------------------+ |
34834420
MS
2693 | FP' | / <- hard_frame_pointer_rtx (aligned)
2694 +-------------------------------+
43e9d192
IB
2695 | dynamic allocation |
2696 +-------------------------------+
34834420
MS
2697 | padding |
2698 +-------------------------------+
2699 | outgoing stack arguments | <-- arg_pointer
2700 | |
2701 +-------------------------------+
2702 | | <-- stack_pointer_rtx (aligned)
43e9d192 2703
34834420
MS
2704 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2705 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2706 unchanged. */
43e9d192
IB
2707
2708/* Generate the prologue instructions for entry into a function.
2709 Establish the stack frame by decreasing the stack pointer with a
2710 properly calculated size and, if necessary, create a frame record
2711 filled with the values of LR and previous frame pointer. The
6991c977 2712 current FP is also set up if it is in use. */
43e9d192
IB
2713
2714void
2715aarch64_expand_prologue (void)
2716{
2717 /* sub sp, sp, #<frame_size>
2718 stp {fp, lr}, [sp, #<frame_size> - 16]
2719 add fp, sp, #<frame_size> - hardfp_offset
2720 stp {cs_reg}, [fp, #-16] etc.
2721
2722 sub sp, sp, <final_adjustment_if_any>
2723 */
43e9d192 2724 HOST_WIDE_INT frame_size, offset;
1c960e02 2725 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2726 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2727 rtx_insn *insn;
43e9d192
IB
2728
2729 aarch64_layout_frame ();
43e9d192 2730
dd991abb
RH
2731 offset = frame_size = cfun->machine->frame.frame_size;
2732 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2733 fp_offset = frame_size - hard_fp_offset;
43e9d192 2734
dd991abb
RH
2735 if (flag_stack_usage_info)
2736 current_function_static_stack_size = frame_size;
43e9d192 2737
44c0e7b9 2738 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2739 if (offset >= 512)
2740 {
2741 /* When the frame has a large size, an initial decrease is done on
2742 the stack pointer to jump over the callee-allocated save area for
2743 register varargs, the local variable area and/or the callee-saved
2744 register area. This will allow the pre-index write-back
2745 store pair instructions to be used for setting up the stack frame
2746 efficiently. */
dd991abb 2747 offset = hard_fp_offset;
43e9d192
IB
2748 if (offset >= 512)
2749 offset = cfun->machine->frame.saved_regs_size;
2750
2751 frame_size -= (offset + crtl->outgoing_args_size);
2752 fp_offset = 0;
2753
2754 if (frame_size >= 0x1000000)
2755 {
2756 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2757 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2758 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2759
2760 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 2761 gen_rtx_SET (stack_pointer_rtx,
dd991abb
RH
2762 plus_constant (Pmode, stack_pointer_rtx,
2763 -frame_size)));
2764 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2765 }
2766 else if (frame_size > 0)
2767 {
dd991abb
RH
2768 int hi_ofs = frame_size & 0xfff000;
2769 int lo_ofs = frame_size & 0x000fff;
2770
2771 if (hi_ofs)
43e9d192
IB
2772 {
2773 insn = emit_insn (gen_add2_insn
dd991abb 2774 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2775 RTX_FRAME_RELATED_P (insn) = 1;
2776 }
dd991abb 2777 if (lo_ofs)
43e9d192
IB
2778 {
2779 insn = emit_insn (gen_add2_insn
dd991abb 2780 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2781 RTX_FRAME_RELATED_P (insn) = 1;
2782 }
2783 }
2784 }
2785 else
2786 frame_size = -1;
2787
2788 if (offset > 0)
2789 {
ae13fce3
JW
2790 bool skip_wb = false;
2791
43e9d192
IB
2792 if (frame_pointer_needed)
2793 {
c5e1f66e
JW
2794 skip_wb = true;
2795
43e9d192
IB
2796 if (fp_offset)
2797 {
2798 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2799 GEN_INT (-offset)));
2800 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2801
2802 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2803 R30_REGNUM, false);
43e9d192
IB
2804 }
2805 else
80c11907 2806 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2807
2808 /* Set up frame pointer to point to the location of the
2809 previous frame pointer on the stack. */
2810 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2811 stack_pointer_rtx,
2812 GEN_INT (fp_offset)));
43e9d192 2813 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2814 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2815 }
2816 else
2817 {
c5e1f66e
JW
2818 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2819 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2820
c5e1f66e
JW
2821 if (fp_offset
2822 || reg1 == FIRST_PSEUDO_REGISTER
2823 || (reg2 == FIRST_PSEUDO_REGISTER
2824 && offset >= 256))
2825 {
2826 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2827 GEN_INT (-offset)));
2828 RTX_FRAME_RELATED_P (insn) = 1;
2829 }
2830 else
2831 {
ef4bddc2 2832 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
2833
2834 skip_wb = true;
2835
2836 if (reg2 == FIRST_PSEUDO_REGISTER)
2837 aarch64_pushwb_single_reg (mode1, reg1, offset);
2838 else
2839 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2840 }
43e9d192
IB
2841 }
2842
c5e1f66e
JW
2843 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2844 skip_wb);
ae13fce3
JW
2845 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2846 skip_wb);
43e9d192
IB
2847 }
2848
2849 /* when offset >= 512,
2850 sub sp, sp, #<outgoing_args_size> */
2851 if (frame_size > -1)
2852 {
2853 if (crtl->outgoing_args_size > 0)
2854 {
2855 insn = emit_insn (gen_add2_insn
2856 (stack_pointer_rtx,
2857 GEN_INT (- crtl->outgoing_args_size)));
2858 RTX_FRAME_RELATED_P (insn) = 1;
2859 }
2860 }
2861}
2862
4f942779
RL
2863/* Return TRUE if we can use a simple_return insn.
2864
2865 This function checks whether the callee saved stack is empty, which
2866 means no restore actions are need. The pro_and_epilogue will use
2867 this to check whether shrink-wrapping opt is feasible. */
2868
2869bool
2870aarch64_use_return_insn_p (void)
2871{
2872 if (!reload_completed)
2873 return false;
2874
2875 if (crtl->profile)
2876 return false;
2877
2878 aarch64_layout_frame ();
2879
2880 return cfun->machine->frame.frame_size == 0;
2881}
2882
43e9d192
IB
2883/* Generate the epilogue instructions for returning from a function. */
2884void
2885aarch64_expand_epilogue (bool for_sibcall)
2886{
1c960e02 2887 HOST_WIDE_INT frame_size, offset;
43e9d192 2888 HOST_WIDE_INT fp_offset;
dd991abb 2889 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2890 rtx_insn *insn;
7e8c2bd5
JW
2891 /* We need to add memory barrier to prevent read from deallocated stack. */
2892 bool need_barrier_p = (get_frame_size () != 0
2893 || cfun->machine->frame.saved_varargs_size);
43e9d192
IB
2894
2895 aarch64_layout_frame ();
43e9d192 2896
1c960e02 2897 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2898 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2899 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2900
2901 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2902 if (offset >= 512)
2903 {
dd991abb 2904 offset = hard_fp_offset;
43e9d192
IB
2905 if (offset >= 512)
2906 offset = cfun->machine->frame.saved_regs_size;
2907
2908 frame_size -= (offset + crtl->outgoing_args_size);
2909 fp_offset = 0;
2910 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2911 {
2912 insn = emit_insn (gen_add2_insn
2913 (stack_pointer_rtx,
2914 GEN_INT (crtl->outgoing_args_size)));
2915 RTX_FRAME_RELATED_P (insn) = 1;
2916 }
2917 }
2918 else
2919 frame_size = -1;
2920
2921 /* If there were outgoing arguments or we've done dynamic stack
2922 allocation, then restore the stack pointer from the frame
2923 pointer. This is at most one insn and more efficient than using
2924 GCC's internal mechanism. */
2925 if (frame_pointer_needed
2926 && (crtl->outgoing_args_size || cfun->calls_alloca))
2927 {
7e8c2bd5
JW
2928 if (cfun->calls_alloca)
2929 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2930
43e9d192
IB
2931 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2932 hard_frame_pointer_rtx,
8f454e9f
JW
2933 GEN_INT (0)));
2934 offset = offset - fp_offset;
43e9d192
IB
2935 }
2936
43e9d192
IB
2937 if (offset > 0)
2938 {
4b92caa1
JW
2939 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2940 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2941 bool skip_wb = true;
dd991abb 2942 rtx cfi_ops = NULL;
4b92caa1 2943
43e9d192 2944 if (frame_pointer_needed)
4b92caa1
JW
2945 fp_offset = 0;
2946 else if (fp_offset
2947 || reg1 == FIRST_PSEUDO_REGISTER
2948 || (reg2 == FIRST_PSEUDO_REGISTER
2949 && offset >= 256))
2950 skip_wb = false;
2951
2952 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2953 skip_wb, &cfi_ops);
4b92caa1 2954 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2955 skip_wb, &cfi_ops);
4b92caa1 2956
7e8c2bd5
JW
2957 if (need_barrier_p)
2958 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2959
4b92caa1 2960 if (skip_wb)
43e9d192 2961 {
ef4bddc2 2962 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2963 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2964
dd991abb 2965 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2966 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2967 {
2968 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2969 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2970 mem = gen_rtx_MEM (mode1, mem);
2971 insn = emit_move_insn (rreg1, mem);
2972 }
4b92caa1
JW
2973 else
2974 {
dd991abb 2975 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2976
dd991abb
RH
2977 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2978 insn = emit_insn (aarch64_gen_loadwb_pair
2979 (mode1, stack_pointer_rtx, rreg1,
2980 rreg2, offset));
4b92caa1 2981 }
43e9d192 2982 }
43e9d192
IB
2983 else
2984 {
2985 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2986 GEN_INT (offset)));
43e9d192 2987 }
43e9d192 2988
dd991abb
RH
2989 /* Reset the CFA to be SP + FRAME_SIZE. */
2990 rtx new_cfa = stack_pointer_rtx;
2991 if (frame_size > 0)
2992 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2993 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2994 REG_NOTES (insn) = cfi_ops;
43e9d192 2995 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2996 }
2997
dd991abb 2998 if (frame_size > 0)
43e9d192 2999 {
7e8c2bd5
JW
3000 if (need_barrier_p)
3001 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3002
43e9d192
IB
3003 if (frame_size >= 0x1000000)
3004 {
3005 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3006 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 3007 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 3008 }
dd991abb 3009 else
43e9d192 3010 {
dd991abb
RH
3011 int hi_ofs = frame_size & 0xfff000;
3012 int lo_ofs = frame_size & 0x000fff;
3013
3014 if (hi_ofs && lo_ofs)
43e9d192
IB
3015 {
3016 insn = emit_insn (gen_add2_insn
dd991abb 3017 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 3018 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 3019 frame_size = lo_ofs;
43e9d192 3020 }
dd991abb
RH
3021 insn = emit_insn (gen_add2_insn
3022 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
3023 }
3024
dd991abb
RH
3025 /* Reset the CFA to be SP + 0. */
3026 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
3027 RTX_FRAME_RELATED_P (insn) = 1;
3028 }
3029
3030 /* Stack adjustment for exception handler. */
3031 if (crtl->calls_eh_return)
3032 {
3033 /* We need to unwind the stack by the offset computed by
3034 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3035 to be SP; letting the CFA move during this adjustment
3036 is just as correct as retaining the CFA from the body
3037 of the function. Therefore, do nothing special. */
3038 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
3039 }
3040
3041 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3042 if (!for_sibcall)
3043 emit_jump_insn (ret_rtx);
3044}
3045
3046/* Return the place to copy the exception unwinding return address to.
3047 This will probably be a stack slot, but could (in theory be the
3048 return register). */
3049rtx
3050aarch64_final_eh_return_addr (void)
3051{
1c960e02
MS
3052 HOST_WIDE_INT fp_offset;
3053
43e9d192 3054 aarch64_layout_frame ();
1c960e02
MS
3055
3056 fp_offset = cfun->machine->frame.frame_size
3057 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
3058
3059 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
3060 return gen_rtx_REG (DImode, LR_REGNUM);
3061
3062 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
3063 result in a store to save LR introduced by builtin_eh_return () being
3064 incorrectly deleted because the alias is not detected.
3065 So in the calculation of the address to copy the exception unwinding
3066 return address to, we note 2 cases.
3067 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
3068 we return a SP-relative location since all the addresses are SP-relative
3069 in this case. This prevents the store from being optimized away.
3070 If the fp_offset is not 0, then the addresses will be FP-relative and
3071 therefore we return a FP-relative location. */
3072
3073 if (frame_pointer_needed)
3074 {
3075 if (fp_offset)
3076 return gen_frame_mem (DImode,
3077 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
3078 else
3079 return gen_frame_mem (DImode,
3080 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
3081 }
3082
3083 /* If FP is not needed, we calculate the location of LR, which would be
3084 at the top of the saved registers block. */
3085
3086 return gen_frame_mem (DImode,
3087 plus_constant (Pmode,
3088 stack_pointer_rtx,
3089 fp_offset
3090 + cfun->machine->frame.saved_regs_size
3091 - 2 * UNITS_PER_WORD));
3092}
3093
9dfc162c
JG
3094/* Possibly output code to build up a constant in a register. For
3095 the benefit of the costs infrastructure, returns the number of
3096 instructions which would be emitted. GENERATE inhibits or
3097 enables code generation. */
3098
3099static int
3100aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 3101{
9dfc162c
JG
3102 int insns = 0;
3103
43e9d192 3104 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
3105 {
3106 if (generate)
3107 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
3108 insns = 1;
3109 }
43e9d192
IB
3110 else
3111 {
3112 int i;
3113 int ncount = 0;
3114 int zcount = 0;
3115 HOST_WIDE_INT valp = val >> 16;
3116 HOST_WIDE_INT valm;
3117 HOST_WIDE_INT tval;
3118
3119 for (i = 16; i < 64; i += 16)
3120 {
3121 valm = (valp & 0xffff);
3122
3123 if (valm != 0)
3124 ++ zcount;
3125
3126 if (valm != 0xffff)
3127 ++ ncount;
3128
3129 valp >>= 16;
3130 }
3131
3132 /* zcount contains the number of additional MOVK instructions
3133 required if the constant is built up with an initial MOVZ instruction,
3134 while ncount is the number of MOVK instructions required if starting
3135 with a MOVN instruction. Choose the sequence that yields the fewest
3136 number of instructions, preferring MOVZ instructions when they are both
3137 the same. */
3138 if (ncount < zcount)
3139 {
9dfc162c
JG
3140 if (generate)
3141 emit_move_insn (gen_rtx_REG (Pmode, regnum),
3142 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 3143 tval = 0xffff;
9dfc162c 3144 insns++;
43e9d192
IB
3145 }
3146 else
3147 {
9dfc162c
JG
3148 if (generate)
3149 emit_move_insn (gen_rtx_REG (Pmode, regnum),
3150 GEN_INT (val & 0xffff));
43e9d192 3151 tval = 0;
9dfc162c 3152 insns++;
43e9d192
IB
3153 }
3154
3155 val >>= 16;
3156
3157 for (i = 16; i < 64; i += 16)
3158 {
3159 if ((val & 0xffff) != tval)
9dfc162c
JG
3160 {
3161 if (generate)
3162 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
3163 GEN_INT (i),
3164 GEN_INT (val & 0xffff)));
3165 insns++;
3166 }
43e9d192
IB
3167 val >>= 16;
3168 }
3169 }
9dfc162c 3170 return insns;
43e9d192
IB
3171}
3172
3173static void
d9600ae5 3174aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
3175{
3176 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
3177 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
3178 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
3179
3180 if (mdelta < 0)
3181 mdelta = -mdelta;
3182
3183 if (mdelta >= 4096 * 4096)
3184 {
9dfc162c 3185 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 3186 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
3187 }
3188 else if (mdelta > 0)
3189 {
43e9d192 3190 if (mdelta >= 4096)
d9600ae5 3191 {
f7df4a84 3192 emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
d9600ae5
SN
3193 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
3194 if (delta < 0)
f7df4a84 3195 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3196 gen_rtx_MINUS (Pmode, this_rtx, shift)));
3197 else
f7df4a84 3198 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3199 gen_rtx_PLUS (Pmode, this_rtx, shift)));
3200 }
43e9d192 3201 if (mdelta % 4096 != 0)
d9600ae5
SN
3202 {
3203 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
f7df4a84 3204 emit_insn (gen_rtx_SET (this_rtx,
d9600ae5
SN
3205 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
3206 }
43e9d192
IB
3207 }
3208}
3209
3210/* Output code to add DELTA to the first argument, and then jump
3211 to FUNCTION. Used for C++ multiple inheritance. */
3212static void
3213aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3214 HOST_WIDE_INT delta,
3215 HOST_WIDE_INT vcall_offset,
3216 tree function)
3217{
3218 /* The this pointer is always in x0. Note that this differs from
3219 Arm where the this pointer maybe bumped to r1 if r0 is required
3220 to return a pointer to an aggregate. On AArch64 a result value
3221 pointer will be in x8. */
3222 int this_regno = R0_REGNUM;
5d8a22a5
DM
3223 rtx this_rtx, temp0, temp1, addr, funexp;
3224 rtx_insn *insn;
43e9d192 3225
75f1d6fc
SN
3226 reload_completed = 1;
3227 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3228
3229 if (vcall_offset == 0)
d9600ae5 3230 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
3231 else
3232 {
28514dda 3233 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3234
75f1d6fc
SN
3235 this_rtx = gen_rtx_REG (Pmode, this_regno);
3236 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3237 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3238
75f1d6fc
SN
3239 addr = this_rtx;
3240 if (delta != 0)
3241 {
3242 if (delta >= -256 && delta < 256)
3243 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3244 plus_constant (Pmode, this_rtx, delta));
3245 else
d9600ae5 3246 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
3247 }
3248
28514dda
YZ
3249 if (Pmode == ptr_mode)
3250 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3251 else
3252 aarch64_emit_move (temp0,
3253 gen_rtx_ZERO_EXTEND (Pmode,
3254 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3255
28514dda 3256 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3257 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3258 else
3259 {
9dfc162c 3260 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 3261 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3262 }
3263
28514dda
YZ
3264 if (Pmode == ptr_mode)
3265 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3266 else
3267 aarch64_emit_move (temp1,
3268 gen_rtx_SIGN_EXTEND (Pmode,
3269 gen_rtx_MEM (ptr_mode, addr)));
3270
75f1d6fc 3271 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3272 }
3273
75f1d6fc
SN
3274 /* Generate a tail call to the target function. */
3275 if (!TREE_USED (function))
3276 {
3277 assemble_external (function);
3278 TREE_USED (function) = 1;
3279 }
3280 funexp = XEXP (DECL_RTL (function), 0);
3281 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3282 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3283 SIBLING_CALL_P (insn) = 1;
3284
3285 insn = get_insns ();
3286 shorten_branches (insn);
3287 final_start_function (insn, file, 1);
3288 final (insn, file, 1);
43e9d192 3289 final_end_function ();
75f1d6fc
SN
3290
3291 /* Stop pretending to be a post-reload pass. */
3292 reload_completed = 0;
43e9d192
IB
3293}
3294
43e9d192
IB
3295static bool
3296aarch64_tls_referenced_p (rtx x)
3297{
3298 if (!TARGET_HAVE_TLS)
3299 return false;
e7de8563
RS
3300 subrtx_iterator::array_type array;
3301 FOR_EACH_SUBRTX (iter, array, x, ALL)
3302 {
3303 const_rtx x = *iter;
3304 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3305 return true;
3306 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3307 TLS offsets, not real symbol references. */
3308 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3309 iter.skip_subrtxes ();
3310 }
3311 return false;
43e9d192
IB
3312}
3313
3314
3315static int
3316aarch64_bitmasks_cmp (const void *i1, const void *i2)
3317{
3318 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
3319 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3320
3321 if (*imm1 < *imm2)
3322 return -1;
3323 if (*imm1 > *imm2)
3324 return +1;
3325 return 0;
3326}
3327
3328
3329static void
3330aarch64_build_bitmask_table (void)
3331{
3332 unsigned HOST_WIDE_INT mask, imm;
3333 unsigned int log_e, e, s, r;
3334 unsigned int nimms = 0;
3335
3336 for (log_e = 1; log_e <= 6; log_e++)
3337 {
3338 e = 1 << log_e;
3339 if (e == 64)
3340 mask = ~(HOST_WIDE_INT) 0;
3341 else
3342 mask = ((HOST_WIDE_INT) 1 << e) - 1;
3343 for (s = 1; s < e; s++)
3344 {
3345 for (r = 0; r < e; r++)
3346 {
3347 /* set s consecutive bits to 1 (s < 64) */
3348 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3349 /* rotate right by r */
3350 if (r != 0)
3351 imm = ((imm >> r) | (imm << (e - r))) & mask;
3352 /* replicate the constant depending on SIMD size */
3353 switch (log_e) {
3354 case 1: imm |= (imm << 2);
3355 case 2: imm |= (imm << 4);
3356 case 3: imm |= (imm << 8);
3357 case 4: imm |= (imm << 16);
3358 case 5: imm |= (imm << 32);
3359 case 6:
3360 break;
3361 default:
3362 gcc_unreachable ();
3363 }
3364 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3365 aarch64_bitmasks[nimms++] = imm;
3366 }
3367 }
3368 }
3369
3370 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3371 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3372 aarch64_bitmasks_cmp);
3373}
3374
3375
3376/* Return true if val can be encoded as a 12-bit unsigned immediate with
3377 a left shift of 0 or 12 bits. */
3378bool
3379aarch64_uimm12_shift (HOST_WIDE_INT val)
3380{
3381 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3382 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3383 );
3384}
3385
3386
3387/* Return true if val is an immediate that can be loaded into a
3388 register by a MOVZ instruction. */
3389static bool
ef4bddc2 3390aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3391{
3392 if (GET_MODE_SIZE (mode) > 4)
3393 {
3394 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3395 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3396 return 1;
3397 }
3398 else
3399 {
3400 /* Ignore sign extension. */
3401 val &= (HOST_WIDE_INT) 0xffffffff;
3402 }
3403 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3404 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3405}
3406
3407
3408/* Return true if val is a valid bitmask immediate. */
3409bool
ef4bddc2 3410aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3411{
3412 if (GET_MODE_SIZE (mode) < 8)
3413 {
3414 /* Replicate bit pattern. */
3415 val &= (HOST_WIDE_INT) 0xffffffff;
3416 val |= val << 32;
3417 }
3418 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3419 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3420}
3421
3422
3423/* Return true if val is an immediate that can be loaded into a
3424 register in a single instruction. */
3425bool
ef4bddc2 3426aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3427{
3428 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3429 return 1;
3430 return aarch64_bitmask_imm (val, mode);
3431}
3432
3433static bool
ef4bddc2 3434aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3435{
3436 rtx base, offset;
7eda14e1 3437
43e9d192
IB
3438 if (GET_CODE (x) == HIGH)
3439 return true;
3440
3441 split_const (x, &base, &offset);
3442 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3443 {
f8b756b7 3444 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
28514dda
YZ
3445 != SYMBOL_FORCE_TO_MEM)
3446 return true;
3447 else
3448 /* Avoid generating a 64-bit relocation in ILP32; leave
3449 to aarch64_expand_mov_immediate to handle it properly. */
3450 return mode != ptr_mode;
3451 }
43e9d192
IB
3452
3453 return aarch64_tls_referenced_p (x);
3454}
3455
3456/* Return true if register REGNO is a valid index register.
3457 STRICT_P is true if REG_OK_STRICT is in effect. */
3458
3459bool
3460aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3461{
3462 if (!HARD_REGISTER_NUM_P (regno))
3463 {
3464 if (!strict_p)
3465 return true;
3466
3467 if (!reg_renumber)
3468 return false;
3469
3470 regno = reg_renumber[regno];
3471 }
3472 return GP_REGNUM_P (regno);
3473}
3474
3475/* Return true if register REGNO is a valid base register for mode MODE.
3476 STRICT_P is true if REG_OK_STRICT is in effect. */
3477
3478bool
3479aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3480{
3481 if (!HARD_REGISTER_NUM_P (regno))
3482 {
3483 if (!strict_p)
3484 return true;
3485
3486 if (!reg_renumber)
3487 return false;
3488
3489 regno = reg_renumber[regno];
3490 }
3491
3492 /* The fake registers will be eliminated to either the stack or
3493 hard frame pointer, both of which are usually valid base registers.
3494 Reload deals with the cases where the eliminated form isn't valid. */
3495 return (GP_REGNUM_P (regno)
3496 || regno == SP_REGNUM
3497 || regno == FRAME_POINTER_REGNUM
3498 || regno == ARG_POINTER_REGNUM);
3499}
3500
3501/* Return true if X is a valid base register for mode MODE.
3502 STRICT_P is true if REG_OK_STRICT is in effect. */
3503
3504static bool
3505aarch64_base_register_rtx_p (rtx x, bool strict_p)
3506{
3507 if (!strict_p && GET_CODE (x) == SUBREG)
3508 x = SUBREG_REG (x);
3509
3510 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3511}
3512
3513/* Return true if address offset is a valid index. If it is, fill in INFO
3514 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3515
3516static bool
3517aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3518 machine_mode mode, bool strict_p)
43e9d192
IB
3519{
3520 enum aarch64_address_type type;
3521 rtx index;
3522 int shift;
3523
3524 /* (reg:P) */
3525 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3526 && GET_MODE (x) == Pmode)
3527 {
3528 type = ADDRESS_REG_REG;
3529 index = x;
3530 shift = 0;
3531 }
3532 /* (sign_extend:DI (reg:SI)) */
3533 else if ((GET_CODE (x) == SIGN_EXTEND
3534 || GET_CODE (x) == ZERO_EXTEND)
3535 && GET_MODE (x) == DImode
3536 && GET_MODE (XEXP (x, 0)) == SImode)
3537 {
3538 type = (GET_CODE (x) == SIGN_EXTEND)
3539 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3540 index = XEXP (x, 0);
3541 shift = 0;
3542 }
3543 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3544 else if (GET_CODE (x) == MULT
3545 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3546 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3547 && GET_MODE (XEXP (x, 0)) == DImode
3548 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3549 && CONST_INT_P (XEXP (x, 1)))
3550 {
3551 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3552 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3553 index = XEXP (XEXP (x, 0), 0);
3554 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3555 }
3556 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3557 else if (GET_CODE (x) == ASHIFT
3558 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3559 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3560 && GET_MODE (XEXP (x, 0)) == DImode
3561 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3562 && CONST_INT_P (XEXP (x, 1)))
3563 {
3564 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3565 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3566 index = XEXP (XEXP (x, 0), 0);
3567 shift = INTVAL (XEXP (x, 1));
3568 }
3569 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3570 else if ((GET_CODE (x) == SIGN_EXTRACT
3571 || GET_CODE (x) == ZERO_EXTRACT)
3572 && GET_MODE (x) == DImode
3573 && GET_CODE (XEXP (x, 0)) == MULT
3574 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3575 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3576 {
3577 type = (GET_CODE (x) == SIGN_EXTRACT)
3578 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3579 index = XEXP (XEXP (x, 0), 0);
3580 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3581 if (INTVAL (XEXP (x, 1)) != 32 + shift
3582 || INTVAL (XEXP (x, 2)) != 0)
3583 shift = -1;
3584 }
3585 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3586 (const_int 0xffffffff<<shift)) */
3587 else if (GET_CODE (x) == AND
3588 && GET_MODE (x) == DImode
3589 && GET_CODE (XEXP (x, 0)) == MULT
3590 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3591 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3592 && CONST_INT_P (XEXP (x, 1)))
3593 {
3594 type = ADDRESS_REG_UXTW;
3595 index = XEXP (XEXP (x, 0), 0);
3596 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3597 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3598 shift = -1;
3599 }
3600 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3601 else if ((GET_CODE (x) == SIGN_EXTRACT
3602 || GET_CODE (x) == ZERO_EXTRACT)
3603 && GET_MODE (x) == DImode
3604 && GET_CODE (XEXP (x, 0)) == ASHIFT
3605 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3606 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3607 {
3608 type = (GET_CODE (x) == SIGN_EXTRACT)
3609 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3610 index = XEXP (XEXP (x, 0), 0);
3611 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3612 if (INTVAL (XEXP (x, 1)) != 32 + shift
3613 || INTVAL (XEXP (x, 2)) != 0)
3614 shift = -1;
3615 }
3616 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3617 (const_int 0xffffffff<<shift)) */
3618 else if (GET_CODE (x) == AND
3619 && GET_MODE (x) == DImode
3620 && GET_CODE (XEXP (x, 0)) == ASHIFT
3621 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3622 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3623 && CONST_INT_P (XEXP (x, 1)))
3624 {
3625 type = ADDRESS_REG_UXTW;
3626 index = XEXP (XEXP (x, 0), 0);
3627 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3628 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3629 shift = -1;
3630 }
3631 /* (mult:P (reg:P) (const_int scale)) */
3632 else if (GET_CODE (x) == MULT
3633 && GET_MODE (x) == Pmode
3634 && GET_MODE (XEXP (x, 0)) == Pmode
3635 && CONST_INT_P (XEXP (x, 1)))
3636 {
3637 type = ADDRESS_REG_REG;
3638 index = XEXP (x, 0);
3639 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3640 }
3641 /* (ashift:P (reg:P) (const_int shift)) */
3642 else if (GET_CODE (x) == ASHIFT
3643 && GET_MODE (x) == Pmode
3644 && GET_MODE (XEXP (x, 0)) == Pmode
3645 && CONST_INT_P (XEXP (x, 1)))
3646 {
3647 type = ADDRESS_REG_REG;
3648 index = XEXP (x, 0);
3649 shift = INTVAL (XEXP (x, 1));
3650 }
3651 else
3652 return false;
3653
3654 if (GET_CODE (index) == SUBREG)
3655 index = SUBREG_REG (index);
3656
3657 if ((shift == 0 ||
3658 (shift > 0 && shift <= 3
3659 && (1 << shift) == GET_MODE_SIZE (mode)))
3660 && REG_P (index)
3661 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3662 {
3663 info->type = type;
3664 info->offset = index;
3665 info->shift = shift;
3666 return true;
3667 }
3668
3669 return false;
3670}
3671
44707478 3672bool
ef4bddc2 3673aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3674{
3675 return (offset >= -64 * GET_MODE_SIZE (mode)
3676 && offset < 64 * GET_MODE_SIZE (mode)
3677 && offset % GET_MODE_SIZE (mode) == 0);
3678}
3679
3680static inline bool
ef4bddc2 3681offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3682 HOST_WIDE_INT offset)
3683{
3684 return offset >= -256 && offset < 256;
3685}
3686
3687static inline bool
ef4bddc2 3688offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3689{
3690 return (offset >= 0
3691 && offset < 4096 * GET_MODE_SIZE (mode)
3692 && offset % GET_MODE_SIZE (mode) == 0);
3693}
3694
3695/* Return true if X is a valid address for machine mode MODE. If it is,
3696 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3697 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3698
3699static bool
3700aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3701 rtx x, machine_mode mode,
43e9d192
IB
3702 RTX_CODE outer_code, bool strict_p)
3703{
3704 enum rtx_code code = GET_CODE (x);
3705 rtx op0, op1;
2d8c6dc1
AH
3706
3707 /* On BE, we use load/store pair for all large int mode load/stores. */
3708 bool load_store_pair_p = (outer_code == PARALLEL
3709 || (BYTES_BIG_ENDIAN
3710 && aarch64_vect_struct_mode_p (mode)));
3711
43e9d192 3712 bool allow_reg_index_p =
2d8c6dc1
AH
3713 !load_store_pair_p
3714 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3715 && !aarch64_vect_struct_mode_p (mode);
3716
3717 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3718 REG addressing. */
3719 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
3720 && (code != POST_INC && code != REG))
3721 return false;
3722
3723 switch (code)
3724 {
3725 case REG:
3726 case SUBREG:
3727 info->type = ADDRESS_REG_IMM;
3728 info->base = x;
3729 info->offset = const0_rtx;
3730 return aarch64_base_register_rtx_p (x, strict_p);
3731
3732 case PLUS:
3733 op0 = XEXP (x, 0);
3734 op1 = XEXP (x, 1);
15c0c5c9
JW
3735
3736 if (! strict_p
4aa81c2e 3737 && REG_P (op0)
15c0c5c9
JW
3738 && (op0 == virtual_stack_vars_rtx
3739 || op0 == frame_pointer_rtx
3740 || op0 == arg_pointer_rtx)
4aa81c2e 3741 && CONST_INT_P (op1))
15c0c5c9
JW
3742 {
3743 info->type = ADDRESS_REG_IMM;
3744 info->base = op0;
3745 info->offset = op1;
3746
3747 return true;
3748 }
3749
43e9d192
IB
3750 if (GET_MODE_SIZE (mode) != 0
3751 && CONST_INT_P (op1)
3752 && aarch64_base_register_rtx_p (op0, strict_p))
3753 {
3754 HOST_WIDE_INT offset = INTVAL (op1);
3755
3756 info->type = ADDRESS_REG_IMM;
3757 info->base = op0;
3758 info->offset = op1;
3759
3760 /* TImode and TFmode values are allowed in both pairs of X
3761 registers and individual Q registers. The available
3762 address modes are:
3763 X,X: 7-bit signed scaled offset
3764 Q: 9-bit signed offset
3765 We conservatively require an offset representable in either mode.
3766 */
3767 if (mode == TImode || mode == TFmode)
44707478 3768 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3769 && offset_9bit_signed_unscaled_p (mode, offset));
3770
2d8c6dc1
AH
3771 /* A 7bit offset check because OImode will emit a ldp/stp
3772 instruction (only big endian will get here).
3773 For ldp/stp instructions, the offset is scaled for the size of a
3774 single element of the pair. */
3775 if (mode == OImode)
3776 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3777
3778 /* Three 9/12 bit offsets checks because CImode will emit three
3779 ldr/str instructions (only big endian will get here). */
3780 if (mode == CImode)
3781 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3782 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3783 || offset_12bit_unsigned_scaled_p (V16QImode,
3784 offset + 32)));
3785
3786 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3787 instructions (only big endian will get here). */
3788 if (mode == XImode)
3789 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3790 && aarch64_offset_7bit_signed_scaled_p (TImode,
3791 offset + 32));
3792
3793 if (load_store_pair_p)
43e9d192 3794 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3795 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3796 else
3797 return (offset_9bit_signed_unscaled_p (mode, offset)
3798 || offset_12bit_unsigned_scaled_p (mode, offset));
3799 }
3800
3801 if (allow_reg_index_p)
3802 {
3803 /* Look for base + (scaled/extended) index register. */
3804 if (aarch64_base_register_rtx_p (op0, strict_p)
3805 && aarch64_classify_index (info, op1, mode, strict_p))
3806 {
3807 info->base = op0;
3808 return true;
3809 }
3810 if (aarch64_base_register_rtx_p (op1, strict_p)
3811 && aarch64_classify_index (info, op0, mode, strict_p))
3812 {
3813 info->base = op1;
3814 return true;
3815 }
3816 }
3817
3818 return false;
3819
3820 case POST_INC:
3821 case POST_DEC:
3822 case PRE_INC:
3823 case PRE_DEC:
3824 info->type = ADDRESS_REG_WB;
3825 info->base = XEXP (x, 0);
3826 info->offset = NULL_RTX;
3827 return aarch64_base_register_rtx_p (info->base, strict_p);
3828
3829 case POST_MODIFY:
3830 case PRE_MODIFY:
3831 info->type = ADDRESS_REG_WB;
3832 info->base = XEXP (x, 0);
3833 if (GET_CODE (XEXP (x, 1)) == PLUS
3834 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3835 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3836 && aarch64_base_register_rtx_p (info->base, strict_p))
3837 {
3838 HOST_WIDE_INT offset;
3839 info->offset = XEXP (XEXP (x, 1), 1);
3840 offset = INTVAL (info->offset);
3841
3842 /* TImode and TFmode values are allowed in both pairs of X
3843 registers and individual Q registers. The available
3844 address modes are:
3845 X,X: 7-bit signed scaled offset
3846 Q: 9-bit signed offset
3847 We conservatively require an offset representable in either mode.
3848 */
3849 if (mode == TImode || mode == TFmode)
44707478 3850 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3851 && offset_9bit_signed_unscaled_p (mode, offset));
3852
2d8c6dc1 3853 if (load_store_pair_p)
43e9d192 3854 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3855 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3856 else
3857 return offset_9bit_signed_unscaled_p (mode, offset);
3858 }
3859 return false;
3860
3861 case CONST:
3862 case SYMBOL_REF:
3863 case LABEL_REF:
79517551
SN
3864 /* load literal: pc-relative constant pool entry. Only supported
3865 for SI mode or larger. */
43e9d192 3866 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
3867
3868 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3869 {
3870 rtx sym, addend;
3871
3872 split_const (x, &sym, &addend);
b4f50fd4
RR
3873 return ((GET_CODE (sym) == LABEL_REF
3874 || (GET_CODE (sym) == SYMBOL_REF
3875 && CONSTANT_POOL_ADDRESS_P (sym)
3876 && !nopcrelative_literal_loads)));
43e9d192
IB
3877 }
3878 return false;
3879
3880 case LO_SUM:
3881 info->type = ADDRESS_LO_SUM;
3882 info->base = XEXP (x, 0);
3883 info->offset = XEXP (x, 1);
3884 if (allow_reg_index_p
3885 && aarch64_base_register_rtx_p (info->base, strict_p))
3886 {
3887 rtx sym, offs;
3888 split_const (info->offset, &sym, &offs);
3889 if (GET_CODE (sym) == SYMBOL_REF
f8b756b7 3890 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
43e9d192
IB
3891 == SYMBOL_SMALL_ABSOLUTE))
3892 {
3893 /* The symbol and offset must be aligned to the access size. */
3894 unsigned int align;
3895 unsigned int ref_size;
3896
3897 if (CONSTANT_POOL_ADDRESS_P (sym))
3898 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3899 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3900 {
3901 tree exp = SYMBOL_REF_DECL (sym);
3902 align = TYPE_ALIGN (TREE_TYPE (exp));
3903 align = CONSTANT_ALIGNMENT (exp, align);
3904 }
3905 else if (SYMBOL_REF_DECL (sym))
3906 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3907 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3908 && SYMBOL_REF_BLOCK (sym) != NULL)
3909 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3910 else
3911 align = BITS_PER_UNIT;
3912
3913 ref_size = GET_MODE_SIZE (mode);
3914 if (ref_size == 0)
3915 ref_size = GET_MODE_SIZE (DImode);
3916
3917 return ((INTVAL (offs) & (ref_size - 1)) == 0
3918 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3919 }
3920 }
3921 return false;
3922
3923 default:
3924 return false;
3925 }
3926}
3927
3928bool
3929aarch64_symbolic_address_p (rtx x)
3930{
3931 rtx offset;
3932
3933 split_const (x, &x, &offset);
3934 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3935}
3936
3937/* Classify the base of symbolic expression X, given that X appears in
3938 context CONTEXT. */
da4f13a4
MS
3939
3940enum aarch64_symbol_type
3941aarch64_classify_symbolic_expression (rtx x,
3942 enum aarch64_symbol_context context)
43e9d192
IB
3943{
3944 rtx offset;
da4f13a4 3945
43e9d192 3946 split_const (x, &x, &offset);
f8b756b7 3947 return aarch64_classify_symbol (x, offset, context);
43e9d192
IB
3948}
3949
3950
3951/* Return TRUE if X is a legitimate address for accessing memory in
3952 mode MODE. */
3953static bool
ef4bddc2 3954aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
3955{
3956 struct aarch64_address_info addr;
3957
3958 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3959}
3960
3961/* Return TRUE if X is a legitimate address for accessing memory in
3962 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3963 pair operation. */
3964bool
ef4bddc2 3965aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 3966 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3967{
3968 struct aarch64_address_info addr;
3969
3970 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3971}
3972
3973/* Return TRUE if rtx X is immediate constant 0.0 */
3974bool
3520f7cc 3975aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3976{
3977 REAL_VALUE_TYPE r;
3978
3979 if (GET_MODE (x) == VOIDmode)
3980 return false;
3981
3982 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3983 if (REAL_VALUE_MINUS_ZERO (r))
3984 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3985 return REAL_VALUES_EQUAL (r, dconst0);
3986}
3987
70f09188
AP
3988/* Return the fixed registers used for condition codes. */
3989
3990static bool
3991aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3992{
3993 *p1 = CC_REGNUM;
3994 *p2 = INVALID_REGNUM;
3995 return true;
3996}
3997
78607708
TV
3998/* Emit call insn with PAT and do aarch64-specific handling. */
3999
d07a3fed 4000void
78607708
TV
4001aarch64_emit_call_insn (rtx pat)
4002{
4003 rtx insn = emit_call_insn (pat);
4004
4005 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4006 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4007 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4008}
4009
ef4bddc2 4010machine_mode
43e9d192
IB
4011aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4012{
4013 /* All floating point compares return CCFP if it is an equality
4014 comparison, and CCFPE otherwise. */
4015 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4016 {
4017 switch (code)
4018 {
4019 case EQ:
4020 case NE:
4021 case UNORDERED:
4022 case ORDERED:
4023 case UNLT:
4024 case UNLE:
4025 case UNGT:
4026 case UNGE:
4027 case UNEQ:
4028 case LTGT:
4029 return CCFPmode;
4030
4031 case LT:
4032 case LE:
4033 case GT:
4034 case GE:
4035 return CCFPEmode;
4036
4037 default:
4038 gcc_unreachable ();
4039 }
4040 }
4041
4042 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4043 && y == const0_rtx
4044 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
4045 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
4046 || GET_CODE (x) == NEG))
43e9d192
IB
4047 return CC_NZmode;
4048
1c992d1e 4049 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
4050 the comparison will have to be swapped when we emit the assembly
4051 code. */
4052 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4053 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
4054 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4055 || GET_CODE (x) == LSHIFTRT
1c992d1e 4056 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
4057 return CC_SWPmode;
4058
1c992d1e
RE
4059 /* Similarly for a negated operand, but we can only do this for
4060 equalities. */
4061 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4062 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
4063 && (code == EQ || code == NE)
4064 && GET_CODE (x) == NEG)
4065 return CC_Zmode;
4066
43e9d192
IB
4067 /* A compare of a mode narrower than SI mode against zero can be done
4068 by extending the value in the comparison. */
4069 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
4070 && y == const0_rtx)
4071 /* Only use sign-extension if we really need it. */
4072 return ((code == GT || code == GE || code == LE || code == LT)
4073 ? CC_SESWPmode : CC_ZESWPmode);
4074
4075 /* For everything else, return CCmode. */
4076 return CCmode;
4077}
4078
3dfa7055
ZC
4079static int
4080aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
4081
cd5660ab 4082int
43e9d192
IB
4083aarch64_get_condition_code (rtx x)
4084{
ef4bddc2 4085 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
4086 enum rtx_code comp_code = GET_CODE (x);
4087
4088 if (GET_MODE_CLASS (mode) != MODE_CC)
4089 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
4090 return aarch64_get_condition_code_1 (mode, comp_code);
4091}
43e9d192 4092
3dfa7055
ZC
4093static int
4094aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
4095{
4096 int ne = -1, eq = -1;
43e9d192
IB
4097 switch (mode)
4098 {
4099 case CCFPmode:
4100 case CCFPEmode:
4101 switch (comp_code)
4102 {
4103 case GE: return AARCH64_GE;
4104 case GT: return AARCH64_GT;
4105 case LE: return AARCH64_LS;
4106 case LT: return AARCH64_MI;
4107 case NE: return AARCH64_NE;
4108 case EQ: return AARCH64_EQ;
4109 case ORDERED: return AARCH64_VC;
4110 case UNORDERED: return AARCH64_VS;
4111 case UNLT: return AARCH64_LT;
4112 case UNLE: return AARCH64_LE;
4113 case UNGT: return AARCH64_HI;
4114 case UNGE: return AARCH64_PL;
cd5660ab 4115 default: return -1;
43e9d192
IB
4116 }
4117 break;
4118
3dfa7055
ZC
4119 case CC_DNEmode:
4120 ne = AARCH64_NE;
4121 eq = AARCH64_EQ;
4122 break;
4123
4124 case CC_DEQmode:
4125 ne = AARCH64_EQ;
4126 eq = AARCH64_NE;
4127 break;
4128
4129 case CC_DGEmode:
4130 ne = AARCH64_GE;
4131 eq = AARCH64_LT;
4132 break;
4133
4134 case CC_DLTmode:
4135 ne = AARCH64_LT;
4136 eq = AARCH64_GE;
4137 break;
4138
4139 case CC_DGTmode:
4140 ne = AARCH64_GT;
4141 eq = AARCH64_LE;
4142 break;
4143
4144 case CC_DLEmode:
4145 ne = AARCH64_LE;
4146 eq = AARCH64_GT;
4147 break;
4148
4149 case CC_DGEUmode:
4150 ne = AARCH64_CS;
4151 eq = AARCH64_CC;
4152 break;
4153
4154 case CC_DLTUmode:
4155 ne = AARCH64_CC;
4156 eq = AARCH64_CS;
4157 break;
4158
4159 case CC_DGTUmode:
4160 ne = AARCH64_HI;
4161 eq = AARCH64_LS;
4162 break;
4163
4164 case CC_DLEUmode:
4165 ne = AARCH64_LS;
4166 eq = AARCH64_HI;
4167 break;
4168
43e9d192
IB
4169 case CCmode:
4170 switch (comp_code)
4171 {
4172 case NE: return AARCH64_NE;
4173 case EQ: return AARCH64_EQ;
4174 case GE: return AARCH64_GE;
4175 case GT: return AARCH64_GT;
4176 case LE: return AARCH64_LE;
4177 case LT: return AARCH64_LT;
4178 case GEU: return AARCH64_CS;
4179 case GTU: return AARCH64_HI;
4180 case LEU: return AARCH64_LS;
4181 case LTU: return AARCH64_CC;
cd5660ab 4182 default: return -1;
43e9d192
IB
4183 }
4184 break;
4185
4186 case CC_SWPmode:
4187 case CC_ZESWPmode:
4188 case CC_SESWPmode:
4189 switch (comp_code)
4190 {
4191 case NE: return AARCH64_NE;
4192 case EQ: return AARCH64_EQ;
4193 case GE: return AARCH64_LE;
4194 case GT: return AARCH64_LT;
4195 case LE: return AARCH64_GE;
4196 case LT: return AARCH64_GT;
4197 case GEU: return AARCH64_LS;
4198 case GTU: return AARCH64_CC;
4199 case LEU: return AARCH64_CS;
4200 case LTU: return AARCH64_HI;
cd5660ab 4201 default: return -1;
43e9d192
IB
4202 }
4203 break;
4204
4205 case CC_NZmode:
4206 switch (comp_code)
4207 {
4208 case NE: return AARCH64_NE;
4209 case EQ: return AARCH64_EQ;
4210 case GE: return AARCH64_PL;
4211 case LT: return AARCH64_MI;
cd5660ab 4212 default: return -1;
43e9d192
IB
4213 }
4214 break;
4215
1c992d1e
RE
4216 case CC_Zmode:
4217 switch (comp_code)
4218 {
4219 case NE: return AARCH64_NE;
4220 case EQ: return AARCH64_EQ;
cd5660ab 4221 default: return -1;
1c992d1e
RE
4222 }
4223 break;
4224
43e9d192 4225 default:
cd5660ab 4226 return -1;
43e9d192
IB
4227 break;
4228 }
3dfa7055
ZC
4229
4230 if (comp_code == NE)
4231 return ne;
4232
4233 if (comp_code == EQ)
4234 return eq;
4235
4236 return -1;
43e9d192
IB
4237}
4238
ddeabd3e
AL
4239bool
4240aarch64_const_vec_all_same_in_range_p (rtx x,
4241 HOST_WIDE_INT minval,
4242 HOST_WIDE_INT maxval)
4243{
4244 HOST_WIDE_INT firstval;
4245 int count, i;
4246
4247 if (GET_CODE (x) != CONST_VECTOR
4248 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4249 return false;
4250
4251 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4252 if (firstval < minval || firstval > maxval)
4253 return false;
4254
4255 count = CONST_VECTOR_NUNITS (x);
4256 for (i = 1; i < count; i++)
4257 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4258 return false;
4259
4260 return true;
4261}
4262
4263bool
4264aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4265{
4266 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4267}
4268
43e9d192 4269
cf670503
ZC
4270/* N Z C V. */
4271#define AARCH64_CC_V 1
4272#define AARCH64_CC_C (1 << 1)
4273#define AARCH64_CC_Z (1 << 2)
4274#define AARCH64_CC_N (1 << 3)
4275
4276/* N Z C V flags for ccmp. The first code is for AND op and the other
4277 is for IOR op. Indexed by AARCH64_COND_CODE. */
4278static const int aarch64_nzcv_codes[][2] =
4279{
4280 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
4281 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
4282 {AARCH64_CC_C, 0}, /* CS, C == 1. */
4283 {0, AARCH64_CC_C}, /* CC, C == 0. */
4284 {AARCH64_CC_N, 0}, /* MI, N == 1. */
4285 {0, AARCH64_CC_N}, /* PL, N == 0. */
4286 {AARCH64_CC_V, 0}, /* VS, V == 1. */
4287 {0, AARCH64_CC_V}, /* VC, V == 0. */
4288 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
4289 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
4290 {0, AARCH64_CC_V}, /* GE, N == V. */
4291 {AARCH64_CC_V, 0}, /* LT, N != V. */
4292 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
4293 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
4294 {0, 0}, /* AL, Any. */
4295 {0, 0}, /* NV, Any. */
4296};
4297
4298int
4299aarch64_ccmp_mode_to_code (enum machine_mode mode)
4300{
4301 switch (mode)
4302 {
4303 case CC_DNEmode:
4304 return NE;
4305
4306 case CC_DEQmode:
4307 return EQ;
4308
4309 case CC_DLEmode:
4310 return LE;
4311
4312 case CC_DGTmode:
4313 return GT;
4314
4315 case CC_DLTmode:
4316 return LT;
4317
4318 case CC_DGEmode:
4319 return GE;
4320
4321 case CC_DLEUmode:
4322 return LEU;
4323
4324 case CC_DGTUmode:
4325 return GTU;
4326
4327 case CC_DLTUmode:
4328 return LTU;
4329
4330 case CC_DGEUmode:
4331 return GEU;
4332
4333 default:
4334 gcc_unreachable ();
4335 }
4336}
4337
4338
43e9d192
IB
4339void
4340aarch64_print_operand (FILE *f, rtx x, char code)
4341{
4342 switch (code)
4343 {
f541a481
KT
4344 /* An integer or symbol address without a preceding # sign. */
4345 case 'c':
4346 switch (GET_CODE (x))
4347 {
4348 case CONST_INT:
4349 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4350 break;
4351
4352 case SYMBOL_REF:
4353 output_addr_const (f, x);
4354 break;
4355
4356 case CONST:
4357 if (GET_CODE (XEXP (x, 0)) == PLUS
4358 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4359 {
4360 output_addr_const (f, x);
4361 break;
4362 }
4363 /* Fall through. */
4364
4365 default:
4366 output_operand_lossage ("Unsupported operand for code '%c'", code);
4367 }
4368 break;
4369
43e9d192
IB
4370 case 'e':
4371 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4372 {
4373 int n;
4374
4aa81c2e 4375 if (!CONST_INT_P (x)
43e9d192
IB
4376 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4377 {
4378 output_operand_lossage ("invalid operand for '%%%c'", code);
4379 return;
4380 }
4381
4382 switch (n)
4383 {
4384 case 3:
4385 fputc ('b', f);
4386 break;
4387 case 4:
4388 fputc ('h', f);
4389 break;
4390 case 5:
4391 fputc ('w', f);
4392 break;
4393 default:
4394 output_operand_lossage ("invalid operand for '%%%c'", code);
4395 return;
4396 }
4397 }
4398 break;
4399
4400 case 'p':
4401 {
4402 int n;
4403
4404 /* Print N such that 2^N == X. */
4aa81c2e 4405 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4406 {
4407 output_operand_lossage ("invalid operand for '%%%c'", code);
4408 return;
4409 }
4410
4411 asm_fprintf (f, "%d", n);
4412 }
4413 break;
4414
4415 case 'P':
4416 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4417 if (!CONST_INT_P (x))
43e9d192
IB
4418 {
4419 output_operand_lossage ("invalid operand for '%%%c'", code);
4420 return;
4421 }
4422
8d55c61b 4423 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
4424 break;
4425
4426 case 'H':
4427 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4428 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4429 {
4430 output_operand_lossage ("invalid operand for '%%%c'", code);
4431 return;
4432 }
4433
01a3a324 4434 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4435 break;
4436
43e9d192 4437 case 'm':
cd5660ab
KT
4438 {
4439 int cond_code;
4440 /* Print a condition (eq, ne, etc). */
43e9d192 4441
cd5660ab
KT
4442 /* CONST_TRUE_RTX means always -- that's the default. */
4443 if (x == const_true_rtx)
43e9d192 4444 return;
43e9d192 4445
cd5660ab
KT
4446 if (!COMPARISON_P (x))
4447 {
4448 output_operand_lossage ("invalid operand for '%%%c'", code);
4449 return;
4450 }
4451
4452 cond_code = aarch64_get_condition_code (x);
4453 gcc_assert (cond_code >= 0);
4454 fputs (aarch64_condition_codes[cond_code], f);
4455 }
43e9d192
IB
4456 break;
4457
4458 case 'M':
cd5660ab
KT
4459 {
4460 int cond_code;
4461 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 4462
cd5660ab
KT
4463 /* CONST_TRUE_RTX means never -- that's the default. */
4464 if (x == const_true_rtx)
4465 {
4466 fputs ("nv", f);
4467 return;
4468 }
43e9d192 4469
cd5660ab
KT
4470 if (!COMPARISON_P (x))
4471 {
4472 output_operand_lossage ("invalid operand for '%%%c'", code);
4473 return;
4474 }
4475 cond_code = aarch64_get_condition_code (x);
4476 gcc_assert (cond_code >= 0);
4477 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4478 (cond_code)], f);
4479 }
43e9d192
IB
4480 break;
4481
4482 case 'b':
4483 case 'h':
4484 case 's':
4485 case 'd':
4486 case 'q':
4487 /* Print a scalar FP/SIMD register name. */
4488 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4489 {
4490 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4491 return;
4492 }
50ce6f88 4493 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4494 break;
4495
4496 case 'S':
4497 case 'T':
4498 case 'U':
4499 case 'V':
4500 /* Print the first FP/SIMD register name in a list. */
4501 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4502 {
4503 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4504 return;
4505 }
50ce6f88 4506 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4507 break;
4508
2d8c6dc1
AH
4509 case 'R':
4510 /* Print a scalar FP/SIMD register name + 1. */
4511 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4512 {
4513 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4514 return;
4515 }
4516 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4517 break;
4518
a05c0ddf 4519 case 'X':
50d38551 4520 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4521 if (!CONST_INT_P (x))
a05c0ddf
IB
4522 {
4523 output_operand_lossage ("invalid operand for '%%%c'", code);
4524 return;
4525 }
50d38551 4526 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4527 break;
4528
43e9d192
IB
4529 case 'w':
4530 case 'x':
4531 /* Print a general register name or the zero register (32-bit or
4532 64-bit). */
3520f7cc
JG
4533 if (x == const0_rtx
4534 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4535 {
50ce6f88 4536 asm_fprintf (f, "%czr", code);
43e9d192
IB
4537 break;
4538 }
4539
4540 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4541 {
50ce6f88 4542 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4543 break;
4544 }
4545
4546 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4547 {
50ce6f88 4548 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4549 break;
4550 }
4551
4552 /* Fall through */
4553
4554 case 0:
4555 /* Print a normal operand, if it's a general register, then we
4556 assume DImode. */
4557 if (x == NULL)
4558 {
4559 output_operand_lossage ("missing operand");
4560 return;
4561 }
4562
4563 switch (GET_CODE (x))
4564 {
4565 case REG:
01a3a324 4566 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4567 break;
4568
4569 case MEM:
4570 aarch64_memory_reference_mode = GET_MODE (x);
4571 output_address (XEXP (x, 0));
4572 break;
4573
4574 case LABEL_REF:
4575 case SYMBOL_REF:
4576 output_addr_const (asm_out_file, x);
4577 break;
4578
4579 case CONST_INT:
4580 asm_fprintf (f, "%wd", INTVAL (x));
4581 break;
4582
4583 case CONST_VECTOR:
3520f7cc
JG
4584 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4585 {
ddeabd3e
AL
4586 gcc_assert (
4587 aarch64_const_vec_all_same_in_range_p (x,
4588 HOST_WIDE_INT_MIN,
4589 HOST_WIDE_INT_MAX));
3520f7cc
JG
4590 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4591 }
4592 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4593 {
4594 fputc ('0', f);
4595 }
4596 else
4597 gcc_unreachable ();
43e9d192
IB
4598 break;
4599
3520f7cc
JG
4600 case CONST_DOUBLE:
4601 /* CONST_DOUBLE can represent a double-width integer.
4602 In this case, the mode of x is VOIDmode. */
4603 if (GET_MODE (x) == VOIDmode)
4604 ; /* Do Nothing. */
4605 else if (aarch64_float_const_zero_rtx_p (x))
4606 {
4607 fputc ('0', f);
4608 break;
4609 }
4610 else if (aarch64_float_const_representable_p (x))
4611 {
4612#define buf_size 20
4613 char float_buf[buf_size] = {'\0'};
4614 REAL_VALUE_TYPE r;
4615 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4616 real_to_decimal_for_mode (float_buf, &r,
4617 buf_size, buf_size,
4618 1, GET_MODE (x));
4619 asm_fprintf (asm_out_file, "%s", float_buf);
4620 break;
4621#undef buf_size
4622 }
4623 output_operand_lossage ("invalid constant");
4624 return;
43e9d192
IB
4625 default:
4626 output_operand_lossage ("invalid operand");
4627 return;
4628 }
4629 break;
4630
4631 case 'A':
4632 if (GET_CODE (x) == HIGH)
4633 x = XEXP (x, 0);
4634
4635 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4636 {
6642bdb4 4637 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4638 asm_fprintf (asm_out_file, ":got:");
4639 break;
4640
4641 case SYMBOL_SMALL_TLSGD:
4642 asm_fprintf (asm_out_file, ":tlsgd:");
4643 break;
4644
4645 case SYMBOL_SMALL_TLSDESC:
4646 asm_fprintf (asm_out_file, ":tlsdesc:");
4647 break;
4648
79496620 4649 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
4650 asm_fprintf (asm_out_file, ":gottprel:");
4651 break;
4652
d18ba284 4653 case SYMBOL_TLSLE24:
43e9d192
IB
4654 asm_fprintf (asm_out_file, ":tprel:");
4655 break;
4656
87dd8ab0
MS
4657 case SYMBOL_TINY_GOT:
4658 gcc_unreachable ();
4659 break;
4660
43e9d192
IB
4661 default:
4662 break;
4663 }
4664 output_addr_const (asm_out_file, x);
4665 break;
4666
4667 case 'L':
4668 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4669 {
6642bdb4 4670 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4671 asm_fprintf (asm_out_file, ":lo12:");
4672 break;
4673
4674 case SYMBOL_SMALL_TLSGD:
4675 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4676 break;
4677
4678 case SYMBOL_SMALL_TLSDESC:
4679 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4680 break;
4681
79496620 4682 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
4683 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4684 break;
4685
cbf5629e
JW
4686 case SYMBOL_TLSLE12:
4687 asm_fprintf (asm_out_file, ":tprel_lo12:");
4688 break;
4689
d18ba284 4690 case SYMBOL_TLSLE24:
43e9d192
IB
4691 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4692 break;
4693
87dd8ab0
MS
4694 case SYMBOL_TINY_GOT:
4695 asm_fprintf (asm_out_file, ":got:");
4696 break;
4697
5ae7caad
JW
4698 case SYMBOL_TINY_TLSIE:
4699 asm_fprintf (asm_out_file, ":gottprel:");
4700 break;
4701
43e9d192
IB
4702 default:
4703 break;
4704 }
4705 output_addr_const (asm_out_file, x);
4706 break;
4707
4708 case 'G':
4709
4710 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4711 {
d18ba284 4712 case SYMBOL_TLSLE24:
43e9d192
IB
4713 asm_fprintf (asm_out_file, ":tprel_hi12:");
4714 break;
4715 default:
4716 break;
4717 }
4718 output_addr_const (asm_out_file, x);
4719 break;
4720
cf670503
ZC
4721 case 'K':
4722 {
4723 int cond_code;
4724 /* Print nzcv. */
4725
4726 if (!COMPARISON_P (x))
4727 {
4728 output_operand_lossage ("invalid operand for '%%%c'", code);
4729 return;
4730 }
4731
4732 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4733 gcc_assert (cond_code >= 0);
4734 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4735 }
4736 break;
4737
4738 case 'k':
4739 {
4740 int cond_code;
4741 /* Print nzcv. */
4742
4743 if (!COMPARISON_P (x))
4744 {
4745 output_operand_lossage ("invalid operand for '%%%c'", code);
4746 return;
4747 }
4748
4749 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4750 gcc_assert (cond_code >= 0);
4751 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4752 }
4753 break;
4754
43e9d192
IB
4755 default:
4756 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4757 return;
4758 }
4759}
4760
4761void
4762aarch64_print_operand_address (FILE *f, rtx x)
4763{
4764 struct aarch64_address_info addr;
4765
4766 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4767 MEM, true))
4768 switch (addr.type)
4769 {
4770 case ADDRESS_REG_IMM:
4771 if (addr.offset == const0_rtx)
01a3a324 4772 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4773 else
16a3246f 4774 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4775 INTVAL (addr.offset));
4776 return;
4777
4778 case ADDRESS_REG_REG:
4779 if (addr.shift == 0)
16a3246f 4780 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4781 reg_names [REGNO (addr.offset)]);
43e9d192 4782 else
16a3246f 4783 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4784 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4785 return;
4786
4787 case ADDRESS_REG_UXTW:
4788 if (addr.shift == 0)
16a3246f 4789 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4790 REGNO (addr.offset) - R0_REGNUM);
4791 else
16a3246f 4792 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4793 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4794 return;
4795
4796 case ADDRESS_REG_SXTW:
4797 if (addr.shift == 0)
16a3246f 4798 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4799 REGNO (addr.offset) - R0_REGNUM);
4800 else
16a3246f 4801 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4802 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4803 return;
4804
4805 case ADDRESS_REG_WB:
4806 switch (GET_CODE (x))
4807 {
4808 case PRE_INC:
16a3246f 4809 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4810 GET_MODE_SIZE (aarch64_memory_reference_mode));
4811 return;
4812 case POST_INC:
16a3246f 4813 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4814 GET_MODE_SIZE (aarch64_memory_reference_mode));
4815 return;
4816 case PRE_DEC:
16a3246f 4817 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4818 GET_MODE_SIZE (aarch64_memory_reference_mode));
4819 return;
4820 case POST_DEC:
16a3246f 4821 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4822 GET_MODE_SIZE (aarch64_memory_reference_mode));
4823 return;
4824 case PRE_MODIFY:
16a3246f 4825 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4826 INTVAL (addr.offset));
4827 return;
4828 case POST_MODIFY:
16a3246f 4829 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4830 INTVAL (addr.offset));
4831 return;
4832 default:
4833 break;
4834 }
4835 break;
4836
4837 case ADDRESS_LO_SUM:
16a3246f 4838 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4839 output_addr_const (f, addr.offset);
4840 asm_fprintf (f, "]");
4841 return;
4842
4843 case ADDRESS_SYMBOLIC:
4844 break;
4845 }
4846
4847 output_addr_const (f, x);
4848}
4849
43e9d192
IB
4850bool
4851aarch64_label_mentioned_p (rtx x)
4852{
4853 const char *fmt;
4854 int i;
4855
4856 if (GET_CODE (x) == LABEL_REF)
4857 return true;
4858
4859 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4860 referencing instruction, but they are constant offsets, not
4861 symbols. */
4862 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4863 return false;
4864
4865 fmt = GET_RTX_FORMAT (GET_CODE (x));
4866 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4867 {
4868 if (fmt[i] == 'E')
4869 {
4870 int j;
4871
4872 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4873 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4874 return 1;
4875 }
4876 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4877 return 1;
4878 }
4879
4880 return 0;
4881}
4882
4883/* Implement REGNO_REG_CLASS. */
4884
4885enum reg_class
4886aarch64_regno_regclass (unsigned regno)
4887{
4888 if (GP_REGNUM_P (regno))
a4a182c6 4889 return GENERAL_REGS;
43e9d192
IB
4890
4891 if (regno == SP_REGNUM)
4892 return STACK_REG;
4893
4894 if (regno == FRAME_POINTER_REGNUM
4895 || regno == ARG_POINTER_REGNUM)
f24bb080 4896 return POINTER_REGS;
43e9d192
IB
4897
4898 if (FP_REGNUM_P (regno))
4899 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4900
4901 return NO_REGS;
4902}
4903
0c4ec427 4904static rtx
ef4bddc2 4905aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
4906{
4907 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4908 where mask is selected by alignment and size of the offset.
4909 We try to pick as large a range for the offset as possible to
4910 maximize the chance of a CSE. However, for aligned addresses
4911 we limit the range to 4k so that structures with different sized
4912 elements are likely to use the same base. */
4913
4914 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4915 {
4916 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4917 HOST_WIDE_INT base_offset;
4918
4919 /* Does it look like we'll need a load/store-pair operation? */
4920 if (GET_MODE_SIZE (mode) > 16
4921 || mode == TImode)
4922 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4923 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4924 /* For offsets aren't a multiple of the access size, the limit is
4925 -256...255. */
4926 else if (offset & (GET_MODE_SIZE (mode) - 1))
4927 base_offset = (offset + 0x100) & ~0x1ff;
4928 else
4929 base_offset = offset & ~0xfff;
4930
4931 if (base_offset == 0)
4932 return x;
4933
4934 offset -= base_offset;
4935 rtx base_reg = gen_reg_rtx (Pmode);
4936 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4937 NULL_RTX);
4938 emit_move_insn (base_reg, val);
4939 x = plus_constant (Pmode, base_reg, offset);
4940 }
4941
4942 return x;
4943}
4944
43e9d192
IB
4945/* Try a machine-dependent way of reloading an illegitimate address
4946 operand. If we find one, push the reload and return the new rtx. */
4947
4948rtx
4949aarch64_legitimize_reload_address (rtx *x_p,
ef4bddc2 4950 machine_mode mode,
43e9d192
IB
4951 int opnum, int type,
4952 int ind_levels ATTRIBUTE_UNUSED)
4953{
4954 rtx x = *x_p;
4955
348d4b0a
BC
4956 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4957 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4958 && GET_CODE (x) == PLUS
4959 && REG_P (XEXP (x, 0))
4960 && CONST_INT_P (XEXP (x, 1)))
4961 {
4962 rtx orig_rtx = x;
4963 x = copy_rtx (x);
4964 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4965 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4966 opnum, (enum reload_type) type);
4967 return x;
4968 }
4969
4970 /* We must recognize output that we have already generated ourselves. */
4971 if (GET_CODE (x) == PLUS
4972 && GET_CODE (XEXP (x, 0)) == PLUS
4973 && REG_P (XEXP (XEXP (x, 0), 0))
4974 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4975 && CONST_INT_P (XEXP (x, 1)))
4976 {
4977 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4978 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4979 opnum, (enum reload_type) type);
4980 return x;
4981 }
4982
4983 /* We wish to handle large displacements off a base register by splitting
4984 the addend across an add and the mem insn. This can cut the number of
4985 extra insns needed from 3 to 1. It is only useful for load/store of a
4986 single register with 12 bit offset field. */
4987 if (GET_CODE (x) == PLUS
4988 && REG_P (XEXP (x, 0))
4989 && CONST_INT_P (XEXP (x, 1))
4990 && HARD_REGISTER_P (XEXP (x, 0))
4991 && mode != TImode
4992 && mode != TFmode
4993 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4994 {
4995 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4996 HOST_WIDE_INT low = val & 0xfff;
4997 HOST_WIDE_INT high = val - low;
4998 HOST_WIDE_INT offs;
4999 rtx cst;
ef4bddc2 5000 machine_mode xmode = GET_MODE (x);
28514dda
YZ
5001
5002 /* In ILP32, xmode can be either DImode or SImode. */
5003 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
5004
5005 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
5006 BLKmode alignment. */
5007 if (GET_MODE_SIZE (mode) == 0)
5008 return NULL_RTX;
5009
5010 offs = low % GET_MODE_SIZE (mode);
5011
5012 /* Align misaligned offset by adjusting high part to compensate. */
5013 if (offs != 0)
5014 {
5015 if (aarch64_uimm12_shift (high + offs))
5016 {
5017 /* Align down. */
5018 low = low - offs;
5019 high = high + offs;
5020 }
5021 else
5022 {
5023 /* Align up. */
5024 offs = GET_MODE_SIZE (mode) - offs;
5025 low = low + offs;
5026 high = high + (low & 0x1000) - offs;
5027 low &= 0xfff;
5028 }
5029 }
5030
5031 /* Check for overflow. */
5032 if (high + low != val)
5033 return NULL_RTX;
5034
5035 cst = GEN_INT (high);
5036 if (!aarch64_uimm12_shift (high))
28514dda 5037 cst = force_const_mem (xmode, cst);
43e9d192
IB
5038
5039 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
5040 in the mem instruction.
5041 Note that replacing this gen_rtx_PLUS with plus_constant is
5042 wrong in this case because we rely on the
5043 (plus (plus reg c1) c2) structure being preserved so that
5044 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
5045 x = gen_rtx_PLUS (xmode,
5046 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
5047 GEN_INT (low));
43e9d192
IB
5048
5049 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 5050 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
5051 opnum, (enum reload_type) type);
5052 return x;
5053 }
5054
5055 return NULL_RTX;
5056}
5057
5058
b4f50fd4
RR
5059/* Return the reload icode required for a constant pool in mode. */
5060static enum insn_code
5061aarch64_constant_pool_reload_icode (machine_mode mode)
5062{
5063 switch (mode)
5064 {
5065 case SFmode:
5066 return CODE_FOR_aarch64_reload_movcpsfdi;
5067
5068 case DFmode:
5069 return CODE_FOR_aarch64_reload_movcpdfdi;
5070
5071 case TFmode:
5072 return CODE_FOR_aarch64_reload_movcptfdi;
5073
5074 case V8QImode:
5075 return CODE_FOR_aarch64_reload_movcpv8qidi;
5076
5077 case V16QImode:
5078 return CODE_FOR_aarch64_reload_movcpv16qidi;
5079
5080 case V4HImode:
5081 return CODE_FOR_aarch64_reload_movcpv4hidi;
5082
5083 case V8HImode:
5084 return CODE_FOR_aarch64_reload_movcpv8hidi;
5085
5086 case V2SImode:
5087 return CODE_FOR_aarch64_reload_movcpv2sidi;
5088
5089 case V4SImode:
5090 return CODE_FOR_aarch64_reload_movcpv4sidi;
5091
5092 case V2DImode:
5093 return CODE_FOR_aarch64_reload_movcpv2didi;
5094
5095 case V2DFmode:
5096 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5097
5098 default:
5099 gcc_unreachable ();
5100 }
5101
5102 gcc_unreachable ();
5103}
43e9d192
IB
5104static reg_class_t
5105aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5106 reg_class_t rclass,
ef4bddc2 5107 machine_mode mode,
43e9d192
IB
5108 secondary_reload_info *sri)
5109{
b4f50fd4
RR
5110
5111 /* If we have to disable direct literal pool loads and stores because the
5112 function is too big, then we need a scratch register. */
5113 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5114 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5115 || targetm.vector_mode_supported_p (GET_MODE (x)))
5116 && nopcrelative_literal_loads)
5117 {
5118 sri->icode = aarch64_constant_pool_reload_icode (mode);
5119 return NO_REGS;
5120 }
5121
43e9d192
IB
5122 /* Without the TARGET_SIMD instructions we cannot move a Q register
5123 to a Q register directly. We need a scratch. */
5124 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5125 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5126 && reg_class_subset_p (rclass, FP_REGS))
5127 {
5128 if (mode == TFmode)
5129 sri->icode = CODE_FOR_aarch64_reload_movtf;
5130 else if (mode == TImode)
5131 sri->icode = CODE_FOR_aarch64_reload_movti;
5132 return NO_REGS;
5133 }
5134
5135 /* A TFmode or TImode memory access should be handled via an FP_REGS
5136 because AArch64 has richer addressing modes for LDR/STR instructions
5137 than LDP/STP instructions. */
d5726973 5138 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
5139 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5140 return FP_REGS;
5141
5142 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 5143 return GENERAL_REGS;
43e9d192
IB
5144
5145 return NO_REGS;
5146}
5147
5148static bool
5149aarch64_can_eliminate (const int from, const int to)
5150{
5151 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5152 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5153
5154 if (frame_pointer_needed)
5155 {
5156 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5157 return true;
5158 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5159 return false;
5160 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5161 && !cfun->calls_alloca)
5162 return true;
5163 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5164 return true;
0b7f8166
MS
5165
5166 return false;
43e9d192 5167 }
1c923b60
JW
5168 else
5169 {
5170 /* If we decided that we didn't need a leaf frame pointer but then used
5171 LR in the function, then we'll want a frame pointer after all, so
5172 prevent this elimination to ensure a frame pointer is used. */
5173 if (to == STACK_POINTER_REGNUM
5174 && flag_omit_leaf_frame_pointer
5175 && df_regs_ever_live_p (LR_REGNUM))
5176 return false;
5177 }
777e6976 5178
43e9d192
IB
5179 return true;
5180}
5181
5182HOST_WIDE_INT
5183aarch64_initial_elimination_offset (unsigned from, unsigned to)
5184{
43e9d192 5185 aarch64_layout_frame ();
78c29983
MS
5186
5187 if (to == HARD_FRAME_POINTER_REGNUM)
5188 {
5189 if (from == ARG_POINTER_REGNUM)
1c960e02 5190 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
5191
5192 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
5193 return (cfun->machine->frame.hard_fp_offset
5194 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
5195 }
5196
5197 if (to == STACK_POINTER_REGNUM)
5198 {
5199 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
5200 return (cfun->machine->frame.frame_size
5201 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
5202 }
5203
1c960e02 5204 return cfun->machine->frame.frame_size;
43e9d192
IB
5205}
5206
43e9d192
IB
5207/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5208 previous frame. */
5209
5210rtx
5211aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5212{
5213 if (count != 0)
5214 return const0_rtx;
5215 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5216}
5217
5218
5219static void
5220aarch64_asm_trampoline_template (FILE *f)
5221{
28514dda
YZ
5222 if (TARGET_ILP32)
5223 {
5224 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5225 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5226 }
5227 else
5228 {
5229 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5230 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5231 }
01a3a324 5232 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5233 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5234 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5235 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5236}
5237
5238static void
5239aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5240{
5241 rtx fnaddr, mem, a_tramp;
28514dda 5242 const int tramp_code_sz = 16;
43e9d192
IB
5243
5244 /* Don't need to copy the trailing D-words, we fill those in below. */
5245 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5246 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5247 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5248 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5249 if (GET_MODE (fnaddr) != ptr_mode)
5250 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5251 emit_move_insn (mem, fnaddr);
5252
28514dda 5253 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5254 emit_move_insn (mem, chain_value);
5255
5256 /* XXX We should really define a "clear_cache" pattern and use
5257 gen_clear_cache(). */
5258 a_tramp = XEXP (m_tramp, 0);
5259 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5260 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5261 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5262 ptr_mode);
43e9d192
IB
5263}
5264
5265static unsigned char
ef4bddc2 5266aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5267{
5268 switch (regclass)
5269 {
fee9ba42 5270 case CALLER_SAVE_REGS:
43e9d192
IB
5271 case POINTER_REGS:
5272 case GENERAL_REGS:
5273 case ALL_REGS:
5274 case FP_REGS:
5275 case FP_LO_REGS:
5276 return
7bd11911
KT
5277 aarch64_vector_mode_p (mode)
5278 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5279 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e06d03 5280 case FIXED_REG0:
43e9d192
IB
5281 case STACK_REG:
5282 return 1;
5283
5284 case NO_REGS:
5285 return 0;
5286
5287 default:
5288 break;
5289 }
5290 gcc_unreachable ();
5291}
5292
5293static reg_class_t
78d8b9f0 5294aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 5295{
51bb310d 5296 if (regclass == POINTER_REGS)
78d8b9f0
IB
5297 return GENERAL_REGS;
5298
51bb310d
MS
5299 if (regclass == STACK_REG)
5300 {
5301 if (REG_P(x)
5302 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5303 return regclass;
5304
5305 return NO_REGS;
5306 }
5307
78d8b9f0
IB
5308 /* If it's an integer immediate that MOVI can't handle, then
5309 FP_REGS is not an option, so we return NO_REGS instead. */
5310 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5311 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5312 return NO_REGS;
5313
27bd251b
IB
5314 /* Register eliminiation can result in a request for
5315 SP+constant->FP_REGS. We cannot support such operations which
5316 use SP as source and an FP_REG as destination, so reject out
5317 right now. */
5318 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5319 {
5320 rtx lhs = XEXP (x, 0);
5321
5322 /* Look through a possible SUBREG introduced by ILP32. */
5323 if (GET_CODE (lhs) == SUBREG)
5324 lhs = SUBREG_REG (lhs);
5325
5326 gcc_assert (REG_P (lhs));
5327 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5328 POINTER_REGS));
5329 return NO_REGS;
5330 }
5331
78d8b9f0 5332 return regclass;
43e9d192
IB
5333}
5334
5335void
5336aarch64_asm_output_labelref (FILE* f, const char *name)
5337{
5338 asm_fprintf (f, "%U%s", name);
5339}
5340
5341static void
5342aarch64_elf_asm_constructor (rtx symbol, int priority)
5343{
5344 if (priority == DEFAULT_INIT_PRIORITY)
5345 default_ctor_section_asm_out_constructor (symbol, priority);
5346 else
5347 {
5348 section *s;
5349 char buf[18];
5350 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5351 s = get_section (buf, SECTION_WRITE, NULL);
5352 switch_to_section (s);
5353 assemble_align (POINTER_SIZE);
28514dda 5354 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5355 }
5356}
5357
5358static void
5359aarch64_elf_asm_destructor (rtx symbol, int priority)
5360{
5361 if (priority == DEFAULT_INIT_PRIORITY)
5362 default_dtor_section_asm_out_destructor (symbol, priority);
5363 else
5364 {
5365 section *s;
5366 char buf[18];
5367 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5368 s = get_section (buf, SECTION_WRITE, NULL);
5369 switch_to_section (s);
5370 assemble_align (POINTER_SIZE);
28514dda 5371 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5372 }
5373}
5374
5375const char*
5376aarch64_output_casesi (rtx *operands)
5377{
5378 char buf[100];
5379 char label[100];
b32d5189 5380 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5381 int index;
5382 static const char *const patterns[4][2] =
5383 {
5384 {
5385 "ldrb\t%w3, [%0,%w1,uxtw]",
5386 "add\t%3, %4, %w3, sxtb #2"
5387 },
5388 {
5389 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5390 "add\t%3, %4, %w3, sxth #2"
5391 },
5392 {
5393 "ldr\t%w3, [%0,%w1,uxtw #2]",
5394 "add\t%3, %4, %w3, sxtw #2"
5395 },
5396 /* We assume that DImode is only generated when not optimizing and
5397 that we don't really need 64-bit address offsets. That would
5398 imply an object file with 8GB of code in a single function! */
5399 {
5400 "ldr\t%w3, [%0,%w1,uxtw #2]",
5401 "add\t%3, %4, %w3, sxtw #2"
5402 }
5403 };
5404
5405 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5406
5407 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5408
5409 gcc_assert (index >= 0 && index <= 3);
5410
5411 /* Need to implement table size reduction, by chaning the code below. */
5412 output_asm_insn (patterns[index][0], operands);
5413 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5414 snprintf (buf, sizeof (buf),
5415 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5416 output_asm_insn (buf, operands);
5417 output_asm_insn (patterns[index][1], operands);
5418 output_asm_insn ("br\t%3", operands);
5419 assemble_label (asm_out_file, label);
5420 return "";
5421}
5422
5423
5424/* Return size in bits of an arithmetic operand which is shifted/scaled and
5425 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5426 operator. */
5427
5428int
5429aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5430{
5431 if (shift >= 0 && shift <= 3)
5432 {
5433 int size;
5434 for (size = 8; size <= 32; size *= 2)
5435 {
5436 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5437 if (mask == bits << shift)
5438 return size;
5439 }
5440 }
5441 return 0;
5442}
5443
5444static bool
ef4bddc2 5445aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5446 const_rtx x ATTRIBUTE_UNUSED)
5447{
5448 /* We can't use blocks for constants when we're using a per-function
5449 constant pool. */
5450 return false;
5451}
5452
5453static section *
ef4bddc2 5454aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
5455 rtx x ATTRIBUTE_UNUSED,
5456 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5457{
5458 /* Force all constant pool entries into the current function section. */
5459 return function_section (current_function_decl);
5460}
5461
5462
5463/* Costs. */
5464
5465/* Helper function for rtx cost calculation. Strip a shift expression
5466 from X. Returns the inner operand if successful, or the original
5467 expression on failure. */
5468static rtx
5469aarch64_strip_shift (rtx x)
5470{
5471 rtx op = x;
5472
57b77d46
RE
5473 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5474 we can convert both to ROR during final output. */
43e9d192
IB
5475 if ((GET_CODE (op) == ASHIFT
5476 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5477 || GET_CODE (op) == LSHIFTRT
5478 || GET_CODE (op) == ROTATERT
5479 || GET_CODE (op) == ROTATE)
43e9d192
IB
5480 && CONST_INT_P (XEXP (op, 1)))
5481 return XEXP (op, 0);
5482
5483 if (GET_CODE (op) == MULT
5484 && CONST_INT_P (XEXP (op, 1))
5485 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5486 return XEXP (op, 0);
5487
5488 return x;
5489}
5490
4745e701 5491/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5492 expression from X. Returns the inner operand if successful, or the
5493 original expression on failure. We deal with a number of possible
5494 canonicalization variations here. */
5495static rtx
4745e701 5496aarch64_strip_extend (rtx x)
43e9d192
IB
5497{
5498 rtx op = x;
5499
5500 /* Zero and sign extraction of a widened value. */
5501 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5502 && XEXP (op, 2) == const0_rtx
4745e701 5503 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5504 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5505 XEXP (op, 1)))
5506 return XEXP (XEXP (op, 0), 0);
5507
5508 /* It can also be represented (for zero-extend) as an AND with an
5509 immediate. */
5510 if (GET_CODE (op) == AND
5511 && GET_CODE (XEXP (op, 0)) == MULT
5512 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5513 && CONST_INT_P (XEXP (op, 1))
5514 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5515 INTVAL (XEXP (op, 1))) != 0)
5516 return XEXP (XEXP (op, 0), 0);
5517
5518 /* Now handle extended register, as this may also have an optional
5519 left shift by 1..4. */
5520 if (GET_CODE (op) == ASHIFT
5521 && CONST_INT_P (XEXP (op, 1))
5522 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5523 op = XEXP (op, 0);
5524
5525 if (GET_CODE (op) == ZERO_EXTEND
5526 || GET_CODE (op) == SIGN_EXTEND)
5527 op = XEXP (op, 0);
5528
5529 if (op != x)
5530 return op;
5531
4745e701
JG
5532 return x;
5533}
5534
0a78ebe4
KT
5535/* Return true iff CODE is a shift supported in combination
5536 with arithmetic instructions. */
4d1919ed 5537
0a78ebe4
KT
5538static bool
5539aarch64_shift_p (enum rtx_code code)
5540{
5541 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5542}
5543
4745e701 5544/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
5545 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5546 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
5547 operands where needed. */
5548
5549static int
e548c9df 5550aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
5551{
5552 rtx op0, op1;
5553 const struct cpu_cost_table *extra_cost
b175b679 5554 = aarch64_tune_params.insn_extra_cost;
4745e701 5555 int cost = 0;
0a78ebe4 5556 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 5557 machine_mode mode = GET_MODE (x);
4745e701
JG
5558
5559 gcc_checking_assert (code == MULT);
5560
5561 op0 = XEXP (x, 0);
5562 op1 = XEXP (x, 1);
5563
5564 if (VECTOR_MODE_P (mode))
5565 mode = GET_MODE_INNER (mode);
5566
5567 /* Integer multiply/fma. */
5568 if (GET_MODE_CLASS (mode) == MODE_INT)
5569 {
5570 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
5571 if (aarch64_shift_p (GET_CODE (x))
5572 || (CONST_INT_P (op1)
5573 && exact_log2 (INTVAL (op1)) > 0))
4745e701 5574 {
0a78ebe4
KT
5575 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5576 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
5577 if (speed)
5578 {
0a78ebe4
KT
5579 if (compound_p)
5580 {
5581 if (REG_P (op1))
5582 /* ARITH + shift-by-register. */
5583 cost += extra_cost->alu.arith_shift_reg;
5584 else if (is_extend)
5585 /* ARITH + extended register. We don't have a cost field
5586 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5587 cost += extra_cost->alu.extend_arith;
5588 else
5589 /* ARITH + shift-by-immediate. */
5590 cost += extra_cost->alu.arith_shift;
5591 }
4745e701
JG
5592 else
5593 /* LSL (immediate). */
0a78ebe4
KT
5594 cost += extra_cost->alu.shift;
5595
4745e701 5596 }
0a78ebe4
KT
5597 /* Strip extends as we will have costed them in the case above. */
5598 if (is_extend)
5599 op0 = aarch64_strip_extend (op0);
4745e701 5600
e548c9df 5601 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
5602
5603 return cost;
5604 }
5605
d2ac256b
KT
5606 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5607 compound and let the below cases handle it. After all, MNEG is a
5608 special-case alias of MSUB. */
5609 if (GET_CODE (op0) == NEG)
5610 {
5611 op0 = XEXP (op0, 0);
5612 compound_p = true;
5613 }
5614
4745e701
JG
5615 /* Integer multiplies or FMAs have zero/sign extending variants. */
5616 if ((GET_CODE (op0) == ZERO_EXTEND
5617 && GET_CODE (op1) == ZERO_EXTEND)
5618 || (GET_CODE (op0) == SIGN_EXTEND
5619 && GET_CODE (op1) == SIGN_EXTEND))
5620 {
e548c9df
AM
5621 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
5622 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
5623
5624 if (speed)
5625 {
0a78ebe4 5626 if (compound_p)
d2ac256b 5627 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
5628 cost += extra_cost->mult[0].extend_add;
5629 else
5630 /* MUL/SMULL/UMULL. */
5631 cost += extra_cost->mult[0].extend;
5632 }
5633
5634 return cost;
5635 }
5636
d2ac256b 5637 /* This is either an integer multiply or a MADD. In both cases
4745e701 5638 we want to recurse and cost the operands. */
e548c9df
AM
5639 cost += rtx_cost (op0, mode, MULT, 0, speed);
5640 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5641
5642 if (speed)
5643 {
0a78ebe4 5644 if (compound_p)
d2ac256b 5645 /* MADD/MSUB. */
4745e701
JG
5646 cost += extra_cost->mult[mode == DImode].add;
5647 else
5648 /* MUL. */
5649 cost += extra_cost->mult[mode == DImode].simple;
5650 }
5651
5652 return cost;
5653 }
5654 else
5655 {
5656 if (speed)
5657 {
3d840f7d 5658 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
5659 operands, unless the rounding mode is upward or downward in
5660 which case FNMUL is different than FMUL with operand negation. */
5661 bool neg0 = GET_CODE (op0) == NEG;
5662 bool neg1 = GET_CODE (op1) == NEG;
5663 if (compound_p || !flag_rounding_math || (neg0 && neg1))
5664 {
5665 if (neg0)
5666 op0 = XEXP (op0, 0);
5667 if (neg1)
5668 op1 = XEXP (op1, 0);
5669 }
4745e701 5670
0a78ebe4 5671 if (compound_p)
4745e701
JG
5672 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5673 cost += extra_cost->fp[mode == DFmode].fma;
5674 else
3d840f7d 5675 /* FMUL/FNMUL. */
4745e701
JG
5676 cost += extra_cost->fp[mode == DFmode].mult;
5677 }
5678
e548c9df
AM
5679 cost += rtx_cost (op0, mode, MULT, 0, speed);
5680 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5681 return cost;
5682 }
43e9d192
IB
5683}
5684
67747367
JG
5685static int
5686aarch64_address_cost (rtx x,
ef4bddc2 5687 machine_mode mode,
67747367
JG
5688 addr_space_t as ATTRIBUTE_UNUSED,
5689 bool speed)
5690{
5691 enum rtx_code c = GET_CODE (x);
b175b679 5692 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
5693 struct aarch64_address_info info;
5694 int cost = 0;
5695 info.shift = 0;
5696
5697 if (!aarch64_classify_address (&info, x, mode, c, false))
5698 {
5699 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5700 {
5701 /* This is a CONST or SYMBOL ref which will be split
5702 in a different way depending on the code model in use.
5703 Cost it through the generic infrastructure. */
e548c9df 5704 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
5705 /* Divide through by the cost of one instruction to
5706 bring it to the same units as the address costs. */
5707 cost_symbol_ref /= COSTS_N_INSNS (1);
5708 /* The cost is then the cost of preparing the address,
5709 followed by an immediate (possibly 0) offset. */
5710 return cost_symbol_ref + addr_cost->imm_offset;
5711 }
5712 else
5713 {
5714 /* This is most likely a jump table from a case
5715 statement. */
5716 return addr_cost->register_offset;
5717 }
5718 }
5719
5720 switch (info.type)
5721 {
5722 case ADDRESS_LO_SUM:
5723 case ADDRESS_SYMBOLIC:
5724 case ADDRESS_REG_IMM:
5725 cost += addr_cost->imm_offset;
5726 break;
5727
5728 case ADDRESS_REG_WB:
5729 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5730 cost += addr_cost->pre_modify;
5731 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5732 cost += addr_cost->post_modify;
5733 else
5734 gcc_unreachable ();
5735
5736 break;
5737
5738 case ADDRESS_REG_REG:
5739 cost += addr_cost->register_offset;
5740 break;
5741
5742 case ADDRESS_REG_UXTW:
5743 case ADDRESS_REG_SXTW:
5744 cost += addr_cost->register_extend;
5745 break;
5746
5747 default:
5748 gcc_unreachable ();
5749 }
5750
5751
5752 if (info.shift > 0)
5753 {
5754 /* For the sake of calculating the cost of the shifted register
5755 component, we can treat same sized modes in the same way. */
5756 switch (GET_MODE_BITSIZE (mode))
5757 {
5758 case 16:
5759 cost += addr_cost->addr_scale_costs.hi;
5760 break;
5761
5762 case 32:
5763 cost += addr_cost->addr_scale_costs.si;
5764 break;
5765
5766 case 64:
5767 cost += addr_cost->addr_scale_costs.di;
5768 break;
5769
5770 /* We can't tell, or this is a 128-bit vector. */
5771 default:
5772 cost += addr_cost->addr_scale_costs.ti;
5773 break;
5774 }
5775 }
5776
5777 return cost;
5778}
5779
b9066f5a
MW
5780/* Return the cost of a branch. If SPEED_P is true then the compiler is
5781 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5782 to be taken. */
5783
5784int
5785aarch64_branch_cost (bool speed_p, bool predictable_p)
5786{
5787 /* When optimizing for speed, use the cost of unpredictable branches. */
5788 const struct cpu_branch_cost *branch_costs =
b175b679 5789 aarch64_tune_params.branch_costs;
b9066f5a
MW
5790
5791 if (!speed_p || predictable_p)
5792 return branch_costs->predictable;
5793 else
5794 return branch_costs->unpredictable;
5795}
5796
7cc2145f
JG
5797/* Return true if the RTX X in mode MODE is a zero or sign extract
5798 usable in an ADD or SUB (extended register) instruction. */
5799static bool
ef4bddc2 5800aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5801{
5802 /* Catch add with a sign extract.
5803 This is add_<optab><mode>_multp2. */
5804 if (GET_CODE (x) == SIGN_EXTRACT
5805 || GET_CODE (x) == ZERO_EXTRACT)
5806 {
5807 rtx op0 = XEXP (x, 0);
5808 rtx op1 = XEXP (x, 1);
5809 rtx op2 = XEXP (x, 2);
5810
5811 if (GET_CODE (op0) == MULT
5812 && CONST_INT_P (op1)
5813 && op2 == const0_rtx
5814 && CONST_INT_P (XEXP (op0, 1))
5815 && aarch64_is_extend_from_extract (mode,
5816 XEXP (op0, 1),
5817 op1))
5818 {
5819 return true;
5820 }
5821 }
e47c4031
KT
5822 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
5823 No shift. */
5824 else if (GET_CODE (x) == SIGN_EXTEND
5825 || GET_CODE (x) == ZERO_EXTEND)
5826 return REG_P (XEXP (x, 0));
7cc2145f
JG
5827
5828 return false;
5829}
5830
61263118
KT
5831static bool
5832aarch64_frint_unspec_p (unsigned int u)
5833{
5834 switch (u)
5835 {
5836 case UNSPEC_FRINTZ:
5837 case UNSPEC_FRINTP:
5838 case UNSPEC_FRINTM:
5839 case UNSPEC_FRINTA:
5840 case UNSPEC_FRINTN:
5841 case UNSPEC_FRINTX:
5842 case UNSPEC_FRINTI:
5843 return true;
5844
5845 default:
5846 return false;
5847 }
5848}
5849
fb0cb7fa
KT
5850/* Return true iff X is an rtx that will match an extr instruction
5851 i.e. as described in the *extr<mode>5_insn family of patterns.
5852 OP0 and OP1 will be set to the operands of the shifts involved
5853 on success and will be NULL_RTX otherwise. */
5854
5855static bool
5856aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
5857{
5858 rtx op0, op1;
5859 machine_mode mode = GET_MODE (x);
5860
5861 *res_op0 = NULL_RTX;
5862 *res_op1 = NULL_RTX;
5863
5864 if (GET_CODE (x) != IOR)
5865 return false;
5866
5867 op0 = XEXP (x, 0);
5868 op1 = XEXP (x, 1);
5869
5870 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
5871 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
5872 {
5873 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5874 if (GET_CODE (op1) == ASHIFT)
5875 std::swap (op0, op1);
5876
5877 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
5878 return false;
5879
5880 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
5881 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
5882
5883 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
5884 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
5885 {
5886 *res_op0 = XEXP (op0, 0);
5887 *res_op1 = XEXP (op1, 0);
5888 return true;
5889 }
5890 }
5891
5892 return false;
5893}
5894
2d5ffe46
AP
5895/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5896 storing it in *COST. Result is true if the total cost of the operation
5897 has now been calculated. */
5898static bool
5899aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5900{
b9e3afe9
AP
5901 rtx inner;
5902 rtx comparator;
5903 enum rtx_code cmpcode;
5904
5905 if (COMPARISON_P (op0))
5906 {
5907 inner = XEXP (op0, 0);
5908 comparator = XEXP (op0, 1);
5909 cmpcode = GET_CODE (op0);
5910 }
5911 else
5912 {
5913 inner = op0;
5914 comparator = const0_rtx;
5915 cmpcode = NE;
5916 }
5917
2d5ffe46
AP
5918 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5919 {
5920 /* Conditional branch. */
b9e3afe9 5921 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5922 return true;
5923 else
5924 {
b9e3afe9 5925 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5926 {
2d5ffe46
AP
5927 if (comparator == const0_rtx)
5928 {
5929 /* TBZ/TBNZ/CBZ/CBNZ. */
5930 if (GET_CODE (inner) == ZERO_EXTRACT)
5931 /* TBZ/TBNZ. */
e548c9df
AM
5932 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
5933 ZERO_EXTRACT, 0, speed);
5934 else
5935 /* CBZ/CBNZ. */
5936 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
5937
5938 return true;
5939 }
5940 }
b9e3afe9 5941 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5942 {
2d5ffe46
AP
5943 /* TBZ/TBNZ. */
5944 if (comparator == const0_rtx)
5945 return true;
5946 }
5947 }
5948 }
b9e3afe9 5949 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5950 {
5951 /* It's a conditional operation based on the status flags,
5952 so it must be some flavor of CSEL. */
5953
5954 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5955 if (GET_CODE (op1) == NEG
5956 || GET_CODE (op1) == NOT
5957 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5958 op1 = XEXP (op1, 0);
5959
e548c9df
AM
5960 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
5961 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
5962 return true;
5963 }
5964
5965 /* We don't know what this is, cost all operands. */
5966 return false;
5967}
5968
43e9d192
IB
5969/* Calculate the cost of calculating X, storing it in *COST. Result
5970 is true if the total cost of the operation has now been calculated. */
5971static bool
e548c9df 5972aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
5973 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5974{
a8eecd00 5975 rtx op0, op1, op2;
73250c4c 5976 const struct cpu_cost_table *extra_cost
b175b679 5977 = aarch64_tune_params.insn_extra_cost;
e548c9df 5978 int code = GET_CODE (x);
43e9d192 5979
7fc5ef02
JG
5980 /* By default, assume that everything has equivalent cost to the
5981 cheapest instruction. Any additional costs are applied as a delta
5982 above this default. */
5983 *cost = COSTS_N_INSNS (1);
5984
43e9d192
IB
5985 switch (code)
5986 {
5987 case SET:
ba123b0d
JG
5988 /* The cost depends entirely on the operands to SET. */
5989 *cost = 0;
43e9d192
IB
5990 op0 = SET_DEST (x);
5991 op1 = SET_SRC (x);
5992
5993 switch (GET_CODE (op0))
5994 {
5995 case MEM:
5996 if (speed)
2961177e
JG
5997 {
5998 rtx address = XEXP (op0, 0);
b6875aac
KV
5999 if (VECTOR_MODE_P (mode))
6000 *cost += extra_cost->ldst.storev;
6001 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6002 *cost += extra_cost->ldst.store;
6003 else if (mode == SFmode)
6004 *cost += extra_cost->ldst.storef;
6005 else if (mode == DFmode)
6006 *cost += extra_cost->ldst.stored;
6007
6008 *cost +=
6009 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6010 0, speed));
6011 }
43e9d192 6012
e548c9df 6013 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6014 return true;
6015
6016 case SUBREG:
6017 if (! REG_P (SUBREG_REG (op0)))
e548c9df 6018 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 6019
43e9d192
IB
6020 /* Fall through. */
6021 case REG:
b6875aac
KV
6022 /* The cost is one per vector-register copied. */
6023 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6024 {
6025 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6026 / GET_MODE_SIZE (V4SImode);
6027 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6028 }
ba123b0d
JG
6029 /* const0_rtx is in general free, but we will use an
6030 instruction to set a register to 0. */
b6875aac
KV
6031 else if (REG_P (op1) || op1 == const0_rtx)
6032 {
6033 /* The cost is 1 per register copied. */
6034 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 6035 / UNITS_PER_WORD;
b6875aac
KV
6036 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6037 }
ba123b0d
JG
6038 else
6039 /* Cost is just the cost of the RHS of the set. */
e548c9df 6040 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6041 return true;
6042
ba123b0d 6043 case ZERO_EXTRACT:
43e9d192 6044 case SIGN_EXTRACT:
ba123b0d
JG
6045 /* Bit-field insertion. Strip any redundant widening of
6046 the RHS to meet the width of the target. */
43e9d192
IB
6047 if (GET_CODE (op1) == SUBREG)
6048 op1 = SUBREG_REG (op1);
6049 if ((GET_CODE (op1) == ZERO_EXTEND
6050 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 6051 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
6052 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
6053 >= INTVAL (XEXP (op0, 1))))
6054 op1 = XEXP (op1, 0);
ba123b0d
JG
6055
6056 if (CONST_INT_P (op1))
6057 {
6058 /* MOV immediate is assumed to always be cheap. */
6059 *cost = COSTS_N_INSNS (1);
6060 }
6061 else
6062 {
6063 /* BFM. */
6064 if (speed)
6065 *cost += extra_cost->alu.bfi;
e548c9df 6066 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
6067 }
6068
43e9d192
IB
6069 return true;
6070
6071 default:
ba123b0d
JG
6072 /* We can't make sense of this, assume default cost. */
6073 *cost = COSTS_N_INSNS (1);
61263118 6074 return false;
43e9d192
IB
6075 }
6076 return false;
6077
9dfc162c
JG
6078 case CONST_INT:
6079 /* If an instruction can incorporate a constant within the
6080 instruction, the instruction's expression avoids calling
6081 rtx_cost() on the constant. If rtx_cost() is called on a
6082 constant, then it is usually because the constant must be
6083 moved into a register by one or more instructions.
6084
6085 The exception is constant 0, which can be expressed
6086 as XZR/WZR and is therefore free. The exception to this is
6087 if we have (set (reg) (const0_rtx)) in which case we must cost
6088 the move. However, we can catch that when we cost the SET, so
6089 we don't need to consider that here. */
6090 if (x == const0_rtx)
6091 *cost = 0;
6092 else
6093 {
6094 /* To an approximation, building any other constant is
6095 proportionally expensive to the number of instructions
6096 required to build that constant. This is true whether we
6097 are compiling for SPEED or otherwise. */
82614948
RR
6098 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
6099 (NULL_RTX, x, false, mode));
9dfc162c
JG
6100 }
6101 return true;
6102
6103 case CONST_DOUBLE:
6104 if (speed)
6105 {
6106 /* mov[df,sf]_aarch64. */
6107 if (aarch64_float_const_representable_p (x))
6108 /* FMOV (scalar immediate). */
6109 *cost += extra_cost->fp[mode == DFmode].fpconst;
6110 else if (!aarch64_float_const_zero_rtx_p (x))
6111 {
6112 /* This will be a load from memory. */
6113 if (mode == DFmode)
6114 *cost += extra_cost->ldst.loadd;
6115 else
6116 *cost += extra_cost->ldst.loadf;
6117 }
6118 else
6119 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6120 or MOV v0.s[0], wzr - neither of which are modeled by the
6121 cost tables. Just use the default cost. */
6122 {
6123 }
6124 }
6125
6126 return true;
6127
43e9d192
IB
6128 case MEM:
6129 if (speed)
2961177e
JG
6130 {
6131 /* For loads we want the base cost of a load, plus an
6132 approximation for the additional cost of the addressing
6133 mode. */
6134 rtx address = XEXP (x, 0);
b6875aac
KV
6135 if (VECTOR_MODE_P (mode))
6136 *cost += extra_cost->ldst.loadv;
6137 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6138 *cost += extra_cost->ldst.load;
6139 else if (mode == SFmode)
6140 *cost += extra_cost->ldst.loadf;
6141 else if (mode == DFmode)
6142 *cost += extra_cost->ldst.loadd;
6143
6144 *cost +=
6145 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6146 0, speed));
6147 }
43e9d192
IB
6148
6149 return true;
6150
6151 case NEG:
4745e701
JG
6152 op0 = XEXP (x, 0);
6153
b6875aac
KV
6154 if (VECTOR_MODE_P (mode))
6155 {
6156 if (speed)
6157 {
6158 /* FNEG. */
6159 *cost += extra_cost->vect.alu;
6160 }
6161 return false;
6162 }
6163
e548c9df
AM
6164 if (GET_MODE_CLASS (mode) == MODE_INT)
6165 {
4745e701
JG
6166 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6167 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6168 {
6169 /* CSETM. */
e548c9df 6170 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
6171 return true;
6172 }
6173
6174 /* Cost this as SUB wzr, X. */
e548c9df 6175 op0 = CONST0_RTX (mode);
4745e701
JG
6176 op1 = XEXP (x, 0);
6177 goto cost_minus;
6178 }
6179
e548c9df 6180 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
6181 {
6182 /* Support (neg(fma...)) as a single instruction only if
6183 sign of zeros is unimportant. This matches the decision
6184 making in aarch64.md. */
6185 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
6186 {
6187 /* FNMADD. */
e548c9df 6188 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
6189 return true;
6190 }
d318517d
SN
6191 if (GET_CODE (op0) == MULT)
6192 {
6193 /* FNMUL. */
6194 *cost = rtx_cost (op0, mode, NEG, 0, speed);
6195 return true;
6196 }
4745e701
JG
6197 if (speed)
6198 /* FNEG. */
6199 *cost += extra_cost->fp[mode == DFmode].neg;
6200 return false;
6201 }
6202
6203 return false;
43e9d192 6204
781aeb73
KT
6205 case CLRSB:
6206 case CLZ:
6207 if (speed)
b6875aac
KV
6208 {
6209 if (VECTOR_MODE_P (mode))
6210 *cost += extra_cost->vect.alu;
6211 else
6212 *cost += extra_cost->alu.clz;
6213 }
781aeb73
KT
6214
6215 return false;
6216
43e9d192
IB
6217 case COMPARE:
6218 op0 = XEXP (x, 0);
6219 op1 = XEXP (x, 1);
6220
6221 if (op1 == const0_rtx
6222 && GET_CODE (op0) == AND)
6223 {
6224 x = op0;
e548c9df 6225 mode = GET_MODE (op0);
43e9d192
IB
6226 goto cost_logic;
6227 }
6228
a8eecd00
JG
6229 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6230 {
6231 /* TODO: A write to the CC flags possibly costs extra, this
6232 needs encoding in the cost tables. */
6233
6234 /* CC_ZESWPmode supports zero extend for free. */
e548c9df 6235 if (mode == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
a8eecd00
JG
6236 op0 = XEXP (op0, 0);
6237
e548c9df 6238 mode = GET_MODE (op0);
a8eecd00
JG
6239 /* ANDS. */
6240 if (GET_CODE (op0) == AND)
6241 {
6242 x = op0;
6243 goto cost_logic;
6244 }
6245
6246 if (GET_CODE (op0) == PLUS)
6247 {
6248 /* ADDS (and CMN alias). */
6249 x = op0;
6250 goto cost_plus;
6251 }
6252
6253 if (GET_CODE (op0) == MINUS)
6254 {
6255 /* SUBS. */
6256 x = op0;
6257 goto cost_minus;
6258 }
6259
6260 if (GET_CODE (op1) == NEG)
6261 {
6262 /* CMN. */
6263 if (speed)
6264 *cost += extra_cost->alu.arith;
6265
e548c9df
AM
6266 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6267 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
6268 return true;
6269 }
6270
6271 /* CMP.
6272
6273 Compare can freely swap the order of operands, and
6274 canonicalization puts the more complex operation first.
6275 But the integer MINUS logic expects the shift/extend
6276 operation in op1. */
6277 if (! (REG_P (op0)
6278 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6279 {
6280 op0 = XEXP (x, 1);
6281 op1 = XEXP (x, 0);
6282 }
6283 goto cost_minus;
6284 }
6285
6286 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6287 {
6288 /* FCMP. */
6289 if (speed)
6290 *cost += extra_cost->fp[mode == DFmode].compare;
6291
6292 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6293 {
e548c9df 6294 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
6295 /* FCMP supports constant 0.0 for no extra cost. */
6296 return true;
6297 }
6298 return false;
6299 }
6300
b6875aac
KV
6301 if (VECTOR_MODE_P (mode))
6302 {
6303 /* Vector compare. */
6304 if (speed)
6305 *cost += extra_cost->vect.alu;
6306
6307 if (aarch64_float_const_zero_rtx_p (op1))
6308 {
6309 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6310 cost. */
6311 return true;
6312 }
6313 return false;
6314 }
a8eecd00 6315 return false;
43e9d192
IB
6316
6317 case MINUS:
4745e701
JG
6318 {
6319 op0 = XEXP (x, 0);
6320 op1 = XEXP (x, 1);
6321
6322cost_minus:
e548c9df 6323 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 6324
4745e701
JG
6325 /* Detect valid immediates. */
6326 if ((GET_MODE_CLASS (mode) == MODE_INT
6327 || (GET_MODE_CLASS (mode) == MODE_CC
6328 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6329 && CONST_INT_P (op1)
6330 && aarch64_uimm12_shift (INTVAL (op1)))
6331 {
4745e701
JG
6332 if (speed)
6333 /* SUB(S) (immediate). */
6334 *cost += extra_cost->alu.arith;
6335 return true;
4745e701
JG
6336 }
6337
7cc2145f
JG
6338 /* Look for SUB (extended register). */
6339 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6340 {
6341 if (speed)
2533c820 6342 *cost += extra_cost->alu.extend_arith;
7cc2145f 6343
e47c4031
KT
6344 op1 = aarch64_strip_extend (op1);
6345 *cost += rtx_cost (op1, VOIDmode,
e548c9df 6346 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
6347 return true;
6348 }
6349
4745e701
JG
6350 rtx new_op1 = aarch64_strip_extend (op1);
6351
6352 /* Cost this as an FMA-alike operation. */
6353 if ((GET_CODE (new_op1) == MULT
0a78ebe4 6354 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
6355 && code != COMPARE)
6356 {
6357 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6358 (enum rtx_code) code,
6359 speed);
4745e701
JG
6360 return true;
6361 }
43e9d192 6362
e548c9df 6363 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 6364
4745e701
JG
6365 if (speed)
6366 {
b6875aac
KV
6367 if (VECTOR_MODE_P (mode))
6368 {
6369 /* Vector SUB. */
6370 *cost += extra_cost->vect.alu;
6371 }
6372 else if (GET_MODE_CLASS (mode) == MODE_INT)
6373 {
6374 /* SUB(S). */
6375 *cost += extra_cost->alu.arith;
6376 }
4745e701 6377 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6378 {
6379 /* FSUB. */
6380 *cost += extra_cost->fp[mode == DFmode].addsub;
6381 }
4745e701
JG
6382 }
6383 return true;
6384 }
43e9d192
IB
6385
6386 case PLUS:
4745e701
JG
6387 {
6388 rtx new_op0;
43e9d192 6389
4745e701
JG
6390 op0 = XEXP (x, 0);
6391 op1 = XEXP (x, 1);
43e9d192 6392
a8eecd00 6393cost_plus:
4745e701
JG
6394 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6395 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6396 {
6397 /* CSINC. */
e548c9df
AM
6398 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
6399 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
6400 return true;
6401 }
43e9d192 6402
4745e701
JG
6403 if (GET_MODE_CLASS (mode) == MODE_INT
6404 && CONST_INT_P (op1)
6405 && aarch64_uimm12_shift (INTVAL (op1)))
6406 {
e548c9df 6407 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 6408
4745e701
JG
6409 if (speed)
6410 /* ADD (immediate). */
6411 *cost += extra_cost->alu.arith;
6412 return true;
6413 }
6414
e548c9df 6415 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 6416
7cc2145f
JG
6417 /* Look for ADD (extended register). */
6418 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6419 {
6420 if (speed)
2533c820 6421 *cost += extra_cost->alu.extend_arith;
7cc2145f 6422
e47c4031
KT
6423 op0 = aarch64_strip_extend (op0);
6424 *cost += rtx_cost (op0, VOIDmode,
e548c9df 6425 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
6426 return true;
6427 }
6428
4745e701
JG
6429 /* Strip any extend, leave shifts behind as we will
6430 cost them through mult_cost. */
6431 new_op0 = aarch64_strip_extend (op0);
6432
6433 if (GET_CODE (new_op0) == MULT
0a78ebe4 6434 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
6435 {
6436 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6437 speed);
4745e701
JG
6438 return true;
6439 }
6440
e548c9df 6441 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
6442
6443 if (speed)
6444 {
b6875aac
KV
6445 if (VECTOR_MODE_P (mode))
6446 {
6447 /* Vector ADD. */
6448 *cost += extra_cost->vect.alu;
6449 }
6450 else if (GET_MODE_CLASS (mode) == MODE_INT)
6451 {
6452 /* ADD. */
6453 *cost += extra_cost->alu.arith;
6454 }
4745e701 6455 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6456 {
6457 /* FADD. */
6458 *cost += extra_cost->fp[mode == DFmode].addsub;
6459 }
4745e701
JG
6460 }
6461 return true;
6462 }
43e9d192 6463
18b42b2a
KT
6464 case BSWAP:
6465 *cost = COSTS_N_INSNS (1);
6466
6467 if (speed)
b6875aac
KV
6468 {
6469 if (VECTOR_MODE_P (mode))
6470 *cost += extra_cost->vect.alu;
6471 else
6472 *cost += extra_cost->alu.rev;
6473 }
18b42b2a
KT
6474 return false;
6475
43e9d192 6476 case IOR:
f7d5cf8d
KT
6477 if (aarch_rev16_p (x))
6478 {
6479 *cost = COSTS_N_INSNS (1);
6480
b6875aac
KV
6481 if (speed)
6482 {
6483 if (VECTOR_MODE_P (mode))
6484 *cost += extra_cost->vect.alu;
6485 else
6486 *cost += extra_cost->alu.rev;
6487 }
6488 return true;
f7d5cf8d 6489 }
fb0cb7fa
KT
6490
6491 if (aarch64_extr_rtx_p (x, &op0, &op1))
6492 {
e548c9df
AM
6493 *cost += rtx_cost (op0, mode, IOR, 0, speed);
6494 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
6495 if (speed)
6496 *cost += extra_cost->alu.shift;
6497
6498 return true;
6499 }
f7d5cf8d 6500 /* Fall through. */
43e9d192
IB
6501 case XOR:
6502 case AND:
6503 cost_logic:
6504 op0 = XEXP (x, 0);
6505 op1 = XEXP (x, 1);
6506
b6875aac
KV
6507 if (VECTOR_MODE_P (mode))
6508 {
6509 if (speed)
6510 *cost += extra_cost->vect.alu;
6511 return true;
6512 }
6513
268c3b47
JG
6514 if (code == AND
6515 && GET_CODE (op0) == MULT
6516 && CONST_INT_P (XEXP (op0, 1))
6517 && CONST_INT_P (op1)
6518 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
6519 INTVAL (op1)) != 0)
6520 {
6521 /* This is a UBFM/SBFM. */
e548c9df 6522 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
6523 if (speed)
6524 *cost += extra_cost->alu.bfx;
6525 return true;
6526 }
6527
e548c9df 6528 if (GET_MODE_CLASS (mode) == MODE_INT)
43e9d192 6529 {
268c3b47
JG
6530 /* We possibly get the immediate for free, this is not
6531 modelled. */
43e9d192 6532 if (CONST_INT_P (op1)
e548c9df 6533 && aarch64_bitmask_imm (INTVAL (op1), mode))
43e9d192 6534 {
e548c9df 6535 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
268c3b47
JG
6536
6537 if (speed)
6538 *cost += extra_cost->alu.logical;
6539
6540 return true;
43e9d192
IB
6541 }
6542 else
6543 {
268c3b47
JG
6544 rtx new_op0 = op0;
6545
6546 /* Handle ORN, EON, or BIC. */
43e9d192
IB
6547 if (GET_CODE (op0) == NOT)
6548 op0 = XEXP (op0, 0);
268c3b47
JG
6549
6550 new_op0 = aarch64_strip_shift (op0);
6551
6552 /* If we had a shift on op0 then this is a logical-shift-
6553 by-register/immediate operation. Otherwise, this is just
6554 a logical operation. */
6555 if (speed)
6556 {
6557 if (new_op0 != op0)
6558 {
6559 /* Shift by immediate. */
6560 if (CONST_INT_P (XEXP (op0, 1)))
6561 *cost += extra_cost->alu.log_shift;
6562 else
6563 *cost += extra_cost->alu.log_shift_reg;
6564 }
6565 else
6566 *cost += extra_cost->alu.logical;
6567 }
6568
6569 /* In both cases we want to cost both operands. */
e548c9df
AM
6570 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
6571 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
268c3b47
JG
6572
6573 return true;
43e9d192 6574 }
43e9d192
IB
6575 }
6576 return false;
6577
268c3b47 6578 case NOT:
6365da9e
KT
6579 x = XEXP (x, 0);
6580 op0 = aarch64_strip_shift (x);
6581
b6875aac
KV
6582 if (VECTOR_MODE_P (mode))
6583 {
6584 /* Vector NOT. */
6585 *cost += extra_cost->vect.alu;
6586 return false;
6587 }
6588
6365da9e
KT
6589 /* MVN-shifted-reg. */
6590 if (op0 != x)
6591 {
e548c9df 6592 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
6593
6594 if (speed)
6595 *cost += extra_cost->alu.log_shift;
6596
6597 return true;
6598 }
6599 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6600 Handle the second form here taking care that 'a' in the above can
6601 be a shift. */
6602 else if (GET_CODE (op0) == XOR)
6603 {
6604 rtx newop0 = XEXP (op0, 0);
6605 rtx newop1 = XEXP (op0, 1);
6606 rtx op0_stripped = aarch64_strip_shift (newop0);
6607
e548c9df
AM
6608 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
6609 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
6610
6611 if (speed)
6612 {
6613 if (op0_stripped != newop0)
6614 *cost += extra_cost->alu.log_shift;
6615 else
6616 *cost += extra_cost->alu.logical;
6617 }
6618
6619 return true;
6620 }
268c3b47
JG
6621 /* MVN. */
6622 if (speed)
6623 *cost += extra_cost->alu.logical;
6624
268c3b47
JG
6625 return false;
6626
43e9d192 6627 case ZERO_EXTEND:
b1685e62
JG
6628
6629 op0 = XEXP (x, 0);
6630 /* If a value is written in SI mode, then zero extended to DI
6631 mode, the operation will in general be free as a write to
6632 a 'w' register implicitly zeroes the upper bits of an 'x'
6633 register. However, if this is
6634
6635 (set (reg) (zero_extend (reg)))
6636
6637 we must cost the explicit register move. */
6638 if (mode == DImode
6639 && GET_MODE (op0) == SImode
6640 && outer == SET)
6641 {
e548c9df 6642 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62
JG
6643
6644 if (!op_cost && speed)
6645 /* MOV. */
6646 *cost += extra_cost->alu.extend;
6647 else
6648 /* Free, the cost is that of the SI mode operation. */
6649 *cost = op_cost;
6650
6651 return true;
6652 }
e548c9df 6653 else if (MEM_P (op0))
43e9d192 6654 {
b1685e62 6655 /* All loads can zero extend to any size for free. */
e548c9df 6656 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
6657 return true;
6658 }
b1685e62 6659
b1685e62 6660 if (speed)
b6875aac
KV
6661 {
6662 if (VECTOR_MODE_P (mode))
6663 {
6664 /* UMOV. */
6665 *cost += extra_cost->vect.alu;
6666 }
6667 else
6668 {
6669 /* UXTB/UXTH. */
6670 *cost += extra_cost->alu.extend;
6671 }
6672 }
43e9d192
IB
6673 return false;
6674
6675 case SIGN_EXTEND:
b1685e62 6676 if (MEM_P (XEXP (x, 0)))
43e9d192 6677 {
b1685e62
JG
6678 /* LDRSH. */
6679 if (speed)
6680 {
6681 rtx address = XEXP (XEXP (x, 0), 0);
6682 *cost += extra_cost->ldst.load_sign_extend;
6683
6684 *cost +=
6685 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6686 0, speed));
6687 }
43e9d192
IB
6688 return true;
6689 }
b1685e62
JG
6690
6691 if (speed)
b6875aac
KV
6692 {
6693 if (VECTOR_MODE_P (mode))
6694 *cost += extra_cost->vect.alu;
6695 else
6696 *cost += extra_cost->alu.extend;
6697 }
43e9d192
IB
6698 return false;
6699
ba0cfa17
JG
6700 case ASHIFT:
6701 op0 = XEXP (x, 0);
6702 op1 = XEXP (x, 1);
6703
6704 if (CONST_INT_P (op1))
6705 {
ba0cfa17 6706 if (speed)
b6875aac
KV
6707 {
6708 if (VECTOR_MODE_P (mode))
6709 {
6710 /* Vector shift (immediate). */
6711 *cost += extra_cost->vect.alu;
6712 }
6713 else
6714 {
6715 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6716 aliases. */
6717 *cost += extra_cost->alu.shift;
6718 }
6719 }
ba0cfa17
JG
6720
6721 /* We can incorporate zero/sign extend for free. */
6722 if (GET_CODE (op0) == ZERO_EXTEND
6723 || GET_CODE (op0) == SIGN_EXTEND)
6724 op0 = XEXP (op0, 0);
6725
e548c9df 6726 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
6727 return true;
6728 }
6729 else
6730 {
ba0cfa17 6731 if (speed)
b6875aac
KV
6732 {
6733 if (VECTOR_MODE_P (mode))
6734 {
6735 /* Vector shift (register). */
6736 *cost += extra_cost->vect.alu;
6737 }
6738 else
6739 {
6740 /* LSLV. */
6741 *cost += extra_cost->alu.shift_reg;
6742 }
6743 }
ba0cfa17
JG
6744 return false; /* All arguments need to be in registers. */
6745 }
6746
43e9d192 6747 case ROTATE:
43e9d192
IB
6748 case ROTATERT:
6749 case LSHIFTRT:
43e9d192 6750 case ASHIFTRT:
ba0cfa17
JG
6751 op0 = XEXP (x, 0);
6752 op1 = XEXP (x, 1);
43e9d192 6753
ba0cfa17
JG
6754 if (CONST_INT_P (op1))
6755 {
6756 /* ASR (immediate) and friends. */
6757 if (speed)
b6875aac
KV
6758 {
6759 if (VECTOR_MODE_P (mode))
6760 *cost += extra_cost->vect.alu;
6761 else
6762 *cost += extra_cost->alu.shift;
6763 }
43e9d192 6764
e548c9df 6765 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
6766 return true;
6767 }
6768 else
6769 {
6770
6771 /* ASR (register) and friends. */
6772 if (speed)
b6875aac
KV
6773 {
6774 if (VECTOR_MODE_P (mode))
6775 *cost += extra_cost->vect.alu;
6776 else
6777 *cost += extra_cost->alu.shift_reg;
6778 }
ba0cfa17
JG
6779 return false; /* All arguments need to be in registers. */
6780 }
43e9d192 6781
909734be
JG
6782 case SYMBOL_REF:
6783
1b1e81f8
JW
6784 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
6785 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
6786 {
6787 /* LDR. */
6788 if (speed)
6789 *cost += extra_cost->ldst.load;
6790 }
6791 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6792 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6793 {
6794 /* ADRP, followed by ADD. */
6795 *cost += COSTS_N_INSNS (1);
6796 if (speed)
6797 *cost += 2 * extra_cost->alu.arith;
6798 }
6799 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6800 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6801 {
6802 /* ADR. */
6803 if (speed)
6804 *cost += extra_cost->alu.arith;
6805 }
6806
6807 if (flag_pic)
6808 {
6809 /* One extra load instruction, after accessing the GOT. */
6810 *cost += COSTS_N_INSNS (1);
6811 if (speed)
6812 *cost += extra_cost->ldst.load;
6813 }
43e9d192
IB
6814 return true;
6815
909734be 6816 case HIGH:
43e9d192 6817 case LO_SUM:
909734be
JG
6818 /* ADRP/ADD (immediate). */
6819 if (speed)
6820 *cost += extra_cost->alu.arith;
43e9d192
IB
6821 return true;
6822
6823 case ZERO_EXTRACT:
6824 case SIGN_EXTRACT:
7cc2145f
JG
6825 /* UBFX/SBFX. */
6826 if (speed)
b6875aac
KV
6827 {
6828 if (VECTOR_MODE_P (mode))
6829 *cost += extra_cost->vect.alu;
6830 else
6831 *cost += extra_cost->alu.bfx;
6832 }
7cc2145f
JG
6833
6834 /* We can trust that the immediates used will be correct (there
6835 are no by-register forms), so we need only cost op0. */
e548c9df 6836 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
6837 return true;
6838
6839 case MULT:
4745e701
JG
6840 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6841 /* aarch64_rtx_mult_cost always handles recursion to its
6842 operands. */
6843 return true;
43e9d192
IB
6844
6845 case MOD:
4f58fe36
KT
6846 /* We can expand signed mod by power of 2 using a NEGS, two parallel
6847 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
6848 an unconditional negate. This case should only ever be reached through
6849 the set_smod_pow2_cheap check in expmed.c. */
6850 if (CONST_INT_P (XEXP (x, 1))
6851 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
6852 && (mode == SImode || mode == DImode))
6853 {
6854 /* We expand to 4 instructions. Reset the baseline. */
6855 *cost = COSTS_N_INSNS (4);
6856
6857 if (speed)
6858 *cost += 2 * extra_cost->alu.logical
6859 + 2 * extra_cost->alu.arith;
6860
6861 return true;
6862 }
6863
6864 /* Fall-through. */
43e9d192 6865 case UMOD:
43e9d192
IB
6866 if (speed)
6867 {
b6875aac
KV
6868 if (VECTOR_MODE_P (mode))
6869 *cost += extra_cost->vect.alu;
e548c9df
AM
6870 else if (GET_MODE_CLASS (mode) == MODE_INT)
6871 *cost += (extra_cost->mult[mode == DImode].add
6872 + extra_cost->mult[mode == DImode].idiv);
6873 else if (mode == DFmode)
73250c4c
KT
6874 *cost += (extra_cost->fp[1].mult
6875 + extra_cost->fp[1].div);
e548c9df 6876 else if (mode == SFmode)
73250c4c
KT
6877 *cost += (extra_cost->fp[0].mult
6878 + extra_cost->fp[0].div);
43e9d192
IB
6879 }
6880 return false; /* All arguments need to be in registers. */
6881
6882 case DIV:
6883 case UDIV:
4105fe38 6884 case SQRT:
43e9d192
IB
6885 if (speed)
6886 {
b6875aac
KV
6887 if (VECTOR_MODE_P (mode))
6888 *cost += extra_cost->vect.alu;
6889 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
6890 /* There is no integer SQRT, so only DIV and UDIV can get
6891 here. */
6892 *cost += extra_cost->mult[mode == DImode].idiv;
6893 else
6894 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
6895 }
6896 return false; /* All arguments need to be in registers. */
6897
a8eecd00 6898 case IF_THEN_ELSE:
2d5ffe46
AP
6899 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6900 XEXP (x, 2), cost, speed);
a8eecd00
JG
6901
6902 case EQ:
6903 case NE:
6904 case GT:
6905 case GTU:
6906 case LT:
6907 case LTU:
6908 case GE:
6909 case GEU:
6910 case LE:
6911 case LEU:
6912
6913 return false; /* All arguments must be in registers. */
6914
b292109f
JG
6915 case FMA:
6916 op0 = XEXP (x, 0);
6917 op1 = XEXP (x, 1);
6918 op2 = XEXP (x, 2);
6919
6920 if (speed)
b6875aac
KV
6921 {
6922 if (VECTOR_MODE_P (mode))
6923 *cost += extra_cost->vect.alu;
6924 else
6925 *cost += extra_cost->fp[mode == DFmode].fma;
6926 }
b292109f
JG
6927
6928 /* FMSUB, FNMADD, and FNMSUB are free. */
6929 if (GET_CODE (op0) == NEG)
6930 op0 = XEXP (op0, 0);
6931
6932 if (GET_CODE (op2) == NEG)
6933 op2 = XEXP (op2, 0);
6934
6935 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6936 and the by-element operand as operand 0. */
6937 if (GET_CODE (op1) == NEG)
6938 op1 = XEXP (op1, 0);
6939
6940 /* Catch vector-by-element operations. The by-element operand can
6941 either be (vec_duplicate (vec_select (x))) or just
6942 (vec_select (x)), depending on whether we are multiplying by
6943 a vector or a scalar.
6944
6945 Canonicalization is not very good in these cases, FMA4 will put the
6946 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6947 if (GET_CODE (op0) == VEC_DUPLICATE)
6948 op0 = XEXP (op0, 0);
6949 else if (GET_CODE (op1) == VEC_DUPLICATE)
6950 op1 = XEXP (op1, 0);
6951
6952 if (GET_CODE (op0) == VEC_SELECT)
6953 op0 = XEXP (op0, 0);
6954 else if (GET_CODE (op1) == VEC_SELECT)
6955 op1 = XEXP (op1, 0);
6956
6957 /* If the remaining parameters are not registers,
6958 get the cost to put them into registers. */
e548c9df
AM
6959 *cost += rtx_cost (op0, mode, FMA, 0, speed);
6960 *cost += rtx_cost (op1, mode, FMA, 1, speed);
6961 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
6962 return true;
6963
5e2a765b
KT
6964 case FLOAT:
6965 case UNSIGNED_FLOAT:
6966 if (speed)
6967 *cost += extra_cost->fp[mode == DFmode].fromint;
6968 return false;
6969
b292109f
JG
6970 case FLOAT_EXTEND:
6971 if (speed)
b6875aac
KV
6972 {
6973 if (VECTOR_MODE_P (mode))
6974 {
6975 /*Vector truncate. */
6976 *cost += extra_cost->vect.alu;
6977 }
6978 else
6979 *cost += extra_cost->fp[mode == DFmode].widen;
6980 }
b292109f
JG
6981 return false;
6982
6983 case FLOAT_TRUNCATE:
6984 if (speed)
b6875aac
KV
6985 {
6986 if (VECTOR_MODE_P (mode))
6987 {
6988 /*Vector conversion. */
6989 *cost += extra_cost->vect.alu;
6990 }
6991 else
6992 *cost += extra_cost->fp[mode == DFmode].narrow;
6993 }
b292109f
JG
6994 return false;
6995
61263118
KT
6996 case FIX:
6997 case UNSIGNED_FIX:
6998 x = XEXP (x, 0);
6999 /* Strip the rounding part. They will all be implemented
7000 by the fcvt* family of instructions anyway. */
7001 if (GET_CODE (x) == UNSPEC)
7002 {
7003 unsigned int uns_code = XINT (x, 1);
7004
7005 if (uns_code == UNSPEC_FRINTA
7006 || uns_code == UNSPEC_FRINTM
7007 || uns_code == UNSPEC_FRINTN
7008 || uns_code == UNSPEC_FRINTP
7009 || uns_code == UNSPEC_FRINTZ)
7010 x = XVECEXP (x, 0, 0);
7011 }
7012
7013 if (speed)
b6875aac
KV
7014 {
7015 if (VECTOR_MODE_P (mode))
7016 *cost += extra_cost->vect.alu;
7017 else
7018 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
7019 }
e548c9df 7020 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
7021 return true;
7022
b292109f 7023 case ABS:
b6875aac
KV
7024 if (VECTOR_MODE_P (mode))
7025 {
7026 /* ABS (vector). */
7027 if (speed)
7028 *cost += extra_cost->vect.alu;
7029 }
7030 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 7031 {
19261b99
KT
7032 op0 = XEXP (x, 0);
7033
7034 /* FABD, which is analogous to FADD. */
7035 if (GET_CODE (op0) == MINUS)
7036 {
e548c9df
AM
7037 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
7038 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
7039 if (speed)
7040 *cost += extra_cost->fp[mode == DFmode].addsub;
7041
7042 return true;
7043 }
7044 /* Simple FABS is analogous to FNEG. */
b292109f
JG
7045 if (speed)
7046 *cost += extra_cost->fp[mode == DFmode].neg;
7047 }
7048 else
7049 {
7050 /* Integer ABS will either be split to
7051 two arithmetic instructions, or will be an ABS
7052 (scalar), which we don't model. */
7053 *cost = COSTS_N_INSNS (2);
7054 if (speed)
7055 *cost += 2 * extra_cost->alu.arith;
7056 }
7057 return false;
7058
7059 case SMAX:
7060 case SMIN:
7061 if (speed)
7062 {
b6875aac
KV
7063 if (VECTOR_MODE_P (mode))
7064 *cost += extra_cost->vect.alu;
7065 else
7066 {
7067 /* FMAXNM/FMINNM/FMAX/FMIN.
7068 TODO: This may not be accurate for all implementations, but
7069 we do not model this in the cost tables. */
7070 *cost += extra_cost->fp[mode == DFmode].addsub;
7071 }
b292109f
JG
7072 }
7073 return false;
7074
61263118
KT
7075 case UNSPEC:
7076 /* The floating point round to integer frint* instructions. */
7077 if (aarch64_frint_unspec_p (XINT (x, 1)))
7078 {
7079 if (speed)
7080 *cost += extra_cost->fp[mode == DFmode].roundint;
7081
7082 return false;
7083 }
781aeb73
KT
7084
7085 if (XINT (x, 1) == UNSPEC_RBIT)
7086 {
7087 if (speed)
7088 *cost += extra_cost->alu.rev;
7089
7090 return false;
7091 }
61263118
KT
7092 break;
7093
fb620c4a
JG
7094 case TRUNCATE:
7095
7096 /* Decompose <su>muldi3_highpart. */
7097 if (/* (truncate:DI */
7098 mode == DImode
7099 /* (lshiftrt:TI */
7100 && GET_MODE (XEXP (x, 0)) == TImode
7101 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7102 /* (mult:TI */
7103 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7104 /* (ANY_EXTEND:TI (reg:DI))
7105 (ANY_EXTEND:TI (reg:DI))) */
7106 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7107 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
7108 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
7109 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
7110 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
7111 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
7112 /* (const_int 64) */
7113 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7114 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
7115 {
7116 /* UMULH/SMULH. */
7117 if (speed)
7118 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
7119 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
7120 mode, MULT, 0, speed);
7121 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
7122 mode, MULT, 1, speed);
fb620c4a
JG
7123 return true;
7124 }
7125
7126 /* Fall through. */
43e9d192 7127 default:
61263118 7128 break;
43e9d192 7129 }
61263118
KT
7130
7131 if (dump_file && (dump_flags & TDF_DETAILS))
7132 fprintf (dump_file,
7133 "\nFailed to cost RTX. Assuming default cost.\n");
7134
7135 return true;
43e9d192
IB
7136}
7137
0ee859b5
JG
7138/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7139 calculated for X. This cost is stored in *COST. Returns true
7140 if the total cost of X was calculated. */
7141static bool
e548c9df 7142aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
7143 int param, int *cost, bool speed)
7144{
e548c9df 7145 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5
JG
7146
7147 if (dump_file && (dump_flags & TDF_DETAILS))
7148 {
7149 print_rtl_single (dump_file, x);
7150 fprintf (dump_file, "\n%s cost: %d (%s)\n",
7151 speed ? "Hot" : "Cold",
7152 *cost, result ? "final" : "partial");
7153 }
7154
7155 return result;
7156}
7157
43e9d192 7158static int
ef4bddc2 7159aarch64_register_move_cost (machine_mode mode,
8a3a7e67 7160 reg_class_t from_i, reg_class_t to_i)
43e9d192 7161{
8a3a7e67
RH
7162 enum reg_class from = (enum reg_class) from_i;
7163 enum reg_class to = (enum reg_class) to_i;
43e9d192 7164 const struct cpu_regmove_cost *regmove_cost
b175b679 7165 = aarch64_tune_params.regmove_cost;
43e9d192 7166
3be07662 7167 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
43e06d03 7168 if (to == CALLER_SAVE_REGS || to == POINTER_REGS || to == FIXED_REG0)
3be07662
WD
7169 to = GENERAL_REGS;
7170
43e06d03 7171 if (from == CALLER_SAVE_REGS || from == POINTER_REGS || from == FIXED_REG0)
3be07662
WD
7172 from = GENERAL_REGS;
7173
6ee70f81
AP
7174 /* Moving between GPR and stack cost is the same as GP2GP. */
7175 if ((from == GENERAL_REGS && to == STACK_REG)
7176 || (to == GENERAL_REGS && from == STACK_REG))
7177 return regmove_cost->GP2GP;
7178
7179 /* To/From the stack register, we move via the gprs. */
7180 if (to == STACK_REG || from == STACK_REG)
7181 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
7182 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
7183
8919453c
WD
7184 if (GET_MODE_SIZE (mode) == 16)
7185 {
7186 /* 128-bit operations on general registers require 2 instructions. */
7187 if (from == GENERAL_REGS && to == GENERAL_REGS)
7188 return regmove_cost->GP2GP * 2;
7189 else if (from == GENERAL_REGS)
7190 return regmove_cost->GP2FP * 2;
7191 else if (to == GENERAL_REGS)
7192 return regmove_cost->FP2GP * 2;
7193
7194 /* When AdvSIMD instructions are disabled it is not possible to move
7195 a 128-bit value directly between Q registers. This is handled in
7196 secondary reload. A general register is used as a scratch to move
7197 the upper DI value and the lower DI value is moved directly,
7198 hence the cost is the sum of three moves. */
7199 if (! TARGET_SIMD)
7200 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
7201
7202 return regmove_cost->FP2FP;
7203 }
7204
43e9d192
IB
7205 if (from == GENERAL_REGS && to == GENERAL_REGS)
7206 return regmove_cost->GP2GP;
7207 else if (from == GENERAL_REGS)
7208 return regmove_cost->GP2FP;
7209 else if (to == GENERAL_REGS)
7210 return regmove_cost->FP2GP;
7211
43e9d192
IB
7212 return regmove_cost->FP2FP;
7213}
7214
7215static int
ef4bddc2 7216aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
7217 reg_class_t rclass ATTRIBUTE_UNUSED,
7218 bool in ATTRIBUTE_UNUSED)
7219{
b175b679 7220 return aarch64_tune_params.memmov_cost;
43e9d192
IB
7221}
7222
d126a4ae
AP
7223/* Return the number of instructions that can be issued per cycle. */
7224static int
7225aarch64_sched_issue_rate (void)
7226{
b175b679 7227 return aarch64_tune_params.issue_rate;
d126a4ae
AP
7228}
7229
d03f7e44
MK
7230static int
7231aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
7232{
7233 int issue_rate = aarch64_sched_issue_rate ();
7234
7235 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
7236}
7237
8990e73a
TB
7238/* Vectorizer cost model target hooks. */
7239
7240/* Implement targetm.vectorize.builtin_vectorization_cost. */
7241static int
7242aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
7243 tree vectype,
7244 int misalign ATTRIBUTE_UNUSED)
7245{
7246 unsigned elements;
7247
7248 switch (type_of_cost)
7249 {
7250 case scalar_stmt:
b175b679 7251 return aarch64_tune_params.vec_costs->scalar_stmt_cost;
8990e73a
TB
7252
7253 case scalar_load:
b175b679 7254 return aarch64_tune_params.vec_costs->scalar_load_cost;
8990e73a
TB
7255
7256 case scalar_store:
b175b679 7257 return aarch64_tune_params.vec_costs->scalar_store_cost;
8990e73a
TB
7258
7259 case vector_stmt:
b175b679 7260 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7261
7262 case vector_load:
b175b679 7263 return aarch64_tune_params.vec_costs->vec_align_load_cost;
8990e73a
TB
7264
7265 case vector_store:
b175b679 7266 return aarch64_tune_params.vec_costs->vec_store_cost;
8990e73a
TB
7267
7268 case vec_to_scalar:
b175b679 7269 return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
8990e73a
TB
7270
7271 case scalar_to_vec:
b175b679 7272 return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
8990e73a
TB
7273
7274 case unaligned_load:
b175b679 7275 return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
8990e73a
TB
7276
7277 case unaligned_store:
b175b679 7278 return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
8990e73a
TB
7279
7280 case cond_branch_taken:
b175b679 7281 return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
8990e73a
TB
7282
7283 case cond_branch_not_taken:
b175b679 7284 return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
8990e73a
TB
7285
7286 case vec_perm:
7287 case vec_promote_demote:
b175b679 7288 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7289
7290 case vec_construct:
7291 elements = TYPE_VECTOR_SUBPARTS (vectype);
7292 return elements / 2 + 1;
7293
7294 default:
7295 gcc_unreachable ();
7296 }
7297}
7298
7299/* Implement targetm.vectorize.add_stmt_cost. */
7300static unsigned
7301aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
7302 struct _stmt_vec_info *stmt_info, int misalign,
7303 enum vect_cost_model_location where)
7304{
7305 unsigned *cost = (unsigned *) data;
7306 unsigned retval = 0;
7307
7308 if (flag_vect_cost_model)
7309 {
7310 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
7311 int stmt_cost =
7312 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
7313
7314 /* Statements in an inner loop relative to the loop being
7315 vectorized are weighted more heavily. The value here is
058e4c71 7316 arbitrary and could potentially be improved with analysis. */
8990e73a 7317 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 7318 count *= 50; /* FIXME */
8990e73a
TB
7319
7320 retval = (unsigned) (count * stmt_cost);
7321 cost[where] += retval;
7322 }
7323
7324 return retval;
7325}
7326
0cfff2a1 7327static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 7328
0cfff2a1
KT
7329/* Enum describing the various ways that the
7330 aarch64_parse_{arch,tune,cpu,extension} functions can fail.
7331 This way their callers can choose what kind of error to give. */
43e9d192 7332
0cfff2a1
KT
7333enum aarch64_parse_opt_result
7334{
7335 AARCH64_PARSE_OK, /* Parsing was successful. */
7336 AARCH64_PARSE_MISSING_ARG, /* Missing argument. */
7337 AARCH64_PARSE_INVALID_FEATURE, /* Invalid feature modifier. */
7338 AARCH64_PARSE_INVALID_ARG /* Invalid arch, tune, cpu arg. */
7339};
7340
7341/* Parse the architecture extension string STR and update ISA_FLAGS
7342 with the architecture features turned on or off. Return a
7343 aarch64_parse_opt_result describing the result. */
7344
7345static enum aarch64_parse_opt_result
7346aarch64_parse_extension (char *str, unsigned long *isa_flags)
43e9d192
IB
7347{
7348 /* The extension string is parsed left to right. */
7349 const struct aarch64_option_extension *opt = NULL;
7350
7351 /* Flag to say whether we are adding or removing an extension. */
7352 int adding_ext = -1;
7353
7354 while (str != NULL && *str != 0)
7355 {
7356 char *ext;
7357 size_t len;
7358
7359 str++;
7360 ext = strchr (str, '+');
7361
7362 if (ext != NULL)
7363 len = ext - str;
7364 else
7365 len = strlen (str);
7366
7367 if (len >= 2 && strncmp (str, "no", 2) == 0)
7368 {
7369 adding_ext = 0;
7370 len -= 2;
7371 str += 2;
7372 }
7373 else if (len > 0)
7374 adding_ext = 1;
7375
7376 if (len == 0)
0cfff2a1
KT
7377 return AARCH64_PARSE_MISSING_ARG;
7378
43e9d192
IB
7379
7380 /* Scan over the extensions table trying to find an exact match. */
7381 for (opt = all_extensions; opt->name != NULL; opt++)
7382 {
7383 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
7384 {
7385 /* Add or remove the extension. */
7386 if (adding_ext)
0cfff2a1 7387 *isa_flags |= opt->flags_on;
43e9d192 7388 else
0cfff2a1 7389 *isa_flags &= ~(opt->flags_off);
43e9d192
IB
7390 break;
7391 }
7392 }
7393
7394 if (opt->name == NULL)
7395 {
7396 /* Extension not found in list. */
0cfff2a1 7397 return AARCH64_PARSE_INVALID_FEATURE;
43e9d192
IB
7398 }
7399
7400 str = ext;
7401 };
7402
0cfff2a1 7403 return AARCH64_PARSE_OK;
43e9d192
IB
7404}
7405
0cfff2a1
KT
7406/* Parse the TO_PARSE string and put the architecture struct that it
7407 selects into RES and the architectural features into ISA_FLAGS.
7408 Return an aarch64_parse_opt_result describing the parse result.
7409 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 7410
0cfff2a1
KT
7411static enum aarch64_parse_opt_result
7412aarch64_parse_arch (const char *to_parse, const struct processor **res,
7413 unsigned long *isa_flags)
43e9d192
IB
7414{
7415 char *ext;
7416 const struct processor *arch;
0cfff2a1 7417 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
7418 size_t len;
7419
0cfff2a1 7420 strcpy (str, to_parse);
43e9d192
IB
7421
7422 ext = strchr (str, '+');
7423
7424 if (ext != NULL)
7425 len = ext - str;
7426 else
7427 len = strlen (str);
7428
7429 if (len == 0)
0cfff2a1
KT
7430 return AARCH64_PARSE_MISSING_ARG;
7431
43e9d192 7432
0cfff2a1 7433 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
7434 for (arch = all_architectures; arch->name != NULL; arch++)
7435 {
7436 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
7437 {
0cfff2a1 7438 unsigned long isa_temp = arch->flags;
43e9d192
IB
7439
7440 if (ext != NULL)
7441 {
0cfff2a1
KT
7442 /* TO_PARSE string contains at least one extension. */
7443 enum aarch64_parse_opt_result ext_res
7444 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 7445
0cfff2a1
KT
7446 if (ext_res != AARCH64_PARSE_OK)
7447 return ext_res;
ffee7aa9 7448 }
0cfff2a1
KT
7449 /* Extension parsing was successful. Confirm the result
7450 arch and ISA flags. */
7451 *res = arch;
7452 *isa_flags = isa_temp;
7453 return AARCH64_PARSE_OK;
43e9d192
IB
7454 }
7455 }
7456
7457 /* ARCH name not found in list. */
0cfff2a1 7458 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7459}
7460
0cfff2a1
KT
7461/* Parse the TO_PARSE string and put the result tuning in RES and the
7462 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
7463 describing the parse result. If there is an error parsing, RES and
7464 ISA_FLAGS are left unchanged. */
43e9d192 7465
0cfff2a1
KT
7466static enum aarch64_parse_opt_result
7467aarch64_parse_cpu (const char *to_parse, const struct processor **res,
7468 unsigned long *isa_flags)
43e9d192
IB
7469{
7470 char *ext;
7471 const struct processor *cpu;
0cfff2a1 7472 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
7473 size_t len;
7474
0cfff2a1 7475 strcpy (str, to_parse);
43e9d192
IB
7476
7477 ext = strchr (str, '+');
7478
7479 if (ext != NULL)
7480 len = ext - str;
7481 else
7482 len = strlen (str);
7483
7484 if (len == 0)
0cfff2a1
KT
7485 return AARCH64_PARSE_MISSING_ARG;
7486
43e9d192
IB
7487
7488 /* Loop through the list of supported CPUs to find a match. */
7489 for (cpu = all_cores; cpu->name != NULL; cpu++)
7490 {
7491 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
7492 {
0cfff2a1
KT
7493 unsigned long isa_temp = cpu->flags;
7494
43e9d192
IB
7495
7496 if (ext != NULL)
7497 {
0cfff2a1
KT
7498 /* TO_PARSE string contains at least one extension. */
7499 enum aarch64_parse_opt_result ext_res
7500 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 7501
0cfff2a1
KT
7502 if (ext_res != AARCH64_PARSE_OK)
7503 return ext_res;
7504 }
7505 /* Extension parsing was successfull. Confirm the result
7506 cpu and ISA flags. */
7507 *res = cpu;
7508 *isa_flags = isa_temp;
7509 return AARCH64_PARSE_OK;
43e9d192
IB
7510 }
7511 }
7512
7513 /* CPU name not found in list. */
0cfff2a1 7514 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7515}
7516
0cfff2a1
KT
7517/* Parse the TO_PARSE string and put the cpu it selects into RES.
7518 Return an aarch64_parse_opt_result describing the parse result.
7519 If the parsing fails the RES does not change. */
43e9d192 7520
0cfff2a1
KT
7521static enum aarch64_parse_opt_result
7522aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
7523{
7524 const struct processor *cpu;
0cfff2a1
KT
7525 char *str = (char *) alloca (strlen (to_parse) + 1);
7526
7527 strcpy (str, to_parse);
43e9d192
IB
7528
7529 /* Loop through the list of supported CPUs to find a match. */
7530 for (cpu = all_cores; cpu->name != NULL; cpu++)
7531 {
7532 if (strcmp (cpu->name, str) == 0)
7533 {
0cfff2a1
KT
7534 *res = cpu;
7535 return AARCH64_PARSE_OK;
43e9d192
IB
7536 }
7537 }
7538
7539 /* CPU name not found in list. */
0cfff2a1 7540 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7541}
7542
8dec06f2
JG
7543/* Parse TOKEN, which has length LENGTH to see if it is an option
7544 described in FLAG. If it is, return the index bit for that fusion type.
7545 If not, error (printing OPTION_NAME) and return zero. */
7546
7547static unsigned int
7548aarch64_parse_one_option_token (const char *token,
7549 size_t length,
7550 const struct aarch64_flag_desc *flag,
7551 const char *option_name)
7552{
7553 for (; flag->name != NULL; flag++)
7554 {
7555 if (length == strlen (flag->name)
7556 && !strncmp (flag->name, token, length))
7557 return flag->flag;
7558 }
7559
7560 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
7561 return 0;
7562}
7563
7564/* Parse OPTION which is a comma-separated list of flags to enable.
7565 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
7566 default state we inherit from the CPU tuning structures. OPTION_NAME
7567 gives the top-level option we are parsing in the -moverride string,
7568 for use in error messages. */
7569
7570static unsigned int
7571aarch64_parse_boolean_options (const char *option,
7572 const struct aarch64_flag_desc *flags,
7573 unsigned int initial_state,
7574 const char *option_name)
7575{
7576 const char separator = '.';
7577 const char* specs = option;
7578 const char* ntoken = option;
7579 unsigned int found_flags = initial_state;
7580
7581 while ((ntoken = strchr (specs, separator)))
7582 {
7583 size_t token_length = ntoken - specs;
7584 unsigned token_ops = aarch64_parse_one_option_token (specs,
7585 token_length,
7586 flags,
7587 option_name);
7588 /* If we find "none" (or, for simplicity's sake, an error) anywhere
7589 in the token stream, reset the supported operations. So:
7590
7591 adrp+add.cmp+branch.none.adrp+add
7592
7593 would have the result of turning on only adrp+add fusion. */
7594 if (!token_ops)
7595 found_flags = 0;
7596
7597 found_flags |= token_ops;
7598 specs = ++ntoken;
7599 }
7600
7601 /* We ended with a comma, print something. */
7602 if (!(*specs))
7603 {
7604 error ("%s string ill-formed\n", option_name);
7605 return 0;
7606 }
7607
7608 /* We still have one more token to parse. */
7609 size_t token_length = strlen (specs);
7610 unsigned token_ops = aarch64_parse_one_option_token (specs,
7611 token_length,
7612 flags,
7613 option_name);
7614 if (!token_ops)
7615 found_flags = 0;
7616
7617 found_flags |= token_ops;
7618 return found_flags;
7619}
7620
7621/* Support for overriding instruction fusion. */
7622
7623static void
7624aarch64_parse_fuse_string (const char *fuse_string,
7625 struct tune_params *tune)
7626{
7627 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
7628 aarch64_fusible_pairs,
7629 tune->fusible_ops,
7630 "fuse=");
7631}
7632
7633/* Support for overriding other tuning flags. */
7634
7635static void
7636aarch64_parse_tune_string (const char *tune_string,
7637 struct tune_params *tune)
7638{
7639 tune->extra_tuning_flags
7640 = aarch64_parse_boolean_options (tune_string,
7641 aarch64_tuning_flags,
7642 tune->extra_tuning_flags,
7643 "tune=");
7644}
7645
7646/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
7647 we understand. If it is, extract the option string and handoff to
7648 the appropriate function. */
7649
7650void
7651aarch64_parse_one_override_token (const char* token,
7652 size_t length,
7653 struct tune_params *tune)
7654{
7655 const struct aarch64_tuning_override_function *fn
7656 = aarch64_tuning_override_functions;
7657
7658 const char *option_part = strchr (token, '=');
7659 if (!option_part)
7660 {
7661 error ("tuning string missing in option (%s)", token);
7662 return;
7663 }
7664
7665 /* Get the length of the option name. */
7666 length = option_part - token;
7667 /* Skip the '=' to get to the option string. */
7668 option_part++;
7669
7670 for (; fn->name != NULL; fn++)
7671 {
7672 if (!strncmp (fn->name, token, length))
7673 {
7674 fn->parse_override (option_part, tune);
7675 return;
7676 }
7677 }
7678
7679 error ("unknown tuning option (%s)",token);
7680 return;
7681}
7682
5eee3c34
JW
7683/* A checking mechanism for the implementation of the tls size. */
7684
7685static void
7686initialize_aarch64_tls_size (struct gcc_options *opts)
7687{
7688 if (aarch64_tls_size == 0)
7689 aarch64_tls_size = 24;
7690
7691 switch (opts->x_aarch64_cmodel_var)
7692 {
7693 case AARCH64_CMODEL_TINY:
7694 /* Both the default and maximum TLS size allowed under tiny is 1M which
7695 needs two instructions to address, so we clamp the size to 24. */
7696 if (aarch64_tls_size > 24)
7697 aarch64_tls_size = 24;
7698 break;
7699 case AARCH64_CMODEL_SMALL:
7700 /* The maximum TLS size allowed under small is 4G. */
7701 if (aarch64_tls_size > 32)
7702 aarch64_tls_size = 32;
7703 break;
7704 case AARCH64_CMODEL_LARGE:
7705 /* The maximum TLS size allowed under large is 16E.
7706 FIXME: 16E should be 64bit, we only support 48bit offset now. */
7707 if (aarch64_tls_size > 48)
7708 aarch64_tls_size = 48;
7709 break;
7710 default:
7711 gcc_unreachable ();
7712 }
7713
7714 return;
7715}
7716
8dec06f2
JG
7717/* Parse STRING looking for options in the format:
7718 string :: option:string
7719 option :: name=substring
7720 name :: {a-z}
7721 substring :: defined by option. */
7722
7723static void
7724aarch64_parse_override_string (const char* input_string,
7725 struct tune_params* tune)
7726{
7727 const char separator = ':';
7728 size_t string_length = strlen (input_string) + 1;
7729 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
7730 char *string = string_root;
7731 strncpy (string, input_string, string_length);
7732 string[string_length - 1] = '\0';
7733
7734 char* ntoken = string;
7735
7736 while ((ntoken = strchr (string, separator)))
7737 {
7738 size_t token_length = ntoken - string;
7739 /* Make this substring look like a string. */
7740 *ntoken = '\0';
7741 aarch64_parse_one_override_token (string, token_length, tune);
7742 string = ++ntoken;
7743 }
7744
7745 /* One last option to parse. */
7746 aarch64_parse_one_override_token (string, strlen (string), tune);
7747 free (string_root);
7748}
43e9d192 7749
43e9d192
IB
7750
7751static void
0cfff2a1 7752aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 7753{
0cfff2a1
KT
7754 if (opts->x_flag_omit_frame_pointer)
7755 opts->x_flag_omit_leaf_frame_pointer = false;
7756 else if (opts->x_flag_omit_leaf_frame_pointer)
7757 opts->x_flag_omit_frame_pointer = true;
43e9d192 7758
0cfff2a1
KT
7759 /* If not opzimizing for size, set the default
7760 alignment to what the target wants. */
7761 if (!opts->x_optimize_size)
43e9d192 7762 {
0cfff2a1
KT
7763 if (opts->x_align_loops <= 0)
7764 opts->x_align_loops = aarch64_tune_params.loop_align;
7765 if (opts->x_align_jumps <= 0)
7766 opts->x_align_jumps = aarch64_tune_params.jump_align;
7767 if (opts->x_align_functions <= 0)
7768 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 7769 }
b4f50fd4
RR
7770
7771 /* If nopcrelative_literal_loads is set on the command line, this
7772 implies that the user asked for PC relative literal loads. */
7773 if (nopcrelative_literal_loads == 1)
7774 nopcrelative_literal_loads = 0;
7775
7776 /* If it is not set on the command line, we default to no
7777 pc relative literal loads. */
7778 if (nopcrelative_literal_loads == 2)
7779 nopcrelative_literal_loads = 1;
7780
7781 /* In the tiny memory model it makes no sense
7782 to disallow non PC relative literal pool loads
7783 as many other things will break anyway. */
7784 if (nopcrelative_literal_loads
7785 && (aarch64_cmodel == AARCH64_CMODEL_TINY
7786 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC))
7787 nopcrelative_literal_loads = 0;
0cfff2a1 7788}
43e9d192 7789
0cfff2a1
KT
7790/* 'Unpack' up the internal tuning structs and update the options
7791 in OPTS. The caller must have set up selected_tune and selected_arch
7792 as all the other target-specific codegen decisions are
7793 derived from them. */
7794
e4ea20c8 7795void
0cfff2a1
KT
7796aarch64_override_options_internal (struct gcc_options *opts)
7797{
7798 aarch64_tune_flags = selected_tune->flags;
7799 aarch64_tune = selected_tune->sched_core;
7800 /* Make a copy of the tuning parameters attached to the core, which
7801 we may later overwrite. */
7802 aarch64_tune_params = *(selected_tune->tune);
7803 aarch64_architecture_version = selected_arch->architecture_version;
7804
7805 if (opts->x_aarch64_override_tune_string)
7806 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
7807 &aarch64_tune_params);
7808
7809 /* This target defaults to strict volatile bitfields. */
7810 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
7811 opts->x_flag_strict_volatile_bitfields = 1;
7812
0cfff2a1
KT
7813 /* -mgeneral-regs-only sets a mask in target_flags, make sure that
7814 aarch64_isa_flags does not contain the FP/SIMD/Crypto feature flags
7815 in case some code tries reading aarch64_isa_flags directly to check if
7816 FP is available. Reuse the aarch64_parse_extension machinery since it
7817 knows how to disable any other flags that fp implies. */
7818 if (TARGET_GENERAL_REGS_ONLY_P (opts->x_target_flags))
43e9d192 7819 {
0cfff2a1
KT
7820 /* aarch64_parse_extension takes char* rather than const char* because
7821 it is usually called from within other parsing functions. */
7822 char tmp_str[] = "+nofp";
361fb3ee 7823 aarch64_parse_extension (tmp_str, &opts->x_aarch64_isa_flags);
43e9d192
IB
7824 }
7825
0cfff2a1 7826 initialize_aarch64_code_model (opts);
5eee3c34 7827 initialize_aarch64_tls_size (opts);
63892fa2 7828
0cfff2a1
KT
7829 aarch64_override_options_after_change_1 (opts);
7830}
43e9d192 7831
0cfff2a1
KT
7832/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
7833 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
7834 they are valid in RES and ISA_FLAGS. Return whether the option is
7835 valid. */
43e9d192 7836
361fb3ee 7837static bool
0cfff2a1
KT
7838aarch64_validate_mcpu (const char *str, const struct processor **res,
7839 unsigned long *isa_flags)
7840{
7841 enum aarch64_parse_opt_result parse_res
7842 = aarch64_parse_cpu (str, res, isa_flags);
7843
7844 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 7845 return true;
0cfff2a1
KT
7846
7847 switch (parse_res)
7848 {
7849 case AARCH64_PARSE_MISSING_ARG:
7850 error ("missing cpu name in -mcpu=%qs", str);
7851 break;
7852 case AARCH64_PARSE_INVALID_ARG:
7853 error ("unknown value %qs for -mcpu", str);
7854 break;
7855 case AARCH64_PARSE_INVALID_FEATURE:
7856 error ("invalid feature modifier in -mcpu=%qs", str);
7857 break;
7858 default:
7859 gcc_unreachable ();
7860 }
361fb3ee
KT
7861
7862 return false;
0cfff2a1
KT
7863}
7864
7865/* Validate a command-line -march option. Parse the arch and extensions
7866 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
7867 results, if they are valid, in RES and ISA_FLAGS. Return whether the
7868 option is valid. */
0cfff2a1 7869
361fb3ee 7870static bool
0cfff2a1
KT
7871aarch64_validate_march (const char *str, const struct processor **res,
7872 unsigned long *isa_flags)
7873{
7874 enum aarch64_parse_opt_result parse_res
7875 = aarch64_parse_arch (str, res, isa_flags);
7876
7877 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 7878 return true;
0cfff2a1
KT
7879
7880 switch (parse_res)
7881 {
7882 case AARCH64_PARSE_MISSING_ARG:
7883 error ("missing arch name in -march=%qs", str);
7884 break;
7885 case AARCH64_PARSE_INVALID_ARG:
7886 error ("unknown value %qs for -march", str);
7887 break;
7888 case AARCH64_PARSE_INVALID_FEATURE:
7889 error ("invalid feature modifier in -march=%qs", str);
7890 break;
7891 default:
7892 gcc_unreachable ();
7893 }
361fb3ee
KT
7894
7895 return false;
0cfff2a1
KT
7896}
7897
7898/* Validate a command-line -mtune option. Parse the cpu
7899 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
7900 result, if it is valid, in RES. Return whether the option is
7901 valid. */
0cfff2a1 7902
361fb3ee 7903static bool
0cfff2a1
KT
7904aarch64_validate_mtune (const char *str, const struct processor **res)
7905{
7906 enum aarch64_parse_opt_result parse_res
7907 = aarch64_parse_tune (str, res);
7908
7909 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 7910 return true;
0cfff2a1
KT
7911
7912 switch (parse_res)
7913 {
7914 case AARCH64_PARSE_MISSING_ARG:
7915 error ("missing cpu name in -mtune=%qs", str);
7916 break;
7917 case AARCH64_PARSE_INVALID_ARG:
7918 error ("unknown value %qs for -mtune", str);
7919 break;
7920 default:
7921 gcc_unreachable ();
7922 }
361fb3ee
KT
7923 return false;
7924}
7925
7926/* Return the CPU corresponding to the enum CPU.
7927 If it doesn't specify a cpu, return the default. */
7928
7929static const struct processor *
7930aarch64_get_tune_cpu (enum aarch64_processor cpu)
7931{
7932 if (cpu != aarch64_none)
7933 return &all_cores[cpu];
7934
7935 /* The & 0x3f is to extract the bottom 6 bits that encode the
7936 default cpu as selected by the --with-cpu GCC configure option
7937 in config.gcc.
7938 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
7939 flags mechanism should be reworked to make it more sane. */
7940 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
7941}
7942
7943/* Return the architecture corresponding to the enum ARCH.
7944 If it doesn't specify a valid architecture, return the default. */
7945
7946static const struct processor *
7947aarch64_get_arch (enum aarch64_arch arch)
7948{
7949 if (arch != aarch64_no_arch)
7950 return &all_architectures[arch];
7951
7952 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
7953
7954 return &all_architectures[cpu->arch];
0cfff2a1
KT
7955}
7956
7957/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
7958 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
7959 tuning structs. In particular it must set selected_tune and
7960 aarch64_isa_flags that define the available ISA features and tuning
7961 decisions. It must also set selected_arch as this will be used to
7962 output the .arch asm tags for each function. */
7963
7964static void
7965aarch64_override_options (void)
7966{
7967 unsigned long cpu_isa = 0;
7968 unsigned long arch_isa = 0;
7969 aarch64_isa_flags = 0;
7970
361fb3ee
KT
7971 bool valid_cpu = true;
7972 bool valid_tune = true;
7973 bool valid_arch = true;
7974
0cfff2a1
KT
7975 selected_cpu = NULL;
7976 selected_arch = NULL;
7977 selected_tune = NULL;
7978
7979 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
7980 If either of -march or -mtune is given, they override their
7981 respective component of -mcpu. */
7982 if (aarch64_cpu_string)
361fb3ee
KT
7983 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
7984 &cpu_isa);
0cfff2a1
KT
7985
7986 if (aarch64_arch_string)
361fb3ee
KT
7987 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
7988 &arch_isa);
0cfff2a1
KT
7989
7990 if (aarch64_tune_string)
361fb3ee 7991 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
7992
7993 /* If the user did not specify a processor, choose the default
7994 one for them. This will be the CPU set during configuration using
a3cd0246 7995 --with-cpu, otherwise it is "generic". */
43e9d192
IB
7996 if (!selected_cpu)
7997 {
0cfff2a1
KT
7998 if (selected_arch)
7999 {
8000 selected_cpu = &all_cores[selected_arch->ident];
8001 aarch64_isa_flags = arch_isa;
361fb3ee 8002 explicit_arch = selected_arch->arch;
0cfff2a1
KT
8003 }
8004 else
8005 {
361fb3ee
KT
8006 /* Get default configure-time CPU. */
8007 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
8008 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
8009 }
361fb3ee
KT
8010
8011 if (selected_tune)
8012 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
8013 }
8014 /* If both -mcpu and -march are specified check that they are architecturally
8015 compatible, warn if they're not and prefer the -march ISA flags. */
8016 else if (selected_arch)
8017 {
8018 if (selected_arch->arch != selected_cpu->arch)
8019 {
8020 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
8021 all_architectures[selected_cpu->arch].name,
8022 selected_arch->name);
8023 }
8024 aarch64_isa_flags = arch_isa;
361fb3ee
KT
8025 explicit_arch = selected_arch->arch;
8026 explicit_tune_core = selected_tune ? selected_tune->ident
8027 : selected_cpu->ident;
0cfff2a1
KT
8028 }
8029 else
8030 {
8031 /* -mcpu but no -march. */
8032 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
8033 explicit_tune_core = selected_tune ? selected_tune->ident
8034 : selected_cpu->ident;
8035 gcc_assert (selected_cpu);
8036 selected_arch = &all_architectures[selected_cpu->arch];
8037 explicit_arch = selected_arch->arch;
43e9d192
IB
8038 }
8039
0cfff2a1
KT
8040 /* Set the arch as well as we will need it when outputing
8041 the .arch directive in assembly. */
8042 if (!selected_arch)
8043 {
8044 gcc_assert (selected_cpu);
8045 selected_arch = &all_architectures[selected_cpu->arch];
8046 }
43e9d192 8047
43e9d192 8048 if (!selected_tune)
3edaf26d 8049 selected_tune = selected_cpu;
43e9d192 8050
0cfff2a1
KT
8051#ifndef HAVE_AS_MABI_OPTION
8052 /* The compiler may have been configured with 2.23.* binutils, which does
8053 not have support for ILP32. */
8054 if (TARGET_ILP32)
8055 error ("Assembler does not support -mabi=ilp32");
8056#endif
43e9d192 8057
361fb3ee
KT
8058 /* Make sure we properly set up the explicit options. */
8059 if ((aarch64_cpu_string && valid_cpu)
8060 || (aarch64_tune_string && valid_tune))
8061 gcc_assert (explicit_tune_core != aarch64_none);
8062
8063 if ((aarch64_cpu_string && valid_cpu)
8064 || (aarch64_arch_string && valid_arch))
8065 gcc_assert (explicit_arch != aarch64_no_arch);
8066
0cfff2a1 8067 aarch64_build_bitmask_table ();
8dec06f2 8068
0cfff2a1
KT
8069 aarch64_override_options_internal (&global_options);
8070
8071 /* Save these options as the default ones in case we push and pop them later
8072 while processing functions with potential target attributes. */
8073 target_option_default_node = target_option_current_node
8074 = build_target_option_node (&global_options);
5e396da6 8075
e2fc7193 8076 aarch64_register_fma_steering ();
fde9b31b 8077
43e9d192
IB
8078}
8079
8080/* Implement targetm.override_options_after_change. */
8081
8082static void
8083aarch64_override_options_after_change (void)
8084{
0cfff2a1 8085 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
8086}
8087
8088static struct machine_function *
8089aarch64_init_machine_status (void)
8090{
8091 struct machine_function *machine;
766090c2 8092 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
8093 return machine;
8094}
8095
8096void
8097aarch64_init_expanders (void)
8098{
8099 init_machine_status = aarch64_init_machine_status;
8100}
8101
8102/* A checking mechanism for the implementation of the various code models. */
8103static void
0cfff2a1 8104initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 8105{
0cfff2a1 8106 if (opts->x_flag_pic)
43e9d192 8107 {
0cfff2a1 8108 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
8109 {
8110 case AARCH64_CMODEL_TINY:
8111 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
8112 break;
8113 case AARCH64_CMODEL_SMALL:
34ecdb0f 8114#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
8115 aarch64_cmodel = (flag_pic == 2
8116 ? AARCH64_CMODEL_SMALL_PIC
8117 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
8118#else
8119 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
8120#endif
43e9d192
IB
8121 break;
8122 case AARCH64_CMODEL_LARGE:
8123 sorry ("code model %qs with -f%s", "large",
0cfff2a1 8124 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 8125 break;
43e9d192
IB
8126 default:
8127 gcc_unreachable ();
8128 }
8129 }
8130 else
0cfff2a1 8131 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
8132}
8133
361fb3ee
KT
8134/* Implement TARGET_OPTION_SAVE. */
8135
8136static void
8137aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
8138{
8139 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
8140}
8141
8142/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
8143 using the information saved in PTR. */
8144
8145static void
8146aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
8147{
8148 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
8149 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8150 opts->x_explicit_arch = ptr->x_explicit_arch;
8151 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
8152 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
8153
8154 aarch64_override_options_internal (opts);
8155}
8156
8157/* Implement TARGET_OPTION_PRINT. */
8158
8159static void
8160aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
8161{
8162 const struct processor *cpu
8163 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8164 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
8165 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005
JG
8166 std::string extension
8167 = aarch64_get_extension_string_for_isa_flags (isa_flags);
361fb3ee
KT
8168
8169 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
8170 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
8171 arch->name, extension.c_str ());
361fb3ee
KT
8172}
8173
d78006d9
KT
8174static GTY(()) tree aarch64_previous_fndecl;
8175
e4ea20c8
KT
8176void
8177aarch64_reset_previous_fndecl (void)
8178{
8179 aarch64_previous_fndecl = NULL;
8180}
8181
d78006d9
KT
8182/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
8183 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
8184 of the function, if such exists. This function may be called multiple
8185 times on a single function so use aarch64_previous_fndecl to avoid
8186 setting up identical state. */
8187
8188static void
8189aarch64_set_current_function (tree fndecl)
8190{
8191 tree old_tree = (aarch64_previous_fndecl
8192 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
8193 : NULL_TREE);
8194
8195 tree new_tree = (fndecl
8196 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
8197 : NULL_TREE);
8198
8199
8200 if (fndecl && fndecl != aarch64_previous_fndecl)
8201 {
8202 aarch64_previous_fndecl = fndecl;
8203 if (old_tree == new_tree)
8204 ;
8205
8206 else if (new_tree && new_tree != target_option_default_node)
8207 {
8208 cl_target_option_restore (&global_options,
8209 TREE_TARGET_OPTION (new_tree));
8210 if (TREE_TARGET_GLOBALS (new_tree))
8211 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
8212 else
8213 TREE_TARGET_GLOBALS (new_tree)
8214 = save_target_globals_default_opts ();
8215 }
8216
8217 else if (old_tree && old_tree != target_option_default_node)
8218 {
8219 new_tree = target_option_current_node;
8220 cl_target_option_restore (&global_options,
8221 TREE_TARGET_OPTION (new_tree));
8222 if (TREE_TARGET_GLOBALS (new_tree))
8223 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
8224 else if (new_tree == target_option_default_node)
8225 restore_target_globals (&default_target_globals);
8226 else
8227 TREE_TARGET_GLOBALS (new_tree)
8228 = save_target_globals_default_opts ();
8229 }
8230 }
6e17a23b
KT
8231
8232 if (!fndecl)
8233 return;
8234
8235 /* If we turned on SIMD make sure that any vector parameters are re-laid out
8236 so that they use proper vector modes. */
8237 if (TARGET_SIMD)
8238 {
8239 tree parms = DECL_ARGUMENTS (fndecl);
8240 for (; parms && parms != void_list_node; parms = TREE_CHAIN (parms))
8241 {
8242 if (TREE_CODE (parms) == PARM_DECL
8243 && VECTOR_TYPE_P (TREE_TYPE (parms))
8244 && DECL_MODE (parms) != TYPE_MODE (TREE_TYPE (parms)))
8245 relayout_decl (parms);
8246 }
8247 }
d78006d9 8248}
361fb3ee 8249
5a2c8331
KT
8250/* Enum describing the various ways we can handle attributes.
8251 In many cases we can reuse the generic option handling machinery. */
8252
8253enum aarch64_attr_opt_type
8254{
8255 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
8256 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
8257 aarch64_attr_enum, /* Attribute sets an enum variable. */
8258 aarch64_attr_custom /* Attribute requires a custom handling function. */
8259};
8260
8261/* All the information needed to handle a target attribute.
8262 NAME is the name of the attribute.
8263 ATTR_TYPE specifies the type of behaviour of the attribute as described
8264 in the definition of enum aarch64_attr_opt_type.
8265 ALLOW_NEG is true if the attribute supports a "no-" form.
8266 HANDLER is the function that takes the attribute string and whether
8267 it is a pragma or attribute and handles the option. It is needed only
8268 when the ATTR_TYPE is aarch64_attr_custom.
8269 OPT_NUM is the enum specifying the option that the attribute modifies.
8270 This is needed for attributes that mirror the behaviour of a command-line
8271 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
8272 aarch64_attr_enum. */
8273
8274struct aarch64_attribute_info
8275{
8276 const char *name;
8277 enum aarch64_attr_opt_type attr_type;
8278 bool allow_neg;
8279 bool (*handler) (const char *, const char *);
8280 enum opt_code opt_num;
8281};
8282
8283/* Handle the ARCH_STR argument to the arch= target attribute.
8284 PRAGMA_OR_ATTR is used in potential error messages. */
8285
8286static bool
8287aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
8288{
8289 const struct processor *tmp_arch = NULL;
8290 enum aarch64_parse_opt_result parse_res
8291 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
8292
8293 if (parse_res == AARCH64_PARSE_OK)
8294 {
8295 gcc_assert (tmp_arch);
8296 selected_arch = tmp_arch;
8297 explicit_arch = selected_arch->arch;
8298 return true;
8299 }
8300
8301 switch (parse_res)
8302 {
8303 case AARCH64_PARSE_MISSING_ARG:
8304 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
8305 break;
8306 case AARCH64_PARSE_INVALID_ARG:
8307 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
8308 break;
8309 case AARCH64_PARSE_INVALID_FEATURE:
8310 error ("invalid feature modifier %qs for 'arch' target %s",
8311 str, pragma_or_attr);
8312 break;
8313 default:
8314 gcc_unreachable ();
8315 }
8316
8317 return false;
8318}
8319
8320/* Handle the argument CPU_STR to the cpu= target attribute.
8321 PRAGMA_OR_ATTR is used in potential error messages. */
8322
8323static bool
8324aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
8325{
8326 const struct processor *tmp_cpu = NULL;
8327 enum aarch64_parse_opt_result parse_res
8328 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
8329
8330 if (parse_res == AARCH64_PARSE_OK)
8331 {
8332 gcc_assert (tmp_cpu);
8333 selected_tune = tmp_cpu;
8334 explicit_tune_core = selected_tune->ident;
8335
8336 selected_arch = &all_architectures[tmp_cpu->arch];
8337 explicit_arch = selected_arch->arch;
8338 return true;
8339 }
8340
8341 switch (parse_res)
8342 {
8343 case AARCH64_PARSE_MISSING_ARG:
8344 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
8345 break;
8346 case AARCH64_PARSE_INVALID_ARG:
8347 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
8348 break;
8349 case AARCH64_PARSE_INVALID_FEATURE:
8350 error ("invalid feature modifier %qs for 'cpu' target %s",
8351 str, pragma_or_attr);
8352 break;
8353 default:
8354 gcc_unreachable ();
8355 }
8356
8357 return false;
8358}
8359
8360/* Handle the argument STR to the tune= target attribute.
8361 PRAGMA_OR_ATTR is used in potential error messages. */
8362
8363static bool
8364aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
8365{
8366 const struct processor *tmp_tune = NULL;
8367 enum aarch64_parse_opt_result parse_res
8368 = aarch64_parse_tune (str, &tmp_tune);
8369
8370 if (parse_res == AARCH64_PARSE_OK)
8371 {
8372 gcc_assert (tmp_tune);
8373 selected_tune = tmp_tune;
8374 explicit_tune_core = selected_tune->ident;
8375 return true;
8376 }
8377
8378 switch (parse_res)
8379 {
8380 case AARCH64_PARSE_INVALID_ARG:
8381 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
8382 break;
8383 default:
8384 gcc_unreachable ();
8385 }
8386
8387 return false;
8388}
8389
8390/* Parse an architecture extensions target attribute string specified in STR.
8391 For example "+fp+nosimd". Show any errors if needed. Return TRUE
8392 if successful. Update aarch64_isa_flags to reflect the ISA features
8393 modified.
8394 PRAGMA_OR_ATTR is used in potential error messages. */
8395
8396static bool
8397aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
8398{
8399 enum aarch64_parse_opt_result parse_res;
8400 unsigned long isa_flags = aarch64_isa_flags;
8401
e4ea20c8
KT
8402 /* We allow "+nothing" in the beginning to clear out all architectural
8403 features if the user wants to handpick specific features. */
8404 if (strncmp ("+nothing", str, 8) == 0)
8405 {
8406 isa_flags = 0;
8407 str += 8;
8408 }
8409
5a2c8331
KT
8410 parse_res = aarch64_parse_extension (str, &isa_flags);
8411
8412 if (parse_res == AARCH64_PARSE_OK)
8413 {
8414 aarch64_isa_flags = isa_flags;
8415 return true;
8416 }
8417
8418 switch (parse_res)
8419 {
8420 case AARCH64_PARSE_MISSING_ARG:
8421 error ("missing feature modifier in target %s %qs",
8422 pragma_or_attr, str);
8423 break;
8424
8425 case AARCH64_PARSE_INVALID_FEATURE:
8426 error ("invalid feature modifier in target %s %qs",
8427 pragma_or_attr, str);
8428 break;
8429
8430 default:
8431 gcc_unreachable ();
8432 }
8433
8434 return false;
8435}
8436
8437/* The target attributes that we support. On top of these we also support just
8438 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
8439 handled explicitly in aarch64_process_one_target_attr. */
8440
8441static const struct aarch64_attribute_info aarch64_attributes[] =
8442{
8443 { "general-regs-only", aarch64_attr_mask, false, NULL,
8444 OPT_mgeneral_regs_only },
8445 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
8446 OPT_mfix_cortex_a53_835769 },
8447 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
8448 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
8449 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
8450 OPT_momit_leaf_frame_pointer },
8451 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
8452 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
8453 OPT_march_ },
8454 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
8455 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
8456 OPT_mtune_ },
8457 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
8458};
8459
8460/* Parse ARG_STR which contains the definition of one target attribute.
8461 Show appropriate errors if any or return true if the attribute is valid.
8462 PRAGMA_OR_ATTR holds the string to use in error messages about whether
8463 we're processing a target attribute or pragma. */
8464
8465static bool
8466aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
8467{
8468 bool invert = false;
8469
8470 size_t len = strlen (arg_str);
8471
8472 if (len == 0)
8473 {
8474 error ("malformed target %s", pragma_or_attr);
8475 return false;
8476 }
8477
8478 char *str_to_check = (char *) alloca (len + 1);
8479 strcpy (str_to_check, arg_str);
8480
8481 /* Skip leading whitespace. */
8482 while (*str_to_check == ' ' || *str_to_check == '\t')
8483 str_to_check++;
8484
8485 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
8486 It is easier to detect and handle it explicitly here rather than going
8487 through the machinery for the rest of the target attributes in this
8488 function. */
8489 if (*str_to_check == '+')
8490 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
8491
8492 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
8493 {
8494 invert = true;
8495 str_to_check += 3;
8496 }
8497 char *arg = strchr (str_to_check, '=');
8498
8499 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
8500 and point ARG to "foo". */
8501 if (arg)
8502 {
8503 *arg = '\0';
8504 arg++;
8505 }
8506 const struct aarch64_attribute_info *p_attr;
8507 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
8508 {
8509 /* If the names don't match up, or the user has given an argument
8510 to an attribute that doesn't accept one, or didn't give an argument
8511 to an attribute that expects one, fail to match. */
8512 if (strcmp (str_to_check, p_attr->name) != 0)
8513 continue;
8514
8515 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
8516 || p_attr->attr_type == aarch64_attr_enum;
8517
8518 if (attr_need_arg_p ^ (arg != NULL))
8519 {
8520 error ("target %s %qs does not accept an argument",
8521 pragma_or_attr, str_to_check);
8522 return false;
8523 }
8524
8525 /* If the name matches but the attribute does not allow "no-" versions
8526 then we can't match. */
8527 if (invert && !p_attr->allow_neg)
8528 {
8529 error ("target %s %qs does not allow a negated form",
8530 pragma_or_attr, str_to_check);
8531 return false;
8532 }
8533
8534 switch (p_attr->attr_type)
8535 {
8536 /* Has a custom handler registered.
8537 For example, cpu=, arch=, tune=. */
8538 case aarch64_attr_custom:
8539 gcc_assert (p_attr->handler);
8540 if (!p_attr->handler (arg, pragma_or_attr))
8541 return false;
8542 break;
8543
8544 /* Either set or unset a boolean option. */
8545 case aarch64_attr_bool:
8546 {
8547 struct cl_decoded_option decoded;
8548
8549 generate_option (p_attr->opt_num, NULL, !invert,
8550 CL_TARGET, &decoded);
8551 aarch64_handle_option (&global_options, &global_options_set,
8552 &decoded, input_location);
8553 break;
8554 }
8555 /* Set or unset a bit in the target_flags. aarch64_handle_option
8556 should know what mask to apply given the option number. */
8557 case aarch64_attr_mask:
8558 {
8559 struct cl_decoded_option decoded;
8560 /* We only need to specify the option number.
8561 aarch64_handle_option will know which mask to apply. */
8562 decoded.opt_index = p_attr->opt_num;
8563 decoded.value = !invert;
8564 aarch64_handle_option (&global_options, &global_options_set,
8565 &decoded, input_location);
8566 break;
8567 }
8568 /* Use the option setting machinery to set an option to an enum. */
8569 case aarch64_attr_enum:
8570 {
8571 gcc_assert (arg);
8572 bool valid;
8573 int value;
8574 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
8575 &value, CL_TARGET);
8576 if (valid)
8577 {
8578 set_option (&global_options, NULL, p_attr->opt_num, value,
8579 NULL, DK_UNSPECIFIED, input_location,
8580 global_dc);
8581 }
8582 else
8583 {
8584 error ("target %s %s=%s is not valid",
8585 pragma_or_attr, str_to_check, arg);
8586 }
8587 break;
8588 }
8589 default:
8590 gcc_unreachable ();
8591 }
8592 }
8593
8594 return true;
8595}
8596
8597/* Count how many times the character C appears in
8598 NULL-terminated string STR. */
8599
8600static unsigned int
8601num_occurences_in_str (char c, char *str)
8602{
8603 unsigned int res = 0;
8604 while (*str != '\0')
8605 {
8606 if (*str == c)
8607 res++;
8608
8609 str++;
8610 }
8611
8612 return res;
8613}
8614
8615/* Parse the tree in ARGS that contains the target attribute information
8616 and update the global target options space. PRAGMA_OR_ATTR is a string
8617 to be used in error messages, specifying whether this is processing
8618 a target attribute or a target pragma. */
8619
8620bool
8621aarch64_process_target_attr (tree args, const char* pragma_or_attr)
8622{
8623 if (TREE_CODE (args) == TREE_LIST)
8624 {
8625 do
8626 {
8627 tree head = TREE_VALUE (args);
8628 if (head)
8629 {
8630 if (!aarch64_process_target_attr (head, pragma_or_attr))
8631 return false;
8632 }
8633 args = TREE_CHAIN (args);
8634 } while (args);
8635
8636 return true;
8637 }
8638 /* We expect to find a string to parse. */
8639 gcc_assert (TREE_CODE (args) == STRING_CST);
8640
8641 size_t len = strlen (TREE_STRING_POINTER (args));
8642 char *str_to_check = (char *) alloca (len + 1);
8643 strcpy (str_to_check, TREE_STRING_POINTER (args));
8644
8645 if (len == 0)
8646 {
8647 error ("malformed target %s value", pragma_or_attr);
8648 return false;
8649 }
8650
8651 /* Used to catch empty spaces between commas i.e.
8652 attribute ((target ("attr1,,attr2"))). */
8653 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
8654
8655 /* Handle multiple target attributes separated by ','. */
8656 char *token = strtok (str_to_check, ",");
8657
8658 unsigned int num_attrs = 0;
8659 while (token)
8660 {
8661 num_attrs++;
8662 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
8663 {
8664 error ("target %s %qs is invalid", pragma_or_attr, token);
8665 return false;
8666 }
8667
8668 token = strtok (NULL, ",");
8669 }
8670
8671 if (num_attrs != num_commas + 1)
8672 {
8673 error ("malformed target %s list %qs",
8674 pragma_or_attr, TREE_STRING_POINTER (args));
8675 return false;
8676 }
8677
8678 return true;
8679}
8680
8681/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
8682 process attribute ((target ("..."))). */
8683
8684static bool
8685aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
8686{
8687 struct cl_target_option cur_target;
8688 bool ret;
8689 tree old_optimize;
8690 tree new_target, new_optimize;
8691 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
8692
8693 /* If what we're processing is the current pragma string then the
8694 target option node is already stored in target_option_current_node
8695 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
8696 having to re-parse the string. This is especially useful to keep
8697 arm_neon.h compile times down since that header contains a lot
8698 of intrinsics enclosed in pragmas. */
8699 if (!existing_target && args == current_target_pragma)
8700 {
8701 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
8702 return true;
8703 }
5a2c8331
KT
8704 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
8705
8706 old_optimize = build_optimization_node (&global_options);
8707 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
8708
8709 /* If the function changed the optimization levels as well as setting
8710 target options, start with the optimizations specified. */
8711 if (func_optimize && func_optimize != old_optimize)
8712 cl_optimization_restore (&global_options,
8713 TREE_OPTIMIZATION (func_optimize));
8714
8715 /* Save the current target options to restore at the end. */
8716 cl_target_option_save (&cur_target, &global_options);
8717
8718 /* If fndecl already has some target attributes applied to it, unpack
8719 them so that we add this attribute on top of them, rather than
8720 overwriting them. */
8721 if (existing_target)
8722 {
8723 struct cl_target_option *existing_options
8724 = TREE_TARGET_OPTION (existing_target);
8725
8726 if (existing_options)
8727 cl_target_option_restore (&global_options, existing_options);
8728 }
8729 else
8730 cl_target_option_restore (&global_options,
8731 TREE_TARGET_OPTION (target_option_current_node));
8732
8733
8734 ret = aarch64_process_target_attr (args, "attribute");
8735
8736 /* Set up any additional state. */
8737 if (ret)
8738 {
8739 aarch64_override_options_internal (&global_options);
e95a988a
KT
8740 /* Initialize SIMD builtins if we haven't already.
8741 Set current_target_pragma to NULL for the duration so that
8742 the builtin initialization code doesn't try to tag the functions
8743 being built with the attributes specified by any current pragma, thus
8744 going into an infinite recursion. */
8745 if (TARGET_SIMD)
8746 {
8747 tree saved_current_target_pragma = current_target_pragma;
8748 current_target_pragma = NULL;
8749 aarch64_init_simd_builtins ();
8750 current_target_pragma = saved_current_target_pragma;
8751 }
5a2c8331
KT
8752 new_target = build_target_option_node (&global_options);
8753 }
8754 else
8755 new_target = NULL;
8756
8757 new_optimize = build_optimization_node (&global_options);
8758
8759 if (fndecl && ret)
8760 {
8761 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
8762
8763 if (old_optimize != new_optimize)
8764 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
8765 }
8766
8767 cl_target_option_restore (&global_options, &cur_target);
8768
8769 if (old_optimize != new_optimize)
8770 cl_optimization_restore (&global_options,
8771 TREE_OPTIMIZATION (old_optimize));
8772 return ret;
8773}
8774
1fd8d40c
KT
8775/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
8776 tri-bool options (yes, no, don't care) and the default value is
8777 DEF, determine whether to reject inlining. */
8778
8779static bool
8780aarch64_tribools_ok_for_inlining_p (int caller, int callee,
8781 int dont_care, int def)
8782{
8783 /* If the callee doesn't care, always allow inlining. */
8784 if (callee == dont_care)
8785 return true;
8786
8787 /* If the caller doesn't care, always allow inlining. */
8788 if (caller == dont_care)
8789 return true;
8790
8791 /* Otherwise, allow inlining if either the callee and caller values
8792 agree, or if the callee is using the default value. */
8793 return (callee == caller || callee == def);
8794}
8795
8796/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
8797 to inline CALLEE into CALLER based on target-specific info.
8798 Make sure that the caller and callee have compatible architectural
8799 features. Then go through the other possible target attributes
8800 and see if they can block inlining. Try not to reject always_inline
8801 callees unless they are incompatible architecturally. */
8802
8803static bool
8804aarch64_can_inline_p (tree caller, tree callee)
8805{
8806 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
8807 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
8808
8809 /* If callee has no option attributes, then it is ok to inline. */
8810 if (!callee_tree)
8811 return true;
8812
8813 struct cl_target_option *caller_opts
8814 = TREE_TARGET_OPTION (caller_tree ? caller_tree
8815 : target_option_default_node);
8816
8817 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
8818
8819
8820 /* Callee's ISA flags should be a subset of the caller's. */
8821 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
8822 != callee_opts->x_aarch64_isa_flags)
8823 return false;
8824
8825 /* Allow non-strict aligned functions inlining into strict
8826 aligned ones. */
8827 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
8828 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
8829 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
8830 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
8831 return false;
8832
8833 bool always_inline = lookup_attribute ("always_inline",
8834 DECL_ATTRIBUTES (callee));
8835
8836 /* If the architectural features match up and the callee is always_inline
8837 then the other attributes don't matter. */
8838 if (always_inline)
8839 return true;
8840
8841 if (caller_opts->x_aarch64_cmodel_var
8842 != callee_opts->x_aarch64_cmodel_var)
8843 return false;
8844
8845 if (caller_opts->x_aarch64_tls_dialect
8846 != callee_opts->x_aarch64_tls_dialect)
8847 return false;
8848
8849 /* Honour explicit requests to workaround errata. */
8850 if (!aarch64_tribools_ok_for_inlining_p (
8851 caller_opts->x_aarch64_fix_a53_err835769,
8852 callee_opts->x_aarch64_fix_a53_err835769,
8853 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
8854 return false;
8855
8856 /* If the user explicitly specified -momit-leaf-frame-pointer for the
8857 caller and calle and they don't match up, reject inlining. */
8858 if (!aarch64_tribools_ok_for_inlining_p (
8859 caller_opts->x_flag_omit_leaf_frame_pointer,
8860 callee_opts->x_flag_omit_leaf_frame_pointer,
8861 2, 1))
8862 return false;
8863
8864 /* If the callee has specific tuning overrides, respect them. */
8865 if (callee_opts->x_aarch64_override_tune_string != NULL
8866 && caller_opts->x_aarch64_override_tune_string == NULL)
8867 return false;
8868
8869 /* If the user specified tuning override strings for the
8870 caller and callee and they don't match up, reject inlining.
8871 We just do a string compare here, we don't analyze the meaning
8872 of the string, as it would be too costly for little gain. */
8873 if (callee_opts->x_aarch64_override_tune_string
8874 && caller_opts->x_aarch64_override_tune_string
8875 && (strcmp (callee_opts->x_aarch64_override_tune_string,
8876 caller_opts->x_aarch64_override_tune_string) != 0))
8877 return false;
8878
8879 return true;
8880}
8881
43e9d192
IB
8882/* Return true if SYMBOL_REF X binds locally. */
8883
8884static bool
8885aarch64_symbol_binds_local_p (const_rtx x)
8886{
8887 return (SYMBOL_REF_DECL (x)
8888 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
8889 : SYMBOL_REF_LOCAL_P (x));
8890}
8891
8892/* Return true if SYMBOL_REF X is thread local */
8893static bool
8894aarch64_tls_symbol_p (rtx x)
8895{
8896 if (! TARGET_HAVE_TLS)
8897 return false;
8898
8899 if (GET_CODE (x) != SYMBOL_REF)
8900 return false;
8901
8902 return SYMBOL_REF_TLS_MODEL (x) != 0;
8903}
8904
8905/* Classify a TLS symbol into one of the TLS kinds. */
8906enum aarch64_symbol_type
8907aarch64_classify_tls_symbol (rtx x)
8908{
8909 enum tls_model tls_kind = tls_symbolic_operand_type (x);
8910
8911 switch (tls_kind)
8912 {
8913 case TLS_MODEL_GLOBAL_DYNAMIC:
8914 case TLS_MODEL_LOCAL_DYNAMIC:
8915 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
8916
8917 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
8918 switch (aarch64_cmodel)
8919 {
8920 case AARCH64_CMODEL_TINY:
8921 case AARCH64_CMODEL_TINY_PIC:
8922 return SYMBOL_TINY_TLSIE;
8923 default:
79496620 8924 return SYMBOL_SMALL_TLSIE;
5ae7caad 8925 }
43e9d192
IB
8926
8927 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
8928 if (aarch64_tls_size == 12)
8929 return SYMBOL_TLSLE12;
8930 else if (aarch64_tls_size == 24)
8931 return SYMBOL_TLSLE24;
8932 else if (aarch64_tls_size == 32)
8933 return SYMBOL_TLSLE32;
8934 else if (aarch64_tls_size == 48)
8935 return SYMBOL_TLSLE48;
8936 else
8937 gcc_unreachable ();
43e9d192
IB
8938
8939 case TLS_MODEL_EMULATED:
8940 case TLS_MODEL_NONE:
8941 return SYMBOL_FORCE_TO_MEM;
8942
8943 default:
8944 gcc_unreachable ();
8945 }
8946}
8947
8948/* Return the method that should be used to access SYMBOL_REF or
8949 LABEL_REF X in context CONTEXT. */
17f4d4bf 8950
43e9d192 8951enum aarch64_symbol_type
f8b756b7 8952aarch64_classify_symbol (rtx x, rtx offset,
43e9d192
IB
8953 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
8954{
8955 if (GET_CODE (x) == LABEL_REF)
8956 {
8957 switch (aarch64_cmodel)
8958 {
8959 case AARCH64_CMODEL_LARGE:
8960 return SYMBOL_FORCE_TO_MEM;
8961
8962 case AARCH64_CMODEL_TINY_PIC:
8963 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
8964 return SYMBOL_TINY_ABSOLUTE;
8965
1b1e81f8 8966 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
8967 case AARCH64_CMODEL_SMALL_PIC:
8968 case AARCH64_CMODEL_SMALL:
8969 return SYMBOL_SMALL_ABSOLUTE;
8970
8971 default:
8972 gcc_unreachable ();
8973 }
8974 }
8975
17f4d4bf 8976 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 8977 {
4a985a37 8978 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
b4f50fd4
RR
8979 {
8980 /* This is alright even in PIC code as the constant
8981 pool reference is always PC relative and within
8982 the same translation unit. */
8983 if (nopcrelative_literal_loads
8984 && CONSTANT_POOL_ADDRESS_P (x))
8985 return SYMBOL_SMALL_ABSOLUTE;
8986 else
8987 return SYMBOL_FORCE_TO_MEM;
8988 }
43e9d192
IB
8989
8990 if (aarch64_tls_symbol_p (x))
8991 return aarch64_classify_tls_symbol (x);
8992
17f4d4bf
CSS
8993 switch (aarch64_cmodel)
8994 {
8995 case AARCH64_CMODEL_TINY:
f8b756b7
TB
8996 /* When we retreive symbol + offset address, we have to make sure
8997 the offset does not cause overflow of the final address. But
8998 we have no way of knowing the address of symbol at compile time
8999 so we can't accurately say if the distance between the PC and
9000 symbol + offset is outside the addressible range of +/-1M in the
9001 TINY code model. So we rely on images not being greater than
9002 1M and cap the offset at 1M and anything beyond 1M will have to
9003 be loaded using an alternative mechanism. */
9004 if (SYMBOL_REF_WEAK (x)
9005 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
9006 return SYMBOL_FORCE_TO_MEM;
9007 return SYMBOL_TINY_ABSOLUTE;
9008
17f4d4bf 9009 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
9010 /* Same reasoning as the tiny code model, but the offset cap here is
9011 4G. */
9012 if (SYMBOL_REF_WEAK (x)
3ff5d1f0
TB
9013 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
9014 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
9015 return SYMBOL_FORCE_TO_MEM;
9016 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9017
17f4d4bf 9018 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 9019 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 9020 return SYMBOL_TINY_GOT;
38e6c9a6
MS
9021 return SYMBOL_TINY_ABSOLUTE;
9022
1b1e81f8 9023 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
9024 case AARCH64_CMODEL_SMALL_PIC:
9025 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
9026 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
9027 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 9028 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9029
17f4d4bf
CSS
9030 default:
9031 gcc_unreachable ();
9032 }
43e9d192 9033 }
17f4d4bf 9034
43e9d192
IB
9035 /* By default push everything into the constant pool. */
9036 return SYMBOL_FORCE_TO_MEM;
9037}
9038
43e9d192
IB
9039bool
9040aarch64_constant_address_p (rtx x)
9041{
9042 return (CONSTANT_P (x) && memory_address_p (DImode, x));
9043}
9044
9045bool
9046aarch64_legitimate_pic_operand_p (rtx x)
9047{
9048 if (GET_CODE (x) == SYMBOL_REF
9049 || (GET_CODE (x) == CONST
9050 && GET_CODE (XEXP (x, 0)) == PLUS
9051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
9052 return false;
9053
9054 return true;
9055}
9056
3520f7cc
JG
9057/* Return true if X holds either a quarter-precision or
9058 floating-point +0.0 constant. */
9059static bool
ef4bddc2 9060aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
9061{
9062 if (!CONST_DOUBLE_P (x))
9063 return false;
9064
6a0f8c01
JW
9065 if (aarch64_float_const_zero_rtx_p (x))
9066 return true;
9067
9068 /* We only handle moving 0.0 to a TFmode register. */
3520f7cc
JG
9069 if (!(mode == SFmode || mode == DFmode))
9070 return false;
9071
3520f7cc
JG
9072 return aarch64_float_const_representable_p (x);
9073}
9074
43e9d192 9075static bool
ef4bddc2 9076aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
9077{
9078 /* Do not allow vector struct mode constants. We could support
9079 0 and -1 easily, but they need support in aarch64-simd.md. */
9080 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
9081 return false;
9082
9083 /* This could probably go away because
9084 we now decompose CONST_INTs according to expand_mov_immediate. */
9085 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 9086 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
9087 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
9088 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
9089
9090 if (GET_CODE (x) == HIGH
9091 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
9092 return true;
9093
9094 return aarch64_constant_address_p (x);
9095}
9096
a5bc806c 9097rtx
43e9d192
IB
9098aarch64_load_tp (rtx target)
9099{
9100 if (!target
9101 || GET_MODE (target) != Pmode
9102 || !register_operand (target, Pmode))
9103 target = gen_reg_rtx (Pmode);
9104
9105 /* Can return in any reg. */
9106 emit_insn (gen_aarch64_load_tp_hard (target));
9107 return target;
9108}
9109
43e9d192
IB
9110/* On AAPCS systems, this is the "struct __va_list". */
9111static GTY(()) tree va_list_type;
9112
9113/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
9114 Return the type to use as __builtin_va_list.
9115
9116 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
9117
9118 struct __va_list
9119 {
9120 void *__stack;
9121 void *__gr_top;
9122 void *__vr_top;
9123 int __gr_offs;
9124 int __vr_offs;
9125 }; */
9126
9127static tree
9128aarch64_build_builtin_va_list (void)
9129{
9130 tree va_list_name;
9131 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9132
9133 /* Create the type. */
9134 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
9135 /* Give it the required name. */
9136 va_list_name = build_decl (BUILTINS_LOCATION,
9137 TYPE_DECL,
9138 get_identifier ("__va_list"),
9139 va_list_type);
9140 DECL_ARTIFICIAL (va_list_name) = 1;
9141 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 9142 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
9143
9144 /* Create the fields. */
9145 f_stack = build_decl (BUILTINS_LOCATION,
9146 FIELD_DECL, get_identifier ("__stack"),
9147 ptr_type_node);
9148 f_grtop = build_decl (BUILTINS_LOCATION,
9149 FIELD_DECL, get_identifier ("__gr_top"),
9150 ptr_type_node);
9151 f_vrtop = build_decl (BUILTINS_LOCATION,
9152 FIELD_DECL, get_identifier ("__vr_top"),
9153 ptr_type_node);
9154 f_groff = build_decl (BUILTINS_LOCATION,
9155 FIELD_DECL, get_identifier ("__gr_offs"),
9156 integer_type_node);
9157 f_vroff = build_decl (BUILTINS_LOCATION,
9158 FIELD_DECL, get_identifier ("__vr_offs"),
9159 integer_type_node);
9160
9161 DECL_ARTIFICIAL (f_stack) = 1;
9162 DECL_ARTIFICIAL (f_grtop) = 1;
9163 DECL_ARTIFICIAL (f_vrtop) = 1;
9164 DECL_ARTIFICIAL (f_groff) = 1;
9165 DECL_ARTIFICIAL (f_vroff) = 1;
9166
9167 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
9168 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
9169 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
9170 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
9171 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
9172
9173 TYPE_FIELDS (va_list_type) = f_stack;
9174 DECL_CHAIN (f_stack) = f_grtop;
9175 DECL_CHAIN (f_grtop) = f_vrtop;
9176 DECL_CHAIN (f_vrtop) = f_groff;
9177 DECL_CHAIN (f_groff) = f_vroff;
9178
9179 /* Compute its layout. */
9180 layout_type (va_list_type);
9181
9182 return va_list_type;
9183}
9184
9185/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
9186static void
9187aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9188{
9189 const CUMULATIVE_ARGS *cum;
9190 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9191 tree stack, grtop, vrtop, groff, vroff;
9192 tree t;
9193 int gr_save_area_size;
9194 int vr_save_area_size;
9195 int vr_offset;
9196
9197 cum = &crtl->args.info;
9198 gr_save_area_size
9199 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
9200 vr_save_area_size
9201 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
9202
d5726973 9203 if (!TARGET_FLOAT)
43e9d192 9204 {
261fb553 9205 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
9206 vr_save_area_size = 0;
9207 }
9208
9209 f_stack = TYPE_FIELDS (va_list_type_node);
9210 f_grtop = DECL_CHAIN (f_stack);
9211 f_vrtop = DECL_CHAIN (f_grtop);
9212 f_groff = DECL_CHAIN (f_vrtop);
9213 f_vroff = DECL_CHAIN (f_groff);
9214
9215 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
9216 NULL_TREE);
9217 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
9218 NULL_TREE);
9219 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
9220 NULL_TREE);
9221 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
9222 NULL_TREE);
9223 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
9224 NULL_TREE);
9225
9226 /* Emit code to initialize STACK, which points to the next varargs stack
9227 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
9228 by named arguments. STACK is 8-byte aligned. */
9229 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
9230 if (cum->aapcs_stack_size > 0)
9231 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
9232 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
9233 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9234
9235 /* Emit code to initialize GRTOP, the top of the GR save area.
9236 virtual_incoming_args_rtx should have been 16 byte aligned. */
9237 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
9238 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
9239 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9240
9241 /* Emit code to initialize VRTOP, the top of the VR save area.
9242 This address is gr_save_area_bytes below GRTOP, rounded
9243 down to the next 16-byte boundary. */
9244 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
9245 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
9246 STACK_BOUNDARY / BITS_PER_UNIT);
9247
9248 if (vr_offset)
9249 t = fold_build_pointer_plus_hwi (t, -vr_offset);
9250 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
9251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9252
9253 /* Emit code to initialize GROFF, the offset from GRTOP of the
9254 next GPR argument. */
9255 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
9256 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
9257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9258
9259 /* Likewise emit code to initialize VROFF, the offset from FTOP
9260 of the next VR argument. */
9261 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
9262 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
9263 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9264}
9265
9266/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
9267
9268static tree
9269aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
9270 gimple_seq *post_p ATTRIBUTE_UNUSED)
9271{
9272 tree addr;
9273 bool indirect_p;
9274 bool is_ha; /* is HFA or HVA. */
9275 bool dw_align; /* double-word align. */
ef4bddc2 9276 machine_mode ag_mode = VOIDmode;
43e9d192 9277 int nregs;
ef4bddc2 9278 machine_mode mode;
43e9d192
IB
9279
9280 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9281 tree stack, f_top, f_off, off, arg, roundup, on_stack;
9282 HOST_WIDE_INT size, rsize, adjust, align;
9283 tree t, u, cond1, cond2;
9284
9285 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9286 if (indirect_p)
9287 type = build_pointer_type (type);
9288
9289 mode = TYPE_MODE (type);
9290
9291 f_stack = TYPE_FIELDS (va_list_type_node);
9292 f_grtop = DECL_CHAIN (f_stack);
9293 f_vrtop = DECL_CHAIN (f_grtop);
9294 f_groff = DECL_CHAIN (f_vrtop);
9295 f_vroff = DECL_CHAIN (f_groff);
9296
9297 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
9298 f_stack, NULL_TREE);
9299 size = int_size_in_bytes (type);
9300 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
9301
9302 dw_align = false;
9303 adjust = 0;
9304 if (aarch64_vfp_is_call_or_return_candidate (mode,
9305 type,
9306 &ag_mode,
9307 &nregs,
9308 &is_ha))
9309 {
9310 /* TYPE passed in fp/simd registers. */
d5726973 9311 if (!TARGET_FLOAT)
261fb553 9312 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
9313
9314 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
9315 unshare_expr (valist), f_vrtop, NULL_TREE);
9316 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
9317 unshare_expr (valist), f_vroff, NULL_TREE);
9318
9319 rsize = nregs * UNITS_PER_VREG;
9320
9321 if (is_ha)
9322 {
9323 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
9324 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
9325 }
9326 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
9327 && size < UNITS_PER_VREG)
9328 {
9329 adjust = UNITS_PER_VREG - size;
9330 }
9331 }
9332 else
9333 {
9334 /* TYPE passed in general registers. */
9335 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
9336 unshare_expr (valist), f_grtop, NULL_TREE);
9337 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
9338 unshare_expr (valist), f_groff, NULL_TREE);
9339 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
9340 nregs = rsize / UNITS_PER_WORD;
9341
9342 if (align > 8)
9343 dw_align = true;
9344
9345 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9346 && size < UNITS_PER_WORD)
9347 {
9348 adjust = UNITS_PER_WORD - size;
9349 }
9350 }
9351
9352 /* Get a local temporary for the field value. */
9353 off = get_initialized_tmp_var (f_off, pre_p, NULL);
9354
9355 /* Emit code to branch if off >= 0. */
9356 t = build2 (GE_EXPR, boolean_type_node, off,
9357 build_int_cst (TREE_TYPE (off), 0));
9358 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
9359
9360 if (dw_align)
9361 {
9362 /* Emit: offs = (offs + 15) & -16. */
9363 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9364 build_int_cst (TREE_TYPE (off), 15));
9365 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
9366 build_int_cst (TREE_TYPE (off), -16));
9367 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
9368 }
9369 else
9370 roundup = NULL;
9371
9372 /* Update ap.__[g|v]r_offs */
9373 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9374 build_int_cst (TREE_TYPE (off), rsize));
9375 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
9376
9377 /* String up. */
9378 if (roundup)
9379 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9380
9381 /* [cond2] if (ap.__[g|v]r_offs > 0) */
9382 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
9383 build_int_cst (TREE_TYPE (f_off), 0));
9384 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
9385
9386 /* String up: make sure the assignment happens before the use. */
9387 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
9388 COND_EXPR_ELSE (cond1) = t;
9389
9390 /* Prepare the trees handling the argument that is passed on the stack;
9391 the top level node will store in ON_STACK. */
9392 arg = get_initialized_tmp_var (stack, pre_p, NULL);
9393 if (align > 8)
9394 {
9395 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
9396 t = fold_convert (intDI_type_node, arg);
9397 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9398 build_int_cst (TREE_TYPE (t), 15));
9399 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9400 build_int_cst (TREE_TYPE (t), -16));
9401 t = fold_convert (TREE_TYPE (arg), t);
9402 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
9403 }
9404 else
9405 roundup = NULL;
9406 /* Advance ap.__stack */
9407 t = fold_convert (intDI_type_node, arg);
9408 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9409 build_int_cst (TREE_TYPE (t), size + 7));
9410 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9411 build_int_cst (TREE_TYPE (t), -8));
9412 t = fold_convert (TREE_TYPE (arg), t);
9413 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
9414 /* String up roundup and advance. */
9415 if (roundup)
9416 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9417 /* String up with arg */
9418 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
9419 /* Big-endianness related address adjustment. */
9420 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9421 && size < UNITS_PER_WORD)
9422 {
9423 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
9424 size_int (UNITS_PER_WORD - size));
9425 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
9426 }
9427
9428 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
9429 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
9430
9431 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
9432 t = off;
9433 if (adjust)
9434 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
9435 build_int_cst (TREE_TYPE (off), adjust));
9436
9437 t = fold_convert (sizetype, t);
9438 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
9439
9440 if (is_ha)
9441 {
9442 /* type ha; // treat as "struct {ftype field[n];}"
9443 ... [computing offs]
9444 for (i = 0; i <nregs; ++i, offs += 16)
9445 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
9446 return ha; */
9447 int i;
9448 tree tmp_ha, field_t, field_ptr_t;
9449
9450 /* Declare a local variable. */
9451 tmp_ha = create_tmp_var_raw (type, "ha");
9452 gimple_add_tmp_var (tmp_ha);
9453
9454 /* Establish the base type. */
9455 switch (ag_mode)
9456 {
9457 case SFmode:
9458 field_t = float_type_node;
9459 field_ptr_t = float_ptr_type_node;
9460 break;
9461 case DFmode:
9462 field_t = double_type_node;
9463 field_ptr_t = double_ptr_type_node;
9464 break;
9465 case TFmode:
9466 field_t = long_double_type_node;
9467 field_ptr_t = long_double_ptr_type_node;
9468 break;
9469/* The half precision and quad precision are not fully supported yet. Enable
9470 the following code after the support is complete. Need to find the correct
9471 type node for __fp16 *. */
9472#if 0
9473 case HFmode:
9474 field_t = float_type_node;
9475 field_ptr_t = float_ptr_type_node;
9476 break;
9477#endif
9478 case V2SImode:
9479 case V4SImode:
9480 {
9481 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
9482 field_t = build_vector_type_for_mode (innertype, ag_mode);
9483 field_ptr_t = build_pointer_type (field_t);
9484 }
9485 break;
9486 default:
9487 gcc_assert (0);
9488 }
9489
9490 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
9491 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
9492 addr = t;
9493 t = fold_convert (field_ptr_t, addr);
9494 t = build2 (MODIFY_EXPR, field_t,
9495 build1 (INDIRECT_REF, field_t, tmp_ha),
9496 build1 (INDIRECT_REF, field_t, t));
9497
9498 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
9499 for (i = 1; i < nregs; ++i)
9500 {
9501 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
9502 u = fold_convert (field_ptr_t, addr);
9503 u = build2 (MODIFY_EXPR, field_t,
9504 build2 (MEM_REF, field_t, tmp_ha,
9505 build_int_cst (field_ptr_t,
9506 (i *
9507 int_size_in_bytes (field_t)))),
9508 build1 (INDIRECT_REF, field_t, u));
9509 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
9510 }
9511
9512 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
9513 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
9514 }
9515
9516 COND_EXPR_ELSE (cond2) = t;
9517 addr = fold_convert (build_pointer_type (type), cond1);
9518 addr = build_va_arg_indirect_ref (addr);
9519
9520 if (indirect_p)
9521 addr = build_va_arg_indirect_ref (addr);
9522
9523 return addr;
9524}
9525
9526/* Implement TARGET_SETUP_INCOMING_VARARGS. */
9527
9528static void
ef4bddc2 9529aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
9530 tree type, int *pretend_size ATTRIBUTE_UNUSED,
9531 int no_rtl)
9532{
9533 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9534 CUMULATIVE_ARGS local_cum;
9535 int gr_saved, vr_saved;
9536
9537 /* The caller has advanced CUM up to, but not beyond, the last named
9538 argument. Advance a local copy of CUM past the last "real" named
9539 argument, to find out how many registers are left over. */
9540 local_cum = *cum;
9541 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
9542
9543 /* Found out how many registers we need to save. */
9544 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
9545 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
9546
d5726973 9547 if (!TARGET_FLOAT)
43e9d192 9548 {
261fb553 9549 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
9550 vr_saved = 0;
9551 }
9552
9553 if (!no_rtl)
9554 {
9555 if (gr_saved > 0)
9556 {
9557 rtx ptr, mem;
9558
9559 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
9560 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
9561 - gr_saved * UNITS_PER_WORD);
9562 mem = gen_frame_mem (BLKmode, ptr);
9563 set_mem_alias_set (mem, get_varargs_alias_set ());
9564
9565 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
9566 mem, gr_saved);
9567 }
9568 if (vr_saved > 0)
9569 {
9570 /* We can't use move_block_from_reg, because it will use
9571 the wrong mode, storing D regs only. */
ef4bddc2 9572 machine_mode mode = TImode;
43e9d192
IB
9573 int off, i;
9574
9575 /* Set OFF to the offset from virtual_incoming_args_rtx of
9576 the first vector register. The VR save area lies below
9577 the GR one, and is aligned to 16 bytes. */
9578 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
9579 STACK_BOUNDARY / BITS_PER_UNIT);
9580 off -= vr_saved * UNITS_PER_VREG;
9581
9582 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
9583 {
9584 rtx ptr, mem;
9585
9586 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
9587 mem = gen_frame_mem (mode, ptr);
9588 set_mem_alias_set (mem, get_varargs_alias_set ());
9589 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
9590 off += UNITS_PER_VREG;
9591 }
9592 }
9593 }
9594
9595 /* We don't save the size into *PRETEND_SIZE because we want to avoid
9596 any complication of having crtl->args.pretend_args_size changed. */
8799637a 9597 cfun->machine->frame.saved_varargs_size
43e9d192
IB
9598 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
9599 STACK_BOUNDARY / BITS_PER_UNIT)
9600 + vr_saved * UNITS_PER_VREG);
9601}
9602
9603static void
9604aarch64_conditional_register_usage (void)
9605{
9606 int i;
9607 if (!TARGET_FLOAT)
9608 {
9609 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
9610 {
9611 fixed_regs[i] = 1;
9612 call_used_regs[i] = 1;
9613 }
9614 }
9615}
9616
9617/* Walk down the type tree of TYPE counting consecutive base elements.
9618 If *MODEP is VOIDmode, then set it to the first valid floating point
9619 type. If a non-floating point type is found, or if a floating point
9620 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
9621 otherwise return the count in the sub-tree. */
9622static int
ef4bddc2 9623aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 9624{
ef4bddc2 9625 machine_mode mode;
43e9d192
IB
9626 HOST_WIDE_INT size;
9627
9628 switch (TREE_CODE (type))
9629 {
9630 case REAL_TYPE:
9631 mode = TYPE_MODE (type);
9632 if (mode != DFmode && mode != SFmode && mode != TFmode)
9633 return -1;
9634
9635 if (*modep == VOIDmode)
9636 *modep = mode;
9637
9638 if (*modep == mode)
9639 return 1;
9640
9641 break;
9642
9643 case COMPLEX_TYPE:
9644 mode = TYPE_MODE (TREE_TYPE (type));
9645 if (mode != DFmode && mode != SFmode && mode != TFmode)
9646 return -1;
9647
9648 if (*modep == VOIDmode)
9649 *modep = mode;
9650
9651 if (*modep == mode)
9652 return 2;
9653
9654 break;
9655
9656 case VECTOR_TYPE:
9657 /* Use V2SImode and V4SImode as representatives of all 64-bit
9658 and 128-bit vector types. */
9659 size = int_size_in_bytes (type);
9660 switch (size)
9661 {
9662 case 8:
9663 mode = V2SImode;
9664 break;
9665 case 16:
9666 mode = V4SImode;
9667 break;
9668 default:
9669 return -1;
9670 }
9671
9672 if (*modep == VOIDmode)
9673 *modep = mode;
9674
9675 /* Vector modes are considered to be opaque: two vectors are
9676 equivalent for the purposes of being homogeneous aggregates
9677 if they are the same size. */
9678 if (*modep == mode)
9679 return 1;
9680
9681 break;
9682
9683 case ARRAY_TYPE:
9684 {
9685 int count;
9686 tree index = TYPE_DOMAIN (type);
9687
807e902e
KZ
9688 /* Can't handle incomplete types nor sizes that are not
9689 fixed. */
9690 if (!COMPLETE_TYPE_P (type)
9691 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
9692 return -1;
9693
9694 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
9695 if (count == -1
9696 || !index
9697 || !TYPE_MAX_VALUE (index)
cc269bb6 9698 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 9699 || !TYPE_MIN_VALUE (index)
cc269bb6 9700 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
9701 || count < 0)
9702 return -1;
9703
ae7e9ddd
RS
9704 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9705 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
9706
9707 /* There must be no padding. */
807e902e 9708 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
9709 return -1;
9710
9711 return count;
9712 }
9713
9714 case RECORD_TYPE:
9715 {
9716 int count = 0;
9717 int sub_count;
9718 tree field;
9719
807e902e
KZ
9720 /* Can't handle incomplete types nor sizes that are not
9721 fixed. */
9722 if (!COMPLETE_TYPE_P (type)
9723 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
9724 return -1;
9725
9726 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9727 {
9728 if (TREE_CODE (field) != FIELD_DECL)
9729 continue;
9730
9731 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
9732 if (sub_count < 0)
9733 return -1;
9734 count += sub_count;
9735 }
9736
9737 /* There must be no padding. */
807e902e 9738 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
9739 return -1;
9740
9741 return count;
9742 }
9743
9744 case UNION_TYPE:
9745 case QUAL_UNION_TYPE:
9746 {
9747 /* These aren't very interesting except in a degenerate case. */
9748 int count = 0;
9749 int sub_count;
9750 tree field;
9751
807e902e
KZ
9752 /* Can't handle incomplete types nor sizes that are not
9753 fixed. */
9754 if (!COMPLETE_TYPE_P (type)
9755 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
9756 return -1;
9757
9758 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9759 {
9760 if (TREE_CODE (field) != FIELD_DECL)
9761 continue;
9762
9763 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
9764 if (sub_count < 0)
9765 return -1;
9766 count = count > sub_count ? count : sub_count;
9767 }
9768
9769 /* There must be no padding. */
807e902e 9770 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
9771 return -1;
9772
9773 return count;
9774 }
9775
9776 default:
9777 break;
9778 }
9779
9780 return -1;
9781}
9782
b6ec6215
KT
9783/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
9784 type as described in AAPCS64 \S 4.1.2.
9785
9786 See the comment above aarch64_composite_type_p for the notes on MODE. */
9787
9788static bool
9789aarch64_short_vector_p (const_tree type,
9790 machine_mode mode)
9791{
9792 HOST_WIDE_INT size = -1;
9793
9794 if (type && TREE_CODE (type) == VECTOR_TYPE)
9795 size = int_size_in_bytes (type);
9796 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9797 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9798 size = GET_MODE_SIZE (mode);
9799
9800 return (size == 8 || size == 16);
9801}
9802
43e9d192
IB
9803/* Return TRUE if the type, as described by TYPE and MODE, is a composite
9804 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
9805 array types. The C99 floating-point complex types are also considered
9806 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
9807 types, which are GCC extensions and out of the scope of AAPCS64, are
9808 treated as composite types here as well.
9809
9810 Note that MODE itself is not sufficient in determining whether a type
9811 is such a composite type or not. This is because
9812 stor-layout.c:compute_record_mode may have already changed the MODE
9813 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
9814 structure with only one field may have its MODE set to the mode of the
9815 field. Also an integer mode whose size matches the size of the
9816 RECORD_TYPE type may be used to substitute the original mode
9817 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
9818 solely relied on. */
9819
9820static bool
9821aarch64_composite_type_p (const_tree type,
ef4bddc2 9822 machine_mode mode)
43e9d192 9823{
b6ec6215
KT
9824 if (aarch64_short_vector_p (type, mode))
9825 return false;
9826
43e9d192
IB
9827 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
9828 return true;
9829
9830 if (mode == BLKmode
9831 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
9832 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
9833 return true;
9834
9835 return false;
9836}
9837
43e9d192
IB
9838/* Return TRUE if an argument, whose type is described by TYPE and MODE,
9839 shall be passed or returned in simd/fp register(s) (providing these
9840 parameter passing registers are available).
9841
9842 Upon successful return, *COUNT returns the number of needed registers,
9843 *BASE_MODE returns the mode of the individual register and when IS_HAF
9844 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
9845 floating-point aggregate or a homogeneous short-vector aggregate. */
9846
9847static bool
ef4bddc2 9848aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 9849 const_tree type,
ef4bddc2 9850 machine_mode *base_mode,
43e9d192
IB
9851 int *count,
9852 bool *is_ha)
9853{
ef4bddc2 9854 machine_mode new_mode = VOIDmode;
43e9d192
IB
9855 bool composite_p = aarch64_composite_type_p (type, mode);
9856
9857 if (is_ha != NULL) *is_ha = false;
9858
9859 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
9860 || aarch64_short_vector_p (type, mode))
9861 {
9862 *count = 1;
9863 new_mode = mode;
9864 }
9865 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
9866 {
9867 if (is_ha != NULL) *is_ha = true;
9868 *count = 2;
9869 new_mode = GET_MODE_INNER (mode);
9870 }
9871 else if (type && composite_p)
9872 {
9873 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
9874
9875 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
9876 {
9877 if (is_ha != NULL) *is_ha = true;
9878 *count = ag_count;
9879 }
9880 else
9881 return false;
9882 }
9883 else
9884 return false;
9885
9886 *base_mode = new_mode;
9887 return true;
9888}
9889
9890/* Implement TARGET_STRUCT_VALUE_RTX. */
9891
9892static rtx
9893aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
9894 int incoming ATTRIBUTE_UNUSED)
9895{
9896 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
9897}
9898
9899/* Implements target hook vector_mode_supported_p. */
9900static bool
ef4bddc2 9901aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
9902{
9903 if (TARGET_SIMD
9904 && (mode == V4SImode || mode == V8HImode
9905 || mode == V16QImode || mode == V2DImode
9906 || mode == V2SImode || mode == V4HImode
9907 || mode == V8QImode || mode == V2SFmode
ad7d90cc 9908 || mode == V4SFmode || mode == V2DFmode
71a11456 9909 || mode == V4HFmode || mode == V8HFmode
ad7d90cc 9910 || mode == V1DFmode))
43e9d192
IB
9911 return true;
9912
9913 return false;
9914}
9915
b7342d25
IB
9916/* Return appropriate SIMD container
9917 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
9918static machine_mode
9919aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 9920{
b7342d25 9921 gcc_assert (width == 64 || width == 128);
43e9d192 9922 if (TARGET_SIMD)
b7342d25
IB
9923 {
9924 if (width == 128)
9925 switch (mode)
9926 {
9927 case DFmode:
9928 return V2DFmode;
9929 case SFmode:
9930 return V4SFmode;
9931 case SImode:
9932 return V4SImode;
9933 case HImode:
9934 return V8HImode;
9935 case QImode:
9936 return V16QImode;
9937 case DImode:
9938 return V2DImode;
9939 default:
9940 break;
9941 }
9942 else
9943 switch (mode)
9944 {
9945 case SFmode:
9946 return V2SFmode;
9947 case SImode:
9948 return V2SImode;
9949 case HImode:
9950 return V4HImode;
9951 case QImode:
9952 return V8QImode;
9953 default:
9954 break;
9955 }
9956 }
43e9d192
IB
9957 return word_mode;
9958}
9959
b7342d25 9960/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
9961static machine_mode
9962aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
9963{
9964 return aarch64_simd_container_mode (mode, 128);
9965}
9966
3b357264
JG
9967/* Return the bitmask of possible vector sizes for the vectorizer
9968 to iterate over. */
9969static unsigned int
9970aarch64_autovectorize_vector_sizes (void)
9971{
9972 return (16 | 8);
9973}
9974
ac2b960f
YZ
9975/* Implement TARGET_MANGLE_TYPE. */
9976
6f549691 9977static const char *
ac2b960f
YZ
9978aarch64_mangle_type (const_tree type)
9979{
9980 /* The AArch64 ABI documents say that "__va_list" has to be
9981 managled as if it is in the "std" namespace. */
9982 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
9983 return "St9__va_list";
9984
c2ec330c
AL
9985 /* Half-precision float. */
9986 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
9987 return "Dh";
9988
f9d53c27
TB
9989 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
9990 builtin types. */
9991 if (TYPE_NAME (type) != NULL)
9992 return aarch64_mangle_builtin_type (type);
c6fc9e43 9993
ac2b960f
YZ
9994 /* Use the default mangling. */
9995 return NULL;
9996}
9997
8baff86e
KT
9998
9999/* Return true if the rtx_insn contains a MEM RTX somewhere
10000 in it. */
75cf1494
KT
10001
10002static bool
8baff86e 10003has_memory_op (rtx_insn *mem_insn)
75cf1494 10004{
8baff86e
KT
10005 subrtx_iterator::array_type array;
10006 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
10007 if (MEM_P (*iter))
10008 return true;
10009
10010 return false;
75cf1494
KT
10011}
10012
10013/* Find the first rtx_insn before insn that will generate an assembly
10014 instruction. */
10015
10016static rtx_insn *
10017aarch64_prev_real_insn (rtx_insn *insn)
10018{
10019 if (!insn)
10020 return NULL;
10021
10022 do
10023 {
10024 insn = prev_real_insn (insn);
10025 }
10026 while (insn && recog_memoized (insn) < 0);
10027
10028 return insn;
10029}
10030
10031static bool
10032is_madd_op (enum attr_type t1)
10033{
10034 unsigned int i;
10035 /* A number of these may be AArch32 only. */
10036 enum attr_type mlatypes[] = {
10037 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
10038 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
10039 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
10040 };
10041
10042 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
10043 {
10044 if (t1 == mlatypes[i])
10045 return true;
10046 }
10047
10048 return false;
10049}
10050
10051/* Check if there is a register dependency between a load and the insn
10052 for which we hold recog_data. */
10053
10054static bool
10055dep_between_memop_and_curr (rtx memop)
10056{
10057 rtx load_reg;
10058 int opno;
10059
8baff86e 10060 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
10061
10062 if (!REG_P (SET_DEST (memop)))
10063 return false;
10064
10065 load_reg = SET_DEST (memop);
8baff86e 10066 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
10067 {
10068 rtx operand = recog_data.operand[opno];
10069 if (REG_P (operand)
10070 && reg_overlap_mentioned_p (load_reg, operand))
10071 return true;
10072
10073 }
10074 return false;
10075}
10076
8baff86e
KT
10077
10078/* When working around the Cortex-A53 erratum 835769,
10079 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
10080 instruction and has a preceding memory instruction such that a NOP
10081 should be inserted between them. */
10082
75cf1494
KT
10083bool
10084aarch64_madd_needs_nop (rtx_insn* insn)
10085{
10086 enum attr_type attr_type;
10087 rtx_insn *prev;
10088 rtx body;
10089
b32c1043 10090 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
10091 return false;
10092
10093 if (recog_memoized (insn) < 0)
10094 return false;
10095
10096 attr_type = get_attr_type (insn);
10097 if (!is_madd_op (attr_type))
10098 return false;
10099
10100 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
10101 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
10102 Restore recog state to INSN to avoid state corruption. */
10103 extract_constrain_insn_cached (insn);
10104
8baff86e 10105 if (!prev || !has_memory_op (prev))
75cf1494
KT
10106 return false;
10107
10108 body = single_set (prev);
10109
10110 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
10111 it and the DImode madd, emit a NOP between them. If body is NULL then we
10112 have a complex memory operation, probably a load/store pair.
10113 Be conservative for now and emit a NOP. */
10114 if (GET_MODE (recog_data.operand[0]) == DImode
10115 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
10116 return true;
10117
10118 return false;
10119
10120}
10121
8baff86e
KT
10122
10123/* Implement FINAL_PRESCAN_INSN. */
10124
75cf1494
KT
10125void
10126aarch64_final_prescan_insn (rtx_insn *insn)
10127{
10128 if (aarch64_madd_needs_nop (insn))
10129 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
10130}
10131
10132
43e9d192 10133/* Return the equivalent letter for size. */
81c2dfb9 10134static char
43e9d192
IB
10135sizetochar (int size)
10136{
10137 switch (size)
10138 {
10139 case 64: return 'd';
10140 case 32: return 's';
10141 case 16: return 'h';
10142 case 8 : return 'b';
10143 default: gcc_unreachable ();
10144 }
10145}
10146
3520f7cc
JG
10147/* Return true iff x is a uniform vector of floating-point
10148 constants, and the constant can be represented in
10149 quarter-precision form. Note, as aarch64_float_const_representable
10150 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
10151static bool
10152aarch64_vect_float_const_representable_p (rtx x)
10153{
92695fbb
RS
10154 rtx elt;
10155 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
10156 && const_vec_duplicate_p (x, &elt)
10157 && aarch64_float_const_representable_p (elt));
3520f7cc
JG
10158}
10159
d8edd899 10160/* Return true for valid and false for invalid. */
3ea63f60 10161bool
ef4bddc2 10162aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 10163 struct simd_immediate_info *info)
43e9d192
IB
10164{
10165#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
10166 matches = 1; \
10167 for (i = 0; i < idx; i += (STRIDE)) \
10168 if (!(TEST)) \
10169 matches = 0; \
10170 if (matches) \
10171 { \
10172 immtype = (CLASS); \
10173 elsize = (ELSIZE); \
43e9d192
IB
10174 eshift = (SHIFT); \
10175 emvn = (NEG); \
10176 break; \
10177 }
10178
10179 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 10180 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 10181 unsigned char bytes[16];
43e9d192
IB
10182 int immtype = -1, matches;
10183 unsigned int invmask = inverse ? 0xff : 0;
10184 int eshift, emvn;
10185
43e9d192 10186 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 10187 {
81c2dfb9
IB
10188 if (! (aarch64_simd_imm_zero_p (op, mode)
10189 || aarch64_vect_float_const_representable_p (op)))
d8edd899 10190 return false;
3520f7cc 10191
48063b9d
IB
10192 if (info)
10193 {
10194 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 10195 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
10196 info->mvn = false;
10197 info->shift = 0;
10198 }
3520f7cc 10199
d8edd899 10200 return true;
3520f7cc 10201 }
43e9d192
IB
10202
10203 /* Splat vector constant out into a byte vector. */
10204 for (i = 0; i < n_elts; i++)
10205 {
4b1e108c
AL
10206 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
10207 it must be laid out in the vector register in reverse order. */
10208 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
10209 unsigned HOST_WIDE_INT elpart;
10210 unsigned int part, parts;
10211
4aa81c2e 10212 if (CONST_INT_P (el))
43e9d192
IB
10213 {
10214 elpart = INTVAL (el);
10215 parts = 1;
10216 }
10217 else if (GET_CODE (el) == CONST_DOUBLE)
10218 {
10219 elpart = CONST_DOUBLE_LOW (el);
10220 parts = 2;
10221 }
10222 else
10223 gcc_unreachable ();
10224
10225 for (part = 0; part < parts; part++)
10226 {
10227 unsigned int byte;
10228 for (byte = 0; byte < innersize; byte++)
10229 {
10230 bytes[idx++] = (elpart & 0xff) ^ invmask;
10231 elpart >>= BITS_PER_UNIT;
10232 }
10233 if (GET_CODE (el) == CONST_DOUBLE)
10234 elpart = CONST_DOUBLE_HIGH (el);
10235 }
10236 }
10237
10238 /* Sanity check. */
10239 gcc_assert (idx == GET_MODE_SIZE (mode));
10240
10241 do
10242 {
10243 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
10244 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
10245
10246 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
10247 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
10248
10249 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
10250 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
10251
10252 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
10253 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
10254
10255 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
10256
10257 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
10258
10259 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
10260 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
10261
10262 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
10263 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
10264
10265 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
10266 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
10267
10268 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
10269 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
10270
10271 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
10272
10273 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
10274
10275 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 10276 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
10277
10278 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 10279 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
10280
10281 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 10282 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
10283
10284 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 10285 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
10286
10287 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
10288
10289 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
10290 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
10291 }
10292 while (0);
10293
e4f0f84d 10294 if (immtype == -1)
d8edd899 10295 return false;
43e9d192 10296
48063b9d 10297 if (info)
43e9d192 10298 {
48063b9d 10299 info->element_width = elsize;
48063b9d
IB
10300 info->mvn = emvn != 0;
10301 info->shift = eshift;
10302
43e9d192
IB
10303 unsigned HOST_WIDE_INT imm = 0;
10304
e4f0f84d
TB
10305 if (immtype >= 12 && immtype <= 15)
10306 info->msl = true;
10307
43e9d192
IB
10308 /* Un-invert bytes of recognized vector, if necessary. */
10309 if (invmask != 0)
10310 for (i = 0; i < idx; i++)
10311 bytes[i] ^= invmask;
10312
10313 if (immtype == 17)
10314 {
10315 /* FIXME: Broken on 32-bit H_W_I hosts. */
10316 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
10317
10318 for (i = 0; i < 8; i++)
10319 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
10320 << (i * BITS_PER_UNIT);
10321
43e9d192 10322
48063b9d
IB
10323 info->value = GEN_INT (imm);
10324 }
10325 else
10326 {
10327 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
10328 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
10329
10330 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
10331 generic constants. */
10332 if (info->mvn)
43e9d192 10333 imm = ~imm;
48063b9d
IB
10334 imm = (imm >> info->shift) & 0xff;
10335 info->value = GEN_INT (imm);
10336 }
43e9d192
IB
10337 }
10338
48063b9d 10339 return true;
43e9d192
IB
10340#undef CHECK
10341}
10342
43e9d192
IB
10343/* Check of immediate shift constants are within range. */
10344bool
ef4bddc2 10345aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
10346{
10347 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
10348 if (left)
ddeabd3e 10349 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 10350 else
ddeabd3e 10351 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
10352}
10353
3520f7cc
JG
10354/* Return true if X is a uniform vector where all elements
10355 are either the floating-point constant 0.0 or the
10356 integer constant 0. */
43e9d192 10357bool
ef4bddc2 10358aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 10359{
3520f7cc 10360 return x == CONST0_RTX (mode);
43e9d192
IB
10361}
10362
10363bool
ef4bddc2 10364aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
10365{
10366 HOST_WIDE_INT imm = INTVAL (x);
10367 int i;
10368
10369 for (i = 0; i < 8; i++)
10370 {
10371 unsigned int byte = imm & 0xff;
10372 if (byte != 0xff && byte != 0)
10373 return false;
10374 imm >>= 8;
10375 }
10376
10377 return true;
10378}
10379
83f8c414
CSS
10380bool
10381aarch64_mov_operand_p (rtx x,
a5350ddc 10382 enum aarch64_symbol_context context,
ef4bddc2 10383 machine_mode mode)
83f8c414 10384{
83f8c414
CSS
10385 if (GET_CODE (x) == HIGH
10386 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10387 return true;
10388
82614948 10389 if (CONST_INT_P (x))
83f8c414
CSS
10390 return true;
10391
10392 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
10393 return true;
10394
a5350ddc
CSS
10395 return aarch64_classify_symbolic_expression (x, context)
10396 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
10397}
10398
43e9d192
IB
10399/* Return a const_int vector of VAL. */
10400rtx
ef4bddc2 10401aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
10402{
10403 int nunits = GET_MODE_NUNITS (mode);
10404 rtvec v = rtvec_alloc (nunits);
10405 int i;
10406
10407 for (i=0; i < nunits; i++)
10408 RTVEC_ELT (v, i) = GEN_INT (val);
10409
10410 return gen_rtx_CONST_VECTOR (mode, v);
10411}
10412
051d0e2f
SN
10413/* Check OP is a legal scalar immediate for the MOVI instruction. */
10414
10415bool
ef4bddc2 10416aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 10417{
ef4bddc2 10418 machine_mode vmode;
051d0e2f
SN
10419
10420 gcc_assert (!VECTOR_MODE_P (mode));
10421 vmode = aarch64_preferred_simd_mode (mode);
10422 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 10423 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
10424}
10425
988fa693
JG
10426/* Construct and return a PARALLEL RTX vector with elements numbering the
10427 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
10428 the vector - from the perspective of the architecture. This does not
10429 line up with GCC's perspective on lane numbers, so we end up with
10430 different masks depending on our target endian-ness. The diagram
10431 below may help. We must draw the distinction when building masks
10432 which select one half of the vector. An instruction selecting
10433 architectural low-lanes for a big-endian target, must be described using
10434 a mask selecting GCC high-lanes.
10435
10436 Big-Endian Little-Endian
10437
10438GCC 0 1 2 3 3 2 1 0
10439 | x | x | x | x | | x | x | x | x |
10440Architecture 3 2 1 0 3 2 1 0
10441
10442Low Mask: { 2, 3 } { 0, 1 }
10443High Mask: { 0, 1 } { 2, 3 }
10444*/
10445
43e9d192 10446rtx
ef4bddc2 10447aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
10448{
10449 int nunits = GET_MODE_NUNITS (mode);
10450 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
10451 int high_base = nunits / 2;
10452 int low_base = 0;
10453 int base;
43e9d192
IB
10454 rtx t1;
10455 int i;
10456
988fa693
JG
10457 if (BYTES_BIG_ENDIAN)
10458 base = high ? low_base : high_base;
10459 else
10460 base = high ? high_base : low_base;
10461
10462 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
10463 RTVEC_ELT (v, i) = GEN_INT (base + i);
10464
10465 t1 = gen_rtx_PARALLEL (mode, v);
10466 return t1;
10467}
10468
988fa693
JG
10469/* Check OP for validity as a PARALLEL RTX vector with elements
10470 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
10471 from the perspective of the architecture. See the diagram above
10472 aarch64_simd_vect_par_cnst_half for more details. */
10473
10474bool
ef4bddc2 10475aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
10476 bool high)
10477{
10478 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
10479 HOST_WIDE_INT count_op = XVECLEN (op, 0);
10480 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
10481 int i = 0;
10482
10483 if (!VECTOR_MODE_P (mode))
10484 return false;
10485
10486 if (count_op != count_ideal)
10487 return false;
10488
10489 for (i = 0; i < count_ideal; i++)
10490 {
10491 rtx elt_op = XVECEXP (op, 0, i);
10492 rtx elt_ideal = XVECEXP (ideal, 0, i);
10493
4aa81c2e 10494 if (!CONST_INT_P (elt_op)
988fa693
JG
10495 || INTVAL (elt_ideal) != INTVAL (elt_op))
10496 return false;
10497 }
10498 return true;
10499}
10500
43e9d192
IB
10501/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
10502 HIGH (exclusive). */
10503void
46ed6024
CB
10504aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
10505 const_tree exp)
43e9d192
IB
10506{
10507 HOST_WIDE_INT lane;
4aa81c2e 10508 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
10509 lane = INTVAL (operand);
10510
10511 if (lane < low || lane >= high)
46ed6024
CB
10512 {
10513 if (exp)
cf0c27ef 10514 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 10515 else
cf0c27ef 10516 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 10517 }
43e9d192
IB
10518}
10519
43e9d192
IB
10520/* Return TRUE if OP is a valid vector addressing mode. */
10521bool
10522aarch64_simd_mem_operand_p (rtx op)
10523{
10524 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 10525 || REG_P (XEXP (op, 0)));
43e9d192
IB
10526}
10527
2d8c6dc1
AH
10528/* Emit a register copy from operand to operand, taking care not to
10529 early-clobber source registers in the process.
43e9d192 10530
2d8c6dc1
AH
10531 COUNT is the number of components into which the copy needs to be
10532 decomposed. */
43e9d192 10533void
2d8c6dc1
AH
10534aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
10535 unsigned int count)
43e9d192
IB
10536{
10537 unsigned int i;
2d8c6dc1
AH
10538 int rdest = REGNO (operands[0]);
10539 int rsrc = REGNO (operands[1]);
43e9d192
IB
10540
10541 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
10542 || rdest < rsrc)
10543 for (i = 0; i < count; i++)
10544 emit_move_insn (gen_rtx_REG (mode, rdest + i),
10545 gen_rtx_REG (mode, rsrc + i));
43e9d192 10546 else
2d8c6dc1
AH
10547 for (i = 0; i < count; i++)
10548 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
10549 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
10550}
10551
10552/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
10553 one of VSTRUCT modes: OI, CI or XI. */
10554int
647d790d 10555aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 10556{
ef4bddc2 10557 machine_mode mode;
43e9d192
IB
10558
10559 extract_insn_cached (insn);
10560
10561 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
10562 {
10563 mode = GET_MODE (recog_data.operand[0]);
10564 switch (mode)
10565 {
10566 case OImode:
10567 return 8;
10568 case CImode:
10569 return 12;
10570 case XImode:
10571 return 16;
10572 default:
10573 gcc_unreachable ();
10574 }
10575 }
10576 return 4;
10577}
10578
668046d1 10579/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 10580 one of VSTRUCT modes: OI, CI, or XI. */
668046d1
DS
10581int
10582aarch64_simd_attr_length_rglist (enum machine_mode mode)
10583{
10584 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
10585}
10586
db0253a4
TB
10587/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
10588 alignment of a vector to 128 bits. */
10589static HOST_WIDE_INT
10590aarch64_simd_vector_alignment (const_tree type)
10591{
9439e9a1 10592 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
10593 return MIN (align, 128);
10594}
10595
10596/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
10597static bool
10598aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
10599{
10600 if (is_packed)
10601 return false;
10602
10603 /* We guarantee alignment for vectors up to 128-bits. */
10604 if (tree_int_cst_compare (TYPE_SIZE (type),
10605 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
10606 return false;
10607
10608 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
10609 return true;
10610}
10611
4369c11e
TB
10612/* If VALS is a vector constant that can be loaded into a register
10613 using DUP, generate instructions to do so and return an RTX to
10614 assign to the register. Otherwise return NULL_RTX. */
10615static rtx
10616aarch64_simd_dup_constant (rtx vals)
10617{
ef4bddc2
RS
10618 machine_mode mode = GET_MODE (vals);
10619 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 10620 rtx x;
4369c11e 10621
92695fbb 10622 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
10623 return NULL_RTX;
10624
10625 /* We can load this constant by using DUP and a constant in a
10626 single ARM register. This will be cheaper than a vector
10627 load. */
92695fbb 10628 x = copy_to_mode_reg (inner_mode, x);
4369c11e
TB
10629 return gen_rtx_VEC_DUPLICATE (mode, x);
10630}
10631
10632
10633/* Generate code to load VALS, which is a PARALLEL containing only
10634 constants (for vec_init) or CONST_VECTOR, efficiently into a
10635 register. Returns an RTX to copy into the register, or NULL_RTX
10636 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 10637static rtx
4369c11e
TB
10638aarch64_simd_make_constant (rtx vals)
10639{
ef4bddc2 10640 machine_mode mode = GET_MODE (vals);
4369c11e
TB
10641 rtx const_dup;
10642 rtx const_vec = NULL_RTX;
10643 int n_elts = GET_MODE_NUNITS (mode);
10644 int n_const = 0;
10645 int i;
10646
10647 if (GET_CODE (vals) == CONST_VECTOR)
10648 const_vec = vals;
10649 else if (GET_CODE (vals) == PARALLEL)
10650 {
10651 /* A CONST_VECTOR must contain only CONST_INTs and
10652 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
10653 Only store valid constants in a CONST_VECTOR. */
10654 for (i = 0; i < n_elts; ++i)
10655 {
10656 rtx x = XVECEXP (vals, 0, i);
10657 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
10658 n_const++;
10659 }
10660 if (n_const == n_elts)
10661 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
10662 }
10663 else
10664 gcc_unreachable ();
10665
10666 if (const_vec != NULL_RTX
48063b9d 10667 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
10668 /* Load using MOVI/MVNI. */
10669 return const_vec;
10670 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
10671 /* Loaded using DUP. */
10672 return const_dup;
10673 else if (const_vec != NULL_RTX)
10674 /* Load from constant pool. We can not take advantage of single-cycle
10675 LD1 because we need a PC-relative addressing mode. */
10676 return const_vec;
10677 else
10678 /* A PARALLEL containing something not valid inside CONST_VECTOR.
10679 We can not construct an initializer. */
10680 return NULL_RTX;
10681}
10682
10683void
10684aarch64_expand_vector_init (rtx target, rtx vals)
10685{
ef4bddc2
RS
10686 machine_mode mode = GET_MODE (target);
10687 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 10688 int n_elts = GET_MODE_NUNITS (mode);
8b66a2d4
AL
10689 int n_var = 0;
10690 rtx any_const = NULL_RTX;
4369c11e 10691 bool all_same = true;
4369c11e 10692
8b66a2d4 10693 for (int i = 0; i < n_elts; ++i)
4369c11e 10694 {
8b66a2d4 10695 rtx x = XVECEXP (vals, 0, i);
4369c11e 10696 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8b66a2d4
AL
10697 ++n_var;
10698 else
10699 any_const = x;
4369c11e 10700
8b66a2d4 10701 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
4369c11e
TB
10702 all_same = false;
10703 }
10704
10705 if (n_var == 0)
10706 {
10707 rtx constant = aarch64_simd_make_constant (vals);
10708 if (constant != NULL_RTX)
10709 {
10710 emit_move_insn (target, constant);
10711 return;
10712 }
10713 }
10714
10715 /* Splat a single non-constant element if we can. */
10716 if (all_same)
10717 {
8b66a2d4 10718 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
4369c11e
TB
10719 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
10720 return;
10721 }
10722
8b66a2d4
AL
10723 /* Half the fields (or less) are non-constant. Load constant then overwrite
10724 varying fields. Hope that this is more efficient than using the stack. */
10725 if (n_var <= n_elts/2)
4369c11e
TB
10726 {
10727 rtx copy = copy_rtx (vals);
4369c11e 10728
8b66a2d4
AL
10729 /* Load constant part of vector. We really don't care what goes into the
10730 parts we will overwrite, but we're more likely to be able to load the
10731 constant efficiently if it has fewer, larger, repeating parts
10732 (see aarch64_simd_valid_immediate). */
10733 for (int i = 0; i < n_elts; i++)
10734 {
10735 rtx x = XVECEXP (vals, 0, i);
10736 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
10737 continue;
10738 rtx subst = any_const;
10739 for (int bit = n_elts / 2; bit > 0; bit /= 2)
10740 {
10741 /* Look in the copied vector, as more elements are const. */
10742 rtx test = XVECEXP (copy, 0, i ^ bit);
10743 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
10744 {
10745 subst = test;
10746 break;
10747 }
10748 }
10749 XVECEXP (copy, 0, i) = subst;
10750 }
4369c11e
TB
10751 aarch64_expand_vector_init (target, copy);
10752
8b66a2d4
AL
10753 /* Insert variables. */
10754 enum insn_code icode = optab_handler (vec_set_optab, mode);
4369c11e 10755 gcc_assert (icode != CODE_FOR_nothing);
8b66a2d4
AL
10756
10757 for (int i = 0; i < n_elts; i++)
10758 {
10759 rtx x = XVECEXP (vals, 0, i);
10760 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
10761 continue;
10762 x = copy_to_mode_reg (inner_mode, x);
10763 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
10764 }
4369c11e
TB
10765 return;
10766 }
10767
10768 /* Construct the vector in memory one field at a time
10769 and load the whole vector. */
8b66a2d4
AL
10770 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
10771 for (int i = 0; i < n_elts; i++)
4369c11e
TB
10772 emit_move_insn (adjust_address_nv (mem, inner_mode,
10773 i * GET_MODE_SIZE (inner_mode)),
10774 XVECEXP (vals, 0, i));
10775 emit_move_insn (target, mem);
10776
10777}
10778
43e9d192 10779static unsigned HOST_WIDE_INT
ef4bddc2 10780aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
10781{
10782 return
10783 (aarch64_vector_mode_supported_p (mode)
10784 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
10785}
10786
10787#ifndef TLS_SECTION_ASM_FLAG
10788#define TLS_SECTION_ASM_FLAG 'T'
10789#endif
10790
10791void
10792aarch64_elf_asm_named_section (const char *name, unsigned int flags,
10793 tree decl ATTRIBUTE_UNUSED)
10794{
10795 char flagchars[10], *f = flagchars;
10796
10797 /* If we have already declared this section, we can use an
10798 abbreviated form to switch back to it -- unless this section is
10799 part of a COMDAT groups, in which case GAS requires the full
10800 declaration every time. */
10801 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
10802 && (flags & SECTION_DECLARED))
10803 {
10804 fprintf (asm_out_file, "\t.section\t%s\n", name);
10805 return;
10806 }
10807
10808 if (!(flags & SECTION_DEBUG))
10809 *f++ = 'a';
10810 if (flags & SECTION_WRITE)
10811 *f++ = 'w';
10812 if (flags & SECTION_CODE)
10813 *f++ = 'x';
10814 if (flags & SECTION_SMALL)
10815 *f++ = 's';
10816 if (flags & SECTION_MERGE)
10817 *f++ = 'M';
10818 if (flags & SECTION_STRINGS)
10819 *f++ = 'S';
10820 if (flags & SECTION_TLS)
10821 *f++ = TLS_SECTION_ASM_FLAG;
10822 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
10823 *f++ = 'G';
10824 *f = '\0';
10825
10826 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
10827
10828 if (!(flags & SECTION_NOTYPE))
10829 {
10830 const char *type;
10831 const char *format;
10832
10833 if (flags & SECTION_BSS)
10834 type = "nobits";
10835 else
10836 type = "progbits";
10837
10838#ifdef TYPE_OPERAND_FMT
10839 format = "," TYPE_OPERAND_FMT;
10840#else
10841 format = ",@%s";
10842#endif
10843
10844 fprintf (asm_out_file, format, type);
10845
10846 if (flags & SECTION_ENTSIZE)
10847 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
10848 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
10849 {
10850 if (TREE_CODE (decl) == IDENTIFIER_NODE)
10851 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
10852 else
10853 fprintf (asm_out_file, ",%s,comdat",
10854 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
10855 }
10856 }
10857
10858 putc ('\n', asm_out_file);
10859}
10860
10861/* Select a format to encode pointers in exception handling data. */
10862int
10863aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
10864{
10865 int type;
10866 switch (aarch64_cmodel)
10867 {
10868 case AARCH64_CMODEL_TINY:
10869 case AARCH64_CMODEL_TINY_PIC:
10870 case AARCH64_CMODEL_SMALL:
10871 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 10872 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
10873 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
10874 for everything. */
10875 type = DW_EH_PE_sdata4;
10876 break;
10877 default:
10878 /* No assumptions here. 8-byte relocs required. */
10879 type = DW_EH_PE_sdata8;
10880 break;
10881 }
10882 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
10883}
10884
361fb3ee
KT
10885/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
10886 by the function fndecl. */
10887
10888void
10889aarch64_declare_function_name (FILE *stream, const char* name,
10890 tree fndecl)
10891{
10892 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
10893
10894 struct cl_target_option *targ_options;
10895 if (target_parts)
10896 targ_options = TREE_TARGET_OPTION (target_parts);
10897 else
10898 targ_options = TREE_TARGET_OPTION (target_option_current_node);
10899 gcc_assert (targ_options);
10900
10901 const struct processor *this_arch
10902 = aarch64_get_arch (targ_options->x_explicit_arch);
10903
054b4005
JG
10904 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
10905 std::string extension
10906 = aarch64_get_extension_string_for_isa_flags (isa_flags);
10907 asm_fprintf (asm_out_file, "\t.arch %s%s\n",
10908 this_arch->name, extension.c_str ());
361fb3ee
KT
10909
10910 /* Print the cpu name we're tuning for in the comments, might be
10911 useful to readers of the generated asm. */
10912
10913 const struct processor *this_tune
10914 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
10915
10916 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
10917 this_tune->name);
10918
10919 /* Don't forget the type directive for ELF. */
10920 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
10921 ASM_OUTPUT_LABEL (stream, name);
10922}
10923
0462169c
SN
10924/* Emit load exclusive. */
10925
10926static void
ef4bddc2 10927aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
10928 rtx mem, rtx model_rtx)
10929{
10930 rtx (*gen) (rtx, rtx, rtx);
10931
10932 switch (mode)
10933 {
10934 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
10935 case HImode: gen = gen_aarch64_load_exclusivehi; break;
10936 case SImode: gen = gen_aarch64_load_exclusivesi; break;
10937 case DImode: gen = gen_aarch64_load_exclusivedi; break;
10938 default:
10939 gcc_unreachable ();
10940 }
10941
10942 emit_insn (gen (rval, mem, model_rtx));
10943}
10944
10945/* Emit store exclusive. */
10946
10947static void
ef4bddc2 10948aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
10949 rtx rval, rtx mem, rtx model_rtx)
10950{
10951 rtx (*gen) (rtx, rtx, rtx, rtx);
10952
10953 switch (mode)
10954 {
10955 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
10956 case HImode: gen = gen_aarch64_store_exclusivehi; break;
10957 case SImode: gen = gen_aarch64_store_exclusivesi; break;
10958 case DImode: gen = gen_aarch64_store_exclusivedi; break;
10959 default:
10960 gcc_unreachable ();
10961 }
10962
10963 emit_insn (gen (bval, rval, mem, model_rtx));
10964}
10965
10966/* Mark the previous jump instruction as unlikely. */
10967
10968static void
10969aarch64_emit_unlikely_jump (rtx insn)
10970{
e5af9ddd 10971 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
10972
10973 insn = emit_jump_insn (insn);
e5af9ddd 10974 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
10975}
10976
10977/* Expand a compare and swap pattern. */
10978
10979void
10980aarch64_expand_compare_and_swap (rtx operands[])
10981{
10982 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 10983 machine_mode mode, cmp_mode;
b0770c0f
MW
10984 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
10985 int idx;
10986 gen_cas_fn gen;
10987 const gen_cas_fn split_cas[] =
10988 {
10989 gen_aarch64_compare_and_swapqi,
10990 gen_aarch64_compare_and_swaphi,
10991 gen_aarch64_compare_and_swapsi,
10992 gen_aarch64_compare_and_swapdi
10993 };
10994 const gen_cas_fn atomic_cas[] =
10995 {
10996 gen_aarch64_compare_and_swapqi_lse,
10997 gen_aarch64_compare_and_swaphi_lse,
10998 gen_aarch64_compare_and_swapsi_lse,
10999 gen_aarch64_compare_and_swapdi_lse
11000 };
0462169c
SN
11001
11002 bval = operands[0];
11003 rval = operands[1];
11004 mem = operands[2];
11005 oldval = operands[3];
11006 newval = operands[4];
11007 is_weak = operands[5];
11008 mod_s = operands[6];
11009 mod_f = operands[7];
11010 mode = GET_MODE (mem);
11011 cmp_mode = mode;
11012
11013 /* Normally the succ memory model must be stronger than fail, but in the
11014 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
11015 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
11016
46b35980
AM
11017 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
11018 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
11019 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
11020
11021 switch (mode)
11022 {
11023 case QImode:
11024 case HImode:
11025 /* For short modes, we're going to perform the comparison in SImode,
11026 so do the zero-extension now. */
11027 cmp_mode = SImode;
11028 rval = gen_reg_rtx (SImode);
11029 oldval = convert_modes (SImode, mode, oldval, true);
11030 /* Fall through. */
11031
11032 case SImode:
11033 case DImode:
11034 /* Force the value into a register if needed. */
11035 if (!aarch64_plus_operand (oldval, mode))
11036 oldval = force_reg (cmp_mode, oldval);
11037 break;
11038
11039 default:
11040 gcc_unreachable ();
11041 }
11042
11043 switch (mode)
11044 {
b0770c0f
MW
11045 case QImode: idx = 0; break;
11046 case HImode: idx = 1; break;
11047 case SImode: idx = 2; break;
11048 case DImode: idx = 3; break;
0462169c
SN
11049 default:
11050 gcc_unreachable ();
11051 }
b0770c0f
MW
11052 if (TARGET_LSE)
11053 gen = atomic_cas[idx];
11054 else
11055 gen = split_cas[idx];
0462169c
SN
11056
11057 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
11058
11059 if (mode == QImode || mode == HImode)
11060 emit_move_insn (operands[1], gen_lowpart (mode, rval));
11061
11062 x = gen_rtx_REG (CCmode, CC_REGNUM);
11063 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 11064 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
11065}
11066
f70fb3b6
MW
11067/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
11068 sequence implementing an atomic operation. */
11069
11070static void
11071aarch64_emit_post_barrier (enum memmodel model)
11072{
11073 const enum memmodel base_model = memmodel_base (model);
11074
11075 if (is_mm_sync (model)
11076 && (base_model == MEMMODEL_ACQUIRE
11077 || base_model == MEMMODEL_ACQ_REL
11078 || base_model == MEMMODEL_SEQ_CST))
11079 {
11080 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
11081 }
11082}
11083
b0770c0f
MW
11084/* Emit an atomic compare-and-swap operation. RVAL is the destination register
11085 for the data in memory. EXPECTED is the value expected to be in memory.
11086 DESIRED is the value to store to memory. MEM is the memory location. MODEL
11087 is the memory ordering to use. */
11088
11089void
11090aarch64_gen_atomic_cas (rtx rval, rtx mem,
11091 rtx expected, rtx desired,
11092 rtx model)
11093{
11094 rtx (*gen) (rtx, rtx, rtx, rtx);
11095 machine_mode mode;
11096
11097 mode = GET_MODE (mem);
11098
11099 switch (mode)
11100 {
11101 case QImode: gen = gen_aarch64_atomic_casqi; break;
11102 case HImode: gen = gen_aarch64_atomic_cashi; break;
11103 case SImode: gen = gen_aarch64_atomic_cassi; break;
11104 case DImode: gen = gen_aarch64_atomic_casdi; break;
11105 default:
11106 gcc_unreachable ();
11107 }
11108
11109 /* Move the expected value into the CAS destination register. */
11110 emit_insn (gen_rtx_SET (rval, expected));
11111
11112 /* Emit the CAS. */
11113 emit_insn (gen (rval, mem, desired, model));
11114
11115 /* Compare the expected value with the value loaded by the CAS, to establish
11116 whether the swap was made. */
11117 aarch64_gen_compare_reg (EQ, rval, expected);
11118}
11119
0462169c
SN
11120/* Split a compare and swap pattern. */
11121
11122void
11123aarch64_split_compare_and_swap (rtx operands[])
11124{
11125 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 11126 machine_mode mode;
0462169c 11127 bool is_weak;
5d8a22a5
DM
11128 rtx_code_label *label1, *label2;
11129 rtx x, cond;
ab876106
MW
11130 enum memmodel model;
11131 rtx model_rtx;
0462169c
SN
11132
11133 rval = operands[0];
11134 mem = operands[1];
11135 oldval = operands[2];
11136 newval = operands[3];
11137 is_weak = (operands[4] != const0_rtx);
ab876106 11138 model_rtx = operands[5];
0462169c
SN
11139 scratch = operands[7];
11140 mode = GET_MODE (mem);
ab876106 11141 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 11142
5d8a22a5 11143 label1 = NULL;
0462169c
SN
11144 if (!is_weak)
11145 {
11146 label1 = gen_label_rtx ();
11147 emit_label (label1);
11148 }
11149 label2 = gen_label_rtx ();
11150
ab876106
MW
11151 /* The initial load can be relaxed for a __sync operation since a final
11152 barrier will be emitted to stop code hoisting. */
11153 if (is_mm_sync (model))
11154 aarch64_emit_load_exclusive (mode, rval, mem,
11155 GEN_INT (MEMMODEL_RELAXED));
11156 else
11157 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c
SN
11158
11159 cond = aarch64_gen_compare_reg (NE, rval, oldval);
11160 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
11161 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11162 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
f7df4a84 11163 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 11164
ab876106 11165 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
11166
11167 if (!is_weak)
11168 {
11169 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
11170 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11171 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 11172 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
11173 }
11174 else
11175 {
11176 cond = gen_rtx_REG (CCmode, CC_REGNUM);
11177 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 11178 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
11179 }
11180
11181 emit_label (label2);
ab876106
MW
11182
11183 /* Emit any final barrier needed for a __sync operation. */
11184 if (is_mm_sync (model))
11185 aarch64_emit_post_barrier (model);
0462169c
SN
11186}
11187
11188/* Split an atomic operation. */
11189
11190void
11191aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
11192 rtx value, rtx model_rtx, rtx cond)
11193{
ef4bddc2
RS
11194 machine_mode mode = GET_MODE (mem);
11195 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
11196 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
11197 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
11198 rtx_code_label *label;
11199 rtx x;
0462169c
SN
11200
11201 label = gen_label_rtx ();
11202 emit_label (label);
11203
11204 if (new_out)
11205 new_out = gen_lowpart (wmode, new_out);
11206 if (old_out)
11207 old_out = gen_lowpart (wmode, old_out);
11208 else
11209 old_out = new_out;
11210 value = simplify_gen_subreg (wmode, value, mode, 0);
11211
f70fb3b6
MW
11212 /* The initial load can be relaxed for a __sync operation since a final
11213 barrier will be emitted to stop code hoisting. */
11214 if (is_sync)
11215 aarch64_emit_load_exclusive (mode, old_out, mem,
11216 GEN_INT (MEMMODEL_RELAXED));
11217 else
11218 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
11219
11220 switch (code)
11221 {
11222 case SET:
11223 new_out = value;
11224 break;
11225
11226 case NOT:
11227 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 11228 emit_insn (gen_rtx_SET (new_out, x));
0462169c 11229 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 11230 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
11231 break;
11232
11233 case MINUS:
11234 if (CONST_INT_P (value))
11235 {
11236 value = GEN_INT (-INTVAL (value));
11237 code = PLUS;
11238 }
11239 /* Fall through. */
11240
11241 default:
11242 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 11243 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
11244 break;
11245 }
11246
11247 aarch64_emit_store_exclusive (mode, cond, mem,
11248 gen_lowpart (mode, new_out), model_rtx);
11249
11250 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
11251 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11252 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 11253 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
11254
11255 /* Emit any final barrier needed for a __sync operation. */
11256 if (is_sync)
11257 aarch64_emit_post_barrier (model);
0462169c
SN
11258}
11259
c2ec330c
AL
11260static void
11261aarch64_init_libfuncs (void)
11262{
11263 /* Half-precision float operations. The compiler handles all operations
11264 with NULL libfuncs by converting to SFmode. */
11265
11266 /* Conversions. */
11267 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
11268 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
11269
11270 /* Arithmetic. */
11271 set_optab_libfunc (add_optab, HFmode, NULL);
11272 set_optab_libfunc (sdiv_optab, HFmode, NULL);
11273 set_optab_libfunc (smul_optab, HFmode, NULL);
11274 set_optab_libfunc (neg_optab, HFmode, NULL);
11275 set_optab_libfunc (sub_optab, HFmode, NULL);
11276
11277 /* Comparisons. */
11278 set_optab_libfunc (eq_optab, HFmode, NULL);
11279 set_optab_libfunc (ne_optab, HFmode, NULL);
11280 set_optab_libfunc (lt_optab, HFmode, NULL);
11281 set_optab_libfunc (le_optab, HFmode, NULL);
11282 set_optab_libfunc (ge_optab, HFmode, NULL);
11283 set_optab_libfunc (gt_optab, HFmode, NULL);
11284 set_optab_libfunc (unord_optab, HFmode, NULL);
11285}
11286
43e9d192 11287/* Target hook for c_mode_for_suffix. */
ef4bddc2 11288static machine_mode
43e9d192
IB
11289aarch64_c_mode_for_suffix (char suffix)
11290{
11291 if (suffix == 'q')
11292 return TFmode;
11293
11294 return VOIDmode;
11295}
11296
3520f7cc
JG
11297/* We can only represent floating point constants which will fit in
11298 "quarter-precision" values. These values are characterised by
11299 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
11300 by:
11301
11302 (-1)^s * (n/16) * 2^r
11303
11304 Where:
11305 's' is the sign bit.
11306 'n' is an integer in the range 16 <= n <= 31.
11307 'r' is an integer in the range -3 <= r <= 4. */
11308
11309/* Return true iff X can be represented by a quarter-precision
11310 floating point immediate operand X. Note, we cannot represent 0.0. */
11311bool
11312aarch64_float_const_representable_p (rtx x)
11313{
11314 /* This represents our current view of how many bits
11315 make up the mantissa. */
11316 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 11317 int exponent;
3520f7cc 11318 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 11319 REAL_VALUE_TYPE r, m;
807e902e 11320 bool fail;
3520f7cc
JG
11321
11322 if (!CONST_DOUBLE_P (x))
11323 return false;
11324
c2ec330c
AL
11325 /* We don't support HFmode constants yet. */
11326 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
11327 return false;
11328
3520f7cc
JG
11329 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11330
11331 /* We cannot represent infinities, NaNs or +/-zero. We won't
11332 know if we have +zero until we analyse the mantissa, but we
11333 can reject the other invalid values. */
11334 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
11335 || REAL_VALUE_MINUS_ZERO (r))
11336 return false;
11337
ba96cdfb 11338 /* Extract exponent. */
3520f7cc
JG
11339 r = real_value_abs (&r);
11340 exponent = REAL_EXP (&r);
11341
11342 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11343 highest (sign) bit, with a fixed binary point at bit point_pos.
11344 m1 holds the low part of the mantissa, m2 the high part.
11345 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
11346 bits for the mantissa, this can fail (low bits will be lost). */
11347 real_ldexp (&m, &r, point_pos - exponent);
807e902e 11348 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
11349
11350 /* If the low part of the mantissa has bits set we cannot represent
11351 the value. */
807e902e 11352 if (w.elt (0) != 0)
3520f7cc
JG
11353 return false;
11354 /* We have rejected the lower HOST_WIDE_INT, so update our
11355 understanding of how many bits lie in the mantissa and
11356 look only at the high HOST_WIDE_INT. */
807e902e 11357 mantissa = w.elt (1);
3520f7cc
JG
11358 point_pos -= HOST_BITS_PER_WIDE_INT;
11359
11360 /* We can only represent values with a mantissa of the form 1.xxxx. */
11361 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11362 if ((mantissa & mask) != 0)
11363 return false;
11364
11365 /* Having filtered unrepresentable values, we may now remove all
11366 but the highest 5 bits. */
11367 mantissa >>= point_pos - 5;
11368
11369 /* We cannot represent the value 0.0, so reject it. This is handled
11370 elsewhere. */
11371 if (mantissa == 0)
11372 return false;
11373
11374 /* Then, as bit 4 is always set, we can mask it off, leaving
11375 the mantissa in the range [0, 15]. */
11376 mantissa &= ~(1 << 4);
11377 gcc_assert (mantissa <= 15);
11378
11379 /* GCC internally does not use IEEE754-like encoding (where normalized
11380 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
11381 Our mantissa values are shifted 4 places to the left relative to
11382 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
11383 by 5 places to correct for GCC's representation. */
11384 exponent = 5 - exponent;
11385
11386 return (exponent >= 0 && exponent <= 7);
11387}
11388
11389char*
81c2dfb9 11390aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 11391 machine_mode mode,
3520f7cc
JG
11392 unsigned width)
11393{
3ea63f60 11394 bool is_valid;
3520f7cc 11395 static char templ[40];
3520f7cc 11396 const char *mnemonic;
e4f0f84d 11397 const char *shift_op;
3520f7cc 11398 unsigned int lane_count = 0;
81c2dfb9 11399 char element_char;
3520f7cc 11400
e4f0f84d 11401 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
11402
11403 /* This will return true to show const_vector is legal for use as either
11404 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
11405 also update INFO to show how the immediate should be generated. */
81c2dfb9 11406 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
11407 gcc_assert (is_valid);
11408
81c2dfb9 11409 element_char = sizetochar (info.element_width);
48063b9d
IB
11410 lane_count = width / info.element_width;
11411
3520f7cc
JG
11412 mode = GET_MODE_INNER (mode);
11413 if (mode == SFmode || mode == DFmode)
11414 {
48063b9d
IB
11415 gcc_assert (info.shift == 0 && ! info.mvn);
11416 if (aarch64_float_const_zero_rtx_p (info.value))
11417 info.value = GEN_INT (0);
11418 else
11419 {
11420#define buf_size 20
11421 REAL_VALUE_TYPE r;
11422 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
11423 char float_buf[buf_size] = {'\0'};
11424 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
11425#undef buf_size
11426
11427 if (lane_count == 1)
11428 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
11429 else
11430 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 11431 lane_count, element_char, float_buf);
48063b9d
IB
11432 return templ;
11433 }
3520f7cc 11434 }
3520f7cc 11435
48063b9d 11436 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 11437 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
11438
11439 if (lane_count == 1)
48063b9d
IB
11440 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
11441 mnemonic, UINTVAL (info.value));
11442 else if (info.shift)
11443 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
11444 ", %s %d", mnemonic, lane_count, element_char,
11445 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 11446 else
48063b9d 11447 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 11448 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
11449 return templ;
11450}
11451
b7342d25
IB
11452char*
11453aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 11454 machine_mode mode)
b7342d25 11455{
ef4bddc2 11456 machine_mode vmode;
b7342d25
IB
11457
11458 gcc_assert (!VECTOR_MODE_P (mode));
11459 vmode = aarch64_simd_container_mode (mode, 64);
11460 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
11461 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
11462}
11463
88b08073
JG
11464/* Split operands into moves from op[1] + op[2] into op[0]. */
11465
11466void
11467aarch64_split_combinev16qi (rtx operands[3])
11468{
11469 unsigned int dest = REGNO (operands[0]);
11470 unsigned int src1 = REGNO (operands[1]);
11471 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 11472 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
11473 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
11474 rtx destlo, desthi;
11475
11476 gcc_assert (halfmode == V16QImode);
11477
11478 if (src1 == dest && src2 == dest + halfregs)
11479 {
11480 /* No-op move. Can't split to nothing; emit something. */
11481 emit_note (NOTE_INSN_DELETED);
11482 return;
11483 }
11484
11485 /* Preserve register attributes for variable tracking. */
11486 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
11487 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
11488 GET_MODE_SIZE (halfmode));
11489
11490 /* Special case of reversed high/low parts. */
11491 if (reg_overlap_mentioned_p (operands[2], destlo)
11492 && reg_overlap_mentioned_p (operands[1], desthi))
11493 {
11494 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
11495 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
11496 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
11497 }
11498 else if (!reg_overlap_mentioned_p (operands[2], destlo))
11499 {
11500 /* Try to avoid unnecessary moves if part of the result
11501 is in the right place already. */
11502 if (src1 != dest)
11503 emit_move_insn (destlo, operands[1]);
11504 if (src2 != dest + halfregs)
11505 emit_move_insn (desthi, operands[2]);
11506 }
11507 else
11508 {
11509 if (src2 != dest + halfregs)
11510 emit_move_insn (desthi, operands[2]);
11511 if (src1 != dest)
11512 emit_move_insn (destlo, operands[1]);
11513 }
11514}
11515
11516/* vec_perm support. */
11517
11518#define MAX_VECT_LEN 16
11519
11520struct expand_vec_perm_d
11521{
11522 rtx target, op0, op1;
11523 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 11524 machine_mode vmode;
88b08073
JG
11525 unsigned char nelt;
11526 bool one_vector_p;
11527 bool testing_p;
11528};
11529
11530/* Generate a variable permutation. */
11531
11532static void
11533aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
11534{
ef4bddc2 11535 machine_mode vmode = GET_MODE (target);
88b08073
JG
11536 bool one_vector_p = rtx_equal_p (op0, op1);
11537
11538 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
11539 gcc_checking_assert (GET_MODE (op0) == vmode);
11540 gcc_checking_assert (GET_MODE (op1) == vmode);
11541 gcc_checking_assert (GET_MODE (sel) == vmode);
11542 gcc_checking_assert (TARGET_SIMD);
11543
11544 if (one_vector_p)
11545 {
11546 if (vmode == V8QImode)
11547 {
11548 /* Expand the argument to a V16QI mode by duplicating it. */
11549 rtx pair = gen_reg_rtx (V16QImode);
11550 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
11551 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
11552 }
11553 else
11554 {
11555 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
11556 }
11557 }
11558 else
11559 {
11560 rtx pair;
11561
11562 if (vmode == V8QImode)
11563 {
11564 pair = gen_reg_rtx (V16QImode);
11565 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
11566 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
11567 }
11568 else
11569 {
11570 pair = gen_reg_rtx (OImode);
11571 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
11572 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
11573 }
11574 }
11575}
11576
11577void
11578aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
11579{
ef4bddc2 11580 machine_mode vmode = GET_MODE (target);
c9d1a16a 11581 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 11582 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 11583 rtx mask;
88b08073
JG
11584
11585 /* The TBL instruction does not use a modulo index, so we must take care
11586 of that ourselves. */
f7c4e5b8
AL
11587 mask = aarch64_simd_gen_const_vector_dup (vmode,
11588 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
11589 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
11590
f7c4e5b8
AL
11591 /* For big-endian, we also need to reverse the index within the vector
11592 (but not which vector). */
11593 if (BYTES_BIG_ENDIAN)
11594 {
11595 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
11596 if (!one_vector_p)
11597 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
11598 sel = expand_simple_binop (vmode, XOR, sel, mask,
11599 NULL, 0, OPTAB_LIB_WIDEN);
11600 }
88b08073
JG
11601 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
11602}
11603
cc4d934f
JG
11604/* Recognize patterns suitable for the TRN instructions. */
11605static bool
11606aarch64_evpc_trn (struct expand_vec_perm_d *d)
11607{
11608 unsigned int i, odd, mask, nelt = d->nelt;
11609 rtx out, in0, in1, x;
11610 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 11611 machine_mode vmode = d->vmode;
cc4d934f
JG
11612
11613 if (GET_MODE_UNIT_SIZE (vmode) > 8)
11614 return false;
11615
11616 /* Note that these are little-endian tests.
11617 We correct for big-endian later. */
11618 if (d->perm[0] == 0)
11619 odd = 0;
11620 else if (d->perm[0] == 1)
11621 odd = 1;
11622 else
11623 return false;
11624 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
11625
11626 for (i = 0; i < nelt; i += 2)
11627 {
11628 if (d->perm[i] != i + odd)
11629 return false;
11630 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
11631 return false;
11632 }
11633
11634 /* Success! */
11635 if (d->testing_p)
11636 return true;
11637
11638 in0 = d->op0;
11639 in1 = d->op1;
11640 if (BYTES_BIG_ENDIAN)
11641 {
11642 x = in0, in0 = in1, in1 = x;
11643 odd = !odd;
11644 }
11645 out = d->target;
11646
11647 if (odd)
11648 {
11649 switch (vmode)
11650 {
11651 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
11652 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
11653 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
11654 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
11655 case V4SImode: gen = gen_aarch64_trn2v4si; break;
11656 case V2SImode: gen = gen_aarch64_trn2v2si; break;
11657 case V2DImode: gen = gen_aarch64_trn2v2di; break;
11658 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
11659 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
11660 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
11661 default:
11662 return false;
11663 }
11664 }
11665 else
11666 {
11667 switch (vmode)
11668 {
11669 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
11670 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
11671 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
11672 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
11673 case V4SImode: gen = gen_aarch64_trn1v4si; break;
11674 case V2SImode: gen = gen_aarch64_trn1v2si; break;
11675 case V2DImode: gen = gen_aarch64_trn1v2di; break;
11676 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
11677 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
11678 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
11679 default:
11680 return false;
11681 }
11682 }
11683
11684 emit_insn (gen (out, in0, in1));
11685 return true;
11686}
11687
11688/* Recognize patterns suitable for the UZP instructions. */
11689static bool
11690aarch64_evpc_uzp (struct expand_vec_perm_d *d)
11691{
11692 unsigned int i, odd, mask, nelt = d->nelt;
11693 rtx out, in0, in1, x;
11694 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 11695 machine_mode vmode = d->vmode;
cc4d934f
JG
11696
11697 if (GET_MODE_UNIT_SIZE (vmode) > 8)
11698 return false;
11699
11700 /* Note that these are little-endian tests.
11701 We correct for big-endian later. */
11702 if (d->perm[0] == 0)
11703 odd = 0;
11704 else if (d->perm[0] == 1)
11705 odd = 1;
11706 else
11707 return false;
11708 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
11709
11710 for (i = 0; i < nelt; i++)
11711 {
11712 unsigned elt = (i * 2 + odd) & mask;
11713 if (d->perm[i] != elt)
11714 return false;
11715 }
11716
11717 /* Success! */
11718 if (d->testing_p)
11719 return true;
11720
11721 in0 = d->op0;
11722 in1 = d->op1;
11723 if (BYTES_BIG_ENDIAN)
11724 {
11725 x = in0, in0 = in1, in1 = x;
11726 odd = !odd;
11727 }
11728 out = d->target;
11729
11730 if (odd)
11731 {
11732 switch (vmode)
11733 {
11734 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
11735 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
11736 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
11737 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
11738 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
11739 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
11740 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
11741 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
11742 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
11743 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
11744 default:
11745 return false;
11746 }
11747 }
11748 else
11749 {
11750 switch (vmode)
11751 {
11752 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
11753 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
11754 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
11755 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
11756 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
11757 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
11758 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
11759 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
11760 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
11761 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
11762 default:
11763 return false;
11764 }
11765 }
11766
11767 emit_insn (gen (out, in0, in1));
11768 return true;
11769}
11770
11771/* Recognize patterns suitable for the ZIP instructions. */
11772static bool
11773aarch64_evpc_zip (struct expand_vec_perm_d *d)
11774{
11775 unsigned int i, high, mask, nelt = d->nelt;
11776 rtx out, in0, in1, x;
11777 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 11778 machine_mode vmode = d->vmode;
cc4d934f
JG
11779
11780 if (GET_MODE_UNIT_SIZE (vmode) > 8)
11781 return false;
11782
11783 /* Note that these are little-endian tests.
11784 We correct for big-endian later. */
11785 high = nelt / 2;
11786 if (d->perm[0] == high)
11787 /* Do Nothing. */
11788 ;
11789 else if (d->perm[0] == 0)
11790 high = 0;
11791 else
11792 return false;
11793 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
11794
11795 for (i = 0; i < nelt / 2; i++)
11796 {
11797 unsigned elt = (i + high) & mask;
11798 if (d->perm[i * 2] != elt)
11799 return false;
11800 elt = (elt + nelt) & mask;
11801 if (d->perm[i * 2 + 1] != elt)
11802 return false;
11803 }
11804
11805 /* Success! */
11806 if (d->testing_p)
11807 return true;
11808
11809 in0 = d->op0;
11810 in1 = d->op1;
11811 if (BYTES_BIG_ENDIAN)
11812 {
11813 x = in0, in0 = in1, in1 = x;
11814 high = !high;
11815 }
11816 out = d->target;
11817
11818 if (high)
11819 {
11820 switch (vmode)
11821 {
11822 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
11823 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
11824 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
11825 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
11826 case V4SImode: gen = gen_aarch64_zip2v4si; break;
11827 case V2SImode: gen = gen_aarch64_zip2v2si; break;
11828 case V2DImode: gen = gen_aarch64_zip2v2di; break;
11829 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
11830 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
11831 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
11832 default:
11833 return false;
11834 }
11835 }
11836 else
11837 {
11838 switch (vmode)
11839 {
11840 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
11841 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
11842 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
11843 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
11844 case V4SImode: gen = gen_aarch64_zip1v4si; break;
11845 case V2SImode: gen = gen_aarch64_zip1v2si; break;
11846 case V2DImode: gen = gen_aarch64_zip1v2di; break;
11847 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
11848 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
11849 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
11850 default:
11851 return false;
11852 }
11853 }
11854
11855 emit_insn (gen (out, in0, in1));
11856 return true;
11857}
11858
ae0533da
AL
11859/* Recognize patterns for the EXT insn. */
11860
11861static bool
11862aarch64_evpc_ext (struct expand_vec_perm_d *d)
11863{
11864 unsigned int i, nelt = d->nelt;
11865 rtx (*gen) (rtx, rtx, rtx, rtx);
11866 rtx offset;
11867
11868 unsigned int location = d->perm[0]; /* Always < nelt. */
11869
11870 /* Check if the extracted indices are increasing by one. */
11871 for (i = 1; i < nelt; i++)
11872 {
11873 unsigned int required = location + i;
11874 if (d->one_vector_p)
11875 {
11876 /* We'll pass the same vector in twice, so allow indices to wrap. */
11877 required &= (nelt - 1);
11878 }
11879 if (d->perm[i] != required)
11880 return false;
11881 }
11882
ae0533da
AL
11883 switch (d->vmode)
11884 {
11885 case V16QImode: gen = gen_aarch64_extv16qi; break;
11886 case V8QImode: gen = gen_aarch64_extv8qi; break;
11887 case V4HImode: gen = gen_aarch64_extv4hi; break;
11888 case V8HImode: gen = gen_aarch64_extv8hi; break;
11889 case V2SImode: gen = gen_aarch64_extv2si; break;
11890 case V4SImode: gen = gen_aarch64_extv4si; break;
11891 case V2SFmode: gen = gen_aarch64_extv2sf; break;
11892 case V4SFmode: gen = gen_aarch64_extv4sf; break;
11893 case V2DImode: gen = gen_aarch64_extv2di; break;
11894 case V2DFmode: gen = gen_aarch64_extv2df; break;
11895 default:
11896 return false;
11897 }
11898
11899 /* Success! */
11900 if (d->testing_p)
11901 return true;
11902
b31e65bb
AL
11903 /* The case where (location == 0) is a no-op for both big- and little-endian,
11904 and is removed by the mid-end at optimization levels -O1 and higher. */
11905
11906 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
11907 {
11908 /* After setup, we want the high elements of the first vector (stored
11909 at the LSB end of the register), and the low elements of the second
11910 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 11911 std::swap (d->op0, d->op1);
ae0533da
AL
11912 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
11913 location = nelt - location;
11914 }
11915
11916 offset = GEN_INT (location);
11917 emit_insn (gen (d->target, d->op0, d->op1, offset));
11918 return true;
11919}
11920
923fcec3
AL
11921/* Recognize patterns for the REV insns. */
11922
11923static bool
11924aarch64_evpc_rev (struct expand_vec_perm_d *d)
11925{
11926 unsigned int i, j, diff, nelt = d->nelt;
11927 rtx (*gen) (rtx, rtx);
11928
11929 if (!d->one_vector_p)
11930 return false;
11931
11932 diff = d->perm[0];
11933 switch (diff)
11934 {
11935 case 7:
11936 switch (d->vmode)
11937 {
11938 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
11939 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
11940 default:
11941 return false;
11942 }
11943 break;
11944 case 3:
11945 switch (d->vmode)
11946 {
11947 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
11948 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
11949 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
11950 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
11951 default:
11952 return false;
11953 }
11954 break;
11955 case 1:
11956 switch (d->vmode)
11957 {
11958 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
11959 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
11960 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
11961 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
11962 case V4SImode: gen = gen_aarch64_rev64v4si; break;
11963 case V2SImode: gen = gen_aarch64_rev64v2si; break;
11964 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
11965 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
11966 default:
11967 return false;
11968 }
11969 break;
11970 default:
11971 return false;
11972 }
11973
11974 for (i = 0; i < nelt ; i += diff + 1)
11975 for (j = 0; j <= diff; j += 1)
11976 {
11977 /* This is guaranteed to be true as the value of diff
11978 is 7, 3, 1 and we should have enough elements in the
11979 queue to generate this. Getting a vector mask with a
11980 value of diff other than these values implies that
11981 something is wrong by the time we get here. */
11982 gcc_assert (i + j < nelt);
11983 if (d->perm[i + j] != i + diff - j)
11984 return false;
11985 }
11986
11987 /* Success! */
11988 if (d->testing_p)
11989 return true;
11990
11991 emit_insn (gen (d->target, d->op0));
11992 return true;
11993}
11994
91bd4114
JG
11995static bool
11996aarch64_evpc_dup (struct expand_vec_perm_d *d)
11997{
11998 rtx (*gen) (rtx, rtx, rtx);
11999 rtx out = d->target;
12000 rtx in0;
ef4bddc2 12001 machine_mode vmode = d->vmode;
91bd4114
JG
12002 unsigned int i, elt, nelt = d->nelt;
12003 rtx lane;
12004
91bd4114
JG
12005 elt = d->perm[0];
12006 for (i = 1; i < nelt; i++)
12007 {
12008 if (elt != d->perm[i])
12009 return false;
12010 }
12011
12012 /* The generic preparation in aarch64_expand_vec_perm_const_1
12013 swaps the operand order and the permute indices if it finds
12014 d->perm[0] to be in the second operand. Thus, we can always
12015 use d->op0 and need not do any extra arithmetic to get the
12016 correct lane number. */
12017 in0 = d->op0;
f901401e 12018 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
12019
12020 switch (vmode)
12021 {
12022 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
12023 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
12024 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
12025 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
12026 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
12027 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
12028 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
862abc04
AL
12029 case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
12030 case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
91bd4114
JG
12031 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
12032 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
12033 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
12034 default:
12035 return false;
12036 }
12037
12038 emit_insn (gen (out, in0, lane));
12039 return true;
12040}
12041
88b08073
JG
12042static bool
12043aarch64_evpc_tbl (struct expand_vec_perm_d *d)
12044{
12045 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 12046 machine_mode vmode = d->vmode;
88b08073
JG
12047 unsigned int i, nelt = d->nelt;
12048
88b08073
JG
12049 if (d->testing_p)
12050 return true;
12051
12052 /* Generic code will try constant permutation twice. Once with the
12053 original mode and again with the elements lowered to QImode.
12054 So wait and don't do the selector expansion ourselves. */
12055 if (vmode != V8QImode && vmode != V16QImode)
12056 return false;
12057
12058 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
12059 {
12060 int nunits = GET_MODE_NUNITS (vmode);
12061
12062 /* If big-endian and two vectors we end up with a weird mixed-endian
12063 mode on NEON. Reverse the index within each word but not the word
12064 itself. */
12065 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
12066 : d->perm[i]);
12067 }
88b08073
JG
12068 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
12069 sel = force_reg (vmode, sel);
12070
12071 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
12072 return true;
12073}
12074
12075static bool
12076aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
12077{
12078 /* The pattern matching functions above are written to look for a small
12079 number to begin the sequence (0, 1, N/2). If we begin with an index
12080 from the second operand, we can swap the operands. */
12081 if (d->perm[0] >= d->nelt)
12082 {
12083 unsigned i, nelt = d->nelt;
88b08073 12084
0696116a 12085 gcc_assert (nelt == (nelt & -nelt));
88b08073 12086 for (i = 0; i < nelt; ++i)
0696116a 12087 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 12088
cb5c6c29 12089 std::swap (d->op0, d->op1);
88b08073
JG
12090 }
12091
12092 if (TARGET_SIMD)
cc4d934f 12093 {
923fcec3
AL
12094 if (aarch64_evpc_rev (d))
12095 return true;
12096 else if (aarch64_evpc_ext (d))
ae0533da 12097 return true;
f901401e
AL
12098 else if (aarch64_evpc_dup (d))
12099 return true;
ae0533da 12100 else if (aarch64_evpc_zip (d))
cc4d934f
JG
12101 return true;
12102 else if (aarch64_evpc_uzp (d))
12103 return true;
12104 else if (aarch64_evpc_trn (d))
12105 return true;
12106 return aarch64_evpc_tbl (d);
12107 }
88b08073
JG
12108 return false;
12109}
12110
12111/* Expand a vec_perm_const pattern. */
12112
12113bool
12114aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
12115{
12116 struct expand_vec_perm_d d;
12117 int i, nelt, which;
12118
12119 d.target = target;
12120 d.op0 = op0;
12121 d.op1 = op1;
12122
12123 d.vmode = GET_MODE (target);
12124 gcc_assert (VECTOR_MODE_P (d.vmode));
12125 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
12126 d.testing_p = false;
12127
12128 for (i = which = 0; i < nelt; ++i)
12129 {
12130 rtx e = XVECEXP (sel, 0, i);
12131 int ei = INTVAL (e) & (2 * nelt - 1);
12132 which |= (ei < nelt ? 1 : 2);
12133 d.perm[i] = ei;
12134 }
12135
12136 switch (which)
12137 {
12138 default:
12139 gcc_unreachable ();
12140
12141 case 3:
12142 d.one_vector_p = false;
12143 if (!rtx_equal_p (op0, op1))
12144 break;
12145
12146 /* The elements of PERM do not suggest that only the first operand
12147 is used, but both operands are identical. Allow easier matching
12148 of the permutation by folding the permutation into the single
12149 input vector. */
12150 /* Fall Through. */
12151 case 2:
12152 for (i = 0; i < nelt; ++i)
12153 d.perm[i] &= nelt - 1;
12154 d.op0 = op1;
12155 d.one_vector_p = true;
12156 break;
12157
12158 case 1:
12159 d.op1 = op0;
12160 d.one_vector_p = true;
12161 break;
12162 }
12163
12164 return aarch64_expand_vec_perm_const_1 (&d);
12165}
12166
12167static bool
ef4bddc2 12168aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
12169 const unsigned char *sel)
12170{
12171 struct expand_vec_perm_d d;
12172 unsigned int i, nelt, which;
12173 bool ret;
12174
12175 d.vmode = vmode;
12176 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
12177 d.testing_p = true;
12178 memcpy (d.perm, sel, nelt);
12179
12180 /* Calculate whether all elements are in one vector. */
12181 for (i = which = 0; i < nelt; ++i)
12182 {
12183 unsigned char e = d.perm[i];
12184 gcc_assert (e < 2 * nelt);
12185 which |= (e < nelt ? 1 : 2);
12186 }
12187
12188 /* If all elements are from the second vector, reindex as if from the
12189 first vector. */
12190 if (which == 2)
12191 for (i = 0; i < nelt; ++i)
12192 d.perm[i] -= nelt;
12193
12194 /* Check whether the mask can be applied to a single vector. */
12195 d.one_vector_p = (which != 3);
12196
12197 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
12198 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
12199 if (!d.one_vector_p)
12200 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
12201
12202 start_sequence ();
12203 ret = aarch64_expand_vec_perm_const_1 (&d);
12204 end_sequence ();
12205
12206 return ret;
12207}
12208
668046d1
DS
12209rtx
12210aarch64_reverse_mask (enum machine_mode mode)
12211{
12212 /* We have to reverse each vector because we dont have
12213 a permuted load that can reverse-load according to ABI rules. */
12214 rtx mask;
12215 rtvec v = rtvec_alloc (16);
12216 int i, j;
12217 int nunits = GET_MODE_NUNITS (mode);
12218 int usize = GET_MODE_UNIT_SIZE (mode);
12219
12220 gcc_assert (BYTES_BIG_ENDIAN);
12221 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
12222
12223 for (i = 0; i < nunits; i++)
12224 for (j = 0; j < usize; j++)
12225 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
12226 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
12227 return force_reg (V16QImode, mask);
12228}
12229
97e1ad78
JG
12230/* Implement MODES_TIEABLE_P. */
12231
12232bool
ef4bddc2 12233aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
12234{
12235 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
12236 return true;
12237
12238 /* We specifically want to allow elements of "structure" modes to
12239 be tieable to the structure. This more general condition allows
12240 other rarer situations too. */
12241 if (TARGET_SIMD
12242 && aarch64_vector_mode_p (mode1)
12243 && aarch64_vector_mode_p (mode2))
12244 return true;
12245
12246 return false;
12247}
12248
e2c75eea
JG
12249/* Return a new RTX holding the result of moving POINTER forward by
12250 AMOUNT bytes. */
12251
12252static rtx
12253aarch64_move_pointer (rtx pointer, int amount)
12254{
12255 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
12256
12257 return adjust_automodify_address (pointer, GET_MODE (pointer),
12258 next, amount);
12259}
12260
12261/* Return a new RTX holding the result of moving POINTER forward by the
12262 size of the mode it points to. */
12263
12264static rtx
12265aarch64_progress_pointer (rtx pointer)
12266{
12267 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
12268
12269 return aarch64_move_pointer (pointer, amount);
12270}
12271
12272/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
12273 MODE bytes. */
12274
12275static void
12276aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 12277 machine_mode mode)
e2c75eea
JG
12278{
12279 rtx reg = gen_reg_rtx (mode);
12280
12281 /* "Cast" the pointers to the correct mode. */
12282 *src = adjust_address (*src, mode, 0);
12283 *dst = adjust_address (*dst, mode, 0);
12284 /* Emit the memcpy. */
12285 emit_move_insn (reg, *src);
12286 emit_move_insn (*dst, reg);
12287 /* Move the pointers forward. */
12288 *src = aarch64_progress_pointer (*src);
12289 *dst = aarch64_progress_pointer (*dst);
12290}
12291
12292/* Expand movmem, as if from a __builtin_memcpy. Return true if
12293 we succeed, otherwise return false. */
12294
12295bool
12296aarch64_expand_movmem (rtx *operands)
12297{
12298 unsigned int n;
12299 rtx dst = operands[0];
12300 rtx src = operands[1];
12301 rtx base;
12302 bool speed_p = !optimize_function_for_size_p (cfun);
12303
12304 /* When optimizing for size, give a better estimate of the length of a
12305 memcpy call, but use the default otherwise. */
12306 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
12307
12308 /* We can't do anything smart if the amount to copy is not constant. */
12309 if (!CONST_INT_P (operands[2]))
12310 return false;
12311
12312 n = UINTVAL (operands[2]);
12313
12314 /* Try to keep the number of instructions low. For cases below 16 bytes we
12315 need to make at most two moves. For cases above 16 bytes it will be one
12316 move for each 16 byte chunk, then at most two additional moves. */
12317 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
12318 return false;
12319
12320 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12321 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
12322
12323 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
12324 src = adjust_automodify_address (src, VOIDmode, base, 0);
12325
12326 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
12327 1-byte chunk. */
12328 if (n < 4)
12329 {
12330 if (n >= 2)
12331 {
12332 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
12333 n -= 2;
12334 }
12335
12336 if (n == 1)
12337 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
12338
12339 return true;
12340 }
12341
12342 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
12343 4-byte chunk, partially overlapping with the previously copied chunk. */
12344 if (n < 8)
12345 {
12346 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12347 n -= 4;
12348 if (n > 0)
12349 {
12350 int move = n - 4;
12351
12352 src = aarch64_move_pointer (src, move);
12353 dst = aarch64_move_pointer (dst, move);
12354 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12355 }
12356 return true;
12357 }
12358
12359 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
12360 them, then (if applicable) an 8-byte chunk. */
12361 while (n >= 8)
12362 {
12363 if (n / 16)
12364 {
12365 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
12366 n -= 16;
12367 }
12368 else
12369 {
12370 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
12371 n -= 8;
12372 }
12373 }
12374
12375 /* Finish the final bytes of the copy. We can always do this in one
12376 instruction. We either copy the exact amount we need, or partially
12377 overlap with the previous chunk we copied and copy 8-bytes. */
12378 if (n == 0)
12379 return true;
12380 else if (n == 1)
12381 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
12382 else if (n == 2)
12383 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
12384 else if (n == 4)
12385 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12386 else
12387 {
12388 if (n == 3)
12389 {
12390 src = aarch64_move_pointer (src, -1);
12391 dst = aarch64_move_pointer (dst, -1);
12392 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12393 }
12394 else
12395 {
12396 int move = n - 8;
12397
12398 src = aarch64_move_pointer (src, move);
12399 dst = aarch64_move_pointer (dst, move);
12400 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
12401 }
12402 }
12403
12404 return true;
12405}
12406
a3125fc2
CL
12407/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12408
12409static unsigned HOST_WIDE_INT
12410aarch64_asan_shadow_offset (void)
12411{
12412 return (HOST_WIDE_INT_1 << 36);
12413}
12414
d3006da6 12415static bool
445d7826 12416aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
12417 unsigned int align,
12418 enum by_pieces_operation op,
12419 bool speed_p)
12420{
12421 /* STORE_BY_PIECES can be used when copying a constant string, but
12422 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
12423 For now we always fail this and let the move_by_pieces code copy
12424 the string from read-only memory. */
12425 if (op == STORE_BY_PIECES)
12426 return false;
12427
12428 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
12429}
12430
5f3bc026
ZC
12431static enum machine_mode
12432aarch64_code_to_ccmode (enum rtx_code code)
12433{
12434 switch (code)
12435 {
12436 case NE:
12437 return CC_DNEmode;
12438
12439 case EQ:
12440 return CC_DEQmode;
12441
12442 case LE:
12443 return CC_DLEmode;
12444
12445 case LT:
12446 return CC_DLTmode;
12447
12448 case GE:
12449 return CC_DGEmode;
12450
12451 case GT:
12452 return CC_DGTmode;
12453
12454 case LEU:
12455 return CC_DLEUmode;
12456
12457 case LTU:
12458 return CC_DLTUmode;
12459
12460 case GEU:
12461 return CC_DGEUmode;
12462
12463 case GTU:
12464 return CC_DGTUmode;
12465
12466 default:
12467 return CCmode;
12468 }
12469}
12470
12471static rtx
12472aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
12473 int code, tree treeop0, tree treeop1)
12474{
12475 enum machine_mode op_mode, cmp_mode, cc_mode;
12476 rtx op0, op1, cmp, target;
12477 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
12478 enum insn_code icode;
12479 struct expand_operand ops[4];
12480
12481 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
12482 if (cc_mode == CCmode)
12483 return NULL_RTX;
12484
12485 start_sequence ();
12486 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
12487
12488 op_mode = GET_MODE (op0);
12489 if (op_mode == VOIDmode)
12490 op_mode = GET_MODE (op1);
12491
12492 switch (op_mode)
12493 {
12494 case QImode:
12495 case HImode:
12496 case SImode:
12497 cmp_mode = SImode;
12498 icode = CODE_FOR_cmpsi;
12499 break;
12500
12501 case DImode:
12502 cmp_mode = DImode;
12503 icode = CODE_FOR_cmpdi;
12504 break;
12505
12506 default:
12507 end_sequence ();
12508 return NULL_RTX;
12509 }
12510
12511 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
12512 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
12513 if (!op0 || !op1)
12514 {
12515 end_sequence ();
12516 return NULL_RTX;
12517 }
12518 *prep_seq = get_insns ();
12519 end_sequence ();
12520
12521 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
12522 target = gen_rtx_REG (CCmode, CC_REGNUM);
12523
12524 create_output_operand (&ops[0], target, CCmode);
12525 create_fixed_operand (&ops[1], cmp);
12526 create_fixed_operand (&ops[2], op0);
12527 create_fixed_operand (&ops[3], op1);
12528
12529 start_sequence ();
12530 if (!maybe_expand_insn (icode, 4, ops))
12531 {
12532 end_sequence ();
12533 return NULL_RTX;
12534 }
12535 *gen_seq = get_insns ();
12536 end_sequence ();
12537
12538 return gen_rtx_REG (cc_mode, CC_REGNUM);
12539}
12540
12541static rtx
12542aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
12543 tree treeop0, tree treeop1, int bit_code)
12544{
12545 rtx op0, op1, cmp0, cmp1, target;
12546 enum machine_mode op_mode, cmp_mode, cc_mode;
12547 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
12548 enum insn_code icode = CODE_FOR_ccmp_andsi;
12549 struct expand_operand ops[6];
12550
12551 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
12552 if (cc_mode == CCmode)
12553 return NULL_RTX;
12554
12555 push_to_sequence ((rtx_insn*) *prep_seq);
12556 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
12557
12558 op_mode = GET_MODE (op0);
12559 if (op_mode == VOIDmode)
12560 op_mode = GET_MODE (op1);
12561
12562 switch (op_mode)
12563 {
12564 case QImode:
12565 case HImode:
12566 case SImode:
12567 cmp_mode = SImode;
12568 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
12569 : CODE_FOR_ccmp_iorsi;
12570 break;
12571
12572 case DImode:
12573 cmp_mode = DImode;
12574 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
12575 : CODE_FOR_ccmp_iordi;
12576 break;
12577
12578 default:
12579 end_sequence ();
12580 return NULL_RTX;
12581 }
12582
12583 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
12584 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
12585 if (!op0 || !op1)
12586 {
12587 end_sequence ();
12588 return NULL_RTX;
12589 }
12590 *prep_seq = get_insns ();
12591 end_sequence ();
12592
12593 target = gen_rtx_REG (cc_mode, CC_REGNUM);
12594 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
12595 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
12596
12597 create_fixed_operand (&ops[0], prev);
12598 create_fixed_operand (&ops[1], target);
12599 create_fixed_operand (&ops[2], op0);
12600 create_fixed_operand (&ops[3], op1);
12601 create_fixed_operand (&ops[4], cmp0);
12602 create_fixed_operand (&ops[5], cmp1);
12603
12604 push_to_sequence ((rtx_insn*) *gen_seq);
12605 if (!maybe_expand_insn (icode, 6, ops))
12606 {
12607 end_sequence ();
12608 return NULL_RTX;
12609 }
12610
12611 *gen_seq = get_insns ();
12612 end_sequence ();
12613
12614 return target;
12615}
12616
12617#undef TARGET_GEN_CCMP_FIRST
12618#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
12619
12620#undef TARGET_GEN_CCMP_NEXT
12621#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
12622
6a569cdd
KT
12623/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
12624 instruction fusion of some sort. */
12625
12626static bool
12627aarch64_macro_fusion_p (void)
12628{
b175b679 12629 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
12630}
12631
12632
12633/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
12634 should be kept together during scheduling. */
12635
12636static bool
12637aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
12638{
12639 rtx set_dest;
12640 rtx prev_set = single_set (prev);
12641 rtx curr_set = single_set (curr);
12642 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
12643 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
12644
12645 if (!aarch64_macro_fusion_p ())
12646 return false;
12647
12648 if (simple_sets_p
b175b679 12649 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
12650 {
12651 /* We are trying to match:
12652 prev (mov) == (set (reg r0) (const_int imm16))
12653 curr (movk) == (set (zero_extract (reg r0)
12654 (const_int 16)
12655 (const_int 16))
12656 (const_int imm16_1)) */
12657
12658 set_dest = SET_DEST (curr_set);
12659
12660 if (GET_CODE (set_dest) == ZERO_EXTRACT
12661 && CONST_INT_P (SET_SRC (curr_set))
12662 && CONST_INT_P (SET_SRC (prev_set))
12663 && CONST_INT_P (XEXP (set_dest, 2))
12664 && INTVAL (XEXP (set_dest, 2)) == 16
12665 && REG_P (XEXP (set_dest, 0))
12666 && REG_P (SET_DEST (prev_set))
12667 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
12668 {
12669 return true;
12670 }
12671 }
12672
9bbe08fe 12673 if (simple_sets_p
b175b679 12674 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
12675 {
12676
12677 /* We're trying to match:
12678 prev (adrp) == (set (reg r1)
12679 (high (symbol_ref ("SYM"))))
12680 curr (add) == (set (reg r0)
12681 (lo_sum (reg r1)
12682 (symbol_ref ("SYM"))))
12683 Note that r0 need not necessarily be the same as r1, especially
12684 during pre-regalloc scheduling. */
12685
12686 if (satisfies_constraint_Ush (SET_SRC (prev_set))
12687 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
12688 {
12689 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
12690 && REG_P (XEXP (SET_SRC (curr_set), 0))
12691 && REGNO (XEXP (SET_SRC (curr_set), 0))
12692 == REGNO (SET_DEST (prev_set))
12693 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
12694 XEXP (SET_SRC (curr_set), 1)))
12695 return true;
12696 }
12697 }
12698
cd0cb232 12699 if (simple_sets_p
b175b679 12700 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
12701 {
12702
12703 /* We're trying to match:
12704 prev (movk) == (set (zero_extract (reg r0)
12705 (const_int 16)
12706 (const_int 32))
12707 (const_int imm16_1))
12708 curr (movk) == (set (zero_extract (reg r0)
12709 (const_int 16)
12710 (const_int 48))
12711 (const_int imm16_2)) */
12712
12713 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
12714 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
12715 && REG_P (XEXP (SET_DEST (prev_set), 0))
12716 && REG_P (XEXP (SET_DEST (curr_set), 0))
12717 && REGNO (XEXP (SET_DEST (prev_set), 0))
12718 == REGNO (XEXP (SET_DEST (curr_set), 0))
12719 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
12720 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
12721 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
12722 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
12723 && CONST_INT_P (SET_SRC (prev_set))
12724 && CONST_INT_P (SET_SRC (curr_set)))
12725 return true;
12726
12727 }
d8354ad7 12728 if (simple_sets_p
b175b679 12729 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
12730 {
12731 /* We're trying to match:
12732 prev (adrp) == (set (reg r0)
12733 (high (symbol_ref ("SYM"))))
12734 curr (ldr) == (set (reg r1)
12735 (mem (lo_sum (reg r0)
12736 (symbol_ref ("SYM")))))
12737 or
12738 curr (ldr) == (set (reg r1)
12739 (zero_extend (mem
12740 (lo_sum (reg r0)
12741 (symbol_ref ("SYM")))))) */
12742 if (satisfies_constraint_Ush (SET_SRC (prev_set))
12743 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
12744 {
12745 rtx curr_src = SET_SRC (curr_set);
12746
12747 if (GET_CODE (curr_src) == ZERO_EXTEND)
12748 curr_src = XEXP (curr_src, 0);
12749
12750 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
12751 && REG_P (XEXP (XEXP (curr_src, 0), 0))
12752 && REGNO (XEXP (XEXP (curr_src, 0), 0))
12753 == REGNO (SET_DEST (prev_set))
12754 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
12755 XEXP (SET_SRC (prev_set), 0)))
12756 return true;
12757 }
12758 }
cd0cb232 12759
b175b679 12760 if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
12761 && any_condjump_p (curr))
12762 {
12763 enum attr_type prev_type = get_attr_type (prev);
12764
12765 /* FIXME: this misses some which is considered simple arthematic
12766 instructions for ThunderX. Simple shifts are missed here. */
12767 if (prev_type == TYPE_ALUS_SREG
12768 || prev_type == TYPE_ALUS_IMM
12769 || prev_type == TYPE_LOGICS_REG
12770 || prev_type == TYPE_LOGICS_IMM)
12771 return true;
12772 }
12773
6a569cdd
KT
12774 return false;
12775}
12776
350013bc
BC
12777/* If MEM is in the form of [base+offset], extract the two parts
12778 of address and set to BASE and OFFSET, otherwise return false
12779 after clearing BASE and OFFSET. */
12780
12781bool
12782extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
12783{
12784 rtx addr;
12785
12786 gcc_assert (MEM_P (mem));
12787
12788 addr = XEXP (mem, 0);
12789
12790 if (REG_P (addr))
12791 {
12792 *base = addr;
12793 *offset = const0_rtx;
12794 return true;
12795 }
12796
12797 if (GET_CODE (addr) == PLUS
12798 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
12799 {
12800 *base = XEXP (addr, 0);
12801 *offset = XEXP (addr, 1);
12802 return true;
12803 }
12804
12805 *base = NULL_RTX;
12806 *offset = NULL_RTX;
12807
12808 return false;
12809}
12810
12811/* Types for scheduling fusion. */
12812enum sched_fusion_type
12813{
12814 SCHED_FUSION_NONE = 0,
12815 SCHED_FUSION_LD_SIGN_EXTEND,
12816 SCHED_FUSION_LD_ZERO_EXTEND,
12817 SCHED_FUSION_LD,
12818 SCHED_FUSION_ST,
12819 SCHED_FUSION_NUM
12820};
12821
12822/* If INSN is a load or store of address in the form of [base+offset],
12823 extract the two parts and set to BASE and OFFSET. Return scheduling
12824 fusion type this INSN is. */
12825
12826static enum sched_fusion_type
12827fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
12828{
12829 rtx x, dest, src;
12830 enum sched_fusion_type fusion = SCHED_FUSION_LD;
12831
12832 gcc_assert (INSN_P (insn));
12833 x = PATTERN (insn);
12834 if (GET_CODE (x) != SET)
12835 return SCHED_FUSION_NONE;
12836
12837 src = SET_SRC (x);
12838 dest = SET_DEST (x);
12839
1f46bd52
AP
12840 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
12841 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
350013bc
BC
12842 return SCHED_FUSION_NONE;
12843
12844 if (GET_CODE (src) == SIGN_EXTEND)
12845 {
12846 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
12847 src = XEXP (src, 0);
12848 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
12849 return SCHED_FUSION_NONE;
12850 }
12851 else if (GET_CODE (src) == ZERO_EXTEND)
12852 {
12853 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
12854 src = XEXP (src, 0);
12855 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
12856 return SCHED_FUSION_NONE;
12857 }
12858
12859 if (GET_CODE (src) == MEM && REG_P (dest))
12860 extract_base_offset_in_addr (src, base, offset);
12861 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
12862 {
12863 fusion = SCHED_FUSION_ST;
12864 extract_base_offset_in_addr (dest, base, offset);
12865 }
12866 else
12867 return SCHED_FUSION_NONE;
12868
12869 if (*base == NULL_RTX || *offset == NULL_RTX)
12870 fusion = SCHED_FUSION_NONE;
12871
12872 return fusion;
12873}
12874
12875/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
12876
12877 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
12878 and PRI are only calculated for these instructions. For other instruction,
12879 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
12880 type instruction fusion can be added by returning different priorities.
12881
12882 It's important that irrelevant instructions get the largest FUSION_PRI. */
12883
12884static void
12885aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
12886 int *fusion_pri, int *pri)
12887{
12888 int tmp, off_val;
12889 rtx base, offset;
12890 enum sched_fusion_type fusion;
12891
12892 gcc_assert (INSN_P (insn));
12893
12894 tmp = max_pri - 1;
12895 fusion = fusion_load_store (insn, &base, &offset);
12896 if (fusion == SCHED_FUSION_NONE)
12897 {
12898 *pri = tmp;
12899 *fusion_pri = tmp;
12900 return;
12901 }
12902
12903 /* Set FUSION_PRI according to fusion type and base register. */
12904 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
12905
12906 /* Calculate PRI. */
12907 tmp /= 2;
12908
12909 /* INSN with smaller offset goes first. */
12910 off_val = (int)(INTVAL (offset));
12911 if (off_val >= 0)
12912 tmp -= (off_val & 0xfffff);
12913 else
12914 tmp += ((- off_val) & 0xfffff);
12915
12916 *pri = tmp;
12917 return;
12918}
12919
12920/* Given OPERANDS of consecutive load/store, check if we can merge
12921 them into ldp/stp. LOAD is true if they are load instructions.
12922 MODE is the mode of memory operands. */
12923
12924bool
12925aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
12926 enum machine_mode mode)
12927{
12928 HOST_WIDE_INT offval_1, offval_2, msize;
12929 enum reg_class rclass_1, rclass_2;
12930 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
12931
12932 if (load)
12933 {
12934 mem_1 = operands[1];
12935 mem_2 = operands[3];
12936 reg_1 = operands[0];
12937 reg_2 = operands[2];
12938 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
12939 if (REGNO (reg_1) == REGNO (reg_2))
12940 return false;
12941 }
12942 else
12943 {
12944 mem_1 = operands[0];
12945 mem_2 = operands[2];
12946 reg_1 = operands[1];
12947 reg_2 = operands[3];
12948 }
12949
bf84ac44
AP
12950 /* The mems cannot be volatile. */
12951 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
12952 return false;
12953
350013bc
BC
12954 /* Check if the addresses are in the form of [base+offset]. */
12955 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
12956 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
12957 return false;
12958 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
12959 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
12960 return false;
12961
12962 /* Check if the bases are same. */
12963 if (!rtx_equal_p (base_1, base_2))
12964 return false;
12965
12966 offval_1 = INTVAL (offset_1);
12967 offval_2 = INTVAL (offset_2);
12968 msize = GET_MODE_SIZE (mode);
12969 /* Check if the offsets are consecutive. */
12970 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
12971 return false;
12972
12973 /* Check if the addresses are clobbered by load. */
12974 if (load)
12975 {
12976 if (reg_mentioned_p (reg_1, mem_1))
12977 return false;
12978
12979 /* In increasing order, the last load can clobber the address. */
12980 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
12981 return false;
12982 }
12983
12984 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
12985 rclass_1 = FP_REGS;
12986 else
12987 rclass_1 = GENERAL_REGS;
12988
12989 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
12990 rclass_2 = FP_REGS;
12991 else
12992 rclass_2 = GENERAL_REGS;
12993
12994 /* Check if the registers are of same class. */
12995 if (rclass_1 != rclass_2)
12996 return false;
12997
12998 return true;
12999}
13000
13001/* Given OPERANDS of consecutive load/store, check if we can merge
13002 them into ldp/stp by adjusting the offset. LOAD is true if they
13003 are load instructions. MODE is the mode of memory operands.
13004
13005 Given below consecutive stores:
13006
13007 str w1, [xb, 0x100]
13008 str w1, [xb, 0x104]
13009 str w1, [xb, 0x108]
13010 str w1, [xb, 0x10c]
13011
13012 Though the offsets are out of the range supported by stp, we can
13013 still pair them after adjusting the offset, like:
13014
13015 add scratch, xb, 0x100
13016 stp w1, w1, [scratch]
13017 stp w1, w1, [scratch, 0x8]
13018
13019 The peephole patterns detecting this opportunity should guarantee
13020 the scratch register is avaliable. */
13021
13022bool
13023aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
13024 enum machine_mode mode)
13025{
13026 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
13027 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
13028 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
13029 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
13030
13031 if (load)
13032 {
13033 reg_1 = operands[0];
13034 mem_1 = operands[1];
13035 reg_2 = operands[2];
13036 mem_2 = operands[3];
13037 reg_3 = operands[4];
13038 mem_3 = operands[5];
13039 reg_4 = operands[6];
13040 mem_4 = operands[7];
13041 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
13042 && REG_P (reg_3) && REG_P (reg_4));
13043 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
13044 return false;
13045 }
13046 else
13047 {
13048 mem_1 = operands[0];
13049 reg_1 = operands[1];
13050 mem_2 = operands[2];
13051 reg_2 = operands[3];
13052 mem_3 = operands[4];
13053 reg_3 = operands[5];
13054 mem_4 = operands[6];
13055 reg_4 = operands[7];
13056 }
13057 /* Skip if memory operand is by itslef valid for ldp/stp. */
13058 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
13059 return false;
13060
bf84ac44
AP
13061 /* The mems cannot be volatile. */
13062 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
13063 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
13064 return false;
13065
350013bc
BC
13066 /* Check if the addresses are in the form of [base+offset]. */
13067 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
13068 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
13069 return false;
13070 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
13071 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
13072 return false;
13073 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
13074 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
13075 return false;
13076 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
13077 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
13078 return false;
13079
13080 /* Check if the bases are same. */
13081 if (!rtx_equal_p (base_1, base_2)
13082 || !rtx_equal_p (base_2, base_3)
13083 || !rtx_equal_p (base_3, base_4))
13084 return false;
13085
13086 offval_1 = INTVAL (offset_1);
13087 offval_2 = INTVAL (offset_2);
13088 offval_3 = INTVAL (offset_3);
13089 offval_4 = INTVAL (offset_4);
13090 msize = GET_MODE_SIZE (mode);
13091 /* Check if the offsets are consecutive. */
13092 if ((offval_1 != (offval_2 + msize)
13093 || offval_1 != (offval_3 + msize * 2)
13094 || offval_1 != (offval_4 + msize * 3))
13095 && (offval_4 != (offval_3 + msize)
13096 || offval_4 != (offval_2 + msize * 2)
13097 || offval_4 != (offval_1 + msize * 3)))
13098 return false;
13099
13100 /* Check if the addresses are clobbered by load. */
13101 if (load)
13102 {
13103 if (reg_mentioned_p (reg_1, mem_1)
13104 || reg_mentioned_p (reg_2, mem_2)
13105 || reg_mentioned_p (reg_3, mem_3))
13106 return false;
13107
13108 /* In increasing order, the last load can clobber the address. */
13109 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
13110 return false;
13111 }
13112
13113 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
13114 rclass_1 = FP_REGS;
13115 else
13116 rclass_1 = GENERAL_REGS;
13117
13118 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
13119 rclass_2 = FP_REGS;
13120 else
13121 rclass_2 = GENERAL_REGS;
13122
13123 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
13124 rclass_3 = FP_REGS;
13125 else
13126 rclass_3 = GENERAL_REGS;
13127
13128 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
13129 rclass_4 = FP_REGS;
13130 else
13131 rclass_4 = GENERAL_REGS;
13132
13133 /* Check if the registers are of same class. */
13134 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
13135 return false;
13136
13137 return true;
13138}
13139
13140/* Given OPERANDS of consecutive load/store, this function pairs them
13141 into ldp/stp after adjusting the offset. It depends on the fact
13142 that addresses of load/store instructions are in increasing order.
13143 MODE is the mode of memory operands. CODE is the rtl operator
13144 which should be applied to all memory operands, it's SIGN_EXTEND,
13145 ZERO_EXTEND or UNKNOWN. */
13146
13147bool
13148aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
13149 enum machine_mode mode, RTX_CODE code)
13150{
13151 rtx base, offset, t1, t2;
13152 rtx mem_1, mem_2, mem_3, mem_4;
13153 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
13154
13155 if (load)
13156 {
13157 mem_1 = operands[1];
13158 mem_2 = operands[3];
13159 mem_3 = operands[5];
13160 mem_4 = operands[7];
13161 }
13162 else
13163 {
13164 mem_1 = operands[0];
13165 mem_2 = operands[2];
13166 mem_3 = operands[4];
13167 mem_4 = operands[6];
13168 gcc_assert (code == UNKNOWN);
13169 }
13170
13171 extract_base_offset_in_addr (mem_1, &base, &offset);
13172 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
13173
13174 /* Adjust offset thus it can fit in ldp/stp instruction. */
13175 msize = GET_MODE_SIZE (mode);
13176 stp_off_limit = msize * 0x40;
13177 off_val = INTVAL (offset);
13178 abs_off = (off_val < 0) ? -off_val : off_val;
13179 new_off = abs_off % stp_off_limit;
13180 adj_off = abs_off - new_off;
13181
13182 /* Further adjust to make sure all offsets are OK. */
13183 if ((new_off + msize * 2) >= stp_off_limit)
13184 {
13185 adj_off += stp_off_limit;
13186 new_off -= stp_off_limit;
13187 }
13188
13189 /* Make sure the adjustment can be done with ADD/SUB instructions. */
13190 if (adj_off >= 0x1000)
13191 return false;
13192
13193 if (off_val < 0)
13194 {
13195 adj_off = -adj_off;
13196 new_off = -new_off;
13197 }
13198
13199 /* Create new memory references. */
13200 mem_1 = change_address (mem_1, VOIDmode,
13201 plus_constant (DImode, operands[8], new_off));
13202
13203 /* Check if the adjusted address is OK for ldp/stp. */
13204 if (!aarch64_mem_pair_operand (mem_1, mode))
13205 return false;
13206
13207 msize = GET_MODE_SIZE (mode);
13208 mem_2 = change_address (mem_2, VOIDmode,
13209 plus_constant (DImode,
13210 operands[8],
13211 new_off + msize));
13212 mem_3 = change_address (mem_3, VOIDmode,
13213 plus_constant (DImode,
13214 operands[8],
13215 new_off + msize * 2));
13216 mem_4 = change_address (mem_4, VOIDmode,
13217 plus_constant (DImode,
13218 operands[8],
13219 new_off + msize * 3));
13220
13221 if (code == ZERO_EXTEND)
13222 {
13223 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
13224 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
13225 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
13226 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
13227 }
13228 else if (code == SIGN_EXTEND)
13229 {
13230 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
13231 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
13232 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
13233 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
13234 }
13235
13236 if (load)
13237 {
13238 operands[1] = mem_1;
13239 operands[3] = mem_2;
13240 operands[5] = mem_3;
13241 operands[7] = mem_4;
13242 }
13243 else
13244 {
13245 operands[0] = mem_1;
13246 operands[2] = mem_2;
13247 operands[4] = mem_3;
13248 operands[6] = mem_4;
13249 }
13250
13251 /* Emit adjusting instruction. */
f7df4a84 13252 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 13253 /* Emit ldp/stp instructions. */
f7df4a84
RS
13254 t1 = gen_rtx_SET (operands[0], operands[1]);
13255 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 13256 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
13257 t1 = gen_rtx_SET (operands[4], operands[5]);
13258 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
13259 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
13260 return true;
13261}
13262
1b1e81f8
JW
13263/* Return 1 if pseudo register should be created and used to hold
13264 GOT address for PIC code. */
13265
13266bool
13267aarch64_use_pseudo_pic_reg (void)
13268{
13269 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
13270}
13271
7b841a12
JW
13272/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
13273
13274static int
13275aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
13276{
13277 switch (XINT (x, 1))
13278 {
13279 case UNSPEC_GOTSMALLPIC:
13280 case UNSPEC_GOTSMALLPIC28K:
13281 case UNSPEC_GOTTINYPIC:
13282 return 0;
13283 default:
13284 break;
13285 }
13286
13287 return default_unspec_may_trap_p (x, flags);
13288}
13289
c2ec330c
AL
13290/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
13291static tree
13292aarch64_promoted_type (const_tree t)
13293{
13294 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
13295 return float_type_node;
13296 return NULL_TREE;
13297}
43e9d192
IB
13298#undef TARGET_ADDRESS_COST
13299#define TARGET_ADDRESS_COST aarch64_address_cost
13300
13301/* This hook will determines whether unnamed bitfields affect the alignment
13302 of the containing structure. The hook returns true if the structure
13303 should inherit the alignment requirements of an unnamed bitfield's
13304 type. */
13305#undef TARGET_ALIGN_ANON_BITFIELD
13306#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
13307
13308#undef TARGET_ASM_ALIGNED_DI_OP
13309#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
13310
13311#undef TARGET_ASM_ALIGNED_HI_OP
13312#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
13313
13314#undef TARGET_ASM_ALIGNED_SI_OP
13315#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
13316
13317#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
13318#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
13319 hook_bool_const_tree_hwi_hwi_const_tree_true
13320
43e9d192
IB
13321#undef TARGET_ASM_OUTPUT_MI_THUNK
13322#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
13323
13324#undef TARGET_ASM_SELECT_RTX_SECTION
13325#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
13326
13327#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
13328#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
13329
13330#undef TARGET_BUILD_BUILTIN_VA_LIST
13331#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
13332
13333#undef TARGET_CALLEE_COPIES
13334#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
13335
13336#undef TARGET_CAN_ELIMINATE
13337#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
13338
1fd8d40c
KT
13339#undef TARGET_CAN_INLINE_P
13340#define TARGET_CAN_INLINE_P aarch64_can_inline_p
13341
43e9d192
IB
13342#undef TARGET_CANNOT_FORCE_CONST_MEM
13343#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
13344
13345#undef TARGET_CONDITIONAL_REGISTER_USAGE
13346#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
13347
13348/* Only the least significant bit is used for initialization guard
13349 variables. */
13350#undef TARGET_CXX_GUARD_MASK_BIT
13351#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
13352
13353#undef TARGET_C_MODE_FOR_SUFFIX
13354#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
13355
13356#ifdef TARGET_BIG_ENDIAN_DEFAULT
13357#undef TARGET_DEFAULT_TARGET_FLAGS
13358#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
13359#endif
13360
13361#undef TARGET_CLASS_MAX_NREGS
13362#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
13363
119103ca
JG
13364#undef TARGET_BUILTIN_DECL
13365#define TARGET_BUILTIN_DECL aarch64_builtin_decl
13366
43e9d192
IB
13367#undef TARGET_EXPAND_BUILTIN
13368#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
13369
13370#undef TARGET_EXPAND_BUILTIN_VA_START
13371#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
13372
9697e620
JG
13373#undef TARGET_FOLD_BUILTIN
13374#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
13375
43e9d192
IB
13376#undef TARGET_FUNCTION_ARG
13377#define TARGET_FUNCTION_ARG aarch64_function_arg
13378
13379#undef TARGET_FUNCTION_ARG_ADVANCE
13380#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
13381
13382#undef TARGET_FUNCTION_ARG_BOUNDARY
13383#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
13384
13385#undef TARGET_FUNCTION_OK_FOR_SIBCALL
13386#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
13387
13388#undef TARGET_FUNCTION_VALUE
13389#define TARGET_FUNCTION_VALUE aarch64_function_value
13390
13391#undef TARGET_FUNCTION_VALUE_REGNO_P
13392#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
13393
13394#undef TARGET_FRAME_POINTER_REQUIRED
13395#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
13396
fc72cba7
AL
13397#undef TARGET_GIMPLE_FOLD_BUILTIN
13398#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 13399
43e9d192
IB
13400#undef TARGET_GIMPLIFY_VA_ARG_EXPR
13401#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
13402
13403#undef TARGET_INIT_BUILTINS
13404#define TARGET_INIT_BUILTINS aarch64_init_builtins
13405
13406#undef TARGET_LEGITIMATE_ADDRESS_P
13407#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
13408
13409#undef TARGET_LEGITIMATE_CONSTANT_P
13410#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
13411
13412#undef TARGET_LIBGCC_CMP_RETURN_MODE
13413#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
13414
38e8f663 13415#undef TARGET_LRA_P
98d404be 13416#define TARGET_LRA_P hook_bool_void_true
38e8f663 13417
ac2b960f
YZ
13418#undef TARGET_MANGLE_TYPE
13419#define TARGET_MANGLE_TYPE aarch64_mangle_type
13420
43e9d192
IB
13421#undef TARGET_MEMORY_MOVE_COST
13422#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
13423
26e0ff94
WD
13424#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
13425#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
13426
43e9d192
IB
13427#undef TARGET_MUST_PASS_IN_STACK
13428#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
13429
13430/* This target hook should return true if accesses to volatile bitfields
13431 should use the narrowest mode possible. It should return false if these
13432 accesses should use the bitfield container type. */
13433#undef TARGET_NARROW_VOLATILE_BITFIELD
13434#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
13435
13436#undef TARGET_OPTION_OVERRIDE
13437#define TARGET_OPTION_OVERRIDE aarch64_override_options
13438
13439#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
13440#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
13441 aarch64_override_options_after_change
13442
361fb3ee
KT
13443#undef TARGET_OPTION_SAVE
13444#define TARGET_OPTION_SAVE aarch64_option_save
13445
13446#undef TARGET_OPTION_RESTORE
13447#define TARGET_OPTION_RESTORE aarch64_option_restore
13448
13449#undef TARGET_OPTION_PRINT
13450#define TARGET_OPTION_PRINT aarch64_option_print
13451
5a2c8331
KT
13452#undef TARGET_OPTION_VALID_ATTRIBUTE_P
13453#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
13454
d78006d9
KT
13455#undef TARGET_SET_CURRENT_FUNCTION
13456#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
13457
43e9d192
IB
13458#undef TARGET_PASS_BY_REFERENCE
13459#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
13460
13461#undef TARGET_PREFERRED_RELOAD_CLASS
13462#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
13463
cee66c68
WD
13464#undef TARGET_SCHED_REASSOCIATION_WIDTH
13465#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
13466
c2ec330c
AL
13467#undef TARGET_PROMOTED_TYPE
13468#define TARGET_PROMOTED_TYPE aarch64_promoted_type
13469
43e9d192
IB
13470#undef TARGET_SECONDARY_RELOAD
13471#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
13472
13473#undef TARGET_SHIFT_TRUNCATION_MASK
13474#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
13475
13476#undef TARGET_SETUP_INCOMING_VARARGS
13477#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
13478
13479#undef TARGET_STRUCT_VALUE_RTX
13480#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
13481
13482#undef TARGET_REGISTER_MOVE_COST
13483#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
13484
13485#undef TARGET_RETURN_IN_MEMORY
13486#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
13487
13488#undef TARGET_RETURN_IN_MSB
13489#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
13490
13491#undef TARGET_RTX_COSTS
7cc2145f 13492#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 13493
d126a4ae
AP
13494#undef TARGET_SCHED_ISSUE_RATE
13495#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
13496
d03f7e44
MK
13497#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
13498#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
13499 aarch64_sched_first_cycle_multipass_dfa_lookahead
13500
43e9d192
IB
13501#undef TARGET_TRAMPOLINE_INIT
13502#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
13503
13504#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
13505#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
13506
13507#undef TARGET_VECTOR_MODE_SUPPORTED_P
13508#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
13509
13510#undef TARGET_ARRAY_MODE_SUPPORTED_P
13511#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
13512
8990e73a
TB
13513#undef TARGET_VECTORIZE_ADD_STMT_COST
13514#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
13515
13516#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
13517#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
13518 aarch64_builtin_vectorization_cost
13519
43e9d192
IB
13520#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
13521#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
13522
42fc9a7f
JG
13523#undef TARGET_VECTORIZE_BUILTINS
13524#define TARGET_VECTORIZE_BUILTINS
13525
13526#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
13527#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
13528 aarch64_builtin_vectorized_function
13529
3b357264
JG
13530#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
13531#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
13532 aarch64_autovectorize_vector_sizes
13533
aa87aced
KV
13534#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
13535#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
13536 aarch64_atomic_assign_expand_fenv
13537
43e9d192
IB
13538/* Section anchor support. */
13539
13540#undef TARGET_MIN_ANCHOR_OFFSET
13541#define TARGET_MIN_ANCHOR_OFFSET -256
13542
13543/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
13544 byte offset; we can do much more for larger data types, but have no way
13545 to determine the size of the access. We assume accesses are aligned. */
13546#undef TARGET_MAX_ANCHOR_OFFSET
13547#define TARGET_MAX_ANCHOR_OFFSET 4095
13548
db0253a4
TB
13549#undef TARGET_VECTOR_ALIGNMENT
13550#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
13551
13552#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
13553#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
13554 aarch64_simd_vector_alignment_reachable
13555
88b08073
JG
13556/* vec_perm support. */
13557
13558#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
13559#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
13560 aarch64_vectorize_vec_perm_const_ok
13561
c2ec330c
AL
13562#undef TARGET_INIT_LIBFUNCS
13563#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 13564
706b2314 13565#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
13566#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
13567
5cb74e90
RR
13568#undef TARGET_FLAGS_REGNUM
13569#define TARGET_FLAGS_REGNUM CC_REGNUM
13570
78607708
TV
13571#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
13572#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
13573
a3125fc2
CL
13574#undef TARGET_ASAN_SHADOW_OFFSET
13575#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
13576
0c4ec427
RE
13577#undef TARGET_LEGITIMIZE_ADDRESS
13578#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
13579
d3006da6
JG
13580#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
13581#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
13582 aarch64_use_by_pieces_infrastructure_p
13583
594bdd53
FY
13584#undef TARGET_CAN_USE_DOLOOP_P
13585#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
13586
6a569cdd
KT
13587#undef TARGET_SCHED_MACRO_FUSION_P
13588#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
13589
13590#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
13591#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
13592
350013bc
BC
13593#undef TARGET_SCHED_FUSION_PRIORITY
13594#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
13595
7b841a12
JW
13596#undef TARGET_UNSPEC_MAY_TRAP_P
13597#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
13598
1b1e81f8
JW
13599#undef TARGET_USE_PSEUDO_PIC_REG
13600#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
13601
43e9d192
IB
13602struct gcc_target targetm = TARGET_INITIALIZER;
13603
13604#include "gt-aarch64.h"