]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[62/77] Big machine_mode to scalar_int_mode replacement
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
cbe34bb5 2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
01736018 22#define INCLUDE_STRING
43e9d192
IB
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e11c4407
AM
26#include "target.h"
27#include "rtl.h"
c7131fb2 28#include "tree.h"
e73cf9a2 29#include "memmodel.h"
c7131fb2 30#include "gimple.h"
e11c4407
AM
31#include "cfghooks.h"
32#include "cfgloop.h"
c7131fb2 33#include "df.h"
e11c4407
AM
34#include "tm_p.h"
35#include "stringpool.h"
314e6352 36#include "attribs.h"
e11c4407
AM
37#include "optabs.h"
38#include "regs.h"
39#include "emit-rtl.h"
40#include "recog.h"
41#include "diagnostic.h"
43e9d192 42#include "insn-attr.h"
40e23961 43#include "alias.h"
40e23961 44#include "fold-const.h"
d8a2d370
DN
45#include "stor-layout.h"
46#include "calls.h"
47#include "varasm.h"
43e9d192 48#include "output.h"
36566b39 49#include "flags.h"
36566b39 50#include "explow.h"
43e9d192
IB
51#include "expr.h"
52#include "reload.h"
43e9d192 53#include "langhooks.h"
5a2c8331 54#include "opts.h"
2d6bc7fa 55#include "params.h"
45b0be94 56#include "gimplify.h"
43e9d192 57#include "dwarf2.h"
61d371eb 58#include "gimple-iterator.h"
8990e73a 59#include "tree-vectorizer.h"
d1bcc29f 60#include "aarch64-cost-tables.h"
0ee859b5 61#include "dumpfile.h"
9b2b7279 62#include "builtins.h"
8baff86e 63#include "rtl-iter.h"
9bbe08fe 64#include "tm-constrs.h"
d03f7e44 65#include "sched-int.h"
d78006d9 66#include "target-globals.h"
a3eb8a52 67#include "common/common-target.h"
51b86113
DM
68#include "selftest.h"
69#include "selftest-rtl.h"
43e9d192 70
994c5d85 71/* This file should be included last. */
d58627a0
RS
72#include "target-def.h"
73
28514dda
YZ
74/* Defined for convenience. */
75#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
76
43e9d192
IB
77/* Classifies an address.
78
79 ADDRESS_REG_IMM
80 A simple base register plus immediate offset.
81
82 ADDRESS_REG_WB
83 A base register indexed by immediate offset with writeback.
84
85 ADDRESS_REG_REG
86 A base register indexed by (optionally scaled) register.
87
88 ADDRESS_REG_UXTW
89 A base register indexed by (optionally scaled) zero-extended register.
90
91 ADDRESS_REG_SXTW
92 A base register indexed by (optionally scaled) sign-extended register.
93
94 ADDRESS_LO_SUM
95 A LO_SUM rtx with a base register and "LO12" symbol relocation.
96
97 ADDRESS_SYMBOLIC:
98 A constant symbolic address, in pc-relative literal pool. */
99
100enum aarch64_address_type {
101 ADDRESS_REG_IMM,
102 ADDRESS_REG_WB,
103 ADDRESS_REG_REG,
104 ADDRESS_REG_UXTW,
105 ADDRESS_REG_SXTW,
106 ADDRESS_LO_SUM,
107 ADDRESS_SYMBOLIC
108};
109
110struct aarch64_address_info {
111 enum aarch64_address_type type;
112 rtx base;
113 rtx offset;
114 int shift;
115 enum aarch64_symbol_type symbol_type;
116};
117
48063b9d
IB
118struct simd_immediate_info
119{
120 rtx value;
121 int shift;
122 int element_width;
48063b9d 123 bool mvn;
e4f0f84d 124 bool msl;
48063b9d
IB
125};
126
43e9d192
IB
127/* The current code model. */
128enum aarch64_code_model aarch64_cmodel;
129
130#ifdef HAVE_AS_TLS
131#undef TARGET_HAVE_TLS
132#define TARGET_HAVE_TLS 1
133#endif
134
ef4bddc2
RS
135static bool aarch64_composite_type_p (const_tree, machine_mode);
136static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 137 const_tree,
ef4bddc2 138 machine_mode *, int *,
43e9d192
IB
139 bool *);
140static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
141static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 142static void aarch64_override_options_after_change (void);
ef4bddc2 143static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 144static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 145 const unsigned char *sel);
ef4bddc2 146static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
147static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
148 const_tree type,
149 int misalignment,
150 bool is_packed);
a2170965
TC
151static machine_mode
152aarch64_simd_container_mode (machine_mode mode, unsigned width);
88b08073 153
0c6caaf8
RL
154/* Major revision number of the ARM Architecture implemented by the target. */
155unsigned aarch64_architecture_version;
156
43e9d192 157/* The processor for which instructions should be scheduled. */
02fdbd5b 158enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 159
43e9d192
IB
160/* Mask to specify which instruction scheduling options should be used. */
161unsigned long aarch64_tune_flags = 0;
162
1be34295 163/* Global flag for PC relative loads. */
9ee6540a 164bool aarch64_pcrelative_literal_loads;
1be34295 165
8dec06f2
JG
166/* Support for command line parsing of boolean flags in the tuning
167 structures. */
168struct aarch64_flag_desc
169{
170 const char* name;
171 unsigned int flag;
172};
173
ed9fa8d2 174#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
175 { name, AARCH64_FUSE_##internal_name },
176static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
177{
178 { "none", AARCH64_FUSE_NOTHING },
179#include "aarch64-fusion-pairs.def"
180 { "all", AARCH64_FUSE_ALL },
181 { NULL, AARCH64_FUSE_NOTHING }
182};
8dec06f2 183
a339a01c 184#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
185 { name, AARCH64_EXTRA_TUNE_##internal_name },
186static const struct aarch64_flag_desc aarch64_tuning_flags[] =
187{
188 { "none", AARCH64_EXTRA_TUNE_NONE },
189#include "aarch64-tuning-flags.def"
190 { "all", AARCH64_EXTRA_TUNE_ALL },
191 { NULL, AARCH64_EXTRA_TUNE_NONE }
192};
8dec06f2 193
43e9d192
IB
194/* Tuning parameters. */
195
43e9d192
IB
196static const struct cpu_addrcost_table generic_addrcost_table =
197{
67747367 198 {
2fae724a 199 1, /* hi */
bd95e655
JG
200 0, /* si */
201 0, /* di */
2fae724a 202 1, /* ti */
67747367 203 },
bd95e655
JG
204 0, /* pre_modify */
205 0, /* post_modify */
206 0, /* register_offset */
783879e6
EM
207 0, /* register_sextend */
208 0, /* register_zextend */
bd95e655 209 0 /* imm_offset */
43e9d192
IB
210};
211
5ec1ae3b
EM
212static const struct cpu_addrcost_table exynosm1_addrcost_table =
213{
214 {
215 0, /* hi */
216 0, /* si */
217 0, /* di */
218 2, /* ti */
219 },
220 0, /* pre_modify */
221 0, /* post_modify */
222 1, /* register_offset */
223 1, /* register_sextend */
224 2, /* register_zextend */
225 0, /* imm_offset */
226};
227
381e27aa
PT
228static const struct cpu_addrcost_table xgene1_addrcost_table =
229{
381e27aa 230 {
bd95e655
JG
231 1, /* hi */
232 0, /* si */
233 0, /* di */
234 1, /* ti */
381e27aa 235 },
bd95e655
JG
236 1, /* pre_modify */
237 0, /* post_modify */
238 0, /* register_offset */
783879e6
EM
239 1, /* register_sextend */
240 1, /* register_zextend */
bd95e655 241 0, /* imm_offset */
381e27aa
PT
242};
243
d1261ac6 244static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
245{
246 {
5f407e57
AP
247 1, /* hi */
248 1, /* si */
249 1, /* di */
ad611a4c
VP
250 2, /* ti */
251 },
252 0, /* pre_modify */
253 0, /* post_modify */
254 2, /* register_offset */
255 3, /* register_sextend */
256 3, /* register_zextend */
257 0, /* imm_offset */
258};
259
43e9d192
IB
260static const struct cpu_regmove_cost generic_regmove_cost =
261{
bd95e655 262 1, /* GP2GP */
3969c510
WD
263 /* Avoid the use of slow int<->fp moves for spilling by setting
264 their cost higher than memmov_cost. */
bd95e655
JG
265 5, /* GP2FP */
266 5, /* FP2GP */
267 2 /* FP2FP */
43e9d192
IB
268};
269
e4a9c55a
WD
270static const struct cpu_regmove_cost cortexa57_regmove_cost =
271{
bd95e655 272 1, /* GP2GP */
e4a9c55a
WD
273 /* Avoid the use of slow int<->fp moves for spilling by setting
274 their cost higher than memmov_cost. */
bd95e655
JG
275 5, /* GP2FP */
276 5, /* FP2GP */
277 2 /* FP2FP */
e4a9c55a
WD
278};
279
280static const struct cpu_regmove_cost cortexa53_regmove_cost =
281{
bd95e655 282 1, /* GP2GP */
e4a9c55a
WD
283 /* Avoid the use of slow int<->fp moves for spilling by setting
284 their cost higher than memmov_cost. */
bd95e655
JG
285 5, /* GP2FP */
286 5, /* FP2GP */
287 2 /* FP2FP */
e4a9c55a
WD
288};
289
5ec1ae3b
EM
290static const struct cpu_regmove_cost exynosm1_regmove_cost =
291{
292 1, /* GP2GP */
293 /* Avoid the use of slow int<->fp moves for spilling by setting
294 their cost higher than memmov_cost (actual, 4 and 9). */
295 9, /* GP2FP */
296 9, /* FP2GP */
297 1 /* FP2FP */
298};
299
d1bcc29f
AP
300static const struct cpu_regmove_cost thunderx_regmove_cost =
301{
bd95e655
JG
302 2, /* GP2GP */
303 2, /* GP2FP */
304 6, /* FP2GP */
305 4 /* FP2FP */
d1bcc29f
AP
306};
307
381e27aa
PT
308static const struct cpu_regmove_cost xgene1_regmove_cost =
309{
bd95e655 310 1, /* GP2GP */
381e27aa
PT
311 /* Avoid the use of slow int<->fp moves for spilling by setting
312 their cost higher than memmov_cost. */
bd95e655
JG
313 8, /* GP2FP */
314 8, /* FP2GP */
315 2 /* FP2FP */
381e27aa
PT
316};
317
ee446d9f
JW
318static const struct cpu_regmove_cost qdf24xx_regmove_cost =
319{
320 2, /* GP2GP */
321 /* Avoid the use of int<->fp moves for spilling. */
322 6, /* GP2FP */
323 6, /* FP2GP */
324 4 /* FP2FP */
325};
326
d1261ac6 327static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
328{
329 1, /* GP2GP */
330 /* Avoid the use of int<->fp moves for spilling. */
331 8, /* GP2FP */
332 8, /* FP2GP */
333 4 /* FP2FP */
334};
335
8990e73a 336/* Generic costs for vector insn classes. */
8990e73a
TB
337static const struct cpu_vector_cost generic_vector_cost =
338{
cd8ae5ed
AP
339 1, /* scalar_int_stmt_cost */
340 1, /* scalar_fp_stmt_cost */
bd95e655
JG
341 1, /* scalar_load_cost */
342 1, /* scalar_store_cost */
cd8ae5ed
AP
343 1, /* vec_int_stmt_cost */
344 1, /* vec_fp_stmt_cost */
c428f91c 345 2, /* vec_permute_cost */
bd95e655
JG
346 1, /* vec_to_scalar_cost */
347 1, /* scalar_to_vec_cost */
348 1, /* vec_align_load_cost */
349 1, /* vec_unalign_load_cost */
350 1, /* vec_unalign_store_cost */
351 1, /* vec_store_cost */
352 3, /* cond_taken_branch_cost */
353 1 /* cond_not_taken_branch_cost */
8990e73a
TB
354};
355
c3f20327
AP
356/* ThunderX costs for vector insn classes. */
357static const struct cpu_vector_cost thunderx_vector_cost =
358{
cd8ae5ed
AP
359 1, /* scalar_int_stmt_cost */
360 1, /* scalar_fp_stmt_cost */
c3f20327
AP
361 3, /* scalar_load_cost */
362 1, /* scalar_store_cost */
cd8ae5ed 363 4, /* vec_int_stmt_cost */
b29d7591 364 1, /* vec_fp_stmt_cost */
c3f20327
AP
365 4, /* vec_permute_cost */
366 2, /* vec_to_scalar_cost */
367 2, /* scalar_to_vec_cost */
368 3, /* vec_align_load_cost */
7e87a3d9
AP
369 5, /* vec_unalign_load_cost */
370 5, /* vec_unalign_store_cost */
c3f20327
AP
371 1, /* vec_store_cost */
372 3, /* cond_taken_branch_cost */
373 3 /* cond_not_taken_branch_cost */
374};
375
60bff090 376/* Generic costs for vector insn classes. */
60bff090
JG
377static const struct cpu_vector_cost cortexa57_vector_cost =
378{
cd8ae5ed
AP
379 1, /* scalar_int_stmt_cost */
380 1, /* scalar_fp_stmt_cost */
bd95e655
JG
381 4, /* scalar_load_cost */
382 1, /* scalar_store_cost */
cd8ae5ed
AP
383 2, /* vec_int_stmt_cost */
384 2, /* vec_fp_stmt_cost */
c428f91c 385 3, /* vec_permute_cost */
bd95e655
JG
386 8, /* vec_to_scalar_cost */
387 8, /* scalar_to_vec_cost */
db4a1c18
WD
388 4, /* vec_align_load_cost */
389 4, /* vec_unalign_load_cost */
bd95e655
JG
390 1, /* vec_unalign_store_cost */
391 1, /* vec_store_cost */
392 1, /* cond_taken_branch_cost */
393 1 /* cond_not_taken_branch_cost */
60bff090
JG
394};
395
5ec1ae3b
EM
396static const struct cpu_vector_cost exynosm1_vector_cost =
397{
cd8ae5ed
AP
398 1, /* scalar_int_stmt_cost */
399 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
400 5, /* scalar_load_cost */
401 1, /* scalar_store_cost */
cd8ae5ed
AP
402 3, /* vec_int_stmt_cost */
403 3, /* vec_fp_stmt_cost */
c428f91c 404 3, /* vec_permute_cost */
5ec1ae3b
EM
405 3, /* vec_to_scalar_cost */
406 3, /* scalar_to_vec_cost */
407 5, /* vec_align_load_cost */
408 5, /* vec_unalign_load_cost */
409 1, /* vec_unalign_store_cost */
410 1, /* vec_store_cost */
411 1, /* cond_taken_branch_cost */
412 1 /* cond_not_taken_branch_cost */
413};
414
381e27aa 415/* Generic costs for vector insn classes. */
381e27aa
PT
416static const struct cpu_vector_cost xgene1_vector_cost =
417{
cd8ae5ed
AP
418 1, /* scalar_int_stmt_cost */
419 1, /* scalar_fp_stmt_cost */
bd95e655
JG
420 5, /* scalar_load_cost */
421 1, /* scalar_store_cost */
cd8ae5ed
AP
422 2, /* vec_int_stmt_cost */
423 2, /* vec_fp_stmt_cost */
c428f91c 424 2, /* vec_permute_cost */
bd95e655
JG
425 4, /* vec_to_scalar_cost */
426 4, /* scalar_to_vec_cost */
427 10, /* vec_align_load_cost */
428 10, /* vec_unalign_load_cost */
429 2, /* vec_unalign_store_cost */
430 2, /* vec_store_cost */
431 2, /* cond_taken_branch_cost */
432 1 /* cond_not_taken_branch_cost */
381e27aa
PT
433};
434
ad611a4c 435/* Costs for vector insn classes for Vulcan. */
d1261ac6 436static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 437{
cd8ae5ed
AP
438 1, /* scalar_int_stmt_cost */
439 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
440 4, /* scalar_load_cost */
441 1, /* scalar_store_cost */
cd8ae5ed
AP
442 5, /* vec_int_stmt_cost */
443 6, /* vec_fp_stmt_cost */
ad611a4c
VP
444 3, /* vec_permute_cost */
445 6, /* vec_to_scalar_cost */
446 5, /* scalar_to_vec_cost */
447 8, /* vec_align_load_cost */
448 8, /* vec_unalign_load_cost */
449 4, /* vec_unalign_store_cost */
450 4, /* vec_store_cost */
451 2, /* cond_taken_branch_cost */
452 1 /* cond_not_taken_branch_cost */
453};
454
b9066f5a
MW
455/* Generic costs for branch instructions. */
456static const struct cpu_branch_cost generic_branch_cost =
457{
9094d4a4
WD
458 1, /* Predictable. */
459 3 /* Unpredictable. */
b9066f5a
MW
460};
461
9acc9cbe
EM
462/* Generic approximation modes. */
463static const cpu_approx_modes generic_approx_modes =
464{
79a2bc2d 465 AARCH64_APPROX_NONE, /* division */
98daafa0 466 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
467 AARCH64_APPROX_NONE /* recip_sqrt */
468};
469
470/* Approximation modes for Exynos M1. */
471static const cpu_approx_modes exynosm1_approx_modes =
472{
79a2bc2d 473 AARCH64_APPROX_NONE, /* division */
98daafa0 474 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
475 AARCH64_APPROX_ALL /* recip_sqrt */
476};
477
478/* Approximation modes for X-Gene 1. */
479static const cpu_approx_modes xgene1_approx_modes =
480{
79a2bc2d 481 AARCH64_APPROX_NONE, /* division */
98daafa0 482 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
483 AARCH64_APPROX_ALL /* recip_sqrt */
484};
485
9d2c6e2e
MK
486/* Generic prefetch settings (which disable prefetch). */
487static const cpu_prefetch_tune generic_prefetch_tune =
488{
489 0, /* num_slots */
490 -1, /* l1_cache_size */
491 -1, /* l1_cache_line_size */
16b2cafd
MK
492 -1, /* l2_cache_size */
493 -1 /* default_opt_level */
9d2c6e2e
MK
494};
495
496static const cpu_prefetch_tune exynosm1_prefetch_tune =
497{
498 0, /* num_slots */
499 -1, /* l1_cache_size */
500 64, /* l1_cache_line_size */
16b2cafd
MK
501 -1, /* l2_cache_size */
502 -1 /* default_opt_level */
9d2c6e2e
MK
503};
504
505static const cpu_prefetch_tune qdf24xx_prefetch_tune =
506{
70c51b58
MK
507 4, /* num_slots */
508 32, /* l1_cache_size */
9d2c6e2e 509 64, /* l1_cache_line_size */
70c51b58
MK
510 1024, /* l2_cache_size */
511 3 /* default_opt_level */
9d2c6e2e
MK
512};
513
f1e247d0
AP
514static const cpu_prefetch_tune thunderxt88_prefetch_tune =
515{
516 8, /* num_slots */
517 32, /* l1_cache_size */
518 128, /* l1_cache_line_size */
519 16*1024, /* l2_cache_size */
520 3 /* default_opt_level */
521};
522
523static const cpu_prefetch_tune thunderx_prefetch_tune =
524{
525 8, /* num_slots */
526 32, /* l1_cache_size */
527 128, /* l1_cache_line_size */
528 -1, /* l2_cache_size */
529 -1 /* default_opt_level */
530};
531
9d2c6e2e
MK
532static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
533{
f1e247d0
AP
534 8, /* num_slots */
535 32, /* l1_cache_size */
9d2c6e2e 536 64, /* l1_cache_line_size */
f1e247d0 537 256, /* l2_cache_size */
16b2cafd 538 -1 /* default_opt_level */
9d2c6e2e
MK
539};
540
43e9d192
IB
541static const struct tune_params generic_tunings =
542{
4e2cd668 543 &cortexa57_extra_costs,
43e9d192
IB
544 &generic_addrcost_table,
545 &generic_regmove_cost,
8990e73a 546 &generic_vector_cost,
b9066f5a 547 &generic_branch_cost,
9acc9cbe 548 &generic_approx_modes,
bd95e655
JG
549 4, /* memmov_cost */
550 2, /* issue_rate */
e0701ef0 551 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
0b82a5a2 552 8, /* function_align. */
6b13482b
WD
553 4, /* jump_align. */
554 8, /* loop_align. */
cee66c68
WD
555 2, /* int_reassoc_width. */
556 4, /* fp_reassoc_width. */
50093a33
WD
557 1, /* vec_reassoc_width. */
558 2, /* min_div_recip_mul_sf. */
dfba575f 559 2, /* min_div_recip_mul_df. */
50487d79 560 0, /* max_case_values. */
3b4c0f7e 561 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
562 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
563 &generic_prefetch_tune
43e9d192
IB
564};
565
1c72a3ca
JG
566static const struct tune_params cortexa35_tunings =
567{
568 &cortexa53_extra_costs,
569 &generic_addrcost_table,
570 &cortexa53_regmove_cost,
571 &generic_vector_cost,
aca97ef8 572 &generic_branch_cost,
9acc9cbe 573 &generic_approx_modes,
1c72a3ca
JG
574 4, /* memmov_cost */
575 1, /* issue_rate */
0bc24338 576 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 577 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 578 16, /* function_align. */
9779b2e8 579 4, /* jump_align. */
d4407370 580 8, /* loop_align. */
1c72a3ca
JG
581 2, /* int_reassoc_width. */
582 4, /* fp_reassoc_width. */
583 1, /* vec_reassoc_width. */
584 2, /* min_div_recip_mul_sf. */
585 2, /* min_div_recip_mul_df. */
586 0, /* max_case_values. */
1c72a3ca 587 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
588 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
589 &generic_prefetch_tune
1c72a3ca
JG
590};
591
984239ad
KT
592static const struct tune_params cortexa53_tunings =
593{
594 &cortexa53_extra_costs,
595 &generic_addrcost_table,
e4a9c55a 596 &cortexa53_regmove_cost,
984239ad 597 &generic_vector_cost,
aca97ef8 598 &generic_branch_cost,
9acc9cbe 599 &generic_approx_modes,
bd95e655
JG
600 4, /* memmov_cost */
601 2, /* issue_rate */
00a8574a 602 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 603 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 604 16, /* function_align. */
9779b2e8 605 4, /* jump_align. */
d4407370 606 8, /* loop_align. */
cee66c68
WD
607 2, /* int_reassoc_width. */
608 4, /* fp_reassoc_width. */
50093a33
WD
609 1, /* vec_reassoc_width. */
610 2, /* min_div_recip_mul_sf. */
dfba575f 611 2, /* min_div_recip_mul_df. */
50487d79 612 0, /* max_case_values. */
2d6bc7fa 613 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
614 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
615 &generic_prefetch_tune
984239ad
KT
616};
617
4fd92af6
KT
618static const struct tune_params cortexa57_tunings =
619{
620 &cortexa57_extra_costs,
a39d4348 621 &generic_addrcost_table,
e4a9c55a 622 &cortexa57_regmove_cost,
60bff090 623 &cortexa57_vector_cost,
aca97ef8 624 &generic_branch_cost,
9acc9cbe 625 &generic_approx_modes,
bd95e655
JG
626 4, /* memmov_cost */
627 3, /* issue_rate */
00a8574a 628 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 629 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2 630 16, /* function_align. */
9779b2e8 631 4, /* jump_align. */
d4407370 632 8, /* loop_align. */
cee66c68
WD
633 2, /* int_reassoc_width. */
634 4, /* fp_reassoc_width. */
50093a33
WD
635 1, /* vec_reassoc_width. */
636 2, /* min_div_recip_mul_sf. */
dfba575f 637 2, /* min_div_recip_mul_df. */
50487d79 638 0, /* max_case_values. */
2d6bc7fa 639 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
640 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
641 &generic_prefetch_tune
dfba575f
JG
642};
643
644static const struct tune_params cortexa72_tunings =
645{
646 &cortexa57_extra_costs,
a39d4348 647 &generic_addrcost_table,
dfba575f
JG
648 &cortexa57_regmove_cost,
649 &cortexa57_vector_cost,
aca97ef8 650 &generic_branch_cost,
9acc9cbe 651 &generic_approx_modes,
dfba575f
JG
652 4, /* memmov_cost */
653 3, /* issue_rate */
00a8574a 654 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f
JG
655 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
656 16, /* function_align. */
9779b2e8 657 4, /* jump_align. */
d4407370 658 8, /* loop_align. */
dfba575f
JG
659 2, /* int_reassoc_width. */
660 4, /* fp_reassoc_width. */
661 1, /* vec_reassoc_width. */
662 2, /* min_div_recip_mul_sf. */
663 2, /* min_div_recip_mul_df. */
50487d79 664 0, /* max_case_values. */
0bc24338 665 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
666 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
667 &generic_prefetch_tune
4fd92af6
KT
668};
669
4fb570c4
KT
670static const struct tune_params cortexa73_tunings =
671{
672 &cortexa57_extra_costs,
a39d4348 673 &generic_addrcost_table,
4fb570c4
KT
674 &cortexa57_regmove_cost,
675 &cortexa57_vector_cost,
aca97ef8 676 &generic_branch_cost,
4fb570c4
KT
677 &generic_approx_modes,
678 4, /* memmov_cost. */
679 2, /* issue_rate. */
680 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
681 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
682 16, /* function_align. */
9779b2e8 683 4, /* jump_align. */
d4407370 684 8, /* loop_align. */
4fb570c4
KT
685 2, /* int_reassoc_width. */
686 4, /* fp_reassoc_width. */
687 1, /* vec_reassoc_width. */
688 2, /* min_div_recip_mul_sf. */
689 2, /* min_div_recip_mul_df. */
690 0, /* max_case_values. */
4fb570c4 691 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
692 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
693 &generic_prefetch_tune
4fb570c4
KT
694};
695
9d2c6e2e
MK
696
697
5ec1ae3b
EM
698static const struct tune_params exynosm1_tunings =
699{
700 &exynosm1_extra_costs,
701 &exynosm1_addrcost_table,
702 &exynosm1_regmove_cost,
703 &exynosm1_vector_cost,
704 &generic_branch_cost,
9acc9cbe 705 &exynosm1_approx_modes,
5ec1ae3b
EM
706 4, /* memmov_cost */
707 3, /* issue_rate */
25cc2199 708 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
5ec1ae3b
EM
709 4, /* function_align. */
710 4, /* jump_align. */
711 4, /* loop_align. */
712 2, /* int_reassoc_width. */
713 4, /* fp_reassoc_width. */
714 1, /* vec_reassoc_width. */
715 2, /* min_div_recip_mul_sf. */
716 2, /* min_div_recip_mul_df. */
717 48, /* max_case_values. */
220379df 718 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
719 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
720 &exynosm1_prefetch_tune
5ec1ae3b
EM
721};
722
f1e247d0
AP
723static const struct tune_params thunderxt88_tunings =
724{
725 &thunderx_extra_costs,
726 &generic_addrcost_table,
727 &thunderx_regmove_cost,
728 &thunderx_vector_cost,
729 &generic_branch_cost,
730 &generic_approx_modes,
731 6, /* memmov_cost */
732 2, /* issue_rate */
733 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
734 8, /* function_align. */
735 8, /* jump_align. */
736 8, /* loop_align. */
737 2, /* int_reassoc_width. */
738 4, /* fp_reassoc_width. */
739 1, /* vec_reassoc_width. */
740 2, /* min_div_recip_mul_sf. */
741 2, /* min_div_recip_mul_df. */
742 0, /* max_case_values. */
743 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
744 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
745 &thunderxt88_prefetch_tune
746};
747
d1bcc29f
AP
748static const struct tune_params thunderx_tunings =
749{
750 &thunderx_extra_costs,
751 &generic_addrcost_table,
752 &thunderx_regmove_cost,
c3f20327 753 &thunderx_vector_cost,
b9066f5a 754 &generic_branch_cost,
9acc9cbe 755 &generic_approx_modes,
bd95e655
JG
756 6, /* memmov_cost */
757 2, /* issue_rate */
e9a3a175 758 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
759 8, /* function_align. */
760 8, /* jump_align. */
761 8, /* loop_align. */
cee66c68
WD
762 2, /* int_reassoc_width. */
763 4, /* fp_reassoc_width. */
50093a33
WD
764 1, /* vec_reassoc_width. */
765 2, /* min_div_recip_mul_sf. */
dfba575f 766 2, /* min_div_recip_mul_df. */
50487d79 767 0, /* max_case_values. */
2d6bc7fa 768 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
769 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
770 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 771 &thunderx_prefetch_tune
d1bcc29f
AP
772};
773
381e27aa
PT
774static const struct tune_params xgene1_tunings =
775{
776 &xgene1_extra_costs,
777 &xgene1_addrcost_table,
778 &xgene1_regmove_cost,
779 &xgene1_vector_cost,
b9066f5a 780 &generic_branch_cost,
9acc9cbe 781 &xgene1_approx_modes,
bd95e655
JG
782 6, /* memmov_cost */
783 4, /* issue_rate */
e9a3a175 784 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
785 16, /* function_align. */
786 8, /* jump_align. */
787 16, /* loop_align. */
788 2, /* int_reassoc_width. */
789 4, /* fp_reassoc_width. */
50093a33
WD
790 1, /* vec_reassoc_width. */
791 2, /* min_div_recip_mul_sf. */
dfba575f 792 2, /* min_div_recip_mul_df. */
50487d79 793 0, /* max_case_values. */
2d6bc7fa 794 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9d2c6e2e
MK
795 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
796 &generic_prefetch_tune
381e27aa
PT
797};
798
ee446d9f
JW
799static const struct tune_params qdf24xx_tunings =
800{
801 &qdf24xx_extra_costs,
a39d4348 802 &generic_addrcost_table,
ee446d9f
JW
803 &qdf24xx_regmove_cost,
804 &generic_vector_cost,
805 &generic_branch_cost,
806 &generic_approx_modes,
807 4, /* memmov_cost */
808 4, /* issue_rate */
809 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
810 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
811 16, /* function_align. */
812 8, /* jump_align. */
813 16, /* loop_align. */
814 2, /* int_reassoc_width. */
815 4, /* fp_reassoc_width. */
816 1, /* vec_reassoc_width. */
817 2, /* min_div_recip_mul_sf. */
818 2, /* min_div_recip_mul_df. */
819 0, /* max_case_values. */
ee446d9f 820 tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
9d2c6e2e
MK
821 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
822 &qdf24xx_prefetch_tune
ee446d9f
JW
823};
824
d1261ac6 825static const struct tune_params thunderx2t99_tunings =
ad611a4c 826{
d1261ac6
AP
827 &thunderx2t99_extra_costs,
828 &thunderx2t99_addrcost_table,
829 &thunderx2t99_regmove_cost,
830 &thunderx2t99_vector_cost,
aca97ef8 831 &generic_branch_cost,
ad611a4c
VP
832 &generic_approx_modes,
833 4, /* memmov_cost. */
834 4, /* issue_rate. */
00c7c57f
JB
835 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
836 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
ad611a4c
VP
837 16, /* function_align. */
838 8, /* jump_align. */
839 16, /* loop_align. */
840 3, /* int_reassoc_width. */
841 2, /* fp_reassoc_width. */
842 2, /* vec_reassoc_width. */
843 2, /* min_div_recip_mul_sf. */
844 2, /* min_div_recip_mul_df. */
845 0, /* max_case_values. */
f1e247d0 846 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
847 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
848 &thunderx2t99_prefetch_tune
ad611a4c
VP
849};
850
8dec06f2
JG
851/* Support for fine-grained override of the tuning structures. */
852struct aarch64_tuning_override_function
853{
854 const char* name;
855 void (*parse_override)(const char*, struct tune_params*);
856};
857
858static void aarch64_parse_fuse_string (const char*, struct tune_params*);
859static void aarch64_parse_tune_string (const char*, struct tune_params*);
860
861static const struct aarch64_tuning_override_function
862aarch64_tuning_override_functions[] =
863{
864 { "fuse", aarch64_parse_fuse_string },
865 { "tune", aarch64_parse_tune_string },
866 { NULL, NULL }
867};
868
43e9d192
IB
869/* A processor implementing AArch64. */
870struct processor
871{
872 const char *const name;
46806c44
KT
873 enum aarch64_processor ident;
874 enum aarch64_processor sched_core;
393ae126 875 enum aarch64_arch arch;
0c6caaf8 876 unsigned architecture_version;
43e9d192
IB
877 const unsigned long flags;
878 const struct tune_params *const tune;
879};
880
393ae126
KT
881/* Architectures implementing AArch64. */
882static const struct processor all_architectures[] =
883{
884#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
885 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
886#include "aarch64-arches.def"
393ae126
KT
887 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
888};
889
43e9d192
IB
890/* Processor cores implementing AArch64. */
891static const struct processor all_cores[] =
892{
e8fcc9fa 893#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
894 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
895 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
896 FLAGS, &COSTS##_tunings},
43e9d192 897#include "aarch64-cores.def"
393ae126
KT
898 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
899 AARCH64_FL_FOR_ARCH8, &generic_tunings},
900 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
901};
902
43e9d192 903
361fb3ee
KT
904/* Target specification. These are populated by the -march, -mtune, -mcpu
905 handling code or by target attributes. */
43e9d192
IB
906static const struct processor *selected_arch;
907static const struct processor *selected_cpu;
908static const struct processor *selected_tune;
909
b175b679
JG
910/* The current tuning set. */
911struct tune_params aarch64_tune_params = generic_tunings;
912
43e9d192
IB
913#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
914
915/* An ISA extension in the co-processor and main instruction set space. */
916struct aarch64_option_extension
917{
918 const char *const name;
919 const unsigned long flags_on;
920 const unsigned long flags_off;
921};
922
43e9d192
IB
923typedef enum aarch64_cond_code
924{
925 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
926 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
927 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
928}
929aarch64_cc;
930
931#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
932
933/* The condition codes of the processor, and the inverse function. */
934static const char * const aarch64_condition_codes[] =
935{
936 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
937 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
938};
939
973d2e01
TP
940/* Generate code to enable conditional branches in functions over 1 MiB. */
941const char *
942aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
943 const char * branch_format)
944{
945 rtx_code_label * tmp_label = gen_label_rtx ();
946 char label_buf[256];
947 char buffer[128];
948 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
949 CODE_LABEL_NUMBER (tmp_label));
950 const char *label_ptr = targetm.strip_name_encoding (label_buf);
951 rtx dest_label = operands[pos_label];
952 operands[pos_label] = tmp_label;
953
954 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
955 output_asm_insn (buffer, operands);
956
957 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
958 operands[pos_label] = dest_label;
959 output_asm_insn (buffer, operands);
960 return "";
961}
962
261fb553
AL
963void
964aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
965{
966 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
967 if (TARGET_GENERAL_REGS_ONLY)
968 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
969 else
970 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
971}
972
c64f7d37
WD
973/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
974 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
31e2b5a3
WD
975 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
976 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
977 cost (in this case the best class is the lowest cost one). Using ALL_REGS
978 irrespectively of its cost results in bad allocations with many redundant
979 int<->FP moves which are expensive on various cores.
980 To avoid this we don't allow ALL_REGS as the allocno class, but force a
981 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
982 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
983 Otherwise set the allocno class depending on the mode.
984 The result of this is that it is no longer inefficient to have a higher
985 memory move cost than the register move cost.
986*/
c64f7d37
WD
987
988static reg_class_t
31e2b5a3
WD
989aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
990 reg_class_t best_class)
c64f7d37 991{
b8506a8a 992 machine_mode mode;
c64f7d37
WD
993
994 if (allocno_class != ALL_REGS)
995 return allocno_class;
996
31e2b5a3
WD
997 if (best_class != ALL_REGS)
998 return best_class;
999
c64f7d37
WD
1000 mode = PSEUDO_REGNO_MODE (regno);
1001 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1002}
1003
26e0ff94 1004static unsigned int
b8506a8a 1005aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1006{
50093a33 1007 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1008 return aarch64_tune_params.min_div_recip_mul_sf;
1009 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1010}
1011
cee66c68
WD
1012static int
1013aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
b8506a8a 1014 machine_mode mode)
cee66c68
WD
1015{
1016 if (VECTOR_MODE_P (mode))
b175b679 1017 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1018 if (INTEGRAL_MODE_P (mode))
b175b679 1019 return aarch64_tune_params.int_reassoc_width;
cee66c68 1020 if (FLOAT_MODE_P (mode))
b175b679 1021 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1022 return 1;
1023}
1024
43e9d192
IB
1025/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1026unsigned
1027aarch64_dbx_register_number (unsigned regno)
1028{
1029 if (GP_REGNUM_P (regno))
1030 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1031 else if (regno == SP_REGNUM)
1032 return AARCH64_DWARF_SP;
1033 else if (FP_REGNUM_P (regno))
1034 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
1035
1036 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1037 equivalent DWARF register. */
1038 return DWARF_FRAME_REGISTERS;
1039}
1040
1041/* Return TRUE if MODE is any of the large INT modes. */
1042static bool
ef4bddc2 1043aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
1044{
1045 return mode == OImode || mode == CImode || mode == XImode;
1046}
1047
1048/* Return TRUE if MODE is any of the vector modes. */
1049static bool
ef4bddc2 1050aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
1051{
1052 return aarch64_vector_mode_supported_p (mode)
1053 || aarch64_vect_struct_mode_p (mode);
1054}
1055
1056/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1057static bool
ef4bddc2 1058aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1059 unsigned HOST_WIDE_INT nelems)
1060{
1061 if (TARGET_SIMD
635e66fe
AL
1062 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1063 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1064 && (nelems >= 2 && nelems <= 4))
1065 return true;
1066
1067 return false;
1068}
1069
1070/* Implement HARD_REGNO_NREGS. */
1071
1072int
ef4bddc2 1073aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
1074{
1075 switch (aarch64_regno_regclass (regno))
1076 {
1077 case FP_REGS:
1078 case FP_LO_REGS:
1079 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
1080 default:
1081 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1082 }
1083 gcc_unreachable ();
1084}
1085
1086/* Implement HARD_REGNO_MODE_OK. */
1087
1088int
ef4bddc2 1089aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1090{
1091 if (GET_MODE_CLASS (mode) == MODE_CC)
1092 return regno == CC_REGNUM;
1093
9259db42
YZ
1094 if (regno == SP_REGNUM)
1095 /* The purpose of comparing with ptr_mode is to support the
1096 global register variable associated with the stack pointer
1097 register via the syntax of asm ("wsp") in ILP32. */
1098 return mode == Pmode || mode == ptr_mode;
1099
1100 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1101 return mode == Pmode;
1102
1103 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
1104 return 1;
1105
1106 if (FP_REGNUM_P (regno))
1107 {
1108 if (aarch64_vect_struct_mode_p (mode))
1109 return
1110 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
1111 else
1112 return 1;
1113 }
1114
1115 return 0;
1116}
1117
73d9ac6a 1118/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1119machine_mode
73d9ac6a 1120aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 1121 machine_mode mode)
73d9ac6a
IB
1122{
1123 /* Handle modes that fit within single registers. */
1124 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
1125 {
1126 if (GET_MODE_SIZE (mode) >= 4)
1127 return mode;
1128 else
1129 return SImode;
1130 }
1131 /* Fall back to generic for multi-reg and very large modes. */
1132 else
1133 return choose_hard_reg_mode (regno, nregs, false);
1134}
1135
43e9d192
IB
1136/* Return true if calls to DECL should be treated as
1137 long-calls (ie called via a register). */
1138static bool
1139aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1140{
1141 return false;
1142}
1143
1144/* Return true if calls to symbol-ref SYM should be treated as
1145 long-calls (ie called via a register). */
1146bool
1147aarch64_is_long_call_p (rtx sym)
1148{
1149 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1150}
1151
b60d63cb
JW
1152/* Return true if calls to symbol-ref SYM should not go through
1153 plt stubs. */
1154
1155bool
1156aarch64_is_noplt_call_p (rtx sym)
1157{
1158 const_tree decl = SYMBOL_REF_DECL (sym);
1159
1160 if (flag_pic
1161 && decl
1162 && (!flag_plt
1163 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1164 && !targetm.binds_local_p (decl))
1165 return true;
1166
1167 return false;
1168}
1169
43e9d192
IB
1170/* Return true if the offsets to a zero/sign-extract operation
1171 represent an expression that matches an extend operation. The
1172 operands represent the paramters from
1173
4745e701 1174 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1175bool
ef4bddc2 1176aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
1177 rtx extract_imm)
1178{
1179 HOST_WIDE_INT mult_val, extract_val;
1180
1181 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1182 return false;
1183
1184 mult_val = INTVAL (mult_imm);
1185 extract_val = INTVAL (extract_imm);
1186
1187 if (extract_val > 8
1188 && extract_val < GET_MODE_BITSIZE (mode)
1189 && exact_log2 (extract_val & ~7) > 0
1190 && (extract_val & 7) <= 4
1191 && mult_val == (1 << (extract_val & 7)))
1192 return true;
1193
1194 return false;
1195}
1196
1197/* Emit an insn that's a simple single-set. Both the operands must be
1198 known to be valid. */
827ab47a 1199inline static rtx_insn *
43e9d192
IB
1200emit_set_insn (rtx x, rtx y)
1201{
f7df4a84 1202 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1203}
1204
1205/* X and Y are two things to compare using CODE. Emit the compare insn and
1206 return the rtx for register 0 in the proper mode. */
1207rtx
1208aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1209{
ef4bddc2 1210 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1211 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1212
1213 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1214 return cc_reg;
1215}
1216
1217/* Build the SYMBOL_REF for __tls_get_addr. */
1218
1219static GTY(()) rtx tls_get_addr_libfunc;
1220
1221rtx
1222aarch64_tls_get_addr (void)
1223{
1224 if (!tls_get_addr_libfunc)
1225 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1226 return tls_get_addr_libfunc;
1227}
1228
1229/* Return the TLS model to use for ADDR. */
1230
1231static enum tls_model
1232tls_symbolic_operand_type (rtx addr)
1233{
1234 enum tls_model tls_kind = TLS_MODEL_NONE;
1235 rtx sym, addend;
1236
1237 if (GET_CODE (addr) == CONST)
1238 {
1239 split_const (addr, &sym, &addend);
1240 if (GET_CODE (sym) == SYMBOL_REF)
1241 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1242 }
1243 else if (GET_CODE (addr) == SYMBOL_REF)
1244 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1245
1246 return tls_kind;
1247}
1248
1249/* We'll allow lo_sum's in addresses in our legitimate addresses
1250 so that combine would take care of combining addresses where
1251 necessary, but for generation purposes, we'll generate the address
1252 as :
1253 RTL Absolute
1254 tmp = hi (symbol_ref); adrp x1, foo
1255 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1256 nop
1257
1258 PIC TLS
1259 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1260 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1261 bl __tls_get_addr
1262 nop
1263
1264 Load TLS symbol, depending on TLS mechanism and TLS access model.
1265
1266 Global Dynamic - Traditional TLS:
1267 adrp tmp, :tlsgd:imm
1268 add dest, tmp, #:tlsgd_lo12:imm
1269 bl __tls_get_addr
1270
1271 Global Dynamic - TLS Descriptors:
1272 adrp dest, :tlsdesc:imm
1273 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1274 add dest, dest, #:tlsdesc_lo12:imm
1275 blr tmp
1276 mrs tp, tpidr_el0
1277 add dest, dest, tp
1278
1279 Initial Exec:
1280 mrs tp, tpidr_el0
1281 adrp tmp, :gottprel:imm
1282 ldr dest, [tmp, #:gottprel_lo12:imm]
1283 add dest, dest, tp
1284
1285 Local Exec:
1286 mrs tp, tpidr_el0
0699caae
RL
1287 add t0, tp, #:tprel_hi12:imm, lsl #12
1288 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1289*/
1290
1291static void
1292aarch64_load_symref_appropriately (rtx dest, rtx imm,
1293 enum aarch64_symbol_type type)
1294{
1295 switch (type)
1296 {
1297 case SYMBOL_SMALL_ABSOLUTE:
1298 {
28514dda 1299 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1300 rtx tmp_reg = dest;
ef4bddc2 1301 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1302
1303 gcc_assert (mode == Pmode || mode == ptr_mode);
1304
43e9d192 1305 if (can_create_pseudo_p ())
28514dda 1306 tmp_reg = gen_reg_rtx (mode);
43e9d192 1307
28514dda 1308 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1309 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1310 return;
1311 }
1312
a5350ddc 1313 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1314 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1315 return;
1316
1b1e81f8
JW
1317 case SYMBOL_SMALL_GOT_28K:
1318 {
1319 machine_mode mode = GET_MODE (dest);
1320 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1321 rtx insn;
1322 rtx mem;
1b1e81f8
JW
1323
1324 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1325 here before rtl expand. Tree IVOPT will generate rtl pattern to
1326 decide rtx costs, in which case pic_offset_table_rtx is not
1327 initialized. For that case no need to generate the first adrp
026c3cfd 1328 instruction as the final cost for global variable access is
1b1e81f8
JW
1329 one instruction. */
1330 if (gp_rtx != NULL)
1331 {
1332 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1333 using the page base as GOT base, the first page may be wasted,
1334 in the worst scenario, there is only 28K space for GOT).
1335
1336 The generate instruction sequence for accessing global variable
1337 is:
1338
a3957742 1339 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1340
1341 Only one instruction needed. But we must initialize
1342 pic_offset_table_rtx properly. We generate initialize insn for
1343 every global access, and allow CSE to remove all redundant.
1344
1345 The final instruction sequences will look like the following
1346 for multiply global variables access.
1347
a3957742 1348 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1349
a3957742
JW
1350 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1351 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1352 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1353 ... */
1b1e81f8
JW
1354
1355 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1356 crtl->uses_pic_offset_table = 1;
1357 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1358
1359 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1360 gp_rtx = gen_lowpart (mode, gp_rtx);
1361
1b1e81f8
JW
1362 }
1363
1364 if (mode == ptr_mode)
1365 {
1366 if (mode == DImode)
53021678 1367 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1368 else
53021678
JW
1369 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1370
1371 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1372 }
1373 else
1374 {
1375 gcc_assert (mode == Pmode);
53021678
JW
1376
1377 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1378 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1379 }
1380
53021678
JW
1381 /* The operand is expected to be MEM. Whenever the related insn
1382 pattern changed, above code which calculate mem should be
1383 updated. */
1384 gcc_assert (GET_CODE (mem) == MEM);
1385 MEM_READONLY_P (mem) = 1;
1386 MEM_NOTRAP_P (mem) = 1;
1387 emit_insn (insn);
1b1e81f8
JW
1388 return;
1389 }
1390
6642bdb4 1391 case SYMBOL_SMALL_GOT_4G:
43e9d192 1392 {
28514dda
YZ
1393 /* In ILP32, the mode of dest can be either SImode or DImode,
1394 while the got entry is always of SImode size. The mode of
1395 dest depends on how dest is used: if dest is assigned to a
1396 pointer (e.g. in the memory), it has SImode; it may have
1397 DImode if dest is dereferenced to access the memeory.
1398 This is why we have to handle three different ldr_got_small
1399 patterns here (two patterns for ILP32). */
53021678
JW
1400
1401 rtx insn;
1402 rtx mem;
43e9d192 1403 rtx tmp_reg = dest;
ef4bddc2 1404 machine_mode mode = GET_MODE (dest);
28514dda 1405
43e9d192 1406 if (can_create_pseudo_p ())
28514dda
YZ
1407 tmp_reg = gen_reg_rtx (mode);
1408
1409 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1410 if (mode == ptr_mode)
1411 {
1412 if (mode == DImode)
53021678 1413 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1414 else
53021678
JW
1415 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1416
1417 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1418 }
1419 else
1420 {
1421 gcc_assert (mode == Pmode);
53021678
JW
1422
1423 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1424 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1425 }
1426
53021678
JW
1427 gcc_assert (GET_CODE (mem) == MEM);
1428 MEM_READONLY_P (mem) = 1;
1429 MEM_NOTRAP_P (mem) = 1;
1430 emit_insn (insn);
43e9d192
IB
1431 return;
1432 }
1433
1434 case SYMBOL_SMALL_TLSGD:
1435 {
5d8a22a5 1436 rtx_insn *insns;
23b88fda
N
1437 machine_mode mode = GET_MODE (dest);
1438 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1439
1440 start_sequence ();
23b88fda
N
1441 if (TARGET_ILP32)
1442 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1443 else
1444 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1445 insns = get_insns ();
1446 end_sequence ();
1447
1448 RTL_CONST_CALL_P (insns) = 1;
1449 emit_libcall_block (insns, dest, result, imm);
1450 return;
1451 }
1452
1453 case SYMBOL_SMALL_TLSDESC:
1454 {
ef4bddc2 1455 machine_mode mode = GET_MODE (dest);
621ad2de 1456 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1457 rtx tp;
1458
621ad2de
AP
1459 gcc_assert (mode == Pmode || mode == ptr_mode);
1460
2876a13f
JW
1461 /* In ILP32, the got entry is always of SImode size. Unlike
1462 small GOT, the dest is fixed at reg 0. */
1463 if (TARGET_ILP32)
1464 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1465 else
2876a13f 1466 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1467 tp = aarch64_load_tp (NULL);
621ad2de
AP
1468
1469 if (mode != Pmode)
1470 tp = gen_lowpart (mode, tp);
1471
2876a13f 1472 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1473 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1474 return;
1475 }
1476
79496620 1477 case SYMBOL_SMALL_TLSIE:
43e9d192 1478 {
621ad2de
AP
1479 /* In ILP32, the mode of dest can be either SImode or DImode,
1480 while the got entry is always of SImode size. The mode of
1481 dest depends on how dest is used: if dest is assigned to a
1482 pointer (e.g. in the memory), it has SImode; it may have
1483 DImode if dest is dereferenced to access the memeory.
1484 This is why we have to handle three different tlsie_small
1485 patterns here (two patterns for ILP32). */
ef4bddc2 1486 machine_mode mode = GET_MODE (dest);
621ad2de 1487 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1488 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1489
1490 if (mode == ptr_mode)
1491 {
1492 if (mode == DImode)
1493 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1494 else
1495 {
1496 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1497 tp = gen_lowpart (mode, tp);
1498 }
1499 }
1500 else
1501 {
1502 gcc_assert (mode == Pmode);
1503 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1504 }
1505
f7df4a84 1506 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1507 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1508 return;
1509 }
1510
cbf5629e 1511 case SYMBOL_TLSLE12:
d18ba284 1512 case SYMBOL_TLSLE24:
cbf5629e
JW
1513 case SYMBOL_TLSLE32:
1514 case SYMBOL_TLSLE48:
43e9d192 1515 {
cbf5629e 1516 machine_mode mode = GET_MODE (dest);
43e9d192 1517 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1518
cbf5629e
JW
1519 if (mode != Pmode)
1520 tp = gen_lowpart (mode, tp);
1521
1522 switch (type)
1523 {
1524 case SYMBOL_TLSLE12:
1525 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1526 (dest, tp, imm));
1527 break;
1528 case SYMBOL_TLSLE24:
1529 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1530 (dest, tp, imm));
1531 break;
1532 case SYMBOL_TLSLE32:
1533 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1534 (dest, imm));
1535 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1536 (dest, dest, tp));
1537 break;
1538 case SYMBOL_TLSLE48:
1539 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1540 (dest, imm));
1541 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1542 (dest, dest, tp));
1543 break;
1544 default:
1545 gcc_unreachable ();
1546 }
e6f7f0e9 1547
43e9d192
IB
1548 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1549 return;
1550 }
1551
87dd8ab0
MS
1552 case SYMBOL_TINY_GOT:
1553 emit_insn (gen_ldr_got_tiny (dest, imm));
1554 return;
1555
5ae7caad
JW
1556 case SYMBOL_TINY_TLSIE:
1557 {
1558 machine_mode mode = GET_MODE (dest);
1559 rtx tp = aarch64_load_tp (NULL);
1560
1561 if (mode == ptr_mode)
1562 {
1563 if (mode == DImode)
1564 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1565 else
1566 {
1567 tp = gen_lowpart (mode, tp);
1568 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1569 }
1570 }
1571 else
1572 {
1573 gcc_assert (mode == Pmode);
1574 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1575 }
1576
1577 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1578 return;
1579 }
1580
43e9d192
IB
1581 default:
1582 gcc_unreachable ();
1583 }
1584}
1585
1586/* Emit a move from SRC to DEST. Assume that the move expanders can
1587 handle all moves if !can_create_pseudo_p (). The distinction is
1588 important because, unlike emit_move_insn, the move expanders know
1589 how to force Pmode objects into the constant pool even when the
1590 constant pool address is not itself legitimate. */
1591static rtx
1592aarch64_emit_move (rtx dest, rtx src)
1593{
1594 return (can_create_pseudo_p ()
1595 ? emit_move_insn (dest, src)
1596 : emit_move_insn_1 (dest, src));
1597}
1598
030d03b8
RE
1599/* Split a 128-bit move operation into two 64-bit move operations,
1600 taking care to handle partial overlap of register to register
1601 copies. Special cases are needed when moving between GP regs and
1602 FP regs. SRC can be a register, constant or memory; DST a register
1603 or memory. If either operand is memory it must not have any side
1604 effects. */
43e9d192
IB
1605void
1606aarch64_split_128bit_move (rtx dst, rtx src)
1607{
030d03b8
RE
1608 rtx dst_lo, dst_hi;
1609 rtx src_lo, src_hi;
43e9d192 1610
ef4bddc2 1611 machine_mode mode = GET_MODE (dst);
12dc6974 1612
030d03b8
RE
1613 gcc_assert (mode == TImode || mode == TFmode);
1614 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1615 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1616
1617 if (REG_P (dst) && REG_P (src))
1618 {
030d03b8
RE
1619 int src_regno = REGNO (src);
1620 int dst_regno = REGNO (dst);
43e9d192 1621
030d03b8 1622 /* Handle FP <-> GP regs. */
43e9d192
IB
1623 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1624 {
030d03b8
RE
1625 src_lo = gen_lowpart (word_mode, src);
1626 src_hi = gen_highpart (word_mode, src);
1627
1628 if (mode == TImode)
1629 {
1630 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1631 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1632 }
1633 else
1634 {
1635 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1636 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1637 }
1638 return;
43e9d192
IB
1639 }
1640 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1641 {
030d03b8
RE
1642 dst_lo = gen_lowpart (word_mode, dst);
1643 dst_hi = gen_highpart (word_mode, dst);
1644
1645 if (mode == TImode)
1646 {
1647 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1648 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1649 }
1650 else
1651 {
1652 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1653 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1654 }
1655 return;
43e9d192 1656 }
43e9d192
IB
1657 }
1658
030d03b8
RE
1659 dst_lo = gen_lowpart (word_mode, dst);
1660 dst_hi = gen_highpart (word_mode, dst);
1661 src_lo = gen_lowpart (word_mode, src);
1662 src_hi = gen_highpart_mode (word_mode, mode, src);
1663
1664 /* At most one pairing may overlap. */
1665 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1666 {
1667 aarch64_emit_move (dst_hi, src_hi);
1668 aarch64_emit_move (dst_lo, src_lo);
1669 }
1670 else
1671 {
1672 aarch64_emit_move (dst_lo, src_lo);
1673 aarch64_emit_move (dst_hi, src_hi);
1674 }
43e9d192
IB
1675}
1676
1677bool
1678aarch64_split_128bit_move_p (rtx dst, rtx src)
1679{
1680 return (! REG_P (src)
1681 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1682}
1683
8b033a8a
SN
1684/* Split a complex SIMD combine. */
1685
1686void
1687aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1688{
ef4bddc2
RS
1689 machine_mode src_mode = GET_MODE (src1);
1690 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1691
1692 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
1693 gcc_assert (register_operand (dst, dst_mode)
1694 && register_operand (src1, src_mode)
1695 && register_operand (src2, src_mode));
8b033a8a 1696
a977dc0c 1697 rtx (*gen) (rtx, rtx, rtx);
8b033a8a 1698
a977dc0c
MC
1699 switch (src_mode)
1700 {
4e10a5a7 1701 case E_V8QImode:
a977dc0c
MC
1702 gen = gen_aarch64_simd_combinev8qi;
1703 break;
4e10a5a7 1704 case E_V4HImode:
a977dc0c
MC
1705 gen = gen_aarch64_simd_combinev4hi;
1706 break;
4e10a5a7 1707 case E_V2SImode:
a977dc0c
MC
1708 gen = gen_aarch64_simd_combinev2si;
1709 break;
4e10a5a7 1710 case E_V4HFmode:
a977dc0c
MC
1711 gen = gen_aarch64_simd_combinev4hf;
1712 break;
4e10a5a7 1713 case E_V2SFmode:
a977dc0c
MC
1714 gen = gen_aarch64_simd_combinev2sf;
1715 break;
4e10a5a7 1716 case E_DImode:
a977dc0c
MC
1717 gen = gen_aarch64_simd_combinedi;
1718 break;
4e10a5a7 1719 case E_DFmode:
a977dc0c
MC
1720 gen = gen_aarch64_simd_combinedf;
1721 break;
1722 default:
1723 gcc_unreachable ();
8b033a8a 1724 }
a977dc0c
MC
1725
1726 emit_insn (gen (dst, src1, src2));
1727 return;
8b033a8a
SN
1728}
1729
fd4842cd
SN
1730/* Split a complex SIMD move. */
1731
1732void
1733aarch64_split_simd_move (rtx dst, rtx src)
1734{
ef4bddc2
RS
1735 machine_mode src_mode = GET_MODE (src);
1736 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1737
1738 gcc_assert (VECTOR_MODE_P (dst_mode));
1739
1740 if (REG_P (dst) && REG_P (src))
1741 {
c59b7e28
SN
1742 rtx (*gen) (rtx, rtx);
1743
fd4842cd
SN
1744 gcc_assert (VECTOR_MODE_P (src_mode));
1745
1746 switch (src_mode)
1747 {
4e10a5a7 1748 case E_V16QImode:
c59b7e28 1749 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd 1750 break;
4e10a5a7 1751 case E_V8HImode:
c59b7e28 1752 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd 1753 break;
4e10a5a7 1754 case E_V4SImode:
c59b7e28 1755 gen = gen_aarch64_split_simd_movv4si;
fd4842cd 1756 break;
4e10a5a7 1757 case E_V2DImode:
c59b7e28 1758 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 1759 break;
4e10a5a7 1760 case E_V8HFmode:
71a11456
AL
1761 gen = gen_aarch64_split_simd_movv8hf;
1762 break;
4e10a5a7 1763 case E_V4SFmode:
c59b7e28 1764 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd 1765 break;
4e10a5a7 1766 case E_V2DFmode:
c59b7e28 1767 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1768 break;
1769 default:
1770 gcc_unreachable ();
1771 }
c59b7e28
SN
1772
1773 emit_insn (gen (dst, src));
fd4842cd
SN
1774 return;
1775 }
1776}
1777
ef22810a
RH
1778bool
1779aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
1780 machine_mode ymode, rtx y)
1781{
1782 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
1783 gcc_assert (r != NULL);
1784 return rtx_equal_p (x, r);
1785}
1786
1787
43e9d192 1788static rtx
ef4bddc2 1789aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1790{
1791 if (can_create_pseudo_p ())
e18b4a81 1792 return force_reg (mode, value);
43e9d192
IB
1793 else
1794 {
1795 x = aarch64_emit_move (x, value);
1796 return x;
1797 }
1798}
1799
1800
1801static rtx
ef4bddc2 1802aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1803{
9c023bf0 1804 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1805 {
1806 rtx high;
1807 /* Load the full offset into a register. This
1808 might be improvable in the future. */
1809 high = GEN_INT (offset);
1810 offset = 0;
e18b4a81
YZ
1811 high = aarch64_force_temporary (mode, temp, high);
1812 reg = aarch64_force_temporary (mode, temp,
1813 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1814 }
1815 return plus_constant (mode, reg, offset);
1816}
1817
82614948
RR
1818static int
1819aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1820 machine_mode mode)
43e9d192 1821{
43e9d192 1822 int i;
9a4865db
WD
1823 unsigned HOST_WIDE_INT val, val2, mask;
1824 int one_match, zero_match;
1825 int num_insns;
43e9d192 1826
9a4865db
WD
1827 val = INTVAL (imm);
1828
1829 if (aarch64_move_imm (val, mode))
43e9d192 1830 {
82614948 1831 if (generate)
f7df4a84 1832 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 1833 return 1;
43e9d192
IB
1834 }
1835
9de00935
TC
1836 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
1837 (with XXXX non-zero). In that case check to see if the move can be done in
1838 a smaller mode. */
1839 val2 = val & 0xffffffff;
1840 if (mode == DImode
1841 && aarch64_move_imm (val2, SImode)
1842 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
1843 {
1844 if (generate)
1845 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1846
1847 /* Check if we have to emit a second instruction by checking to see
1848 if any of the upper 32 bits of the original DI mode value is set. */
1849 if (val == val2)
1850 return 1;
1851
1852 i = (val >> 48) ? 48 : 32;
1853
1854 if (generate)
1855 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1856 GEN_INT ((val >> i) & 0xffff)));
1857
1858 return 2;
1859 }
1860
9a4865db 1861 if ((val >> 32) == 0 || mode == SImode)
43e9d192 1862 {
82614948
RR
1863 if (generate)
1864 {
9a4865db
WD
1865 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1866 if (mode == SImode)
1867 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1868 GEN_INT ((val >> 16) & 0xffff)));
1869 else
1870 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1871 GEN_INT ((val >> 16) & 0xffff)));
82614948 1872 }
9a4865db 1873 return 2;
43e9d192
IB
1874 }
1875
1876 /* Remaining cases are all for DImode. */
1877
43e9d192 1878 mask = 0xffff;
9a4865db
WD
1879 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1880 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1881 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1882 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 1883
62c8d76c 1884 if (zero_match != 2 && one_match != 2)
43e9d192 1885 {
62c8d76c
WD
1886 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1887 For a 64-bit bitmask try whether changing 16 bits to all ones or
1888 zeroes creates a valid bitmask. To check any repeated bitmask,
1889 try using 16 bits from the other 32-bit half of val. */
43e9d192 1890
62c8d76c 1891 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 1892 {
62c8d76c
WD
1893 val2 = val & ~mask;
1894 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1895 break;
1896 val2 = val | mask;
1897 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1898 break;
1899 val2 = val2 & ~mask;
1900 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1901 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1902 break;
43e9d192 1903 }
62c8d76c 1904 if (i != 64)
43e9d192 1905 {
62c8d76c 1906 if (generate)
43e9d192 1907 {
62c8d76c
WD
1908 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1909 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 1910 GEN_INT ((val >> i) & 0xffff)));
43e9d192 1911 }
1312b1ba 1912 return 2;
43e9d192
IB
1913 }
1914 }
1915
9a4865db
WD
1916 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1917 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1918 otherwise skip zero bits. */
2c274197 1919
9a4865db 1920 num_insns = 1;
43e9d192 1921 mask = 0xffff;
9a4865db
WD
1922 val2 = one_match > zero_match ? ~val : val;
1923 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1924
1925 if (generate)
1926 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1927 ? (val | ~(mask << i))
1928 : (val & (mask << i)))));
1929 for (i += 16; i < 64; i += 16)
43e9d192 1930 {
9a4865db
WD
1931 if ((val2 & (mask << i)) == 0)
1932 continue;
1933 if (generate)
1934 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1935 GEN_INT ((val >> i) & 0xffff)));
1936 num_insns ++;
82614948
RR
1937 }
1938
1939 return num_insns;
1940}
1941
1942
1943void
1944aarch64_expand_mov_immediate (rtx dest, rtx imm)
1945{
1946 machine_mode mode = GET_MODE (dest);
1947
1948 gcc_assert (mode == SImode || mode == DImode);
1949
1950 /* Check on what type of symbol it is. */
1951 if (GET_CODE (imm) == SYMBOL_REF
1952 || GET_CODE (imm) == LABEL_REF
1953 || GET_CODE (imm) == CONST)
1954 {
1955 rtx mem, base, offset;
1956 enum aarch64_symbol_type sty;
1957
1958 /* If we have (const (plus symbol offset)), separate out the offset
1959 before we start classifying the symbol. */
1960 split_const (imm, &base, &offset);
1961
a6e0bfa7 1962 sty = aarch64_classify_symbol (base, offset);
82614948
RR
1963 switch (sty)
1964 {
1965 case SYMBOL_FORCE_TO_MEM:
1966 if (offset != const0_rtx
1967 && targetm.cannot_force_const_mem (mode, imm))
1968 {
1969 gcc_assert (can_create_pseudo_p ());
1970 base = aarch64_force_temporary (mode, dest, base);
1971 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1972 aarch64_emit_move (dest, base);
1973 return;
1974 }
b4f50fd4 1975
82614948
RR
1976 mem = force_const_mem (ptr_mode, imm);
1977 gcc_assert (mem);
b4f50fd4
RR
1978
1979 /* If we aren't generating PC relative literals, then
1980 we need to expand the literal pool access carefully.
1981 This is something that needs to be done in a number
1982 of places, so could well live as a separate function. */
9ee6540a 1983 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
1984 {
1985 gcc_assert (can_create_pseudo_p ());
1986 base = gen_reg_rtx (ptr_mode);
1987 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
1988 if (ptr_mode != Pmode)
1989 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
1990 mem = gen_rtx_MEM (ptr_mode, base);
1991 }
1992
82614948
RR
1993 if (mode != ptr_mode)
1994 mem = gen_rtx_ZERO_EXTEND (mode, mem);
b4f50fd4 1995
f7df4a84 1996 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 1997
82614948
RR
1998 return;
1999
2000 case SYMBOL_SMALL_TLSGD:
2001 case SYMBOL_SMALL_TLSDESC:
79496620 2002 case SYMBOL_SMALL_TLSIE:
1b1e81f8 2003 case SYMBOL_SMALL_GOT_28K:
6642bdb4 2004 case SYMBOL_SMALL_GOT_4G:
82614948 2005 case SYMBOL_TINY_GOT:
5ae7caad 2006 case SYMBOL_TINY_TLSIE:
82614948
RR
2007 if (offset != const0_rtx)
2008 {
2009 gcc_assert(can_create_pseudo_p ());
2010 base = aarch64_force_temporary (mode, dest, base);
2011 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
2012 aarch64_emit_move (dest, base);
2013 return;
2014 }
2015 /* FALLTHRU */
2016
82614948
RR
2017 case SYMBOL_SMALL_ABSOLUTE:
2018 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 2019 case SYMBOL_TLSLE12:
d18ba284 2020 case SYMBOL_TLSLE24:
cbf5629e
JW
2021 case SYMBOL_TLSLE32:
2022 case SYMBOL_TLSLE48:
82614948
RR
2023 aarch64_load_symref_appropriately (dest, imm, sty);
2024 return;
2025
2026 default:
2027 gcc_unreachable ();
2028 }
2029 }
2030
2031 if (!CONST_INT_P (imm))
2032 {
2033 if (GET_CODE (imm) == HIGH)
f7df4a84 2034 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
2035 else
2036 {
2037 rtx mem = force_const_mem (mode, imm);
2038 gcc_assert (mem);
f7df4a84 2039 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 2040 }
82614948
RR
2041
2042 return;
43e9d192 2043 }
82614948
RR
2044
2045 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
2046}
2047
5be6b295
WD
2048/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
2049 temporary value if necessary. FRAME_RELATED_P should be true if
2050 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
2051 to the generated instructions. If SCRATCHREG is known to hold
2052 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
2053 immediate again.
2054
2055 Since this function may be used to adjust the stack pointer, we must
2056 ensure that it cannot cause transient stack deallocation (for example
2057 by first incrementing SP and then decrementing when adjusting by a
2058 large immediate). */
c4ddc43a
JW
2059
2060static void
5be6b295
WD
2061aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg,
2062 HOST_WIDE_INT delta, bool frame_related_p,
2063 bool emit_move_imm)
c4ddc43a
JW
2064{
2065 HOST_WIDE_INT mdelta = abs_hwi (delta);
2066 rtx this_rtx = gen_rtx_REG (mode, regnum);
37d6a4b7 2067 rtx_insn *insn;
c4ddc43a 2068
c4ddc43a
JW
2069 if (!mdelta)
2070 return;
2071
5be6b295 2072 /* Single instruction adjustment. */
c4ddc43a
JW
2073 if (aarch64_uimm12_shift (mdelta))
2074 {
37d6a4b7
JW
2075 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
2076 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2077 return;
2078 }
2079
5be6b295
WD
2080 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
2081 Only do this if mdelta is not a 16-bit move as adjusting using a move
2082 is better. */
2083 if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
c4ddc43a
JW
2084 {
2085 HOST_WIDE_INT low_off = mdelta & 0xfff;
2086
2087 low_off = delta < 0 ? -low_off : low_off;
37d6a4b7
JW
2088 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
2089 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2090 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
2091 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2092 return;
2093 }
2094
5be6b295 2095 /* Emit a move immediate if required and an addition/subtraction. */
c4ddc43a 2096 rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
5be6b295
WD
2097 if (emit_move_imm)
2098 aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
2099 insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
2100 : gen_add2_insn (this_rtx, scratch_rtx));
37d6a4b7
JW
2101 if (frame_related_p)
2102 {
2103 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2104 rtx adj = plus_constant (mode, this_rtx, delta);
2105 add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj));
2106 }
c4ddc43a
JW
2107}
2108
5be6b295
WD
2109static inline void
2110aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
2111 HOST_WIDE_INT delta)
2112{
2113 aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
2114}
2115
2116static inline void
2117aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
2118{
2119 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
2120 true, emit_move_imm);
2121}
2122
2123static inline void
2124aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
2125{
2126 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
2127 frame_related_p, true);
2128}
2129
43e9d192 2130static bool
fee9ba42
JW
2131aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2132 tree exp ATTRIBUTE_UNUSED)
43e9d192 2133{
fee9ba42 2134 /* Currently, always true. */
43e9d192
IB
2135 return true;
2136}
2137
2138/* Implement TARGET_PASS_BY_REFERENCE. */
2139
2140static bool
2141aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 2142 machine_mode mode,
43e9d192
IB
2143 const_tree type,
2144 bool named ATTRIBUTE_UNUSED)
2145{
2146 HOST_WIDE_INT size;
ef4bddc2 2147 machine_mode dummymode;
43e9d192
IB
2148 int nregs;
2149
2150 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2151 size = (mode == BLKmode && type)
2152 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2153
aadc1c43
MHD
2154 /* Aggregates are passed by reference based on their size. */
2155 if (type && AGGREGATE_TYPE_P (type))
43e9d192 2156 {
aadc1c43 2157 size = int_size_in_bytes (type);
43e9d192
IB
2158 }
2159
2160 /* Variable sized arguments are always returned by reference. */
2161 if (size < 0)
2162 return true;
2163
2164 /* Can this be a candidate to be passed in fp/simd register(s)? */
2165 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2166 &dummymode, &nregs,
2167 NULL))
2168 return false;
2169
2170 /* Arguments which are variable sized or larger than 2 registers are
2171 passed by reference unless they are a homogenous floating point
2172 aggregate. */
2173 return size > 2 * UNITS_PER_WORD;
2174}
2175
2176/* Return TRUE if VALTYPE is padded to its least significant bits. */
2177static bool
2178aarch64_return_in_msb (const_tree valtype)
2179{
ef4bddc2 2180 machine_mode dummy_mode;
43e9d192
IB
2181 int dummy_int;
2182
2183 /* Never happens in little-endian mode. */
2184 if (!BYTES_BIG_ENDIAN)
2185 return false;
2186
2187 /* Only composite types smaller than or equal to 16 bytes can
2188 be potentially returned in registers. */
2189 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
2190 || int_size_in_bytes (valtype) <= 0
2191 || int_size_in_bytes (valtype) > 16)
2192 return false;
2193
2194 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2195 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2196 is always passed/returned in the least significant bits of fp/simd
2197 register(s). */
2198 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
2199 &dummy_mode, &dummy_int, NULL))
2200 return false;
2201
2202 return true;
2203}
2204
2205/* Implement TARGET_FUNCTION_VALUE.
2206 Define how to find the value returned by a function. */
2207
2208static rtx
2209aarch64_function_value (const_tree type, const_tree func,
2210 bool outgoing ATTRIBUTE_UNUSED)
2211{
ef4bddc2 2212 machine_mode mode;
43e9d192
IB
2213 int unsignedp;
2214 int count;
ef4bddc2 2215 machine_mode ag_mode;
43e9d192
IB
2216
2217 mode = TYPE_MODE (type);
2218 if (INTEGRAL_TYPE_P (type))
2219 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
2220
2221 if (aarch64_return_in_msb (type))
2222 {
2223 HOST_WIDE_INT size = int_size_in_bytes (type);
2224
2225 if (size % UNITS_PER_WORD != 0)
2226 {
2227 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2228 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2229 }
2230 }
2231
2232 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2233 &ag_mode, &count, NULL))
2234 {
2235 if (!aarch64_composite_type_p (type, mode))
2236 {
2237 gcc_assert (count == 1 && mode == ag_mode);
2238 return gen_rtx_REG (mode, V0_REGNUM);
2239 }
2240 else
2241 {
2242 int i;
2243 rtx par;
2244
2245 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
2246 for (i = 0; i < count; i++)
2247 {
2248 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
2249 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2250 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
2251 XVECEXP (par, 0, i) = tmp;
2252 }
2253 return par;
2254 }
2255 }
2256 else
2257 return gen_rtx_REG (mode, R0_REGNUM);
2258}
2259
2260/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2261 Return true if REGNO is the number of a hard register in which the values
2262 of called function may come back. */
2263
2264static bool
2265aarch64_function_value_regno_p (const unsigned int regno)
2266{
2267 /* Maximum of 16 bytes can be returned in the general registers. Examples
2268 of 16-byte return values are: 128-bit integers and 16-byte small
2269 structures (excluding homogeneous floating-point aggregates). */
2270 if (regno == R0_REGNUM || regno == R1_REGNUM)
2271 return true;
2272
2273 /* Up to four fp/simd registers can return a function value, e.g. a
2274 homogeneous floating-point aggregate having four members. */
2275 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 2276 return TARGET_FLOAT;
43e9d192
IB
2277
2278 return false;
2279}
2280
2281/* Implement TARGET_RETURN_IN_MEMORY.
2282
2283 If the type T of the result of a function is such that
2284 void func (T arg)
2285 would require that arg be passed as a value in a register (or set of
2286 registers) according to the parameter passing rules, then the result
2287 is returned in the same registers as would be used for such an
2288 argument. */
2289
2290static bool
2291aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2292{
2293 HOST_WIDE_INT size;
ef4bddc2 2294 machine_mode ag_mode;
43e9d192
IB
2295 int count;
2296
2297 if (!AGGREGATE_TYPE_P (type)
2298 && TREE_CODE (type) != COMPLEX_TYPE
2299 && TREE_CODE (type) != VECTOR_TYPE)
2300 /* Simple scalar types always returned in registers. */
2301 return false;
2302
2303 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
2304 type,
2305 &ag_mode,
2306 &count,
2307 NULL))
2308 return false;
2309
2310 /* Types larger than 2 registers returned in memory. */
2311 size = int_size_in_bytes (type);
2312 return (size < 0 || size > 2 * UNITS_PER_WORD);
2313}
2314
2315static bool
ef4bddc2 2316aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2317 const_tree type, int *nregs)
2318{
2319 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2320 return aarch64_vfp_is_call_or_return_candidate (mode,
2321 type,
2322 &pcum->aapcs_vfp_rmode,
2323 nregs,
2324 NULL);
2325}
2326
985b8393 2327/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
2328 bits. The idea is to suppress any stronger alignment requested by
2329 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2330 This is a helper function for local use only. */
2331
985b8393 2332static unsigned int
ef4bddc2 2333aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 2334{
75d6cc81 2335 if (!type)
985b8393 2336 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 2337
75d6cc81 2338 if (integer_zerop (TYPE_SIZE (type)))
985b8393 2339 return 0;
43e9d192 2340
75d6cc81
AL
2341 gcc_assert (TYPE_MODE (type) == mode);
2342
2343 if (!AGGREGATE_TYPE_P (type))
985b8393 2344 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
2345
2346 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 2347 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 2348
985b8393 2349 unsigned int alignment = 0;
75d6cc81 2350 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
2351 if (TREE_CODE (field) == FIELD_DECL)
2352 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 2353
985b8393 2354 return alignment;
43e9d192
IB
2355}
2356
2357/* Layout a function argument according to the AAPCS64 rules. The rule
2358 numbers refer to the rule numbers in the AAPCS64. */
2359
2360static void
ef4bddc2 2361aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2362 const_tree type,
2363 bool named ATTRIBUTE_UNUSED)
2364{
2365 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2366 int ncrn, nvrn, nregs;
2367 bool allocate_ncrn, allocate_nvrn;
3abf17cf 2368 HOST_WIDE_INT size;
43e9d192
IB
2369
2370 /* We need to do this once per argument. */
2371 if (pcum->aapcs_arg_processed)
2372 return;
2373
2374 pcum->aapcs_arg_processed = true;
2375
3abf17cf
YZ
2376 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2377 size
4f59f9f2
UB
2378 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2379 UNITS_PER_WORD);
3abf17cf 2380
43e9d192
IB
2381 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2382 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2383 mode,
2384 type,
2385 &nregs);
2386
2387 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2388 The following code thus handles passing by SIMD/FP registers first. */
2389
2390 nvrn = pcum->aapcs_nvrn;
2391
2392 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2393 and homogenous short-vector aggregates (HVA). */
2394 if (allocate_nvrn)
2395 {
261fb553
AL
2396 if (!TARGET_FLOAT)
2397 aarch64_err_no_fpadvsimd (mode, "argument");
2398
43e9d192
IB
2399 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2400 {
2401 pcum->aapcs_nextnvrn = nvrn + nregs;
2402 if (!aarch64_composite_type_p (type, mode))
2403 {
2404 gcc_assert (nregs == 1);
2405 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2406 }
2407 else
2408 {
2409 rtx par;
2410 int i;
2411 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2412 for (i = 0; i < nregs; i++)
2413 {
2414 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2415 V0_REGNUM + nvrn + i);
2416 tmp = gen_rtx_EXPR_LIST
2417 (VOIDmode, tmp,
2418 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2419 XVECEXP (par, 0, i) = tmp;
2420 }
2421 pcum->aapcs_reg = par;
2422 }
2423 return;
2424 }
2425 else
2426 {
2427 /* C.3 NSRN is set to 8. */
2428 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2429 goto on_stack;
2430 }
2431 }
2432
2433 ncrn = pcum->aapcs_ncrn;
3abf17cf 2434 nregs = size / UNITS_PER_WORD;
43e9d192
IB
2435
2436 /* C6 - C9. though the sign and zero extension semantics are
2437 handled elsewhere. This is the case where the argument fits
2438 entirely general registers. */
2439 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2440 {
43e9d192
IB
2441
2442 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2443
2444 /* C.8 if the argument has an alignment of 16 then the NGRN is
2445 rounded up to the next even number. */
985b8393
JJ
2446 if (nregs == 2
2447 && ncrn % 2
2ec07fa6 2448 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 2449 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
2450 alignment nregs should be > 2 and therefore it should be
2451 passed by reference rather than value. */
985b8393
JJ
2452 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2453 {
2454 ++ncrn;
2455 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 2456 }
2ec07fa6 2457
43e9d192
IB
2458 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2459 A reg is still generated for it, but the caller should be smart
2460 enough not to use it. */
2461 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 2462 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
2463 else
2464 {
2465 rtx par;
2466 int i;
2467
2468 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2469 for (i = 0; i < nregs; i++)
2470 {
2471 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2472 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2473 GEN_INT (i * UNITS_PER_WORD));
2474 XVECEXP (par, 0, i) = tmp;
2475 }
2476 pcum->aapcs_reg = par;
2477 }
2478
2479 pcum->aapcs_nextncrn = ncrn + nregs;
2480 return;
2481 }
2482
2483 /* C.11 */
2484 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2485
2486 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2487 this argument and align the total size if necessary. */
43e9d192 2488on_stack:
3abf17cf 2489 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 2490
985b8393 2491 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
2492 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2493 16 / UNITS_PER_WORD);
43e9d192
IB
2494 return;
2495}
2496
2497/* Implement TARGET_FUNCTION_ARG. */
2498
2499static rtx
ef4bddc2 2500aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2501 const_tree type, bool named)
2502{
2503 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2504 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2505
2506 if (mode == VOIDmode)
2507 return NULL_RTX;
2508
2509 aarch64_layout_arg (pcum_v, mode, type, named);
2510 return pcum->aapcs_reg;
2511}
2512
2513void
2514aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2515 const_tree fntype ATTRIBUTE_UNUSED,
2516 rtx libname ATTRIBUTE_UNUSED,
2517 const_tree fndecl ATTRIBUTE_UNUSED,
2518 unsigned n_named ATTRIBUTE_UNUSED)
2519{
2520 pcum->aapcs_ncrn = 0;
2521 pcum->aapcs_nvrn = 0;
2522 pcum->aapcs_nextncrn = 0;
2523 pcum->aapcs_nextnvrn = 0;
2524 pcum->pcs_variant = ARM_PCS_AAPCS64;
2525 pcum->aapcs_reg = NULL_RTX;
2526 pcum->aapcs_arg_processed = false;
2527 pcum->aapcs_stack_words = 0;
2528 pcum->aapcs_stack_size = 0;
2529
261fb553
AL
2530 if (!TARGET_FLOAT
2531 && fndecl && TREE_PUBLIC (fndecl)
2532 && fntype && fntype != error_mark_node)
2533 {
2534 const_tree type = TREE_TYPE (fntype);
2535 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2536 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2537 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2538 &mode, &nregs, NULL))
2539 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2540 }
43e9d192
IB
2541 return;
2542}
2543
2544static void
2545aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2546 machine_mode mode,
43e9d192
IB
2547 const_tree type,
2548 bool named)
2549{
2550 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2551 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2552 {
2553 aarch64_layout_arg (pcum_v, mode, type, named);
2554 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2555 != (pcum->aapcs_stack_words != 0));
2556 pcum->aapcs_arg_processed = false;
2557 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2558 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2559 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2560 pcum->aapcs_stack_words = 0;
2561 pcum->aapcs_reg = NULL_RTX;
2562 }
2563}
2564
2565bool
2566aarch64_function_arg_regno_p (unsigned regno)
2567{
2568 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2569 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2570}
2571
2572/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2573 PARM_BOUNDARY bits of alignment, but will be given anything up
2574 to STACK_BOUNDARY bits if the type requires it. This makes sure
2575 that both before and after the layout of each argument, the Next
2576 Stacked Argument Address (NSAA) will have a minimum alignment of
2577 8 bytes. */
2578
2579static unsigned int
ef4bddc2 2580aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 2581{
985b8393
JJ
2582 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2583 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
2584}
2585
2586/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2587
2588 Return true if an argument passed on the stack should be padded upwards,
2589 i.e. if the least-significant byte of the stack slot has useful data.
2590
2591 Small aggregate types are placed in the lowest memory address.
2592
2593 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2594
2595bool
ef4bddc2 2596aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2597{
2598 /* On little-endian targets, the least significant byte of every stack
2599 argument is passed at the lowest byte address of the stack slot. */
2600 if (!BYTES_BIG_ENDIAN)
2601 return true;
2602
00edcfbe 2603 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2604 the least significant byte of a stack argument is passed at the highest
2605 byte address of the stack slot. */
2606 if (type
00edcfbe
YZ
2607 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2608 || POINTER_TYPE_P (type))
43e9d192
IB
2609 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2610 return false;
2611
2612 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2613 return true;
2614}
2615
2616/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2617
2618 It specifies padding for the last (may also be the only)
2619 element of a block move between registers and memory. If
2620 assuming the block is in the memory, padding upward means that
2621 the last element is padded after its highest significant byte,
2622 while in downward padding, the last element is padded at the
2623 its least significant byte side.
2624
2625 Small aggregates and small complex types are always padded
2626 upwards.
2627
2628 We don't need to worry about homogeneous floating-point or
2629 short-vector aggregates; their move is not affected by the
2630 padding direction determined here. Regardless of endianness,
2631 each element of such an aggregate is put in the least
2632 significant bits of a fp/simd register.
2633
2634 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2635 register has useful data, and return the opposite if the most
2636 significant byte does. */
2637
2638bool
ef4bddc2 2639aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2640 bool first ATTRIBUTE_UNUSED)
2641{
2642
2643 /* Small composite types are always padded upward. */
2644 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2645 {
2646 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2647 : GET_MODE_SIZE (mode));
2648 if (size < 2 * UNITS_PER_WORD)
2649 return true;
2650 }
2651
2652 /* Otherwise, use the default padding. */
2653 return !BYTES_BIG_ENDIAN;
2654}
2655
095a2d76 2656static scalar_int_mode
43e9d192
IB
2657aarch64_libgcc_cmp_return_mode (void)
2658{
2659 return SImode;
2660}
2661
a3eb8a52
EB
2662#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2663
2664/* We use the 12-bit shifted immediate arithmetic instructions so values
2665 must be multiple of (1 << 12), i.e. 4096. */
2666#define ARITH_FACTOR 4096
2667
2668#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2669#error Cannot use simple address calculation for stack probing
2670#endif
2671
2672/* The pair of scratch registers used for stack probing. */
2673#define PROBE_STACK_FIRST_REG 9
2674#define PROBE_STACK_SECOND_REG 10
2675
2676/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2677 inclusive. These are offsets from the current stack pointer. */
2678
2679static void
2680aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2681{
5f5c5e0f 2682 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
2683
2684 /* See the same assertion on PROBE_INTERVAL above. */
2685 gcc_assert ((first % ARITH_FACTOR) == 0);
2686
2687 /* See if we have a constant small number of probes to generate. If so,
2688 that's the easy case. */
2689 if (size <= PROBE_INTERVAL)
2690 {
2691 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2692
2693 emit_set_insn (reg1,
5f5c5e0f 2694 plus_constant (Pmode,
a3eb8a52 2695 stack_pointer_rtx, -(first + base)));
5f5c5e0f 2696 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
2697 }
2698
2699 /* The run-time loop is made up of 8 insns in the generic case while the
2700 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2701 else if (size <= 4 * PROBE_INTERVAL)
2702 {
2703 HOST_WIDE_INT i, rem;
2704
2705 emit_set_insn (reg1,
5f5c5e0f 2706 plus_constant (Pmode,
a3eb8a52
EB
2707 stack_pointer_rtx,
2708 -(first + PROBE_INTERVAL)));
2709 emit_stack_probe (reg1);
2710
2711 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2712 it exceeds SIZE. If only two probes are needed, this will not
2713 generate any code. Then probe at FIRST + SIZE. */
2714 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2715 {
2716 emit_set_insn (reg1,
5f5c5e0f 2717 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
2718 emit_stack_probe (reg1);
2719 }
2720
2721 rem = size - (i - PROBE_INTERVAL);
2722 if (rem > 256)
2723 {
2724 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2725
5f5c5e0f
EB
2726 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
2727 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
2728 }
2729 else
5f5c5e0f 2730 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
2731 }
2732
2733 /* Otherwise, do the same as above, but in a loop. Note that we must be
2734 extra careful with variables wrapping around because we might be at
2735 the very top (or the very bottom) of the address space and we have
2736 to be able to handle this case properly; in particular, we use an
2737 equality test for the loop condition. */
2738 else
2739 {
5f5c5e0f 2740 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
2741
2742 /* Step 1: round SIZE to the previous multiple of the interval. */
2743
2744 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2745
2746
2747 /* Step 2: compute initial and final value of the loop counter. */
2748
2749 /* TEST_ADDR = SP + FIRST. */
2750 emit_set_insn (reg1,
5f5c5e0f 2751 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
2752
2753 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
2754 HOST_WIDE_INT adjustment = - (first + rounded_size);
2755 if (! aarch64_uimm12_shift (adjustment))
2756 {
2757 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
2758 true, Pmode);
2759 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
2760 }
2761 else
2762 {
2763 emit_set_insn (reg2,
2764 plus_constant (Pmode, stack_pointer_rtx, adjustment));
2765 }
2766
a3eb8a52
EB
2767 /* Step 3: the loop
2768
2769 do
2770 {
2771 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2772 probe at TEST_ADDR
2773 }
2774 while (TEST_ADDR != LAST_ADDR)
2775
2776 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2777 until it is equal to ROUNDED_SIZE. */
2778
5f5c5e0f 2779 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
2780
2781
2782 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2783 that SIZE is equal to ROUNDED_SIZE. */
2784
2785 if (size != rounded_size)
2786 {
2787 HOST_WIDE_INT rem = size - rounded_size;
2788
2789 if (rem > 256)
2790 {
2791 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2792
5f5c5e0f
EB
2793 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
2794 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
2795 }
2796 else
5f5c5e0f 2797 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
2798 }
2799 }
2800
2801 /* Make sure nothing is scheduled before we are done. */
2802 emit_insn (gen_blockage ());
2803}
2804
2805/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2806 absolute addresses. */
2807
2808const char *
2809aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2810{
2811 static int labelno = 0;
2812 char loop_lab[32];
2813 rtx xops[2];
2814
2815 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2816
2817 /* Loop. */
2818 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2819
2820 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2821 xops[0] = reg1;
2822 xops[1] = GEN_INT (PROBE_INTERVAL);
2823 output_asm_insn ("sub\t%0, %0, %1", xops);
2824
2825 /* Probe at TEST_ADDR. */
2826 output_asm_insn ("str\txzr, [%0]", xops);
2827
2828 /* Test if TEST_ADDR == LAST_ADDR. */
2829 xops[1] = reg2;
2830 output_asm_insn ("cmp\t%0, %1", xops);
2831
2832 /* Branch. */
2833 fputs ("\tb.ne\t", asm_out_file);
2834 assemble_name_raw (asm_out_file, loop_lab);
2835 fputc ('\n', asm_out_file);
2836
2837 return "";
2838}
2839
43e9d192
IB
2840static bool
2841aarch64_frame_pointer_required (void)
2842{
0b7f8166
MS
2843 /* In aarch64_override_options_after_change
2844 flag_omit_leaf_frame_pointer turns off the frame pointer by
2845 default. Turn it back on now if we've not got a leaf
2846 function. */
2847 if (flag_omit_leaf_frame_pointer
2848 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2849 return true;
43e9d192 2850
8144a493
WD
2851 /* Force a frame pointer for EH returns so the return address is at FP+8. */
2852 if (crtl->calls_eh_return)
2853 return true;
2854
0b7f8166 2855 return false;
43e9d192
IB
2856}
2857
2858/* Mark the registers that need to be saved by the callee and calculate
2859 the size of the callee-saved registers area and frame record (both FP
2860 and LR may be omitted). */
2861static void
2862aarch64_layout_frame (void)
2863{
2864 HOST_WIDE_INT offset = 0;
4b0685d9 2865 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
2866
2867 if (reload_completed && cfun->machine->frame.laid_out)
2868 return;
2869
97826595
MS
2870#define SLOT_NOT_REQUIRED (-2)
2871#define SLOT_REQUIRED (-1)
2872
71bfb77a
WD
2873 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
2874 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 2875
43e9d192
IB
2876 /* First mark all the registers that really need to be saved... */
2877 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2878 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2879
2880 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2881 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2882
2883 /* ... that includes the eh data registers (if needed)... */
2884 if (crtl->calls_eh_return)
2885 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2886 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2887 = SLOT_REQUIRED;
43e9d192
IB
2888
2889 /* ... and any callee saved register that dataflow says is live. */
2890 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2891 if (df_regs_ever_live_p (regno)
1c923b60
JW
2892 && (regno == R30_REGNUM
2893 || !call_used_regs[regno]))
97826595 2894 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2895
2896 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2897 if (df_regs_ever_live_p (regno)
2898 && !call_used_regs[regno])
4b0685d9
WD
2899 {
2900 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2901 last_fp_reg = regno;
2902 }
43e9d192
IB
2903
2904 if (frame_pointer_needed)
2905 {
2e1cdae5 2906 /* FP and LR are placed in the linkage record. */
43e9d192 2907 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2908 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2909 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2910 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2e1cdae5 2911 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2912 }
2913
2914 /* Now assign stack slots for them. */
2e1cdae5 2915 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2916 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2917 {
2918 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2919 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2920 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2921 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 2922 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2923 offset += UNITS_PER_WORD;
2924 }
2925
4b0685d9
WD
2926 HOST_WIDE_INT max_int_offset = offset;
2927 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2928 bool has_align_gap = offset != max_int_offset;
2929
43e9d192 2930 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2931 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 2932 {
4b0685d9
WD
2933 /* If there is an alignment gap between integer and fp callee-saves,
2934 allocate the last fp register to it if possible. */
2935 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
2936 {
2937 cfun->machine->frame.reg_offset[regno] = max_int_offset;
2938 break;
2939 }
2940
43e9d192 2941 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2942 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2943 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2944 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
2945 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2946 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2947 offset += UNITS_PER_WORD;
2948 }
2949
4f59f9f2 2950 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
2951
2952 cfun->machine->frame.saved_regs_size = offset;
1c960e02 2953
71bfb77a
WD
2954 HOST_WIDE_INT varargs_and_saved_regs_size
2955 = offset + cfun->machine->frame.saved_varargs_size;
2956
1c960e02 2957 cfun->machine->frame.hard_fp_offset
71bfb77a 2958 = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
4f59f9f2 2959 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02
MS
2960
2961 cfun->machine->frame.frame_size
4f59f9f2
UB
2962 = ROUND_UP (cfun->machine->frame.hard_fp_offset
2963 + crtl->outgoing_args_size,
2964 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 2965
71bfb77a
WD
2966 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
2967
2968 cfun->machine->frame.initial_adjust = 0;
2969 cfun->machine->frame.final_adjust = 0;
2970 cfun->machine->frame.callee_adjust = 0;
2971 cfun->machine->frame.callee_offset = 0;
2972
2973 HOST_WIDE_INT max_push_offset = 0;
2974 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
2975 max_push_offset = 512;
2976 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
2977 max_push_offset = 256;
2978
2979 if (cfun->machine->frame.frame_size < max_push_offset
2980 && crtl->outgoing_args_size == 0)
2981 {
2982 /* Simple, small frame with no outgoing arguments:
2983 stp reg1, reg2, [sp, -frame_size]!
2984 stp reg3, reg4, [sp, 16] */
2985 cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
2986 }
2987 else if ((crtl->outgoing_args_size
2988 + cfun->machine->frame.saved_regs_size < 512)
2989 && !(cfun->calls_alloca
2990 && cfun->machine->frame.hard_fp_offset < max_push_offset))
2991 {
2992 /* Frame with small outgoing arguments:
2993 sub sp, sp, frame_size
2994 stp reg1, reg2, [sp, outgoing_args_size]
2995 stp reg3, reg4, [sp, outgoing_args_size + 16] */
2996 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
2997 cfun->machine->frame.callee_offset
2998 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
2999 }
3000 else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
3001 {
3002 /* Frame with large outgoing arguments but a small local area:
3003 stp reg1, reg2, [sp, -hard_fp_offset]!
3004 stp reg3, reg4, [sp, 16]
3005 sub sp, sp, outgoing_args_size */
3006 cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
3007 cfun->machine->frame.final_adjust
3008 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
3009 }
3010 else if (!frame_pointer_needed
3011 && varargs_and_saved_regs_size < max_push_offset)
3012 {
3013 /* Frame with large local area and outgoing arguments (this pushes the
3014 callee-saves first, followed by the locals and outgoing area):
3015 stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
3016 stp reg3, reg4, [sp, 16]
3017 sub sp, sp, frame_size - varargs_and_saved_regs_size */
3018 cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size;
3019 cfun->machine->frame.final_adjust
3020 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
3021 cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust;
3022 cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset;
3023 }
3024 else
3025 {
3026 /* Frame with large local area and outgoing arguments using frame pointer:
3027 sub sp, sp, hard_fp_offset
3028 stp x29, x30, [sp, 0]
3029 add x29, sp, 0
3030 stp reg3, reg4, [sp, 16]
3031 sub sp, sp, outgoing_args_size */
3032 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
3033 cfun->machine->frame.final_adjust
3034 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
3035 }
3036
43e9d192
IB
3037 cfun->machine->frame.laid_out = true;
3038}
3039
04ddfe06
KT
3040/* Return true if the register REGNO is saved on entry to
3041 the current function. */
3042
43e9d192
IB
3043static bool
3044aarch64_register_saved_on_entry (int regno)
3045{
97826595 3046 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
3047}
3048
04ddfe06
KT
3049/* Return the next register up from REGNO up to LIMIT for the callee
3050 to save. */
3051
64dedd72
JW
3052static unsigned
3053aarch64_next_callee_save (unsigned regno, unsigned limit)
3054{
3055 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
3056 regno ++;
3057 return regno;
3058}
43e9d192 3059
04ddfe06
KT
3060/* Push the register number REGNO of mode MODE to the stack with write-back
3061 adjusting the stack by ADJUSTMENT. */
3062
c5e1f66e 3063static void
ef4bddc2 3064aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
3065 HOST_WIDE_INT adjustment)
3066 {
3067 rtx base_rtx = stack_pointer_rtx;
3068 rtx insn, reg, mem;
3069
3070 reg = gen_rtx_REG (mode, regno);
3071 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
3072 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 3073 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
3074
3075 insn = emit_move_insn (mem, reg);
3076 RTX_FRAME_RELATED_P (insn) = 1;
3077}
3078
04ddfe06
KT
3079/* Generate and return an instruction to store the pair of registers
3080 REG and REG2 of mode MODE to location BASE with write-back adjusting
3081 the stack location BASE by ADJUSTMENT. */
3082
80c11907 3083static rtx
ef4bddc2 3084aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
3085 HOST_WIDE_INT adjustment)
3086{
3087 switch (mode)
3088 {
4e10a5a7 3089 case E_DImode:
80c11907
JW
3090 return gen_storewb_pairdi_di (base, base, reg, reg2,
3091 GEN_INT (-adjustment),
3092 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 3093 case E_DFmode:
80c11907
JW
3094 return gen_storewb_pairdf_di (base, base, reg, reg2,
3095 GEN_INT (-adjustment),
3096 GEN_INT (UNITS_PER_WORD - adjustment));
3097 default:
3098 gcc_unreachable ();
3099 }
3100}
3101
04ddfe06
KT
3102/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3103 stack pointer by ADJUSTMENT. */
3104
80c11907 3105static void
89ac681e 3106aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 3107{
5d8a22a5 3108 rtx_insn *insn;
0d4a1197 3109 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e 3110
71bfb77a 3111 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3112 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
3113
80c11907
JW
3114 rtx reg1 = gen_rtx_REG (mode, regno1);
3115 rtx reg2 = gen_rtx_REG (mode, regno2);
3116
3117 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
3118 reg2, adjustment));
3119 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
3120 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3121 RTX_FRAME_RELATED_P (insn) = 1;
3122}
3123
04ddfe06
KT
3124/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3125 adjusting it by ADJUSTMENT afterwards. */
3126
159313d9 3127static rtx
ef4bddc2 3128aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
3129 HOST_WIDE_INT adjustment)
3130{
3131 switch (mode)
3132 {
4e10a5a7 3133 case E_DImode:
159313d9 3134 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3135 GEN_INT (UNITS_PER_WORD));
4e10a5a7 3136 case E_DFmode:
159313d9 3137 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3138 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3139 default:
3140 gcc_unreachable ();
3141 }
3142}
3143
04ddfe06
KT
3144/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3145 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3146 into CFI_OPS. */
3147
89ac681e
WD
3148static void
3149aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
3150 rtx *cfi_ops)
3151{
0d4a1197 3152 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e
WD
3153 rtx reg1 = gen_rtx_REG (mode, regno1);
3154
3155 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
3156
71bfb77a 3157 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3158 {
3159 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
3160 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 3161 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
3162 }
3163 else
3164 {
3165 rtx reg2 = gen_rtx_REG (mode, regno2);
3166 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3167 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
3168 reg2, adjustment));
3169 }
3170}
3171
04ddfe06
KT
3172/* Generate and return a store pair instruction of mode MODE to store
3173 register REG1 to MEM1 and register REG2 to MEM2. */
3174
72df5c1f 3175static rtx
ef4bddc2 3176aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
3177 rtx reg2)
3178{
3179 switch (mode)
3180 {
4e10a5a7 3181 case E_DImode:
72df5c1f
JW
3182 return gen_store_pairdi (mem1, reg1, mem2, reg2);
3183
4e10a5a7 3184 case E_DFmode:
72df5c1f
JW
3185 return gen_store_pairdf (mem1, reg1, mem2, reg2);
3186
3187 default:
3188 gcc_unreachable ();
3189 }
3190}
3191
04ddfe06
KT
3192/* Generate and regurn a load pair isntruction of mode MODE to load register
3193 REG1 from MEM1 and register REG2 from MEM2. */
3194
72df5c1f 3195static rtx
ef4bddc2 3196aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
3197 rtx mem2)
3198{
3199 switch (mode)
3200 {
4e10a5a7 3201 case E_DImode:
72df5c1f
JW
3202 return gen_load_pairdi (reg1, mem1, reg2, mem2);
3203
4e10a5a7 3204 case E_DFmode:
72df5c1f
JW
3205 return gen_load_pairdf (reg1, mem1, reg2, mem2);
3206
3207 default:
3208 gcc_unreachable ();
3209 }
3210}
3211
db58fd89
JW
3212/* Return TRUE if return address signing should be enabled for the current
3213 function, otherwise return FALSE. */
3214
3215bool
3216aarch64_return_address_signing_enabled (void)
3217{
3218 /* This function should only be called after frame laid out. */
3219 gcc_assert (cfun->machine->frame.laid_out);
3220
3221 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
3222 if it's LR is pushed onto stack. */
3223 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
3224 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
3225 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
3226}
3227
04ddfe06
KT
3228/* Emit code to save the callee-saved registers from register number START
3229 to LIMIT to the stack at the location starting at offset START_OFFSET,
3230 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 3231
43e9d192 3232static void
ef4bddc2 3233aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 3234 unsigned start, unsigned limit, bool skip_wb)
43e9d192 3235{
5d8a22a5 3236 rtx_insn *insn;
43e9d192
IB
3237 unsigned regno;
3238 unsigned regno2;
3239
0ec74a1e 3240 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
3241 regno <= limit;
3242 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 3243 {
ae13fce3
JW
3244 rtx reg, mem;
3245 HOST_WIDE_INT offset;
64dedd72 3246
ae13fce3
JW
3247 if (skip_wb
3248 && (regno == cfun->machine->frame.wb_candidate1
3249 || regno == cfun->machine->frame.wb_candidate2))
3250 continue;
3251
827ab47a
KT
3252 if (cfun->machine->reg_is_wrapped_separately[regno])
3253 continue;
3254
ae13fce3
JW
3255 reg = gen_rtx_REG (mode, regno);
3256 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
3257 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
3258 offset));
64dedd72
JW
3259
3260 regno2 = aarch64_next_callee_save (regno + 1, limit);
3261
3262 if (regno2 <= limit
827ab47a 3263 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
3264 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3265 == cfun->machine->frame.reg_offset[regno2]))
3266
43e9d192 3267 {
0ec74a1e 3268 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
3269 rtx mem2;
3270
3271 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
3272 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
3273 offset));
8ed2fc62
JW
3274 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
3275 reg2));
0b4a9743 3276
64dedd72
JW
3277 /* The first part of a frame-related parallel insn is
3278 always assumed to be relevant to the frame
3279 calculations; subsequent parts, are only
3280 frame-related if explicitly marked. */
3281 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3282 regno = regno2;
3283 }
3284 else
8ed2fc62
JW
3285 insn = emit_move_insn (mem, reg);
3286
3287 RTX_FRAME_RELATED_P (insn) = 1;
3288 }
3289}
3290
04ddfe06
KT
3291/* Emit code to restore the callee registers of mode MODE from register
3292 number START up to and including LIMIT. Restore from the stack offset
3293 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3294 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3295
8ed2fc62 3296static void
ef4bddc2 3297aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 3298 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 3299 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 3300{
8ed2fc62 3301 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
3302 unsigned regno;
3303 unsigned regno2;
3304 HOST_WIDE_INT offset;
3305
3306 for (regno = aarch64_next_callee_save (start, limit);
3307 regno <= limit;
3308 regno = aarch64_next_callee_save (regno + 1, limit))
3309 {
827ab47a
KT
3310 if (cfun->machine->reg_is_wrapped_separately[regno])
3311 continue;
3312
ae13fce3 3313 rtx reg, mem;
8ed2fc62 3314
ae13fce3
JW
3315 if (skip_wb
3316 && (regno == cfun->machine->frame.wb_candidate1
3317 || regno == cfun->machine->frame.wb_candidate2))
3318 continue;
3319
3320 reg = gen_rtx_REG (mode, regno);
8ed2fc62 3321 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 3322 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
3323
3324 regno2 = aarch64_next_callee_save (regno + 1, limit);
3325
3326 if (regno2 <= limit
827ab47a 3327 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
3328 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3329 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 3330 {
8ed2fc62
JW
3331 rtx reg2 = gen_rtx_REG (mode, regno2);
3332 rtx mem2;
3333
3334 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 3335 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 3336 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 3337
dd991abb 3338 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 3339 regno = regno2;
43e9d192 3340 }
8ed2fc62 3341 else
dd991abb
RH
3342 emit_move_insn (reg, mem);
3343 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 3344 }
43e9d192
IB
3345}
3346
827ab47a
KT
3347static inline bool
3348offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3349 HOST_WIDE_INT offset)
3350{
3351 return offset >= -256 && offset < 256;
3352}
3353
3354static inline bool
3355offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3356{
3357 return (offset >= 0
3358 && offset < 4096 * GET_MODE_SIZE (mode)
3359 && offset % GET_MODE_SIZE (mode) == 0);
3360}
3361
3362bool
3363aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3364{
3365 return (offset >= -64 * GET_MODE_SIZE (mode)
3366 && offset < 64 * GET_MODE_SIZE (mode)
3367 && offset % GET_MODE_SIZE (mode) == 0);
3368}
3369
3370/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3371
3372static sbitmap
3373aarch64_get_separate_components (void)
3374{
3375 aarch64_layout_frame ();
3376
3377 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3378 bitmap_clear (components);
3379
3380 /* The registers we need saved to the frame. */
3381 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3382 if (aarch64_register_saved_on_entry (regno))
3383 {
3384 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3385 if (!frame_pointer_needed)
3386 offset += cfun->machine->frame.frame_size
3387 - cfun->machine->frame.hard_fp_offset;
3388 /* Check that we can access the stack slot of the register with one
3389 direct load with no adjustments needed. */
3390 if (offset_12bit_unsigned_scaled_p (DImode, offset))
3391 bitmap_set_bit (components, regno);
3392 }
3393
3394 /* Don't mess with the hard frame pointer. */
3395 if (frame_pointer_needed)
3396 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
3397
3398 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3399 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3400 /* If aarch64_layout_frame has chosen registers to store/restore with
3401 writeback don't interfere with them to avoid having to output explicit
3402 stack adjustment instructions. */
3403 if (reg2 != INVALID_REGNUM)
3404 bitmap_clear_bit (components, reg2);
3405 if (reg1 != INVALID_REGNUM)
3406 bitmap_clear_bit (components, reg1);
3407
3408 bitmap_clear_bit (components, LR_REGNUM);
3409 bitmap_clear_bit (components, SP_REGNUM);
3410
3411 return components;
3412}
3413
3414/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3415
3416static sbitmap
3417aarch64_components_for_bb (basic_block bb)
3418{
3419 bitmap in = DF_LIVE_IN (bb);
3420 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
3421 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
3422
3423 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3424 bitmap_clear (components);
3425
3426 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3427 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3428 if ((!call_used_regs[regno])
3429 && (bitmap_bit_p (in, regno)
3430 || bitmap_bit_p (gen, regno)
3431 || bitmap_bit_p (kill, regno)))
3432 bitmap_set_bit (components, regno);
3433
3434 return components;
3435}
3436
3437/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3438 Nothing to do for aarch64. */
3439
3440static void
3441aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
3442{
3443}
3444
3445/* Return the next set bit in BMP from START onwards. Return the total number
3446 of bits in BMP if no set bit is found at or after START. */
3447
3448static unsigned int
3449aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
3450{
3451 unsigned int nbits = SBITMAP_SIZE (bmp);
3452 if (start == nbits)
3453 return start;
3454
3455 gcc_assert (start < nbits);
3456 for (unsigned int i = start; i < nbits; i++)
3457 if (bitmap_bit_p (bmp, i))
3458 return i;
3459
3460 return nbits;
3461}
3462
3463/* Do the work for aarch64_emit_prologue_components and
3464 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3465 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3466 for these components or the epilogue sequence. That is, it determines
3467 whether we should emit stores or loads and what kind of CFA notes to attach
3468 to the insns. Otherwise the logic for the two sequences is very
3469 similar. */
3470
3471static void
3472aarch64_process_components (sbitmap components, bool prologue_p)
3473{
3474 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
3475 ? HARD_FRAME_POINTER_REGNUM
3476 : STACK_POINTER_REGNUM);
3477
3478 unsigned last_regno = SBITMAP_SIZE (components);
3479 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
3480 rtx_insn *insn = NULL;
3481
3482 while (regno != last_regno)
3483 {
3484 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3485 so DFmode for the vector registers is enough. */
0d4a1197 3486 machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
827ab47a
KT
3487 rtx reg = gen_rtx_REG (mode, regno);
3488 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3489 if (!frame_pointer_needed)
3490 offset += cfun->machine->frame.frame_size
3491 - cfun->machine->frame.hard_fp_offset;
3492 rtx addr = plus_constant (Pmode, ptr_reg, offset);
3493 rtx mem = gen_frame_mem (mode, addr);
3494
3495 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
3496 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
3497 /* No more registers to handle after REGNO.
3498 Emit a single save/restore and exit. */
3499 if (regno2 == last_regno)
3500 {
3501 insn = emit_insn (set);
3502 RTX_FRAME_RELATED_P (insn) = 1;
3503 if (prologue_p)
3504 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3505 else
3506 add_reg_note (insn, REG_CFA_RESTORE, reg);
3507 break;
3508 }
3509
3510 HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2];
3511 /* The next register is not of the same class or its offset is not
3512 mergeable with the current one into a pair. */
3513 if (!satisfies_constraint_Ump (mem)
3514 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
3515 || (offset2 - cfun->machine->frame.reg_offset[regno])
3516 != GET_MODE_SIZE (mode))
3517 {
3518 insn = emit_insn (set);
3519 RTX_FRAME_RELATED_P (insn) = 1;
3520 if (prologue_p)
3521 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3522 else
3523 add_reg_note (insn, REG_CFA_RESTORE, reg);
3524
3525 regno = regno2;
3526 continue;
3527 }
3528
3529 /* REGNO2 can be saved/restored in a pair with REGNO. */
3530 rtx reg2 = gen_rtx_REG (mode, regno2);
3531 if (!frame_pointer_needed)
3532 offset2 += cfun->machine->frame.frame_size
3533 - cfun->machine->frame.hard_fp_offset;
3534 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
3535 rtx mem2 = gen_frame_mem (mode, addr2);
3536 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
3537 : gen_rtx_SET (reg2, mem2);
3538
3539 if (prologue_p)
3540 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
3541 else
3542 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
3543
3544 RTX_FRAME_RELATED_P (insn) = 1;
3545 if (prologue_p)
3546 {
3547 add_reg_note (insn, REG_CFA_OFFSET, set);
3548 add_reg_note (insn, REG_CFA_OFFSET, set2);
3549 }
3550 else
3551 {
3552 add_reg_note (insn, REG_CFA_RESTORE, reg);
3553 add_reg_note (insn, REG_CFA_RESTORE, reg2);
3554 }
3555
3556 regno = aarch64_get_next_set_bit (components, regno2 + 1);
3557 }
3558}
3559
3560/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3561
3562static void
3563aarch64_emit_prologue_components (sbitmap components)
3564{
3565 aarch64_process_components (components, true);
3566}
3567
3568/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3569
3570static void
3571aarch64_emit_epilogue_components (sbitmap components)
3572{
3573 aarch64_process_components (components, false);
3574}
3575
3576/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3577
3578static void
3579aarch64_set_handled_components (sbitmap components)
3580{
3581 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3582 if (bitmap_bit_p (components, regno))
3583 cfun->machine->reg_is_wrapped_separately[regno] = true;
3584}
3585
43e9d192
IB
3586/* AArch64 stack frames generated by this compiler look like:
3587
3588 +-------------------------------+
3589 | |
3590 | incoming stack arguments |
3591 | |
34834420
MS
3592 +-------------------------------+
3593 | | <-- incoming stack pointer (aligned)
43e9d192
IB
3594 | callee-allocated save area |
3595 | for register varargs |
3596 | |
34834420
MS
3597 +-------------------------------+
3598 | local variables | <-- frame_pointer_rtx
43e9d192
IB
3599 | |
3600 +-------------------------------+
454fdba9
RL
3601 | padding0 | \
3602 +-------------------------------+ |
454fdba9 3603 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
3604 +-------------------------------+ |
3605 | LR' | |
3606 +-------------------------------+ |
34834420
MS
3607 | FP' | / <- hard_frame_pointer_rtx (aligned)
3608 +-------------------------------+
43e9d192
IB
3609 | dynamic allocation |
3610 +-------------------------------+
34834420
MS
3611 | padding |
3612 +-------------------------------+
3613 | outgoing stack arguments | <-- arg_pointer
3614 | |
3615 +-------------------------------+
3616 | | <-- stack_pointer_rtx (aligned)
43e9d192 3617
34834420
MS
3618 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3619 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3620 unchanged. */
43e9d192
IB
3621
3622/* Generate the prologue instructions for entry into a function.
3623 Establish the stack frame by decreasing the stack pointer with a
3624 properly calculated size and, if necessary, create a frame record
3625 filled with the values of LR and previous frame pointer. The
6991c977 3626 current FP is also set up if it is in use. */
43e9d192
IB
3627
3628void
3629aarch64_expand_prologue (void)
3630{
43e9d192 3631 aarch64_layout_frame ();
43e9d192 3632
71bfb77a
WD
3633 HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
3634 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3635 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3636 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3637 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3638 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3639 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3640 rtx_insn *insn;
43e9d192 3641
db58fd89
JW
3642 /* Sign return address for functions. */
3643 if (aarch64_return_address_signing_enabled ())
27169e45
JW
3644 {
3645 insn = emit_insn (gen_pacisp ());
3646 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3647 RTX_FRAME_RELATED_P (insn) = 1;
3648 }
db58fd89 3649
dd991abb
RH
3650 if (flag_stack_usage_info)
3651 current_function_static_stack_size = frame_size;
43e9d192 3652
a3eb8a52
EB
3653 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3654 {
3655 if (crtl->is_leaf && !cfun->calls_alloca)
3656 {
3657 if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
3658 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3659 frame_size - STACK_CHECK_PROTECT);
3660 }
3661 else if (frame_size > 0)
3662 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
3663 }
3664
5be6b295 3665 aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
43e9d192 3666
71bfb77a
WD
3667 if (callee_adjust != 0)
3668 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 3669
71bfb77a 3670 if (frame_pointer_needed)
43e9d192 3671 {
71bfb77a
WD
3672 if (callee_adjust == 0)
3673 aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
3674 R30_REGNUM, false);
3675 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
3676 stack_pointer_rtx,
3677 GEN_INT (callee_offset)));
3678 RTX_FRAME_RELATED_P (insn) = 1;
3679 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 3680 }
71bfb77a
WD
3681
3682 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3683 callee_adjust != 0 || frame_pointer_needed);
3684 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3685 callee_adjust != 0 || frame_pointer_needed);
5be6b295 3686 aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
43e9d192
IB
3687}
3688
4f942779
RL
3689/* Return TRUE if we can use a simple_return insn.
3690
3691 This function checks whether the callee saved stack is empty, which
3692 means no restore actions are need. The pro_and_epilogue will use
3693 this to check whether shrink-wrapping opt is feasible. */
3694
3695bool
3696aarch64_use_return_insn_p (void)
3697{
3698 if (!reload_completed)
3699 return false;
3700
3701 if (crtl->profile)
3702 return false;
3703
3704 aarch64_layout_frame ();
3705
3706 return cfun->machine->frame.frame_size == 0;
3707}
3708
71bfb77a
WD
3709/* Generate the epilogue instructions for returning from a function.
3710 This is almost exactly the reverse of the prolog sequence, except
3711 that we need to insert barriers to avoid scheduling loads that read
3712 from a deallocated stack, and we optimize the unwind records by
3713 emitting them all together if possible. */
43e9d192
IB
3714void
3715aarch64_expand_epilogue (bool for_sibcall)
3716{
43e9d192 3717 aarch64_layout_frame ();
43e9d192 3718
71bfb77a
WD
3719 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3720 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3721 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3722 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3723 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3724 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3725 rtx cfi_ops = NULL;
3726 rtx_insn *insn;
44c0e7b9 3727
71bfb77a
WD
3728 /* We need to add memory barrier to prevent read from deallocated stack. */
3729 bool need_barrier_p = (get_frame_size ()
3730 + cfun->machine->frame.saved_varargs_size) != 0;
43e9d192 3731
71bfb77a 3732 /* Emit a barrier to prevent loads from a deallocated stack. */
8144a493
WD
3733 if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
3734 || crtl->calls_eh_return)
43e9d192 3735 {
71bfb77a
WD
3736 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3737 need_barrier_p = false;
3738 }
7e8c2bd5 3739
71bfb77a
WD
3740 /* Restore the stack pointer from the frame pointer if it may not
3741 be the same as the stack pointer. */
3742 if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
3743 {
43e9d192
IB
3744 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3745 hard_frame_pointer_rtx,
71bfb77a
WD
3746 GEN_INT (-callee_offset)));
3747 /* If writeback is used when restoring callee-saves, the CFA
3748 is restored on the instruction doing the writeback. */
3749 RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
43e9d192 3750 }
71bfb77a 3751 else
5be6b295 3752 aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
43e9d192 3753
71bfb77a
WD
3754 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3755 callee_adjust != 0, &cfi_ops);
3756 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3757 callee_adjust != 0, &cfi_ops);
43e9d192 3758
71bfb77a
WD
3759 if (need_barrier_p)
3760 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3761
3762 if (callee_adjust != 0)
3763 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
3764
3765 if (callee_adjust != 0 || initial_adjust > 65536)
3766 {
3767 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 3768 insn = get_last_insn ();
71bfb77a
WD
3769 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
3770 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 3771 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 3772 cfi_ops = NULL;
43e9d192
IB
3773 }
3774
5be6b295 3775 aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 3776
71bfb77a
WD
3777 if (cfi_ops)
3778 {
3779 /* Emit delayed restores and reset the CFA to be SP. */
3780 insn = get_last_insn ();
3781 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
3782 REG_NOTES (insn) = cfi_ops;
3783 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
3784 }
3785
db58fd89
JW
3786 /* We prefer to emit the combined return/authenticate instruction RETAA,
3787 however there are three cases in which we must instead emit an explicit
3788 authentication instruction.
3789
3790 1) Sibcalls don't return in a normal way, so if we're about to call one
3791 we must authenticate.
3792
3793 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
3794 generating code for !TARGET_ARMV8_3 we can't use it and must
3795 explicitly authenticate.
3796
3797 3) On an eh_return path we make extra stack adjustments to update the
3798 canonical frame address to be the exception handler's CFA. We want
3799 to authenticate using the CFA of the function which calls eh_return.
3800 */
3801 if (aarch64_return_address_signing_enabled ()
3802 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
3803 {
3804 insn = emit_insn (gen_autisp ());
3805 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3806 RTX_FRAME_RELATED_P (insn) = 1;
3807 }
db58fd89 3808
dd991abb
RH
3809 /* Stack adjustment for exception handler. */
3810 if (crtl->calls_eh_return)
3811 {
3812 /* We need to unwind the stack by the offset computed by
3813 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3814 to be SP; letting the CFA move during this adjustment
3815 is just as correct as retaining the CFA from the body
3816 of the function. Therefore, do nothing special. */
3817 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
3818 }
3819
3820 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3821 if (!for_sibcall)
3822 emit_jump_insn (ret_rtx);
3823}
3824
8144a493
WD
3825/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
3826 normally or return to a previous frame after unwinding.
1c960e02 3827
8144a493
WD
3828 An EH return uses a single shared return sequence. The epilogue is
3829 exactly like a normal epilogue except that it has an extra input
3830 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
3831 that must be applied after the frame has been destroyed. An extra label
3832 is inserted before the epilogue which initializes this register to zero,
3833 and this is the entry point for a normal return.
43e9d192 3834
8144a493
WD
3835 An actual EH return updates the return address, initializes the stack
3836 adjustment and jumps directly into the epilogue (bypassing the zeroing
3837 of the adjustment). Since the return address is typically saved on the
3838 stack when a function makes a call, the saved LR must be updated outside
3839 the epilogue.
43e9d192 3840
8144a493
WD
3841 This poses problems as the store is generated well before the epilogue,
3842 so the offset of LR is not known yet. Also optimizations will remove the
3843 store as it appears dead, even after the epilogue is generated (as the
3844 base or offset for loading LR is different in many cases).
43e9d192 3845
8144a493
WD
3846 To avoid these problems this implementation forces the frame pointer
3847 in eh_return functions so that the location of LR is fixed and known early.
3848 It also marks the store volatile, so no optimization is permitted to
3849 remove the store. */
3850rtx
3851aarch64_eh_return_handler_rtx (void)
3852{
3853 rtx tmp = gen_frame_mem (Pmode,
3854 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 3855
8144a493
WD
3856 /* Mark the store volatile, so no optimization is permitted to remove it. */
3857 MEM_VOLATILE_P (tmp) = true;
3858 return tmp;
43e9d192
IB
3859}
3860
43e9d192
IB
3861/* Output code to add DELTA to the first argument, and then jump
3862 to FUNCTION. Used for C++ multiple inheritance. */
3863static void
3864aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3865 HOST_WIDE_INT delta,
3866 HOST_WIDE_INT vcall_offset,
3867 tree function)
3868{
3869 /* The this pointer is always in x0. Note that this differs from
3870 Arm where the this pointer maybe bumped to r1 if r0 is required
3871 to return a pointer to an aggregate. On AArch64 a result value
3872 pointer will be in x8. */
3873 int this_regno = R0_REGNUM;
5d8a22a5
DM
3874 rtx this_rtx, temp0, temp1, addr, funexp;
3875 rtx_insn *insn;
43e9d192 3876
75f1d6fc
SN
3877 reload_completed = 1;
3878 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3879
3880 if (vcall_offset == 0)
5be6b295 3881 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3882 else
3883 {
28514dda 3884 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3885
75f1d6fc
SN
3886 this_rtx = gen_rtx_REG (Pmode, this_regno);
3887 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3888 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3889
75f1d6fc
SN
3890 addr = this_rtx;
3891 if (delta != 0)
3892 {
3893 if (delta >= -256 && delta < 256)
3894 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3895 plus_constant (Pmode, this_rtx, delta));
3896 else
5be6b295 3897 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3898 }
3899
28514dda
YZ
3900 if (Pmode == ptr_mode)
3901 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3902 else
3903 aarch64_emit_move (temp0,
3904 gen_rtx_ZERO_EXTEND (Pmode,
3905 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3906
28514dda 3907 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3908 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3909 else
3910 {
f43657b4
JW
3911 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
3912 Pmode);
75f1d6fc 3913 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3914 }
3915
28514dda
YZ
3916 if (Pmode == ptr_mode)
3917 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3918 else
3919 aarch64_emit_move (temp1,
3920 gen_rtx_SIGN_EXTEND (Pmode,
3921 gen_rtx_MEM (ptr_mode, addr)));
3922
75f1d6fc 3923 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3924 }
3925
75f1d6fc
SN
3926 /* Generate a tail call to the target function. */
3927 if (!TREE_USED (function))
3928 {
3929 assemble_external (function);
3930 TREE_USED (function) = 1;
3931 }
3932 funexp = XEXP (DECL_RTL (function), 0);
3933 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3934 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3935 SIBLING_CALL_P (insn) = 1;
3936
3937 insn = get_insns ();
3938 shorten_branches (insn);
3939 final_start_function (insn, file, 1);
3940 final (insn, file, 1);
43e9d192 3941 final_end_function ();
75f1d6fc
SN
3942
3943 /* Stop pretending to be a post-reload pass. */
3944 reload_completed = 0;
43e9d192
IB
3945}
3946
43e9d192
IB
3947static bool
3948aarch64_tls_referenced_p (rtx x)
3949{
3950 if (!TARGET_HAVE_TLS)
3951 return false;
e7de8563
RS
3952 subrtx_iterator::array_type array;
3953 FOR_EACH_SUBRTX (iter, array, x, ALL)
3954 {
3955 const_rtx x = *iter;
3956 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3957 return true;
3958 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3959 TLS offsets, not real symbol references. */
3960 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3961 iter.skip_subrtxes ();
3962 }
3963 return false;
43e9d192
IB
3964}
3965
3966
43e9d192
IB
3967/* Return true if val can be encoded as a 12-bit unsigned immediate with
3968 a left shift of 0 or 12 bits. */
3969bool
3970aarch64_uimm12_shift (HOST_WIDE_INT val)
3971{
3972 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3973 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3974 );
3975}
3976
3977
3978/* Return true if val is an immediate that can be loaded into a
3979 register by a MOVZ instruction. */
3980static bool
ef4bddc2 3981aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3982{
3983 if (GET_MODE_SIZE (mode) > 4)
3984 {
3985 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3986 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3987 return 1;
3988 }
3989 else
3990 {
3991 /* Ignore sign extension. */
3992 val &= (HOST_WIDE_INT) 0xffffffff;
3993 }
3994 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3995 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3996}
3997
a64c73a2
WD
3998/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3999
4000static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
4001 {
4002 0x0000000100000001ull,
4003 0x0001000100010001ull,
4004 0x0101010101010101ull,
4005 0x1111111111111111ull,
4006 0x5555555555555555ull,
4007 };
4008
43e9d192
IB
4009
4010/* Return true if val is a valid bitmask immediate. */
a64c73a2 4011
43e9d192 4012bool
a64c73a2 4013aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 4014{
a64c73a2
WD
4015 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
4016 int bits;
4017
4018 /* Check for a single sequence of one bits and return quickly if so.
4019 The special cases of all ones and all zeroes returns false. */
4020 val = (unsigned HOST_WIDE_INT) val_in;
4021 tmp = val + (val & -val);
4022
4023 if (tmp == (tmp & -tmp))
4024 return (val + 1) > 1;
4025
4026 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
4027 if (mode == SImode)
4028 val = (val << 32) | (val & 0xffffffff);
4029
4030 /* Invert if the immediate doesn't start with a zero bit - this means we
4031 only need to search for sequences of one bits. */
4032 if (val & 1)
4033 val = ~val;
4034
4035 /* Find the first set bit and set tmp to val with the first sequence of one
4036 bits removed. Return success if there is a single sequence of ones. */
4037 first_one = val & -val;
4038 tmp = val & (val + first_one);
4039
4040 if (tmp == 0)
4041 return true;
4042
4043 /* Find the next set bit and compute the difference in bit position. */
4044 next_one = tmp & -tmp;
4045 bits = clz_hwi (first_one) - clz_hwi (next_one);
4046 mask = val ^ tmp;
4047
4048 /* Check the bit position difference is a power of 2, and that the first
4049 sequence of one bits fits within 'bits' bits. */
4050 if ((mask >> bits) != 0 || bits != (bits & -bits))
4051 return false;
4052
4053 /* Check the sequence of one bits is repeated 64/bits times. */
4054 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
4055}
4056
43fd192f
MC
4057/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
4058 Assumed precondition: VAL_IN Is not zero. */
4059
4060unsigned HOST_WIDE_INT
4061aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
4062{
4063 int lowest_bit_set = ctz_hwi (val_in);
4064 int highest_bit_set = floor_log2 (val_in);
4065 gcc_assert (val_in != 0);
4066
4067 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
4068 (HOST_WIDE_INT_1U << lowest_bit_set));
4069}
4070
4071/* Create constant where bits outside of lowest bit set to highest bit set
4072 are set to 1. */
4073
4074unsigned HOST_WIDE_INT
4075aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
4076{
4077 return val_in | ~aarch64_and_split_imm1 (val_in);
4078}
4079
4080/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
4081
4082bool
4083aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
4084{
4085 if (aarch64_bitmask_imm (val_in, mode))
4086 return false;
4087
4088 if (aarch64_move_imm (val_in, mode))
4089 return false;
4090
4091 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
4092
4093 return aarch64_bitmask_imm (imm2, mode);
4094}
43e9d192
IB
4095
4096/* Return true if val is an immediate that can be loaded into a
4097 register in a single instruction. */
4098bool
ef4bddc2 4099aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
4100{
4101 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
4102 return 1;
4103 return aarch64_bitmask_imm (val, mode);
4104}
4105
4106static bool
ef4bddc2 4107aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
4108{
4109 rtx base, offset;
7eda14e1 4110
43e9d192
IB
4111 if (GET_CODE (x) == HIGH)
4112 return true;
4113
4114 split_const (x, &base, &offset);
4115 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 4116 {
a6e0bfa7 4117 if (aarch64_classify_symbol (base, offset)
28514dda
YZ
4118 != SYMBOL_FORCE_TO_MEM)
4119 return true;
4120 else
4121 /* Avoid generating a 64-bit relocation in ILP32; leave
4122 to aarch64_expand_mov_immediate to handle it properly. */
4123 return mode != ptr_mode;
4124 }
43e9d192
IB
4125
4126 return aarch64_tls_referenced_p (x);
4127}
4128
e79136e4
WD
4129/* Implement TARGET_CASE_VALUES_THRESHOLD.
4130 The expansion for a table switch is quite expensive due to the number
4131 of instructions, the table lookup and hard to predict indirect jump.
4132 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4133 set, otherwise use tables for > 16 cases as a tradeoff between size and
4134 performance. When optimizing for size, use the default setting. */
50487d79
EM
4135
4136static unsigned int
4137aarch64_case_values_threshold (void)
4138{
4139 /* Use the specified limit for the number of cases before using jump
4140 tables at higher optimization levels. */
4141 if (optimize > 2
4142 && selected_cpu->tune->max_case_values != 0)
4143 return selected_cpu->tune->max_case_values;
4144 else
e79136e4 4145 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
4146}
4147
43e9d192
IB
4148/* Return true if register REGNO is a valid index register.
4149 STRICT_P is true if REG_OK_STRICT is in effect. */
4150
4151bool
4152aarch64_regno_ok_for_index_p (int regno, bool strict_p)
4153{
4154 if (!HARD_REGISTER_NUM_P (regno))
4155 {
4156 if (!strict_p)
4157 return true;
4158
4159 if (!reg_renumber)
4160 return false;
4161
4162 regno = reg_renumber[regno];
4163 }
4164 return GP_REGNUM_P (regno);
4165}
4166
4167/* Return true if register REGNO is a valid base register for mode MODE.
4168 STRICT_P is true if REG_OK_STRICT is in effect. */
4169
4170bool
4171aarch64_regno_ok_for_base_p (int regno, bool strict_p)
4172{
4173 if (!HARD_REGISTER_NUM_P (regno))
4174 {
4175 if (!strict_p)
4176 return true;
4177
4178 if (!reg_renumber)
4179 return false;
4180
4181 regno = reg_renumber[regno];
4182 }
4183
4184 /* The fake registers will be eliminated to either the stack or
4185 hard frame pointer, both of which are usually valid base registers.
4186 Reload deals with the cases where the eliminated form isn't valid. */
4187 return (GP_REGNUM_P (regno)
4188 || regno == SP_REGNUM
4189 || regno == FRAME_POINTER_REGNUM
4190 || regno == ARG_POINTER_REGNUM);
4191}
4192
4193/* Return true if X is a valid base register for mode MODE.
4194 STRICT_P is true if REG_OK_STRICT is in effect. */
4195
4196static bool
4197aarch64_base_register_rtx_p (rtx x, bool strict_p)
4198{
4199 if (!strict_p && GET_CODE (x) == SUBREG)
4200 x = SUBREG_REG (x);
4201
4202 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
4203}
4204
4205/* Return true if address offset is a valid index. If it is, fill in INFO
4206 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4207
4208static bool
4209aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 4210 machine_mode mode, bool strict_p)
43e9d192
IB
4211{
4212 enum aarch64_address_type type;
4213 rtx index;
4214 int shift;
4215
4216 /* (reg:P) */
4217 if ((REG_P (x) || GET_CODE (x) == SUBREG)
4218 && GET_MODE (x) == Pmode)
4219 {
4220 type = ADDRESS_REG_REG;
4221 index = x;
4222 shift = 0;
4223 }
4224 /* (sign_extend:DI (reg:SI)) */
4225 else if ((GET_CODE (x) == SIGN_EXTEND
4226 || GET_CODE (x) == ZERO_EXTEND)
4227 && GET_MODE (x) == DImode
4228 && GET_MODE (XEXP (x, 0)) == SImode)
4229 {
4230 type = (GET_CODE (x) == SIGN_EXTEND)
4231 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4232 index = XEXP (x, 0);
4233 shift = 0;
4234 }
4235 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4236 else if (GET_CODE (x) == MULT
4237 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4238 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4239 && GET_MODE (XEXP (x, 0)) == DImode
4240 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4241 && CONST_INT_P (XEXP (x, 1)))
4242 {
4243 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4244 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4245 index = XEXP (XEXP (x, 0), 0);
4246 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4247 }
4248 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4249 else if (GET_CODE (x) == ASHIFT
4250 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4251 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4252 && GET_MODE (XEXP (x, 0)) == DImode
4253 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4254 && CONST_INT_P (XEXP (x, 1)))
4255 {
4256 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4257 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4258 index = XEXP (XEXP (x, 0), 0);
4259 shift = INTVAL (XEXP (x, 1));
4260 }
4261 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4262 else if ((GET_CODE (x) == SIGN_EXTRACT
4263 || GET_CODE (x) == ZERO_EXTRACT)
4264 && GET_MODE (x) == DImode
4265 && GET_CODE (XEXP (x, 0)) == MULT
4266 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4267 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4268 {
4269 type = (GET_CODE (x) == SIGN_EXTRACT)
4270 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4271 index = XEXP (XEXP (x, 0), 0);
4272 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4273 if (INTVAL (XEXP (x, 1)) != 32 + shift
4274 || INTVAL (XEXP (x, 2)) != 0)
4275 shift = -1;
4276 }
4277 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4278 (const_int 0xffffffff<<shift)) */
4279 else if (GET_CODE (x) == AND
4280 && GET_MODE (x) == DImode
4281 && GET_CODE (XEXP (x, 0)) == MULT
4282 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4283 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4284 && CONST_INT_P (XEXP (x, 1)))
4285 {
4286 type = ADDRESS_REG_UXTW;
4287 index = XEXP (XEXP (x, 0), 0);
4288 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4289 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4290 shift = -1;
4291 }
4292 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4293 else if ((GET_CODE (x) == SIGN_EXTRACT
4294 || GET_CODE (x) == ZERO_EXTRACT)
4295 && GET_MODE (x) == DImode
4296 && GET_CODE (XEXP (x, 0)) == ASHIFT
4297 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4298 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4299 {
4300 type = (GET_CODE (x) == SIGN_EXTRACT)
4301 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4302 index = XEXP (XEXP (x, 0), 0);
4303 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4304 if (INTVAL (XEXP (x, 1)) != 32 + shift
4305 || INTVAL (XEXP (x, 2)) != 0)
4306 shift = -1;
4307 }
4308 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4309 (const_int 0xffffffff<<shift)) */
4310 else if (GET_CODE (x) == AND
4311 && GET_MODE (x) == DImode
4312 && GET_CODE (XEXP (x, 0)) == ASHIFT
4313 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4314 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4315 && CONST_INT_P (XEXP (x, 1)))
4316 {
4317 type = ADDRESS_REG_UXTW;
4318 index = XEXP (XEXP (x, 0), 0);
4319 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4320 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4321 shift = -1;
4322 }
4323 /* (mult:P (reg:P) (const_int scale)) */
4324 else if (GET_CODE (x) == MULT
4325 && GET_MODE (x) == Pmode
4326 && GET_MODE (XEXP (x, 0)) == Pmode
4327 && CONST_INT_P (XEXP (x, 1)))
4328 {
4329 type = ADDRESS_REG_REG;
4330 index = XEXP (x, 0);
4331 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4332 }
4333 /* (ashift:P (reg:P) (const_int shift)) */
4334 else if (GET_CODE (x) == ASHIFT
4335 && GET_MODE (x) == Pmode
4336 && GET_MODE (XEXP (x, 0)) == Pmode
4337 && CONST_INT_P (XEXP (x, 1)))
4338 {
4339 type = ADDRESS_REG_REG;
4340 index = XEXP (x, 0);
4341 shift = INTVAL (XEXP (x, 1));
4342 }
4343 else
4344 return false;
4345
4346 if (GET_CODE (index) == SUBREG)
4347 index = SUBREG_REG (index);
4348
4349 if ((shift == 0 ||
4350 (shift > 0 && shift <= 3
4351 && (1 << shift) == GET_MODE_SIZE (mode)))
4352 && REG_P (index)
4353 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
4354 {
4355 info->type = type;
4356 info->offset = index;
4357 info->shift = shift;
4358 return true;
4359 }
4360
4361 return false;
4362}
4363
abc52318
KT
4364/* Return true if MODE is one of the modes for which we
4365 support LDP/STP operations. */
4366
4367static bool
4368aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
4369{
4370 return mode == SImode || mode == DImode
4371 || mode == SFmode || mode == DFmode
4372 || (aarch64_vector_mode_supported_p (mode)
4373 && GET_MODE_SIZE (mode) == 8);
4374}
4375
9e0218fc
RH
4376/* Return true if REGNO is a virtual pointer register, or an eliminable
4377 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4378 include stack_pointer or hard_frame_pointer. */
4379static bool
4380virt_or_elim_regno_p (unsigned regno)
4381{
4382 return ((regno >= FIRST_VIRTUAL_REGISTER
4383 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
4384 || regno == FRAME_POINTER_REGNUM
4385 || regno == ARG_POINTER_REGNUM);
4386}
4387
43e9d192
IB
4388/* Return true if X is a valid address for machine mode MODE. If it is,
4389 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4390 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4391
4392static bool
4393aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 4394 rtx x, machine_mode mode,
43e9d192
IB
4395 RTX_CODE outer_code, bool strict_p)
4396{
4397 enum rtx_code code = GET_CODE (x);
4398 rtx op0, op1;
2d8c6dc1 4399
80d43579
WD
4400 /* On BE, we use load/store pair for all large int mode load/stores.
4401 TI/TFmode may also use a load/store pair. */
2d8c6dc1 4402 bool load_store_pair_p = (outer_code == PARALLEL
80d43579
WD
4403 || mode == TImode
4404 || mode == TFmode
2d8c6dc1
AH
4405 || (BYTES_BIG_ENDIAN
4406 && aarch64_vect_struct_mode_p (mode)));
4407
43e9d192 4408 bool allow_reg_index_p =
2d8c6dc1
AH
4409 !load_store_pair_p
4410 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
4411 && !aarch64_vect_struct_mode_p (mode);
4412
4413 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4414 REG addressing. */
4415 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
4416 && (code != POST_INC && code != REG))
4417 return false;
4418
4419 switch (code)
4420 {
4421 case REG:
4422 case SUBREG:
4423 info->type = ADDRESS_REG_IMM;
4424 info->base = x;
4425 info->offset = const0_rtx;
4426 return aarch64_base_register_rtx_p (x, strict_p);
4427
4428 case PLUS:
4429 op0 = XEXP (x, 0);
4430 op1 = XEXP (x, 1);
15c0c5c9
JW
4431
4432 if (! strict_p
4aa81c2e 4433 && REG_P (op0)
9e0218fc 4434 && virt_or_elim_regno_p (REGNO (op0))
4aa81c2e 4435 && CONST_INT_P (op1))
15c0c5c9
JW
4436 {
4437 info->type = ADDRESS_REG_IMM;
4438 info->base = op0;
4439 info->offset = op1;
4440
4441 return true;
4442 }
4443
43e9d192
IB
4444 if (GET_MODE_SIZE (mode) != 0
4445 && CONST_INT_P (op1)
4446 && aarch64_base_register_rtx_p (op0, strict_p))
4447 {
4448 HOST_WIDE_INT offset = INTVAL (op1);
4449
4450 info->type = ADDRESS_REG_IMM;
4451 info->base = op0;
4452 info->offset = op1;
4453
4454 /* TImode and TFmode values are allowed in both pairs of X
4455 registers and individual Q registers. The available
4456 address modes are:
4457 X,X: 7-bit signed scaled offset
4458 Q: 9-bit signed offset
4459 We conservatively require an offset representable in either mode.
8ed49fab
KT
4460 When performing the check for pairs of X registers i.e. LDP/STP
4461 pass down DImode since that is the natural size of the LDP/STP
4462 instruction memory accesses. */
43e9d192 4463 if (mode == TImode || mode == TFmode)
8ed49fab 4464 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
8734dfac
WD
4465 && (offset_9bit_signed_unscaled_p (mode, offset)
4466 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 4467
2d8c6dc1
AH
4468 /* A 7bit offset check because OImode will emit a ldp/stp
4469 instruction (only big endian will get here).
4470 For ldp/stp instructions, the offset is scaled for the size of a
4471 single element of the pair. */
4472 if (mode == OImode)
4473 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
4474
4475 /* Three 9/12 bit offsets checks because CImode will emit three
4476 ldr/str instructions (only big endian will get here). */
4477 if (mode == CImode)
4478 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4479 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
4480 || offset_12bit_unsigned_scaled_p (V16QImode,
4481 offset + 32)));
4482
4483 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4484 instructions (only big endian will get here). */
4485 if (mode == XImode)
4486 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4487 && aarch64_offset_7bit_signed_scaled_p (TImode,
4488 offset + 32));
4489
4490 if (load_store_pair_p)
43e9d192 4491 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4492 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4493 else
4494 return (offset_9bit_signed_unscaled_p (mode, offset)
4495 || offset_12bit_unsigned_scaled_p (mode, offset));
4496 }
4497
4498 if (allow_reg_index_p)
4499 {
4500 /* Look for base + (scaled/extended) index register. */
4501 if (aarch64_base_register_rtx_p (op0, strict_p)
4502 && aarch64_classify_index (info, op1, mode, strict_p))
4503 {
4504 info->base = op0;
4505 return true;
4506 }
4507 if (aarch64_base_register_rtx_p (op1, strict_p)
4508 && aarch64_classify_index (info, op0, mode, strict_p))
4509 {
4510 info->base = op1;
4511 return true;
4512 }
4513 }
4514
4515 return false;
4516
4517 case POST_INC:
4518 case POST_DEC:
4519 case PRE_INC:
4520 case PRE_DEC:
4521 info->type = ADDRESS_REG_WB;
4522 info->base = XEXP (x, 0);
4523 info->offset = NULL_RTX;
4524 return aarch64_base_register_rtx_p (info->base, strict_p);
4525
4526 case POST_MODIFY:
4527 case PRE_MODIFY:
4528 info->type = ADDRESS_REG_WB;
4529 info->base = XEXP (x, 0);
4530 if (GET_CODE (XEXP (x, 1)) == PLUS
4531 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
4532 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
4533 && aarch64_base_register_rtx_p (info->base, strict_p))
4534 {
4535 HOST_WIDE_INT offset;
4536 info->offset = XEXP (XEXP (x, 1), 1);
4537 offset = INTVAL (info->offset);
4538
4539 /* TImode and TFmode values are allowed in both pairs of X
4540 registers and individual Q registers. The available
4541 address modes are:
4542 X,X: 7-bit signed scaled offset
4543 Q: 9-bit signed offset
4544 We conservatively require an offset representable in either mode.
4545 */
4546 if (mode == TImode || mode == TFmode)
44707478 4547 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
4548 && offset_9bit_signed_unscaled_p (mode, offset));
4549
2d8c6dc1 4550 if (load_store_pair_p)
43e9d192 4551 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4552 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4553 else
4554 return offset_9bit_signed_unscaled_p (mode, offset);
4555 }
4556 return false;
4557
4558 case CONST:
4559 case SYMBOL_REF:
4560 case LABEL_REF:
79517551
SN
4561 /* load literal: pc-relative constant pool entry. Only supported
4562 for SI mode or larger. */
43e9d192 4563 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
4564
4565 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
4566 {
4567 rtx sym, addend;
4568
4569 split_const (x, &sym, &addend);
b4f50fd4
RR
4570 return ((GET_CODE (sym) == LABEL_REF
4571 || (GET_CODE (sym) == SYMBOL_REF
4572 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 4573 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
4574 }
4575 return false;
4576
4577 case LO_SUM:
4578 info->type = ADDRESS_LO_SUM;
4579 info->base = XEXP (x, 0);
4580 info->offset = XEXP (x, 1);
4581 if (allow_reg_index_p
4582 && aarch64_base_register_rtx_p (info->base, strict_p))
4583 {
4584 rtx sym, offs;
4585 split_const (info->offset, &sym, &offs);
4586 if (GET_CODE (sym) == SYMBOL_REF
a6e0bfa7 4587 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
4588 {
4589 /* The symbol and offset must be aligned to the access size. */
4590 unsigned int align;
4591 unsigned int ref_size;
4592
4593 if (CONSTANT_POOL_ADDRESS_P (sym))
4594 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4595 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4596 {
4597 tree exp = SYMBOL_REF_DECL (sym);
4598 align = TYPE_ALIGN (TREE_TYPE (exp));
4599 align = CONSTANT_ALIGNMENT (exp, align);
4600 }
4601 else if (SYMBOL_REF_DECL (sym))
4602 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
4603 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4604 && SYMBOL_REF_BLOCK (sym) != NULL)
4605 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
4606 else
4607 align = BITS_PER_UNIT;
4608
4609 ref_size = GET_MODE_SIZE (mode);
4610 if (ref_size == 0)
4611 ref_size = GET_MODE_SIZE (DImode);
4612
4613 return ((INTVAL (offs) & (ref_size - 1)) == 0
4614 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4615 }
4616 }
4617 return false;
4618
4619 default:
4620 return false;
4621 }
4622}
4623
9bf2f779
KT
4624/* Return true if the address X is valid for a PRFM instruction.
4625 STRICT_P is true if we should do strict checking with
4626 aarch64_classify_address. */
4627
4628bool
4629aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
4630{
4631 struct aarch64_address_info addr;
4632
4633 /* PRFM accepts the same addresses as DImode... */
4634 bool res = aarch64_classify_address (&addr, x, DImode, MEM, strict_p);
4635 if (!res)
4636 return false;
4637
4638 /* ... except writeback forms. */
4639 return addr.type != ADDRESS_REG_WB;
4640}
4641
43e9d192
IB
4642bool
4643aarch64_symbolic_address_p (rtx x)
4644{
4645 rtx offset;
4646
4647 split_const (x, &x, &offset);
4648 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4649}
4650
a6e0bfa7 4651/* Classify the base of symbolic expression X. */
da4f13a4
MS
4652
4653enum aarch64_symbol_type
a6e0bfa7 4654aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
4655{
4656 rtx offset;
da4f13a4 4657
43e9d192 4658 split_const (x, &x, &offset);
a6e0bfa7 4659 return aarch64_classify_symbol (x, offset);
43e9d192
IB
4660}
4661
4662
4663/* Return TRUE if X is a legitimate address for accessing memory in
4664 mode MODE. */
4665static bool
ef4bddc2 4666aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
4667{
4668 struct aarch64_address_info addr;
4669
4670 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4671}
4672
4673/* Return TRUE if X is a legitimate address for accessing memory in
4674 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4675 pair operation. */
4676bool
ef4bddc2 4677aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 4678 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
4679{
4680 struct aarch64_address_info addr;
4681
4682 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4683}
4684
491ec060
WD
4685/* Split an out-of-range address displacement into a base and offset.
4686 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4687 to increase opportunities for sharing the base address of different sizes.
8734dfac 4688 For unaligned accesses and TI/TF mode use the signed 9-bit range. */
491ec060
WD
4689static bool
4690aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
4691{
8734dfac
WD
4692 HOST_WIDE_INT offset = INTVAL (*disp);
4693 HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
491ec060 4694
8734dfac
WD
4695 if (mode == TImode || mode == TFmode
4696 || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
4697 base = (offset + 0x100) & ~0x1ff;
491ec060 4698
8734dfac
WD
4699 *off = GEN_INT (base);
4700 *disp = GEN_INT (offset - base);
491ec060
WD
4701 return true;
4702}
4703
a2170965
TC
4704/* Return the binary representation of floating point constant VALUE in INTVAL.
4705 If the value cannot be converted, return false without setting INTVAL.
4706 The conversion is done in the given MODE. */
4707bool
4708aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
4709{
4710
4711 /* We make a general exception for 0. */
4712 if (aarch64_float_const_zero_rtx_p (value))
4713 {
4714 *intval = 0;
4715 return true;
4716 }
4717
4718 machine_mode mode = GET_MODE (value);
4719 if (GET_CODE (value) != CONST_DOUBLE
4720 || !SCALAR_FLOAT_MODE_P (mode)
4721 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
4722 /* Only support up to DF mode. */
4723 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
4724 return false;
4725
4726 unsigned HOST_WIDE_INT ival = 0;
4727
4728 long res[2];
4729 real_to_target (res,
4730 CONST_DOUBLE_REAL_VALUE (value),
4731 REAL_MODE_FORMAT (mode));
4732
5c22bb48
TC
4733 if (mode == DFmode)
4734 {
4735 int order = BYTES_BIG_ENDIAN ? 1 : 0;
4736 ival = zext_hwi (res[order], 32);
4737 ival |= (zext_hwi (res[1 - order], 32) << 32);
4738 }
4739 else
4740 ival = zext_hwi (res[0], 32);
a2170965
TC
4741
4742 *intval = ival;
4743 return true;
4744}
4745
4746/* Return TRUE if rtx X is an immediate constant that can be moved using a
4747 single MOV(+MOVK) followed by an FMOV. */
4748bool
4749aarch64_float_const_rtx_p (rtx x)
4750{
4751 machine_mode mode = GET_MODE (x);
4752 if (mode == VOIDmode)
4753 return false;
4754
4755 /* Determine whether it's cheaper to write float constants as
4756 mov/movk pairs over ldr/adrp pairs. */
4757 unsigned HOST_WIDE_INT ival;
4758
4759 if (GET_CODE (x) == CONST_DOUBLE
4760 && SCALAR_FLOAT_MODE_P (mode)
4761 && aarch64_reinterpret_float_as_int (x, &ival))
4762 {
304b9962
RS
4763 machine_mode imode = (mode == HFmode
4764 ? SImode
4765 : int_mode_for_mode (mode).require ());
a2170965
TC
4766 int num_instr = aarch64_internal_mov_immediate
4767 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
4768 return num_instr < 3;
4769 }
4770
4771 return false;
4772}
4773
43e9d192
IB
4774/* Return TRUE if rtx X is immediate constant 0.0 */
4775bool
3520f7cc 4776aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 4777{
43e9d192
IB
4778 if (GET_MODE (x) == VOIDmode)
4779 return false;
4780
34a72c33 4781 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 4782 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 4783 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
4784}
4785
a2170965
TC
4786/* Return TRUE if rtx X is immediate constant that fits in a single
4787 MOVI immediate operation. */
4788bool
4789aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
4790{
4791 if (!TARGET_SIMD)
4792 return false;
4793
a2170965
TC
4794 machine_mode vmode, imode;
4795 unsigned HOST_WIDE_INT ival;
4796
4797 if (GET_CODE (x) == CONST_DOUBLE
4798 && SCALAR_FLOAT_MODE_P (mode))
4799 {
4800 if (!aarch64_reinterpret_float_as_int (x, &ival))
4801 return false;
4802
35c38fa6
TC
4803 /* We make a general exception for 0. */
4804 if (aarch64_float_const_zero_rtx_p (x))
4805 return true;
4806
304b9962 4807 imode = int_mode_for_mode (mode).require ();
a2170965
TC
4808 }
4809 else if (GET_CODE (x) == CONST_INT
4810 && SCALAR_INT_MODE_P (mode))
4811 {
4812 imode = mode;
4813 ival = INTVAL (x);
4814 }
4815 else
4816 return false;
4817
4818 /* use a 64 bit mode for everything except for DI/DF mode, where we use
4819 a 128 bit vector mode. */
4820 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
4821
4822 vmode = aarch64_simd_container_mode (imode, width);
4823 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
4824
4825 return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
4826}
4827
4828
70f09188
AP
4829/* Return the fixed registers used for condition codes. */
4830
4831static bool
4832aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4833{
4834 *p1 = CC_REGNUM;
4835 *p2 = INVALID_REGNUM;
4836 return true;
4837}
4838
47210a04
RL
4839/* This function is used by the call expanders of the machine description.
4840 RESULT is the register in which the result is returned. It's NULL for
4841 "call" and "sibcall".
4842 MEM is the location of the function call.
4843 SIBCALL indicates whether this function call is normal call or sibling call.
4844 It will generate different pattern accordingly. */
4845
4846void
4847aarch64_expand_call (rtx result, rtx mem, bool sibcall)
4848{
4849 rtx call, callee, tmp;
4850 rtvec vec;
4851 machine_mode mode;
4852
4853 gcc_assert (MEM_P (mem));
4854 callee = XEXP (mem, 0);
4855 mode = GET_MODE (callee);
4856 gcc_assert (mode == Pmode);
4857
4858 /* Decide if we should generate indirect calls by loading the
4859 address of the callee into a register before performing
4860 the branch-and-link. */
4861 if (SYMBOL_REF_P (callee)
4862 ? (aarch64_is_long_call_p (callee)
4863 || aarch64_is_noplt_call_p (callee))
4864 : !REG_P (callee))
4865 XEXP (mem, 0) = force_reg (mode, callee);
4866
4867 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
4868
4869 if (result != NULL_RTX)
4870 call = gen_rtx_SET (result, call);
4871
4872 if (sibcall)
4873 tmp = ret_rtx;
4874 else
4875 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
4876
4877 vec = gen_rtvec (2, call, tmp);
4878 call = gen_rtx_PARALLEL (VOIDmode, vec);
4879
4880 aarch64_emit_call_insn (call);
4881}
4882
78607708
TV
4883/* Emit call insn with PAT and do aarch64-specific handling. */
4884
d07a3fed 4885void
78607708
TV
4886aarch64_emit_call_insn (rtx pat)
4887{
4888 rtx insn = emit_call_insn (pat);
4889
4890 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4891 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4892 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4893}
4894
ef4bddc2 4895machine_mode
43e9d192
IB
4896aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4897{
4898 /* All floating point compares return CCFP if it is an equality
4899 comparison, and CCFPE otherwise. */
4900 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4901 {
4902 switch (code)
4903 {
4904 case EQ:
4905 case NE:
4906 case UNORDERED:
4907 case ORDERED:
4908 case UNLT:
4909 case UNLE:
4910 case UNGT:
4911 case UNGE:
4912 case UNEQ:
4913 case LTGT:
4914 return CCFPmode;
4915
4916 case LT:
4917 case LE:
4918 case GT:
4919 case GE:
4920 return CCFPEmode;
4921
4922 default:
4923 gcc_unreachable ();
4924 }
4925 }
4926
2b8568fe
KT
4927 /* Equality comparisons of short modes against zero can be performed
4928 using the TST instruction with the appropriate bitmask. */
4929 if (y == const0_rtx && REG_P (x)
4930 && (code == EQ || code == NE)
4931 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
4932 return CC_NZmode;
4933
b06335f9
KT
4934 /* Similarly, comparisons of zero_extends from shorter modes can
4935 be performed using an ANDS with an immediate mask. */
4936 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
4937 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4938 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
4939 && (code == EQ || code == NE))
4940 return CC_NZmode;
4941
43e9d192
IB
4942 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4943 && y == const0_rtx
4944 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 4945 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
4946 || GET_CODE (x) == NEG
4947 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
4948 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
4949 return CC_NZmode;
4950
1c992d1e 4951 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
4952 the comparison will have to be swapped when we emit the assembly
4953 code. */
4954 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 4955 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
4956 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4957 || GET_CODE (x) == LSHIFTRT
1c992d1e 4958 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
4959 return CC_SWPmode;
4960
1c992d1e
RE
4961 /* Similarly for a negated operand, but we can only do this for
4962 equalities. */
4963 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4964 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
4965 && (code == EQ || code == NE)
4966 && GET_CODE (x) == NEG)
4967 return CC_Zmode;
4968
ef22810a
RH
4969 /* A test for unsigned overflow. */
4970 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
4971 && code == NE
4972 && GET_CODE (x) == PLUS
4973 && GET_CODE (y) == ZERO_EXTEND)
4974 return CC_Cmode;
4975
43e9d192
IB
4976 /* For everything else, return CCmode. */
4977 return CCmode;
4978}
4979
3dfa7055 4980static int
b8506a8a 4981aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 4982
cd5660ab 4983int
43e9d192
IB
4984aarch64_get_condition_code (rtx x)
4985{
ef4bddc2 4986 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
4987 enum rtx_code comp_code = GET_CODE (x);
4988
4989 if (GET_MODE_CLASS (mode) != MODE_CC)
4990 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
4991 return aarch64_get_condition_code_1 (mode, comp_code);
4992}
43e9d192 4993
3dfa7055 4994static int
b8506a8a 4995aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 4996{
43e9d192
IB
4997 switch (mode)
4998 {
4e10a5a7
RS
4999 case E_CCFPmode:
5000 case E_CCFPEmode:
43e9d192
IB
5001 switch (comp_code)
5002 {
5003 case GE: return AARCH64_GE;
5004 case GT: return AARCH64_GT;
5005 case LE: return AARCH64_LS;
5006 case LT: return AARCH64_MI;
5007 case NE: return AARCH64_NE;
5008 case EQ: return AARCH64_EQ;
5009 case ORDERED: return AARCH64_VC;
5010 case UNORDERED: return AARCH64_VS;
5011 case UNLT: return AARCH64_LT;
5012 case UNLE: return AARCH64_LE;
5013 case UNGT: return AARCH64_HI;
5014 case UNGE: return AARCH64_PL;
cd5660ab 5015 default: return -1;
43e9d192
IB
5016 }
5017 break;
5018
4e10a5a7 5019 case E_CCmode:
43e9d192
IB
5020 switch (comp_code)
5021 {
5022 case NE: return AARCH64_NE;
5023 case EQ: return AARCH64_EQ;
5024 case GE: return AARCH64_GE;
5025 case GT: return AARCH64_GT;
5026 case LE: return AARCH64_LE;
5027 case LT: return AARCH64_LT;
5028 case GEU: return AARCH64_CS;
5029 case GTU: return AARCH64_HI;
5030 case LEU: return AARCH64_LS;
5031 case LTU: return AARCH64_CC;
cd5660ab 5032 default: return -1;
43e9d192
IB
5033 }
5034 break;
5035
4e10a5a7 5036 case E_CC_SWPmode:
43e9d192
IB
5037 switch (comp_code)
5038 {
5039 case NE: return AARCH64_NE;
5040 case EQ: return AARCH64_EQ;
5041 case GE: return AARCH64_LE;
5042 case GT: return AARCH64_LT;
5043 case LE: return AARCH64_GE;
5044 case LT: return AARCH64_GT;
5045 case GEU: return AARCH64_LS;
5046 case GTU: return AARCH64_CC;
5047 case LEU: return AARCH64_CS;
5048 case LTU: return AARCH64_HI;
cd5660ab 5049 default: return -1;
43e9d192
IB
5050 }
5051 break;
5052
4e10a5a7 5053 case E_CC_NZmode:
43e9d192
IB
5054 switch (comp_code)
5055 {
5056 case NE: return AARCH64_NE;
5057 case EQ: return AARCH64_EQ;
5058 case GE: return AARCH64_PL;
5059 case LT: return AARCH64_MI;
cd5660ab 5060 default: return -1;
43e9d192
IB
5061 }
5062 break;
5063
4e10a5a7 5064 case E_CC_Zmode:
1c992d1e
RE
5065 switch (comp_code)
5066 {
5067 case NE: return AARCH64_NE;
5068 case EQ: return AARCH64_EQ;
cd5660ab 5069 default: return -1;
1c992d1e
RE
5070 }
5071 break;
5072
4e10a5a7 5073 case E_CC_Cmode:
ef22810a
RH
5074 switch (comp_code)
5075 {
5076 case NE: return AARCH64_CS;
5077 case EQ: return AARCH64_CC;
5078 default: return -1;
5079 }
5080 break;
5081
43e9d192 5082 default:
cd5660ab 5083 return -1;
43e9d192 5084 }
3dfa7055 5085
3dfa7055 5086 return -1;
43e9d192
IB
5087}
5088
ddeabd3e
AL
5089bool
5090aarch64_const_vec_all_same_in_range_p (rtx x,
5091 HOST_WIDE_INT minval,
5092 HOST_WIDE_INT maxval)
5093{
5094 HOST_WIDE_INT firstval;
5095 int count, i;
5096
5097 if (GET_CODE (x) != CONST_VECTOR
5098 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
5099 return false;
5100
5101 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
5102 if (firstval < minval || firstval > maxval)
5103 return false;
5104
5105 count = CONST_VECTOR_NUNITS (x);
5106 for (i = 1; i < count; i++)
5107 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
5108 return false;
5109
5110 return true;
5111}
5112
5113bool
5114aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
5115{
5116 return aarch64_const_vec_all_same_in_range_p (x, val, val);
5117}
5118
43e9d192 5119
cf670503
ZC
5120/* N Z C V. */
5121#define AARCH64_CC_V 1
5122#define AARCH64_CC_C (1 << 1)
5123#define AARCH64_CC_Z (1 << 2)
5124#define AARCH64_CC_N (1 << 3)
5125
c8012fbc
WD
5126/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
5127static const int aarch64_nzcv_codes[] =
5128{
5129 0, /* EQ, Z == 1. */
5130 AARCH64_CC_Z, /* NE, Z == 0. */
5131 0, /* CS, C == 1. */
5132 AARCH64_CC_C, /* CC, C == 0. */
5133 0, /* MI, N == 1. */
5134 AARCH64_CC_N, /* PL, N == 0. */
5135 0, /* VS, V == 1. */
5136 AARCH64_CC_V, /* VC, V == 0. */
5137 0, /* HI, C ==1 && Z == 0. */
5138 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
5139 AARCH64_CC_V, /* GE, N == V. */
5140 0, /* LT, N != V. */
5141 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
5142 0, /* LE, !(Z == 0 && N == V). */
5143 0, /* AL, Any. */
5144 0 /* NV, Any. */
cf670503
ZC
5145};
5146
bcf19844
JW
5147/* Print operand X to file F in a target specific manner according to CODE.
5148 The acceptable formatting commands given by CODE are:
5149 'c': An integer or symbol address without a preceding #
5150 sign.
5151 'e': Print the sign/zero-extend size as a character 8->b,
5152 16->h, 32->w.
5153 'p': Prints N such that 2^N == X (X must be power of 2 and
5154 const int).
5155 'P': Print the number of non-zero bits in X (a const_int).
5156 'H': Print the higher numbered register of a pair (TImode)
5157 of regs.
5158 'm': Print a condition (eq, ne, etc).
5159 'M': Same as 'm', but invert condition.
5160 'b/h/s/d/q': Print a scalar FP/SIMD register name.
5161 'S/T/U/V': Print a FP/SIMD register name for a register list.
5162 The register printed is the FP/SIMD register name
5163 of X + 0/1/2/3 for S/T/U/V.
5164 'R': Print a scalar FP/SIMD register name + 1.
5165 'X': Print bottom 16 bits of integer constant in hex.
5166 'w/x': Print a general register name or the zero register
5167 (32-bit or 64-bit).
5168 '0': Print a normal operand, if it's a general register,
5169 then we assume DImode.
5170 'k': Print NZCV for conditional compare instructions.
5171 'A': Output address constant representing the first
5172 argument of X, specifying a relocation offset
5173 if appropriate.
5174 'L': Output constant address specified by X
5175 with a relocation offset if appropriate.
5176 'G': Prints address of X, specifying a PC relative
5177 relocation mode if appropriate. */
5178
cc8ca59e
JB
5179static void
5180aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192
IB
5181{
5182 switch (code)
5183 {
f541a481
KT
5184 case 'c':
5185 switch (GET_CODE (x))
5186 {
5187 case CONST_INT:
5188 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5189 break;
5190
5191 case SYMBOL_REF:
5192 output_addr_const (f, x);
5193 break;
5194
5195 case CONST:
5196 if (GET_CODE (XEXP (x, 0)) == PLUS
5197 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5198 {
5199 output_addr_const (f, x);
5200 break;
5201 }
5202 /* Fall through. */
5203
5204 default:
5205 output_operand_lossage ("Unsupported operand for code '%c'", code);
5206 }
5207 break;
5208
43e9d192 5209 case 'e':
43e9d192
IB
5210 {
5211 int n;
5212
4aa81c2e 5213 if (!CONST_INT_P (x)
43e9d192
IB
5214 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
5215 {
5216 output_operand_lossage ("invalid operand for '%%%c'", code);
5217 return;
5218 }
5219
5220 switch (n)
5221 {
5222 case 3:
5223 fputc ('b', f);
5224 break;
5225 case 4:
5226 fputc ('h', f);
5227 break;
5228 case 5:
5229 fputc ('w', f);
5230 break;
5231 default:
5232 output_operand_lossage ("invalid operand for '%%%c'", code);
5233 return;
5234 }
5235 }
5236 break;
5237
5238 case 'p':
5239 {
5240 int n;
5241
4aa81c2e 5242 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
5243 {
5244 output_operand_lossage ("invalid operand for '%%%c'", code);
5245 return;
5246 }
5247
5248 asm_fprintf (f, "%d", n);
5249 }
5250 break;
5251
5252 case 'P':
4aa81c2e 5253 if (!CONST_INT_P (x))
43e9d192
IB
5254 {
5255 output_operand_lossage ("invalid operand for '%%%c'", code);
5256 return;
5257 }
5258
8d55c61b 5259 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
5260 break;
5261
5262 case 'H':
4aa81c2e 5263 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
5264 {
5265 output_operand_lossage ("invalid operand for '%%%c'", code);
5266 return;
5267 }
5268
01a3a324 5269 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
5270 break;
5271
43e9d192 5272 case 'M':
c8012fbc 5273 case 'm':
cd5660ab
KT
5274 {
5275 int cond_code;
c8012fbc
WD
5276 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
5277 if (x == const_true_rtx)
cd5660ab 5278 {
c8012fbc
WD
5279 if (code == 'M')
5280 fputs ("nv", f);
cd5660ab
KT
5281 return;
5282 }
43e9d192 5283
cd5660ab
KT
5284 if (!COMPARISON_P (x))
5285 {
5286 output_operand_lossage ("invalid operand for '%%%c'", code);
5287 return;
5288 }
c8012fbc 5289
cd5660ab
KT
5290 cond_code = aarch64_get_condition_code (x);
5291 gcc_assert (cond_code >= 0);
c8012fbc
WD
5292 if (code == 'M')
5293 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
5294 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 5295 }
43e9d192
IB
5296 break;
5297
5298 case 'b':
5299 case 'h':
5300 case 's':
5301 case 'd':
5302 case 'q':
43e9d192
IB
5303 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5304 {
5305 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5306 return;
5307 }
50ce6f88 5308 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
5309 break;
5310
5311 case 'S':
5312 case 'T':
5313 case 'U':
5314 case 'V':
43e9d192
IB
5315 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5316 {
5317 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5318 return;
5319 }
50ce6f88 5320 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
5321 break;
5322
2d8c6dc1 5323 case 'R':
2d8c6dc1
AH
5324 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5325 {
5326 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5327 return;
5328 }
5329 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
5330 break;
5331
a05c0ddf 5332 case 'X':
4aa81c2e 5333 if (!CONST_INT_P (x))
a05c0ddf
IB
5334 {
5335 output_operand_lossage ("invalid operand for '%%%c'", code);
5336 return;
5337 }
50d38551 5338 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
5339 break;
5340
43e9d192
IB
5341 case 'w':
5342 case 'x':
3520f7cc
JG
5343 if (x == const0_rtx
5344 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 5345 {
50ce6f88 5346 asm_fprintf (f, "%czr", code);
43e9d192
IB
5347 break;
5348 }
5349
5350 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
5351 {
50ce6f88 5352 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
5353 break;
5354 }
5355
5356 if (REG_P (x) && REGNO (x) == SP_REGNUM)
5357 {
50ce6f88 5358 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
5359 break;
5360 }
5361
5362 /* Fall through */
5363
5364 case 0:
43e9d192
IB
5365 if (x == NULL)
5366 {
5367 output_operand_lossage ("missing operand");
5368 return;
5369 }
5370
5371 switch (GET_CODE (x))
5372 {
5373 case REG:
01a3a324 5374 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
5375 break;
5376
5377 case MEM:
cc8ca59e 5378 output_address (GET_MODE (x), XEXP (x, 0));
00eee3fa
WD
5379 /* Check all memory references are Pmode - even with ILP32. */
5380 gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode);
43e9d192
IB
5381 break;
5382
2af16a7c 5383 case CONST:
43e9d192
IB
5384 case LABEL_REF:
5385 case SYMBOL_REF:
5386 output_addr_const (asm_out_file, x);
5387 break;
5388
5389 case CONST_INT:
5390 asm_fprintf (f, "%wd", INTVAL (x));
5391 break;
5392
5393 case CONST_VECTOR:
3520f7cc
JG
5394 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
5395 {
ddeabd3e
AL
5396 gcc_assert (
5397 aarch64_const_vec_all_same_in_range_p (x,
5398 HOST_WIDE_INT_MIN,
5399 HOST_WIDE_INT_MAX));
3520f7cc
JG
5400 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
5401 }
5402 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
5403 {
5404 fputc ('0', f);
5405 }
5406 else
5407 gcc_unreachable ();
43e9d192
IB
5408 break;
5409
3520f7cc 5410 case CONST_DOUBLE:
2ca5b430
KT
5411 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5412 be getting CONST_DOUBLEs holding integers. */
5413 gcc_assert (GET_MODE (x) != VOIDmode);
5414 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
5415 {
5416 fputc ('0', f);
5417 break;
5418 }
5419 else if (aarch64_float_const_representable_p (x))
5420 {
5421#define buf_size 20
5422 char float_buf[buf_size] = {'\0'};
34a72c33
RS
5423 real_to_decimal_for_mode (float_buf,
5424 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
5425 buf_size, buf_size,
5426 1, GET_MODE (x));
5427 asm_fprintf (asm_out_file, "%s", float_buf);
5428 break;
5429#undef buf_size
5430 }
5431 output_operand_lossage ("invalid constant");
5432 return;
43e9d192
IB
5433 default:
5434 output_operand_lossage ("invalid operand");
5435 return;
5436 }
5437 break;
5438
5439 case 'A':
5440 if (GET_CODE (x) == HIGH)
5441 x = XEXP (x, 0);
5442
a6e0bfa7 5443 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5444 {
6642bdb4 5445 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5446 asm_fprintf (asm_out_file, ":got:");
5447 break;
5448
5449 case SYMBOL_SMALL_TLSGD:
5450 asm_fprintf (asm_out_file, ":tlsgd:");
5451 break;
5452
5453 case SYMBOL_SMALL_TLSDESC:
5454 asm_fprintf (asm_out_file, ":tlsdesc:");
5455 break;
5456
79496620 5457 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5458 asm_fprintf (asm_out_file, ":gottprel:");
5459 break;
5460
d18ba284 5461 case SYMBOL_TLSLE24:
43e9d192
IB
5462 asm_fprintf (asm_out_file, ":tprel:");
5463 break;
5464
87dd8ab0
MS
5465 case SYMBOL_TINY_GOT:
5466 gcc_unreachable ();
5467 break;
5468
43e9d192
IB
5469 default:
5470 break;
5471 }
5472 output_addr_const (asm_out_file, x);
5473 break;
5474
5475 case 'L':
a6e0bfa7 5476 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5477 {
6642bdb4 5478 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5479 asm_fprintf (asm_out_file, ":lo12:");
5480 break;
5481
5482 case SYMBOL_SMALL_TLSGD:
5483 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
5484 break;
5485
5486 case SYMBOL_SMALL_TLSDESC:
5487 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
5488 break;
5489
79496620 5490 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5491 asm_fprintf (asm_out_file, ":gottprel_lo12:");
5492 break;
5493
cbf5629e
JW
5494 case SYMBOL_TLSLE12:
5495 asm_fprintf (asm_out_file, ":tprel_lo12:");
5496 break;
5497
d18ba284 5498 case SYMBOL_TLSLE24:
43e9d192
IB
5499 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
5500 break;
5501
87dd8ab0
MS
5502 case SYMBOL_TINY_GOT:
5503 asm_fprintf (asm_out_file, ":got:");
5504 break;
5505
5ae7caad
JW
5506 case SYMBOL_TINY_TLSIE:
5507 asm_fprintf (asm_out_file, ":gottprel:");
5508 break;
5509
43e9d192
IB
5510 default:
5511 break;
5512 }
5513 output_addr_const (asm_out_file, x);
5514 break;
5515
5516 case 'G':
a6e0bfa7 5517 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5518 {
d18ba284 5519 case SYMBOL_TLSLE24:
43e9d192
IB
5520 asm_fprintf (asm_out_file, ":tprel_hi12:");
5521 break;
5522 default:
5523 break;
5524 }
5525 output_addr_const (asm_out_file, x);
5526 break;
5527
cf670503
ZC
5528 case 'k':
5529 {
c8012fbc 5530 HOST_WIDE_INT cond_code;
cf670503 5531
c8012fbc 5532 if (!CONST_INT_P (x))
cf670503
ZC
5533 {
5534 output_operand_lossage ("invalid operand for '%%%c'", code);
5535 return;
5536 }
5537
c8012fbc
WD
5538 cond_code = INTVAL (x);
5539 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
5540 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
5541 }
5542 break;
5543
43e9d192
IB
5544 default:
5545 output_operand_lossage ("invalid operand prefix '%%%c'", code);
5546 return;
5547 }
5548}
5549
cc8ca59e
JB
5550static void
5551aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
43e9d192
IB
5552{
5553 struct aarch64_address_info addr;
5554
cc8ca59e 5555 if (aarch64_classify_address (&addr, x, mode, MEM, true))
43e9d192
IB
5556 switch (addr.type)
5557 {
5558 case ADDRESS_REG_IMM:
5559 if (addr.offset == const0_rtx)
01a3a324 5560 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 5561 else
16a3246f 5562 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
5563 INTVAL (addr.offset));
5564 return;
5565
5566 case ADDRESS_REG_REG:
5567 if (addr.shift == 0)
16a3246f 5568 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 5569 reg_names [REGNO (addr.offset)]);
43e9d192 5570 else
16a3246f 5571 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 5572 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
5573 return;
5574
5575 case ADDRESS_REG_UXTW:
5576 if (addr.shift == 0)
16a3246f 5577 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5578 REGNO (addr.offset) - R0_REGNUM);
5579 else
16a3246f 5580 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5581 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5582 return;
5583
5584 case ADDRESS_REG_SXTW:
5585 if (addr.shift == 0)
16a3246f 5586 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5587 REGNO (addr.offset) - R0_REGNUM);
5588 else
16a3246f 5589 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5590 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5591 return;
5592
5593 case ADDRESS_REG_WB:
5594 switch (GET_CODE (x))
5595 {
5596 case PRE_INC:
16a3246f 5597 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5598 GET_MODE_SIZE (mode));
43e9d192
IB
5599 return;
5600 case POST_INC:
16a3246f 5601 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
cc8ca59e 5602 GET_MODE_SIZE (mode));
43e9d192
IB
5603 return;
5604 case PRE_DEC:
16a3246f 5605 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5606 GET_MODE_SIZE (mode));
43e9d192
IB
5607 return;
5608 case POST_DEC:
16a3246f 5609 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
cc8ca59e 5610 GET_MODE_SIZE (mode));
43e9d192
IB
5611 return;
5612 case PRE_MODIFY:
16a3246f 5613 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
5614 INTVAL (addr.offset));
5615 return;
5616 case POST_MODIFY:
16a3246f 5617 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
5618 INTVAL (addr.offset));
5619 return;
5620 default:
5621 break;
5622 }
5623 break;
5624
5625 case ADDRESS_LO_SUM:
16a3246f 5626 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
5627 output_addr_const (f, addr.offset);
5628 asm_fprintf (f, "]");
5629 return;
5630
5631 case ADDRESS_SYMBOLIC:
5632 break;
5633 }
5634
5635 output_addr_const (f, x);
5636}
5637
43e9d192
IB
5638bool
5639aarch64_label_mentioned_p (rtx x)
5640{
5641 const char *fmt;
5642 int i;
5643
5644 if (GET_CODE (x) == LABEL_REF)
5645 return true;
5646
5647 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5648 referencing instruction, but they are constant offsets, not
5649 symbols. */
5650 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5651 return false;
5652
5653 fmt = GET_RTX_FORMAT (GET_CODE (x));
5654 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5655 {
5656 if (fmt[i] == 'E')
5657 {
5658 int j;
5659
5660 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5661 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
5662 return 1;
5663 }
5664 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
5665 return 1;
5666 }
5667
5668 return 0;
5669}
5670
5671/* Implement REGNO_REG_CLASS. */
5672
5673enum reg_class
5674aarch64_regno_regclass (unsigned regno)
5675{
5676 if (GP_REGNUM_P (regno))
a4a182c6 5677 return GENERAL_REGS;
43e9d192
IB
5678
5679 if (regno == SP_REGNUM)
5680 return STACK_REG;
5681
5682 if (regno == FRAME_POINTER_REGNUM
5683 || regno == ARG_POINTER_REGNUM)
f24bb080 5684 return POINTER_REGS;
43e9d192
IB
5685
5686 if (FP_REGNUM_P (regno))
5687 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5688
5689 return NO_REGS;
5690}
5691
0c4ec427 5692static rtx
ef4bddc2 5693aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
5694{
5695 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5696 where mask is selected by alignment and size of the offset.
5697 We try to pick as large a range for the offset as possible to
5698 maximize the chance of a CSE. However, for aligned addresses
5699 we limit the range to 4k so that structures with different sized
e8426e0a
BC
5700 elements are likely to use the same base. We need to be careful
5701 not to split a CONST for some forms of address expression, otherwise
5702 it will generate sub-optimal code. */
0c4ec427
RE
5703
5704 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5705 {
9e0218fc 5706 rtx base = XEXP (x, 0);
17d7bdd8 5707 rtx offset_rtx = XEXP (x, 1);
9e0218fc 5708 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 5709
9e0218fc 5710 if (GET_CODE (base) == PLUS)
e8426e0a 5711 {
9e0218fc
RH
5712 rtx op0 = XEXP (base, 0);
5713 rtx op1 = XEXP (base, 1);
5714
5715 /* Force any scaling into a temp for CSE. */
5716 op0 = force_reg (Pmode, op0);
5717 op1 = force_reg (Pmode, op1);
5718
5719 /* Let the pointer register be in op0. */
5720 if (REG_POINTER (op1))
5721 std::swap (op0, op1);
5722
5723 /* If the pointer is virtual or frame related, then we know that
5724 virtual register instantiation or register elimination is going
5725 to apply a second constant. We want the two constants folded
5726 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5727 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 5728 {
9e0218fc
RH
5729 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
5730 NULL_RTX, true, OPTAB_DIRECT);
5731 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 5732 }
e8426e0a 5733
9e0218fc
RH
5734 /* Otherwise, in order to encourage CSE (and thence loop strength
5735 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5736 base = expand_binop (Pmode, add_optab, op0, op1,
5737 NULL_RTX, true, OPTAB_DIRECT);
5738 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
5739 }
5740
8734dfac 5741 /* Does it look like we'll need a 16-byte load/store-pair operation? */
9e0218fc 5742 HOST_WIDE_INT base_offset;
8734dfac
WD
5743 if (GET_MODE_SIZE (mode) > 16)
5744 base_offset = (offset + 0x400) & ~0x7f0;
0c4ec427
RE
5745 /* For offsets aren't a multiple of the access size, the limit is
5746 -256...255. */
5747 else if (offset & (GET_MODE_SIZE (mode) - 1))
ff0f3f1c
WD
5748 {
5749 base_offset = (offset + 0x100) & ~0x1ff;
5750
5751 /* BLKmode typically uses LDP of X-registers. */
5752 if (mode == BLKmode)
5753 base_offset = (offset + 512) & ~0x3ff;
5754 }
5755 /* Small negative offsets are supported. */
5756 else if (IN_RANGE (offset, -256, 0))
5757 base_offset = 0;
8734dfac
WD
5758 else if (mode == TImode || mode == TFmode)
5759 base_offset = (offset + 0x100) & ~0x1ff;
ff0f3f1c 5760 /* Use 12-bit offset by access size. */
0c4ec427 5761 else
ff0f3f1c 5762 base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
0c4ec427 5763
9e0218fc
RH
5764 if (base_offset != 0)
5765 {
5766 base = plus_constant (Pmode, base, base_offset);
5767 base = force_operand (base, NULL_RTX);
5768 return plus_constant (Pmode, base, offset - base_offset);
5769 }
0c4ec427
RE
5770 }
5771
5772 return x;
5773}
5774
b4f50fd4
RR
5775/* Return the reload icode required for a constant pool in mode. */
5776static enum insn_code
5777aarch64_constant_pool_reload_icode (machine_mode mode)
5778{
5779 switch (mode)
5780 {
4e10a5a7 5781 case E_SFmode:
b4f50fd4
RR
5782 return CODE_FOR_aarch64_reload_movcpsfdi;
5783
4e10a5a7 5784 case E_DFmode:
b4f50fd4
RR
5785 return CODE_FOR_aarch64_reload_movcpdfdi;
5786
4e10a5a7 5787 case E_TFmode:
b4f50fd4
RR
5788 return CODE_FOR_aarch64_reload_movcptfdi;
5789
4e10a5a7 5790 case E_V8QImode:
b4f50fd4
RR
5791 return CODE_FOR_aarch64_reload_movcpv8qidi;
5792
4e10a5a7 5793 case E_V16QImode:
b4f50fd4
RR
5794 return CODE_FOR_aarch64_reload_movcpv16qidi;
5795
4e10a5a7 5796 case E_V4HImode:
b4f50fd4
RR
5797 return CODE_FOR_aarch64_reload_movcpv4hidi;
5798
4e10a5a7 5799 case E_V8HImode:
b4f50fd4
RR
5800 return CODE_FOR_aarch64_reload_movcpv8hidi;
5801
4e10a5a7 5802 case E_V2SImode:
b4f50fd4
RR
5803 return CODE_FOR_aarch64_reload_movcpv2sidi;
5804
4e10a5a7 5805 case E_V4SImode:
b4f50fd4
RR
5806 return CODE_FOR_aarch64_reload_movcpv4sidi;
5807
4e10a5a7 5808 case E_V2DImode:
b4f50fd4
RR
5809 return CODE_FOR_aarch64_reload_movcpv2didi;
5810
4e10a5a7 5811 case E_V2DFmode:
b4f50fd4
RR
5812 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5813
5814 default:
5815 gcc_unreachable ();
5816 }
5817
5818 gcc_unreachable ();
5819}
43e9d192
IB
5820static reg_class_t
5821aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5822 reg_class_t rclass,
ef4bddc2 5823 machine_mode mode,
43e9d192
IB
5824 secondary_reload_info *sri)
5825{
b4f50fd4
RR
5826
5827 /* If we have to disable direct literal pool loads and stores because the
5828 function is too big, then we need a scratch register. */
5829 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5830 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5831 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 5832 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
5833 {
5834 sri->icode = aarch64_constant_pool_reload_icode (mode);
5835 return NO_REGS;
5836 }
5837
43e9d192
IB
5838 /* Without the TARGET_SIMD instructions we cannot move a Q register
5839 to a Q register directly. We need a scratch. */
5840 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5841 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5842 && reg_class_subset_p (rclass, FP_REGS))
5843 {
5844 if (mode == TFmode)
5845 sri->icode = CODE_FOR_aarch64_reload_movtf;
5846 else if (mode == TImode)
5847 sri->icode = CODE_FOR_aarch64_reload_movti;
5848 return NO_REGS;
5849 }
5850
5851 /* A TFmode or TImode memory access should be handled via an FP_REGS
5852 because AArch64 has richer addressing modes for LDR/STR instructions
5853 than LDP/STP instructions. */
d5726973 5854 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
5855 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5856 return FP_REGS;
5857
5858 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 5859 return GENERAL_REGS;
43e9d192
IB
5860
5861 return NO_REGS;
5862}
5863
5864static bool
5865aarch64_can_eliminate (const int from, const int to)
5866{
5867 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5868 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5869
5870 if (frame_pointer_needed)
5871 {
5872 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5873 return true;
5874 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5875 return false;
5876 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5877 && !cfun->calls_alloca)
5878 return true;
5879 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5880 return true;
0b7f8166
MS
5881
5882 return false;
43e9d192 5883 }
1c923b60
JW
5884 else
5885 {
5886 /* If we decided that we didn't need a leaf frame pointer but then used
5887 LR in the function, then we'll want a frame pointer after all, so
5888 prevent this elimination to ensure a frame pointer is used. */
5889 if (to == STACK_POINTER_REGNUM
5890 && flag_omit_leaf_frame_pointer
5891 && df_regs_ever_live_p (LR_REGNUM))
5892 return false;
5893 }
777e6976 5894
43e9d192
IB
5895 return true;
5896}
5897
5898HOST_WIDE_INT
5899aarch64_initial_elimination_offset (unsigned from, unsigned to)
5900{
43e9d192 5901 aarch64_layout_frame ();
78c29983
MS
5902
5903 if (to == HARD_FRAME_POINTER_REGNUM)
5904 {
5905 if (from == ARG_POINTER_REGNUM)
71bfb77a 5906 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
5907
5908 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5909 return cfun->machine->frame.hard_fp_offset
5910 - cfun->machine->frame.locals_offset;
78c29983
MS
5911 }
5912
5913 if (to == STACK_POINTER_REGNUM)
5914 {
5915 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5916 return cfun->machine->frame.frame_size
5917 - cfun->machine->frame.locals_offset;
78c29983
MS
5918 }
5919
1c960e02 5920 return cfun->machine->frame.frame_size;
43e9d192
IB
5921}
5922
43e9d192
IB
5923/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5924 previous frame. */
5925
5926rtx
5927aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5928{
5929 if (count != 0)
5930 return const0_rtx;
5931 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5932}
5933
5934
5935static void
5936aarch64_asm_trampoline_template (FILE *f)
5937{
28514dda
YZ
5938 if (TARGET_ILP32)
5939 {
5940 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5941 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5942 }
5943 else
5944 {
5945 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5946 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5947 }
01a3a324 5948 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5949 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5950 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5951 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5952}
5953
5954static void
5955aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5956{
5957 rtx fnaddr, mem, a_tramp;
28514dda 5958 const int tramp_code_sz = 16;
43e9d192
IB
5959
5960 /* Don't need to copy the trailing D-words, we fill those in below. */
5961 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5962 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5963 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5964 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5965 if (GET_MODE (fnaddr) != ptr_mode)
5966 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5967 emit_move_insn (mem, fnaddr);
5968
28514dda 5969 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5970 emit_move_insn (mem, chain_value);
5971
5972 /* XXX We should really define a "clear_cache" pattern and use
5973 gen_clear_cache(). */
5974 a_tramp = XEXP (m_tramp, 0);
5975 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5976 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5977 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5978 ptr_mode);
43e9d192
IB
5979}
5980
5981static unsigned char
ef4bddc2 5982aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5983{
5984 switch (regclass)
5985 {
fee9ba42 5986 case CALLER_SAVE_REGS:
43e9d192
IB
5987 case POINTER_REGS:
5988 case GENERAL_REGS:
5989 case ALL_REGS:
5990 case FP_REGS:
5991 case FP_LO_REGS:
5992 return
7bd11911
KT
5993 aarch64_vector_mode_p (mode)
5994 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5995 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
5996 case STACK_REG:
5997 return 1;
5998
5999 case NO_REGS:
6000 return 0;
6001
6002 default:
6003 break;
6004 }
6005 gcc_unreachable ();
6006}
6007
6008static reg_class_t
78d8b9f0 6009aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 6010{
51bb310d 6011 if (regclass == POINTER_REGS)
78d8b9f0
IB
6012 return GENERAL_REGS;
6013
51bb310d
MS
6014 if (regclass == STACK_REG)
6015 {
6016 if (REG_P(x)
6017 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
6018 return regclass;
6019
6020 return NO_REGS;
6021 }
6022
27bd251b
IB
6023 /* Register eliminiation can result in a request for
6024 SP+constant->FP_REGS. We cannot support such operations which
6025 use SP as source and an FP_REG as destination, so reject out
6026 right now. */
6027 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
6028 {
6029 rtx lhs = XEXP (x, 0);
6030
6031 /* Look through a possible SUBREG introduced by ILP32. */
6032 if (GET_CODE (lhs) == SUBREG)
6033 lhs = SUBREG_REG (lhs);
6034
6035 gcc_assert (REG_P (lhs));
6036 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
6037 POINTER_REGS));
6038 return NO_REGS;
6039 }
6040
78d8b9f0 6041 return regclass;
43e9d192
IB
6042}
6043
6044void
6045aarch64_asm_output_labelref (FILE* f, const char *name)
6046{
6047 asm_fprintf (f, "%U%s", name);
6048}
6049
6050static void
6051aarch64_elf_asm_constructor (rtx symbol, int priority)
6052{
6053 if (priority == DEFAULT_INIT_PRIORITY)
6054 default_ctor_section_asm_out_constructor (symbol, priority);
6055 else
6056 {
6057 section *s;
53d190c1
AT
6058 /* While priority is known to be in range [0, 65535], so 18 bytes
6059 would be enough, the compiler might not know that. To avoid
6060 -Wformat-truncation false positive, use a larger size. */
6061 char buf[23];
43e9d192
IB
6062 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
6063 s = get_section (buf, SECTION_WRITE, NULL);
6064 switch_to_section (s);
6065 assemble_align (POINTER_SIZE);
28514dda 6066 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
6067 }
6068}
6069
6070static void
6071aarch64_elf_asm_destructor (rtx symbol, int priority)
6072{
6073 if (priority == DEFAULT_INIT_PRIORITY)
6074 default_dtor_section_asm_out_destructor (symbol, priority);
6075 else
6076 {
6077 section *s;
53d190c1
AT
6078 /* While priority is known to be in range [0, 65535], so 18 bytes
6079 would be enough, the compiler might not know that. To avoid
6080 -Wformat-truncation false positive, use a larger size. */
6081 char buf[23];
43e9d192
IB
6082 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
6083 s = get_section (buf, SECTION_WRITE, NULL);
6084 switch_to_section (s);
6085 assemble_align (POINTER_SIZE);
28514dda 6086 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
6087 }
6088}
6089
6090const char*
6091aarch64_output_casesi (rtx *operands)
6092{
6093 char buf[100];
6094 char label[100];
b32d5189 6095 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
6096 int index;
6097 static const char *const patterns[4][2] =
6098 {
6099 {
6100 "ldrb\t%w3, [%0,%w1,uxtw]",
6101 "add\t%3, %4, %w3, sxtb #2"
6102 },
6103 {
6104 "ldrh\t%w3, [%0,%w1,uxtw #1]",
6105 "add\t%3, %4, %w3, sxth #2"
6106 },
6107 {
6108 "ldr\t%w3, [%0,%w1,uxtw #2]",
6109 "add\t%3, %4, %w3, sxtw #2"
6110 },
6111 /* We assume that DImode is only generated when not optimizing and
6112 that we don't really need 64-bit address offsets. That would
6113 imply an object file with 8GB of code in a single function! */
6114 {
6115 "ldr\t%w3, [%0,%w1,uxtw #2]",
6116 "add\t%3, %4, %w3, sxtw #2"
6117 }
6118 };
6119
6120 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
6121
6122 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
6123
6124 gcc_assert (index >= 0 && index <= 3);
6125
6126 /* Need to implement table size reduction, by chaning the code below. */
6127 output_asm_insn (patterns[index][0], operands);
6128 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
6129 snprintf (buf, sizeof (buf),
6130 "adr\t%%4, %s", targetm.strip_name_encoding (label));
6131 output_asm_insn (buf, operands);
6132 output_asm_insn (patterns[index][1], operands);
6133 output_asm_insn ("br\t%3", operands);
6134 assemble_label (asm_out_file, label);
6135 return "";
6136}
6137
6138
6139/* Return size in bits of an arithmetic operand which is shifted/scaled and
6140 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
6141 operator. */
6142
6143int
6144aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
6145{
6146 if (shift >= 0 && shift <= 3)
6147 {
6148 int size;
6149 for (size = 8; size <= 32; size *= 2)
6150 {
6151 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
6152 if (mask == bits << shift)
6153 return size;
6154 }
6155 }
6156 return 0;
6157}
6158
e78d485e
RR
6159/* Constant pools are per function only when PC relative
6160 literal loads are true or we are in the large memory
6161 model. */
6162
6163static inline bool
6164aarch64_can_use_per_function_literal_pools_p (void)
6165{
9ee6540a 6166 return (aarch64_pcrelative_literal_loads
e78d485e
RR
6167 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
6168}
6169
43e9d192 6170static bool
e78d485e 6171aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 6172{
3eece53d
RR
6173 /* Fixme:: In an ideal world this would work similar
6174 to the logic in aarch64_select_rtx_section but this
6175 breaks bootstrap in gcc go. For now we workaround
6176 this by returning false here. */
6177 return false;
43e9d192
IB
6178}
6179
e78d485e
RR
6180/* Select appropriate section for constants depending
6181 on where we place literal pools. */
6182
43e9d192 6183static section *
e78d485e
RR
6184aarch64_select_rtx_section (machine_mode mode,
6185 rtx x,
6186 unsigned HOST_WIDE_INT align)
43e9d192 6187{
e78d485e
RR
6188 if (aarch64_can_use_per_function_literal_pools_p ())
6189 return function_section (current_function_decl);
43e9d192 6190
e78d485e
RR
6191 return default_elf_select_rtx_section (mode, x, align);
6192}
43e9d192 6193
5fca7b66
RH
6194/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
6195void
6196aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
6197 HOST_WIDE_INT offset)
6198{
6199 /* When using per-function literal pools, we must ensure that any code
6200 section is aligned to the minimal instruction length, lest we get
6201 errors from the assembler re "unaligned instructions". */
6202 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
6203 ASM_OUTPUT_ALIGN (f, 2);
6204}
6205
43e9d192
IB
6206/* Costs. */
6207
6208/* Helper function for rtx cost calculation. Strip a shift expression
6209 from X. Returns the inner operand if successful, or the original
6210 expression on failure. */
6211static rtx
6212aarch64_strip_shift (rtx x)
6213{
6214 rtx op = x;
6215
57b77d46
RE
6216 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
6217 we can convert both to ROR during final output. */
43e9d192
IB
6218 if ((GET_CODE (op) == ASHIFT
6219 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
6220 || GET_CODE (op) == LSHIFTRT
6221 || GET_CODE (op) == ROTATERT
6222 || GET_CODE (op) == ROTATE)
43e9d192
IB
6223 && CONST_INT_P (XEXP (op, 1)))
6224 return XEXP (op, 0);
6225
6226 if (GET_CODE (op) == MULT
6227 && CONST_INT_P (XEXP (op, 1))
6228 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
6229 return XEXP (op, 0);
6230
6231 return x;
6232}
6233
4745e701 6234/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
6235 expression from X. Returns the inner operand if successful, or the
6236 original expression on failure. We deal with a number of possible
b10f1009
AP
6237 canonicalization variations here. If STRIP_SHIFT is true, then
6238 we can strip off a shift also. */
43e9d192 6239static rtx
b10f1009 6240aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192
IB
6241{
6242 rtx op = x;
6243
6244 /* Zero and sign extraction of a widened value. */
6245 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
6246 && XEXP (op, 2) == const0_rtx
4745e701 6247 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
6248 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
6249 XEXP (op, 1)))
6250 return XEXP (XEXP (op, 0), 0);
6251
6252 /* It can also be represented (for zero-extend) as an AND with an
6253 immediate. */
6254 if (GET_CODE (op) == AND
6255 && GET_CODE (XEXP (op, 0)) == MULT
6256 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
6257 && CONST_INT_P (XEXP (op, 1))
6258 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
6259 INTVAL (XEXP (op, 1))) != 0)
6260 return XEXP (XEXP (op, 0), 0);
6261
6262 /* Now handle extended register, as this may also have an optional
6263 left shift by 1..4. */
b10f1009
AP
6264 if (strip_shift
6265 && GET_CODE (op) == ASHIFT
43e9d192
IB
6266 && CONST_INT_P (XEXP (op, 1))
6267 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
6268 op = XEXP (op, 0);
6269
6270 if (GET_CODE (op) == ZERO_EXTEND
6271 || GET_CODE (op) == SIGN_EXTEND)
6272 op = XEXP (op, 0);
6273
6274 if (op != x)
6275 return op;
6276
4745e701
JG
6277 return x;
6278}
6279
0a78ebe4
KT
6280/* Return true iff CODE is a shift supported in combination
6281 with arithmetic instructions. */
4d1919ed 6282
0a78ebe4
KT
6283static bool
6284aarch64_shift_p (enum rtx_code code)
6285{
6286 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
6287}
6288
b10f1009
AP
6289
6290/* Return true iff X is a cheap shift without a sign extend. */
6291
6292static bool
6293aarch64_cheap_mult_shift_p (rtx x)
6294{
6295 rtx op0, op1;
6296
6297 op0 = XEXP (x, 0);
6298 op1 = XEXP (x, 1);
6299
6300 if (!(aarch64_tune_params.extra_tuning_flags
6301 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
6302 return false;
6303
6304 if (GET_CODE (op0) == SIGN_EXTEND)
6305 return false;
6306
6307 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
6308 && UINTVAL (op1) <= 4)
6309 return true;
6310
6311 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
6312 return false;
6313
6314 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
6315
6316 if (l2 > 0 && l2 <= 4)
6317 return true;
6318
6319 return false;
6320}
6321
4745e701 6322/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
6323 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
6324 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
6325 operands where needed. */
6326
6327static int
e548c9df 6328aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
6329{
6330 rtx op0, op1;
6331 const struct cpu_cost_table *extra_cost
b175b679 6332 = aarch64_tune_params.insn_extra_cost;
4745e701 6333 int cost = 0;
0a78ebe4 6334 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 6335 machine_mode mode = GET_MODE (x);
4745e701
JG
6336
6337 gcc_checking_assert (code == MULT);
6338
6339 op0 = XEXP (x, 0);
6340 op1 = XEXP (x, 1);
6341
6342 if (VECTOR_MODE_P (mode))
6343 mode = GET_MODE_INNER (mode);
6344
6345 /* Integer multiply/fma. */
6346 if (GET_MODE_CLASS (mode) == MODE_INT)
6347 {
6348 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
6349 if (aarch64_shift_p (GET_CODE (x))
6350 || (CONST_INT_P (op1)
6351 && exact_log2 (INTVAL (op1)) > 0))
4745e701 6352 {
0a78ebe4
KT
6353 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
6354 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
6355 if (speed)
6356 {
0a78ebe4
KT
6357 if (compound_p)
6358 {
b10f1009
AP
6359 /* If the shift is considered cheap,
6360 then don't add any cost. */
6361 if (aarch64_cheap_mult_shift_p (x))
6362 ;
6363 else if (REG_P (op1))
0a78ebe4
KT
6364 /* ARITH + shift-by-register. */
6365 cost += extra_cost->alu.arith_shift_reg;
6366 else if (is_extend)
6367 /* ARITH + extended register. We don't have a cost field
6368 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6369 cost += extra_cost->alu.extend_arith;
6370 else
6371 /* ARITH + shift-by-immediate. */
6372 cost += extra_cost->alu.arith_shift;
6373 }
4745e701
JG
6374 else
6375 /* LSL (immediate). */
0a78ebe4
KT
6376 cost += extra_cost->alu.shift;
6377
4745e701 6378 }
0a78ebe4
KT
6379 /* Strip extends as we will have costed them in the case above. */
6380 if (is_extend)
b10f1009 6381 op0 = aarch64_strip_extend (op0, true);
4745e701 6382
e548c9df 6383 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
6384
6385 return cost;
6386 }
6387
d2ac256b
KT
6388 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6389 compound and let the below cases handle it. After all, MNEG is a
6390 special-case alias of MSUB. */
6391 if (GET_CODE (op0) == NEG)
6392 {
6393 op0 = XEXP (op0, 0);
6394 compound_p = true;
6395 }
6396
4745e701
JG
6397 /* Integer multiplies or FMAs have zero/sign extending variants. */
6398 if ((GET_CODE (op0) == ZERO_EXTEND
6399 && GET_CODE (op1) == ZERO_EXTEND)
6400 || (GET_CODE (op0) == SIGN_EXTEND
6401 && GET_CODE (op1) == SIGN_EXTEND))
6402 {
e548c9df
AM
6403 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
6404 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
6405
6406 if (speed)
6407 {
0a78ebe4 6408 if (compound_p)
d2ac256b 6409 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
6410 cost += extra_cost->mult[0].extend_add;
6411 else
6412 /* MUL/SMULL/UMULL. */
6413 cost += extra_cost->mult[0].extend;
6414 }
6415
6416 return cost;
6417 }
6418
d2ac256b 6419 /* This is either an integer multiply or a MADD. In both cases
4745e701 6420 we want to recurse and cost the operands. */
e548c9df
AM
6421 cost += rtx_cost (op0, mode, MULT, 0, speed);
6422 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6423
6424 if (speed)
6425 {
0a78ebe4 6426 if (compound_p)
d2ac256b 6427 /* MADD/MSUB. */
4745e701
JG
6428 cost += extra_cost->mult[mode == DImode].add;
6429 else
6430 /* MUL. */
6431 cost += extra_cost->mult[mode == DImode].simple;
6432 }
6433
6434 return cost;
6435 }
6436 else
6437 {
6438 if (speed)
6439 {
3d840f7d 6440 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
6441 operands, unless the rounding mode is upward or downward in
6442 which case FNMUL is different than FMUL with operand negation. */
6443 bool neg0 = GET_CODE (op0) == NEG;
6444 bool neg1 = GET_CODE (op1) == NEG;
6445 if (compound_p || !flag_rounding_math || (neg0 && neg1))
6446 {
6447 if (neg0)
6448 op0 = XEXP (op0, 0);
6449 if (neg1)
6450 op1 = XEXP (op1, 0);
6451 }
4745e701 6452
0a78ebe4 6453 if (compound_p)
4745e701
JG
6454 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6455 cost += extra_cost->fp[mode == DFmode].fma;
6456 else
3d840f7d 6457 /* FMUL/FNMUL. */
4745e701
JG
6458 cost += extra_cost->fp[mode == DFmode].mult;
6459 }
6460
e548c9df
AM
6461 cost += rtx_cost (op0, mode, MULT, 0, speed);
6462 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6463 return cost;
6464 }
43e9d192
IB
6465}
6466
67747367
JG
6467static int
6468aarch64_address_cost (rtx x,
ef4bddc2 6469 machine_mode mode,
67747367
JG
6470 addr_space_t as ATTRIBUTE_UNUSED,
6471 bool speed)
6472{
6473 enum rtx_code c = GET_CODE (x);
b175b679 6474 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
6475 struct aarch64_address_info info;
6476 int cost = 0;
6477 info.shift = 0;
6478
6479 if (!aarch64_classify_address (&info, x, mode, c, false))
6480 {
6481 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
6482 {
6483 /* This is a CONST or SYMBOL ref which will be split
6484 in a different way depending on the code model in use.
6485 Cost it through the generic infrastructure. */
e548c9df 6486 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
6487 /* Divide through by the cost of one instruction to
6488 bring it to the same units as the address costs. */
6489 cost_symbol_ref /= COSTS_N_INSNS (1);
6490 /* The cost is then the cost of preparing the address,
6491 followed by an immediate (possibly 0) offset. */
6492 return cost_symbol_ref + addr_cost->imm_offset;
6493 }
6494 else
6495 {
6496 /* This is most likely a jump table from a case
6497 statement. */
6498 return addr_cost->register_offset;
6499 }
6500 }
6501
6502 switch (info.type)
6503 {
6504 case ADDRESS_LO_SUM:
6505 case ADDRESS_SYMBOLIC:
6506 case ADDRESS_REG_IMM:
6507 cost += addr_cost->imm_offset;
6508 break;
6509
6510 case ADDRESS_REG_WB:
6511 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
6512 cost += addr_cost->pre_modify;
6513 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
6514 cost += addr_cost->post_modify;
6515 else
6516 gcc_unreachable ();
6517
6518 break;
6519
6520 case ADDRESS_REG_REG:
6521 cost += addr_cost->register_offset;
6522 break;
6523
67747367 6524 case ADDRESS_REG_SXTW:
783879e6
EM
6525 cost += addr_cost->register_sextend;
6526 break;
6527
6528 case ADDRESS_REG_UXTW:
6529 cost += addr_cost->register_zextend;
67747367
JG
6530 break;
6531
6532 default:
6533 gcc_unreachable ();
6534 }
6535
6536
6537 if (info.shift > 0)
6538 {
6539 /* For the sake of calculating the cost of the shifted register
6540 component, we can treat same sized modes in the same way. */
6541 switch (GET_MODE_BITSIZE (mode))
6542 {
6543 case 16:
6544 cost += addr_cost->addr_scale_costs.hi;
6545 break;
6546
6547 case 32:
6548 cost += addr_cost->addr_scale_costs.si;
6549 break;
6550
6551 case 64:
6552 cost += addr_cost->addr_scale_costs.di;
6553 break;
6554
6555 /* We can't tell, or this is a 128-bit vector. */
6556 default:
6557 cost += addr_cost->addr_scale_costs.ti;
6558 break;
6559 }
6560 }
6561
6562 return cost;
6563}
6564
b9066f5a
MW
6565/* Return the cost of a branch. If SPEED_P is true then the compiler is
6566 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6567 to be taken. */
6568
6569int
6570aarch64_branch_cost (bool speed_p, bool predictable_p)
6571{
6572 /* When optimizing for speed, use the cost of unpredictable branches. */
6573 const struct cpu_branch_cost *branch_costs =
b175b679 6574 aarch64_tune_params.branch_costs;
b9066f5a
MW
6575
6576 if (!speed_p || predictable_p)
6577 return branch_costs->predictable;
6578 else
6579 return branch_costs->unpredictable;
6580}
6581
7cc2145f
JG
6582/* Return true if the RTX X in mode MODE is a zero or sign extract
6583 usable in an ADD or SUB (extended register) instruction. */
6584static bool
ef4bddc2 6585aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
6586{
6587 /* Catch add with a sign extract.
6588 This is add_<optab><mode>_multp2. */
6589 if (GET_CODE (x) == SIGN_EXTRACT
6590 || GET_CODE (x) == ZERO_EXTRACT)
6591 {
6592 rtx op0 = XEXP (x, 0);
6593 rtx op1 = XEXP (x, 1);
6594 rtx op2 = XEXP (x, 2);
6595
6596 if (GET_CODE (op0) == MULT
6597 && CONST_INT_P (op1)
6598 && op2 == const0_rtx
6599 && CONST_INT_P (XEXP (op0, 1))
6600 && aarch64_is_extend_from_extract (mode,
6601 XEXP (op0, 1),
6602 op1))
6603 {
6604 return true;
6605 }
6606 }
e47c4031
KT
6607 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6608 No shift. */
6609 else if (GET_CODE (x) == SIGN_EXTEND
6610 || GET_CODE (x) == ZERO_EXTEND)
6611 return REG_P (XEXP (x, 0));
7cc2145f
JG
6612
6613 return false;
6614}
6615
61263118
KT
6616static bool
6617aarch64_frint_unspec_p (unsigned int u)
6618{
6619 switch (u)
6620 {
6621 case UNSPEC_FRINTZ:
6622 case UNSPEC_FRINTP:
6623 case UNSPEC_FRINTM:
6624 case UNSPEC_FRINTA:
6625 case UNSPEC_FRINTN:
6626 case UNSPEC_FRINTX:
6627 case UNSPEC_FRINTI:
6628 return true;
6629
6630 default:
6631 return false;
6632 }
6633}
6634
fb0cb7fa
KT
6635/* Return true iff X is an rtx that will match an extr instruction
6636 i.e. as described in the *extr<mode>5_insn family of patterns.
6637 OP0 and OP1 will be set to the operands of the shifts involved
6638 on success and will be NULL_RTX otherwise. */
6639
6640static bool
6641aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
6642{
6643 rtx op0, op1;
6644 machine_mode mode = GET_MODE (x);
6645
6646 *res_op0 = NULL_RTX;
6647 *res_op1 = NULL_RTX;
6648
6649 if (GET_CODE (x) != IOR)
6650 return false;
6651
6652 op0 = XEXP (x, 0);
6653 op1 = XEXP (x, 1);
6654
6655 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
6656 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
6657 {
6658 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6659 if (GET_CODE (op1) == ASHIFT)
6660 std::swap (op0, op1);
6661
6662 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
6663 return false;
6664
6665 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
6666 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
6667
6668 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
6669 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
6670 {
6671 *res_op0 = XEXP (op0, 0);
6672 *res_op1 = XEXP (op1, 0);
6673 return true;
6674 }
6675 }
6676
6677 return false;
6678}
6679
2d5ffe46
AP
6680/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6681 storing it in *COST. Result is true if the total cost of the operation
6682 has now been calculated. */
6683static bool
6684aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
6685{
b9e3afe9
AP
6686 rtx inner;
6687 rtx comparator;
6688 enum rtx_code cmpcode;
6689
6690 if (COMPARISON_P (op0))
6691 {
6692 inner = XEXP (op0, 0);
6693 comparator = XEXP (op0, 1);
6694 cmpcode = GET_CODE (op0);
6695 }
6696 else
6697 {
6698 inner = op0;
6699 comparator = const0_rtx;
6700 cmpcode = NE;
6701 }
6702
2d5ffe46
AP
6703 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6704 {
6705 /* Conditional branch. */
b9e3afe9 6706 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
6707 return true;
6708 else
6709 {
b9e3afe9 6710 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 6711 {
2d5ffe46
AP
6712 if (comparator == const0_rtx)
6713 {
6714 /* TBZ/TBNZ/CBZ/CBNZ. */
6715 if (GET_CODE (inner) == ZERO_EXTRACT)
6716 /* TBZ/TBNZ. */
e548c9df
AM
6717 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6718 ZERO_EXTRACT, 0, speed);
6719 else
6720 /* CBZ/CBNZ. */
6721 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
6722
6723 return true;
6724 }
6725 }
b9e3afe9 6726 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 6727 {
2d5ffe46
AP
6728 /* TBZ/TBNZ. */
6729 if (comparator == const0_rtx)
6730 return true;
6731 }
6732 }
6733 }
b9e3afe9 6734 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 6735 {
786298dc 6736 /* CCMP. */
6dfeb7ce 6737 if (GET_CODE (op1) == COMPARE)
786298dc
WD
6738 {
6739 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6740 if (XEXP (op1, 1) == const0_rtx)
6741 *cost += 1;
6742 if (speed)
6743 {
6744 machine_mode mode = GET_MODE (XEXP (op1, 0));
6745 const struct cpu_cost_table *extra_cost
6746 = aarch64_tune_params.insn_extra_cost;
6747
6748 if (GET_MODE_CLASS (mode) == MODE_INT)
6749 *cost += extra_cost->alu.arith;
6750 else
6751 *cost += extra_cost->fp[mode == DFmode].compare;
6752 }
6753 return true;
6754 }
6755
2d5ffe46
AP
6756 /* It's a conditional operation based on the status flags,
6757 so it must be some flavor of CSEL. */
6758
6759 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6760 if (GET_CODE (op1) == NEG
6761 || GET_CODE (op1) == NOT
6762 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6763 op1 = XEXP (op1, 0);
bad00732
KT
6764 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
6765 {
6766 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6767 op1 = XEXP (op1, 0);
6768 op2 = XEXP (op2, 0);
6769 }
2d5ffe46 6770
e548c9df
AM
6771 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6772 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
6773 return true;
6774 }
6775
6776 /* We don't know what this is, cost all operands. */
6777 return false;
6778}
6779
283b6c85
KT
6780/* Check whether X is a bitfield operation of the form shift + extend that
6781 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6782 operand to which the bitfield operation is applied. Otherwise return
6783 NULL_RTX. */
6784
6785static rtx
6786aarch64_extend_bitfield_pattern_p (rtx x)
6787{
6788 rtx_code outer_code = GET_CODE (x);
6789 machine_mode outer_mode = GET_MODE (x);
6790
6791 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6792 && outer_mode != SImode && outer_mode != DImode)
6793 return NULL_RTX;
6794
6795 rtx inner = XEXP (x, 0);
6796 rtx_code inner_code = GET_CODE (inner);
6797 machine_mode inner_mode = GET_MODE (inner);
6798 rtx op = NULL_RTX;
6799
6800 switch (inner_code)
6801 {
6802 case ASHIFT:
6803 if (CONST_INT_P (XEXP (inner, 1))
6804 && (inner_mode == QImode || inner_mode == HImode))
6805 op = XEXP (inner, 0);
6806 break;
6807 case LSHIFTRT:
6808 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6809 && (inner_mode == QImode || inner_mode == HImode))
6810 op = XEXP (inner, 0);
6811 break;
6812 case ASHIFTRT:
6813 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6814 && (inner_mode == QImode || inner_mode == HImode))
6815 op = XEXP (inner, 0);
6816 break;
6817 default:
6818 break;
6819 }
6820
6821 return op;
6822}
6823
8c83f71d
KT
6824/* Return true if the mask and a shift amount from an RTX of the form
6825 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6826 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6827
6828bool
6829aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode, rtx mask, rtx shft_amnt)
6830{
6831 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
6832 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
6833 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
6834 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
6835}
6836
43e9d192
IB
6837/* Calculate the cost of calculating X, storing it in *COST. Result
6838 is true if the total cost of the operation has now been calculated. */
6839static bool
e548c9df 6840aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
6841 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6842{
a8eecd00 6843 rtx op0, op1, op2;
73250c4c 6844 const struct cpu_cost_table *extra_cost
b175b679 6845 = aarch64_tune_params.insn_extra_cost;
e548c9df 6846 int code = GET_CODE (x);
b4206259 6847 scalar_int_mode int_mode;
43e9d192 6848
7fc5ef02
JG
6849 /* By default, assume that everything has equivalent cost to the
6850 cheapest instruction. Any additional costs are applied as a delta
6851 above this default. */
6852 *cost = COSTS_N_INSNS (1);
6853
43e9d192
IB
6854 switch (code)
6855 {
6856 case SET:
ba123b0d
JG
6857 /* The cost depends entirely on the operands to SET. */
6858 *cost = 0;
43e9d192
IB
6859 op0 = SET_DEST (x);
6860 op1 = SET_SRC (x);
6861
6862 switch (GET_CODE (op0))
6863 {
6864 case MEM:
6865 if (speed)
2961177e
JG
6866 {
6867 rtx address = XEXP (op0, 0);
b6875aac
KV
6868 if (VECTOR_MODE_P (mode))
6869 *cost += extra_cost->ldst.storev;
6870 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6871 *cost += extra_cost->ldst.store;
6872 else if (mode == SFmode)
6873 *cost += extra_cost->ldst.storef;
6874 else if (mode == DFmode)
6875 *cost += extra_cost->ldst.stored;
6876
6877 *cost +=
6878 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6879 0, speed));
6880 }
43e9d192 6881
e548c9df 6882 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6883 return true;
6884
6885 case SUBREG:
6886 if (! REG_P (SUBREG_REG (op0)))
e548c9df 6887 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 6888
43e9d192
IB
6889 /* Fall through. */
6890 case REG:
b6875aac
KV
6891 /* The cost is one per vector-register copied. */
6892 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6893 {
6894 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6895 / GET_MODE_SIZE (V4SImode);
6896 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6897 }
ba123b0d
JG
6898 /* const0_rtx is in general free, but we will use an
6899 instruction to set a register to 0. */
b6875aac
KV
6900 else if (REG_P (op1) || op1 == const0_rtx)
6901 {
6902 /* The cost is 1 per register copied. */
6903 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 6904 / UNITS_PER_WORD;
b6875aac
KV
6905 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6906 }
ba123b0d
JG
6907 else
6908 /* Cost is just the cost of the RHS of the set. */
e548c9df 6909 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6910 return true;
6911
ba123b0d 6912 case ZERO_EXTRACT:
43e9d192 6913 case SIGN_EXTRACT:
ba123b0d
JG
6914 /* Bit-field insertion. Strip any redundant widening of
6915 the RHS to meet the width of the target. */
43e9d192
IB
6916 if (GET_CODE (op1) == SUBREG)
6917 op1 = SUBREG_REG (op1);
6918 if ((GET_CODE (op1) == ZERO_EXTEND
6919 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 6920 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
6921 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
6922 >= INTVAL (XEXP (op0, 1))))
6923 op1 = XEXP (op1, 0);
ba123b0d
JG
6924
6925 if (CONST_INT_P (op1))
6926 {
6927 /* MOV immediate is assumed to always be cheap. */
6928 *cost = COSTS_N_INSNS (1);
6929 }
6930 else
6931 {
6932 /* BFM. */
6933 if (speed)
6934 *cost += extra_cost->alu.bfi;
e548c9df 6935 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
6936 }
6937
43e9d192
IB
6938 return true;
6939
6940 default:
ba123b0d
JG
6941 /* We can't make sense of this, assume default cost. */
6942 *cost = COSTS_N_INSNS (1);
61263118 6943 return false;
43e9d192
IB
6944 }
6945 return false;
6946
9dfc162c
JG
6947 case CONST_INT:
6948 /* If an instruction can incorporate a constant within the
6949 instruction, the instruction's expression avoids calling
6950 rtx_cost() on the constant. If rtx_cost() is called on a
6951 constant, then it is usually because the constant must be
6952 moved into a register by one or more instructions.
6953
6954 The exception is constant 0, which can be expressed
6955 as XZR/WZR and is therefore free. The exception to this is
6956 if we have (set (reg) (const0_rtx)) in which case we must cost
6957 the move. However, we can catch that when we cost the SET, so
6958 we don't need to consider that here. */
6959 if (x == const0_rtx)
6960 *cost = 0;
6961 else
6962 {
6963 /* To an approximation, building any other constant is
6964 proportionally expensive to the number of instructions
6965 required to build that constant. This is true whether we
6966 are compiling for SPEED or otherwise. */
82614948
RR
6967 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
6968 (NULL_RTX, x, false, mode));
9dfc162c
JG
6969 }
6970 return true;
6971
6972 case CONST_DOUBLE:
a2170965
TC
6973
6974 /* First determine number of instructions to do the move
6975 as an integer constant. */
6976 if (!aarch64_float_const_representable_p (x)
6977 && !aarch64_can_const_movi_rtx_p (x, mode)
6978 && aarch64_float_const_rtx_p (x))
6979 {
6980 unsigned HOST_WIDE_INT ival;
6981 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
6982 gcc_assert (succeed);
6983
304b9962
RS
6984 machine_mode imode = (mode == HFmode
6985 ? SImode
6986 : int_mode_for_mode (mode).require ());
a2170965
TC
6987 int ncost = aarch64_internal_mov_immediate
6988 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
6989 *cost += COSTS_N_INSNS (ncost);
6990 return true;
6991 }
6992
9dfc162c
JG
6993 if (speed)
6994 {
6995 /* mov[df,sf]_aarch64. */
6996 if (aarch64_float_const_representable_p (x))
6997 /* FMOV (scalar immediate). */
6998 *cost += extra_cost->fp[mode == DFmode].fpconst;
6999 else if (!aarch64_float_const_zero_rtx_p (x))
7000 {
7001 /* This will be a load from memory. */
7002 if (mode == DFmode)
7003 *cost += extra_cost->ldst.loadd;
7004 else
7005 *cost += extra_cost->ldst.loadf;
7006 }
7007 else
7008 /* Otherwise this is +0.0. We get this using MOVI d0, #0
7009 or MOV v0.s[0], wzr - neither of which are modeled by the
7010 cost tables. Just use the default cost. */
7011 {
7012 }
7013 }
7014
7015 return true;
7016
43e9d192
IB
7017 case MEM:
7018 if (speed)
2961177e
JG
7019 {
7020 /* For loads we want the base cost of a load, plus an
7021 approximation for the additional cost of the addressing
7022 mode. */
7023 rtx address = XEXP (x, 0);
b6875aac
KV
7024 if (VECTOR_MODE_P (mode))
7025 *cost += extra_cost->ldst.loadv;
7026 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
7027 *cost += extra_cost->ldst.load;
7028 else if (mode == SFmode)
7029 *cost += extra_cost->ldst.loadf;
7030 else if (mode == DFmode)
7031 *cost += extra_cost->ldst.loadd;
7032
7033 *cost +=
7034 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7035 0, speed));
7036 }
43e9d192
IB
7037
7038 return true;
7039
7040 case NEG:
4745e701
JG
7041 op0 = XEXP (x, 0);
7042
b6875aac
KV
7043 if (VECTOR_MODE_P (mode))
7044 {
7045 if (speed)
7046 {
7047 /* FNEG. */
7048 *cost += extra_cost->vect.alu;
7049 }
7050 return false;
7051 }
7052
e548c9df
AM
7053 if (GET_MODE_CLASS (mode) == MODE_INT)
7054 {
4745e701
JG
7055 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
7056 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
7057 {
7058 /* CSETM. */
e548c9df 7059 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
7060 return true;
7061 }
7062
7063 /* Cost this as SUB wzr, X. */
e548c9df 7064 op0 = CONST0_RTX (mode);
4745e701
JG
7065 op1 = XEXP (x, 0);
7066 goto cost_minus;
7067 }
7068
e548c9df 7069 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
7070 {
7071 /* Support (neg(fma...)) as a single instruction only if
7072 sign of zeros is unimportant. This matches the decision
7073 making in aarch64.md. */
7074 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
7075 {
7076 /* FNMADD. */
e548c9df 7077 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
7078 return true;
7079 }
d318517d
SN
7080 if (GET_CODE (op0) == MULT)
7081 {
7082 /* FNMUL. */
7083 *cost = rtx_cost (op0, mode, NEG, 0, speed);
7084 return true;
7085 }
4745e701
JG
7086 if (speed)
7087 /* FNEG. */
7088 *cost += extra_cost->fp[mode == DFmode].neg;
7089 return false;
7090 }
7091
7092 return false;
43e9d192 7093
781aeb73
KT
7094 case CLRSB:
7095 case CLZ:
7096 if (speed)
b6875aac
KV
7097 {
7098 if (VECTOR_MODE_P (mode))
7099 *cost += extra_cost->vect.alu;
7100 else
7101 *cost += extra_cost->alu.clz;
7102 }
781aeb73
KT
7103
7104 return false;
7105
43e9d192
IB
7106 case COMPARE:
7107 op0 = XEXP (x, 0);
7108 op1 = XEXP (x, 1);
7109
7110 if (op1 == const0_rtx
7111 && GET_CODE (op0) == AND)
7112 {
7113 x = op0;
e548c9df 7114 mode = GET_MODE (op0);
43e9d192
IB
7115 goto cost_logic;
7116 }
7117
a8eecd00
JG
7118 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
7119 {
7120 /* TODO: A write to the CC flags possibly costs extra, this
7121 needs encoding in the cost tables. */
7122
e548c9df 7123 mode = GET_MODE (op0);
a8eecd00
JG
7124 /* ANDS. */
7125 if (GET_CODE (op0) == AND)
7126 {
7127 x = op0;
7128 goto cost_logic;
7129 }
7130
7131 if (GET_CODE (op0) == PLUS)
7132 {
7133 /* ADDS (and CMN alias). */
7134 x = op0;
7135 goto cost_plus;
7136 }
7137
7138 if (GET_CODE (op0) == MINUS)
7139 {
7140 /* SUBS. */
7141 x = op0;
7142 goto cost_minus;
7143 }
7144
345854d8
KT
7145 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
7146 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
7147 && CONST_INT_P (XEXP (op0, 2)))
7148 {
7149 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
7150 Handle it here directly rather than going to cost_logic
7151 since we know the immediate generated for the TST is valid
7152 so we can avoid creating an intermediate rtx for it only
7153 for costing purposes. */
7154 if (speed)
7155 *cost += extra_cost->alu.logical;
7156
7157 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
7158 ZERO_EXTRACT, 0, speed);
7159 return true;
7160 }
7161
a8eecd00
JG
7162 if (GET_CODE (op1) == NEG)
7163 {
7164 /* CMN. */
7165 if (speed)
7166 *cost += extra_cost->alu.arith;
7167
e548c9df
AM
7168 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
7169 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
7170 return true;
7171 }
7172
7173 /* CMP.
7174
7175 Compare can freely swap the order of operands, and
7176 canonicalization puts the more complex operation first.
7177 But the integer MINUS logic expects the shift/extend
7178 operation in op1. */
7179 if (! (REG_P (op0)
7180 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
7181 {
7182 op0 = XEXP (x, 1);
7183 op1 = XEXP (x, 0);
7184 }
7185 goto cost_minus;
7186 }
7187
7188 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7189 {
7190 /* FCMP. */
7191 if (speed)
7192 *cost += extra_cost->fp[mode == DFmode].compare;
7193
7194 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
7195 {
e548c9df 7196 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
7197 /* FCMP supports constant 0.0 for no extra cost. */
7198 return true;
7199 }
7200 return false;
7201 }
7202
b6875aac
KV
7203 if (VECTOR_MODE_P (mode))
7204 {
7205 /* Vector compare. */
7206 if (speed)
7207 *cost += extra_cost->vect.alu;
7208
7209 if (aarch64_float_const_zero_rtx_p (op1))
7210 {
7211 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
7212 cost. */
7213 return true;
7214 }
7215 return false;
7216 }
a8eecd00 7217 return false;
43e9d192
IB
7218
7219 case MINUS:
4745e701
JG
7220 {
7221 op0 = XEXP (x, 0);
7222 op1 = XEXP (x, 1);
7223
7224cost_minus:
e548c9df 7225 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 7226
4745e701
JG
7227 /* Detect valid immediates. */
7228 if ((GET_MODE_CLASS (mode) == MODE_INT
7229 || (GET_MODE_CLASS (mode) == MODE_CC
7230 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
7231 && CONST_INT_P (op1)
7232 && aarch64_uimm12_shift (INTVAL (op1)))
7233 {
4745e701
JG
7234 if (speed)
7235 /* SUB(S) (immediate). */
7236 *cost += extra_cost->alu.arith;
7237 return true;
4745e701
JG
7238 }
7239
7cc2145f
JG
7240 /* Look for SUB (extended register). */
7241 if (aarch64_rtx_arith_op_extract_p (op1, mode))
7242 {
7243 if (speed)
2533c820 7244 *cost += extra_cost->alu.extend_arith;
7cc2145f 7245
b10f1009 7246 op1 = aarch64_strip_extend (op1, true);
e47c4031 7247 *cost += rtx_cost (op1, VOIDmode,
e548c9df 7248 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
7249 return true;
7250 }
7251
b10f1009 7252 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
7253
7254 /* Cost this as an FMA-alike operation. */
7255 if ((GET_CODE (new_op1) == MULT
0a78ebe4 7256 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
7257 && code != COMPARE)
7258 {
7259 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
7260 (enum rtx_code) code,
7261 speed);
4745e701
JG
7262 return true;
7263 }
43e9d192 7264
e548c9df 7265 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 7266
4745e701
JG
7267 if (speed)
7268 {
b6875aac
KV
7269 if (VECTOR_MODE_P (mode))
7270 {
7271 /* Vector SUB. */
7272 *cost += extra_cost->vect.alu;
7273 }
7274 else if (GET_MODE_CLASS (mode) == MODE_INT)
7275 {
7276 /* SUB(S). */
7277 *cost += extra_cost->alu.arith;
7278 }
4745e701 7279 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
7280 {
7281 /* FSUB. */
7282 *cost += extra_cost->fp[mode == DFmode].addsub;
7283 }
4745e701
JG
7284 }
7285 return true;
7286 }
43e9d192
IB
7287
7288 case PLUS:
4745e701
JG
7289 {
7290 rtx new_op0;
43e9d192 7291
4745e701
JG
7292 op0 = XEXP (x, 0);
7293 op1 = XEXP (x, 1);
43e9d192 7294
a8eecd00 7295cost_plus:
4745e701
JG
7296 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
7297 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
7298 {
7299 /* CSINC. */
e548c9df
AM
7300 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
7301 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
7302 return true;
7303 }
43e9d192 7304
4745e701
JG
7305 if (GET_MODE_CLASS (mode) == MODE_INT
7306 && CONST_INT_P (op1)
7307 && aarch64_uimm12_shift (INTVAL (op1)))
7308 {
e548c9df 7309 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 7310
4745e701
JG
7311 if (speed)
7312 /* ADD (immediate). */
7313 *cost += extra_cost->alu.arith;
7314 return true;
7315 }
7316
e548c9df 7317 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 7318
7cc2145f
JG
7319 /* Look for ADD (extended register). */
7320 if (aarch64_rtx_arith_op_extract_p (op0, mode))
7321 {
7322 if (speed)
2533c820 7323 *cost += extra_cost->alu.extend_arith;
7cc2145f 7324
b10f1009 7325 op0 = aarch64_strip_extend (op0, true);
e47c4031 7326 *cost += rtx_cost (op0, VOIDmode,
e548c9df 7327 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
7328 return true;
7329 }
7330
4745e701
JG
7331 /* Strip any extend, leave shifts behind as we will
7332 cost them through mult_cost. */
b10f1009 7333 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
7334
7335 if (GET_CODE (new_op0) == MULT
0a78ebe4 7336 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
7337 {
7338 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
7339 speed);
4745e701
JG
7340 return true;
7341 }
7342
e548c9df 7343 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
7344
7345 if (speed)
7346 {
b6875aac
KV
7347 if (VECTOR_MODE_P (mode))
7348 {
7349 /* Vector ADD. */
7350 *cost += extra_cost->vect.alu;
7351 }
7352 else if (GET_MODE_CLASS (mode) == MODE_INT)
7353 {
7354 /* ADD. */
7355 *cost += extra_cost->alu.arith;
7356 }
4745e701 7357 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
7358 {
7359 /* FADD. */
7360 *cost += extra_cost->fp[mode == DFmode].addsub;
7361 }
4745e701
JG
7362 }
7363 return true;
7364 }
43e9d192 7365
18b42b2a
KT
7366 case BSWAP:
7367 *cost = COSTS_N_INSNS (1);
7368
7369 if (speed)
b6875aac
KV
7370 {
7371 if (VECTOR_MODE_P (mode))
7372 *cost += extra_cost->vect.alu;
7373 else
7374 *cost += extra_cost->alu.rev;
7375 }
18b42b2a
KT
7376 return false;
7377
43e9d192 7378 case IOR:
f7d5cf8d
KT
7379 if (aarch_rev16_p (x))
7380 {
7381 *cost = COSTS_N_INSNS (1);
7382
b6875aac
KV
7383 if (speed)
7384 {
7385 if (VECTOR_MODE_P (mode))
7386 *cost += extra_cost->vect.alu;
7387 else
7388 *cost += extra_cost->alu.rev;
7389 }
7390 return true;
f7d5cf8d 7391 }
fb0cb7fa
KT
7392
7393 if (aarch64_extr_rtx_p (x, &op0, &op1))
7394 {
e548c9df
AM
7395 *cost += rtx_cost (op0, mode, IOR, 0, speed);
7396 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
7397 if (speed)
7398 *cost += extra_cost->alu.shift;
7399
7400 return true;
7401 }
f7d5cf8d 7402 /* Fall through. */
43e9d192
IB
7403 case XOR:
7404 case AND:
7405 cost_logic:
7406 op0 = XEXP (x, 0);
7407 op1 = XEXP (x, 1);
7408
b6875aac
KV
7409 if (VECTOR_MODE_P (mode))
7410 {
7411 if (speed)
7412 *cost += extra_cost->vect.alu;
7413 return true;
7414 }
7415
268c3b47
JG
7416 if (code == AND
7417 && GET_CODE (op0) == MULT
7418 && CONST_INT_P (XEXP (op0, 1))
7419 && CONST_INT_P (op1)
7420 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
7421 INTVAL (op1)) != 0)
7422 {
7423 /* This is a UBFM/SBFM. */
e548c9df 7424 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
7425 if (speed)
7426 *cost += extra_cost->alu.bfx;
7427 return true;
7428 }
7429
b4206259 7430 if (is_int_mode (mode, &int_mode))
43e9d192 7431 {
8c83f71d 7432 if (CONST_INT_P (op1))
43e9d192 7433 {
8c83f71d
KT
7434 /* We have a mask + shift version of a UBFIZ
7435 i.e. the *andim_ashift<mode>_bfiz pattern. */
7436 if (GET_CODE (op0) == ASHIFT
b4206259
RS
7437 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
7438 XEXP (op0, 1)))
8c83f71d 7439 {
b4206259 7440 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
7441 (enum rtx_code) code, 0, speed);
7442 if (speed)
7443 *cost += extra_cost->alu.bfx;
268c3b47 7444
8c83f71d
KT
7445 return true;
7446 }
b4206259 7447 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
7448 {
7449 /* We possibly get the immediate for free, this is not
7450 modelled. */
b4206259
RS
7451 *cost += rtx_cost (op0, int_mode,
7452 (enum rtx_code) code, 0, speed);
8c83f71d
KT
7453 if (speed)
7454 *cost += extra_cost->alu.logical;
268c3b47 7455
8c83f71d
KT
7456 return true;
7457 }
43e9d192
IB
7458 }
7459 else
7460 {
268c3b47
JG
7461 rtx new_op0 = op0;
7462
7463 /* Handle ORN, EON, or BIC. */
43e9d192
IB
7464 if (GET_CODE (op0) == NOT)
7465 op0 = XEXP (op0, 0);
268c3b47
JG
7466
7467 new_op0 = aarch64_strip_shift (op0);
7468
7469 /* If we had a shift on op0 then this is a logical-shift-
7470 by-register/immediate operation. Otherwise, this is just
7471 a logical operation. */
7472 if (speed)
7473 {
7474 if (new_op0 != op0)
7475 {
7476 /* Shift by immediate. */
7477 if (CONST_INT_P (XEXP (op0, 1)))
7478 *cost += extra_cost->alu.log_shift;
7479 else
7480 *cost += extra_cost->alu.log_shift_reg;
7481 }
7482 else
7483 *cost += extra_cost->alu.logical;
7484 }
7485
7486 /* In both cases we want to cost both operands. */
b4206259
RS
7487 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
7488 0, speed);
7489 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
7490 1, speed);
268c3b47
JG
7491
7492 return true;
43e9d192 7493 }
43e9d192
IB
7494 }
7495 return false;
7496
268c3b47 7497 case NOT:
6365da9e
KT
7498 x = XEXP (x, 0);
7499 op0 = aarch64_strip_shift (x);
7500
b6875aac
KV
7501 if (VECTOR_MODE_P (mode))
7502 {
7503 /* Vector NOT. */
7504 *cost += extra_cost->vect.alu;
7505 return false;
7506 }
7507
6365da9e
KT
7508 /* MVN-shifted-reg. */
7509 if (op0 != x)
7510 {
e548c9df 7511 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
7512
7513 if (speed)
7514 *cost += extra_cost->alu.log_shift;
7515
7516 return true;
7517 }
7518 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7519 Handle the second form here taking care that 'a' in the above can
7520 be a shift. */
7521 else if (GET_CODE (op0) == XOR)
7522 {
7523 rtx newop0 = XEXP (op0, 0);
7524 rtx newop1 = XEXP (op0, 1);
7525 rtx op0_stripped = aarch64_strip_shift (newop0);
7526
e548c9df
AM
7527 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
7528 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
7529
7530 if (speed)
7531 {
7532 if (op0_stripped != newop0)
7533 *cost += extra_cost->alu.log_shift;
7534 else
7535 *cost += extra_cost->alu.logical;
7536 }
7537
7538 return true;
7539 }
268c3b47
JG
7540 /* MVN. */
7541 if (speed)
7542 *cost += extra_cost->alu.logical;
7543
268c3b47
JG
7544 return false;
7545
43e9d192 7546 case ZERO_EXTEND:
b1685e62
JG
7547
7548 op0 = XEXP (x, 0);
7549 /* If a value is written in SI mode, then zero extended to DI
7550 mode, the operation will in general be free as a write to
7551 a 'w' register implicitly zeroes the upper bits of an 'x'
7552 register. However, if this is
7553
7554 (set (reg) (zero_extend (reg)))
7555
7556 we must cost the explicit register move. */
7557 if (mode == DImode
7558 && GET_MODE (op0) == SImode
7559 && outer == SET)
7560 {
e548c9df 7561 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 7562
dde23f43
KM
7563 /* If OP_COST is non-zero, then the cost of the zero extend
7564 is effectively the cost of the inner operation. Otherwise
7565 we have a MOV instruction and we take the cost from the MOV
7566 itself. This is true independently of whether we are
7567 optimizing for space or time. */
7568 if (op_cost)
b1685e62
JG
7569 *cost = op_cost;
7570
7571 return true;
7572 }
e548c9df 7573 else if (MEM_P (op0))
43e9d192 7574 {
b1685e62 7575 /* All loads can zero extend to any size for free. */
e548c9df 7576 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
7577 return true;
7578 }
b1685e62 7579
283b6c85
KT
7580 op0 = aarch64_extend_bitfield_pattern_p (x);
7581 if (op0)
7582 {
7583 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
7584 if (speed)
7585 *cost += extra_cost->alu.bfx;
7586 return true;
7587 }
7588
b1685e62 7589 if (speed)
b6875aac
KV
7590 {
7591 if (VECTOR_MODE_P (mode))
7592 {
7593 /* UMOV. */
7594 *cost += extra_cost->vect.alu;
7595 }
7596 else
7597 {
63715e5e
WD
7598 /* We generate an AND instead of UXTB/UXTH. */
7599 *cost += extra_cost->alu.logical;
b6875aac
KV
7600 }
7601 }
43e9d192
IB
7602 return false;
7603
7604 case SIGN_EXTEND:
b1685e62 7605 if (MEM_P (XEXP (x, 0)))
43e9d192 7606 {
b1685e62
JG
7607 /* LDRSH. */
7608 if (speed)
7609 {
7610 rtx address = XEXP (XEXP (x, 0), 0);
7611 *cost += extra_cost->ldst.load_sign_extend;
7612
7613 *cost +=
7614 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7615 0, speed));
7616 }
43e9d192
IB
7617 return true;
7618 }
b1685e62 7619
283b6c85
KT
7620 op0 = aarch64_extend_bitfield_pattern_p (x);
7621 if (op0)
7622 {
7623 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
7624 if (speed)
7625 *cost += extra_cost->alu.bfx;
7626 return true;
7627 }
7628
b1685e62 7629 if (speed)
b6875aac
KV
7630 {
7631 if (VECTOR_MODE_P (mode))
7632 *cost += extra_cost->vect.alu;
7633 else
7634 *cost += extra_cost->alu.extend;
7635 }
43e9d192
IB
7636 return false;
7637
ba0cfa17
JG
7638 case ASHIFT:
7639 op0 = XEXP (x, 0);
7640 op1 = XEXP (x, 1);
7641
7642 if (CONST_INT_P (op1))
7643 {
ba0cfa17 7644 if (speed)
b6875aac
KV
7645 {
7646 if (VECTOR_MODE_P (mode))
7647 {
7648 /* Vector shift (immediate). */
7649 *cost += extra_cost->vect.alu;
7650 }
7651 else
7652 {
7653 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7654 aliases. */
7655 *cost += extra_cost->alu.shift;
7656 }
7657 }
ba0cfa17
JG
7658
7659 /* We can incorporate zero/sign extend for free. */
7660 if (GET_CODE (op0) == ZERO_EXTEND
7661 || GET_CODE (op0) == SIGN_EXTEND)
7662 op0 = XEXP (op0, 0);
7663
e548c9df 7664 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
7665 return true;
7666 }
7667 else
7668 {
7813b280 7669 if (VECTOR_MODE_P (mode))
b6875aac 7670 {
7813b280
KT
7671 if (speed)
7672 /* Vector shift (register). */
7673 *cost += extra_cost->vect.alu;
7674 }
7675 else
7676 {
7677 if (speed)
7678 /* LSLV. */
7679 *cost += extra_cost->alu.shift_reg;
7680
7681 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
7682 && CONST_INT_P (XEXP (op1, 1))
7683 && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
b6875aac 7684 {
7813b280
KT
7685 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
7686 /* We already demanded XEXP (op1, 0) to be REG_P, so
7687 don't recurse into it. */
7688 return true;
b6875aac
KV
7689 }
7690 }
ba0cfa17
JG
7691 return false; /* All arguments need to be in registers. */
7692 }
7693
43e9d192 7694 case ROTATE:
43e9d192
IB
7695 case ROTATERT:
7696 case LSHIFTRT:
43e9d192 7697 case ASHIFTRT:
ba0cfa17
JG
7698 op0 = XEXP (x, 0);
7699 op1 = XEXP (x, 1);
43e9d192 7700
ba0cfa17
JG
7701 if (CONST_INT_P (op1))
7702 {
7703 /* ASR (immediate) and friends. */
7704 if (speed)
b6875aac
KV
7705 {
7706 if (VECTOR_MODE_P (mode))
7707 *cost += extra_cost->vect.alu;
7708 else
7709 *cost += extra_cost->alu.shift;
7710 }
43e9d192 7711
e548c9df 7712 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
7713 return true;
7714 }
7715 else
7716 {
7813b280 7717 if (VECTOR_MODE_P (mode))
b6875aac 7718 {
7813b280
KT
7719 if (speed)
7720 /* Vector shift (register). */
b6875aac 7721 *cost += extra_cost->vect.alu;
7813b280
KT
7722 }
7723 else
7724 {
7725 if (speed)
7726 /* ASR (register) and friends. */
b6875aac 7727 *cost += extra_cost->alu.shift_reg;
7813b280
KT
7728
7729 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
7730 && CONST_INT_P (XEXP (op1, 1))
7731 && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
7732 {
7733 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
7734 /* We already demanded XEXP (op1, 0) to be REG_P, so
7735 don't recurse into it. */
7736 return true;
7737 }
b6875aac 7738 }
ba0cfa17
JG
7739 return false; /* All arguments need to be in registers. */
7740 }
43e9d192 7741
909734be
JG
7742 case SYMBOL_REF:
7743
1b1e81f8
JW
7744 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7745 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
7746 {
7747 /* LDR. */
7748 if (speed)
7749 *cost += extra_cost->ldst.load;
7750 }
7751 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7752 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7753 {
7754 /* ADRP, followed by ADD. */
7755 *cost += COSTS_N_INSNS (1);
7756 if (speed)
7757 *cost += 2 * extra_cost->alu.arith;
7758 }
7759 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7760 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7761 {
7762 /* ADR. */
7763 if (speed)
7764 *cost += extra_cost->alu.arith;
7765 }
7766
7767 if (flag_pic)
7768 {
7769 /* One extra load instruction, after accessing the GOT. */
7770 *cost += COSTS_N_INSNS (1);
7771 if (speed)
7772 *cost += extra_cost->ldst.load;
7773 }
43e9d192
IB
7774 return true;
7775
909734be 7776 case HIGH:
43e9d192 7777 case LO_SUM:
909734be
JG
7778 /* ADRP/ADD (immediate). */
7779 if (speed)
7780 *cost += extra_cost->alu.arith;
43e9d192
IB
7781 return true;
7782
7783 case ZERO_EXTRACT:
7784 case SIGN_EXTRACT:
7cc2145f
JG
7785 /* UBFX/SBFX. */
7786 if (speed)
b6875aac
KV
7787 {
7788 if (VECTOR_MODE_P (mode))
7789 *cost += extra_cost->vect.alu;
7790 else
7791 *cost += extra_cost->alu.bfx;
7792 }
7cc2145f
JG
7793
7794 /* We can trust that the immediates used will be correct (there
7795 are no by-register forms), so we need only cost op0. */
e548c9df 7796 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
7797 return true;
7798
7799 case MULT:
4745e701
JG
7800 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7801 /* aarch64_rtx_mult_cost always handles recursion to its
7802 operands. */
7803 return true;
43e9d192
IB
7804
7805 case MOD:
4f58fe36
KT
7806 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7807 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7808 an unconditional negate. This case should only ever be reached through
7809 the set_smod_pow2_cheap check in expmed.c. */
7810 if (CONST_INT_P (XEXP (x, 1))
7811 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7812 && (mode == SImode || mode == DImode))
7813 {
7814 /* We expand to 4 instructions. Reset the baseline. */
7815 *cost = COSTS_N_INSNS (4);
7816
7817 if (speed)
7818 *cost += 2 * extra_cost->alu.logical
7819 + 2 * extra_cost->alu.arith;
7820
7821 return true;
7822 }
7823
7824 /* Fall-through. */
43e9d192 7825 case UMOD:
43e9d192
IB
7826 if (speed)
7827 {
cb9ac430 7828 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
7829 if (VECTOR_MODE_P (mode))
7830 *cost += extra_cost->vect.alu;
e548c9df
AM
7831 else if (GET_MODE_CLASS (mode) == MODE_INT)
7832 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
7833 + extra_cost->mult[mode == DImode].idiv
7834 + (code == MOD ? 1 : 0));
43e9d192
IB
7835 }
7836 return false; /* All arguments need to be in registers. */
7837
7838 case DIV:
7839 case UDIV:
4105fe38 7840 case SQRT:
43e9d192
IB
7841 if (speed)
7842 {
b6875aac
KV
7843 if (VECTOR_MODE_P (mode))
7844 *cost += extra_cost->vect.alu;
7845 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
7846 /* There is no integer SQRT, so only DIV and UDIV can get
7847 here. */
cb9ac430
TC
7848 *cost += (extra_cost->mult[mode == DImode].idiv
7849 /* Slighly prefer UDIV over SDIV. */
7850 + (code == DIV ? 1 : 0));
4105fe38
JG
7851 else
7852 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
7853 }
7854 return false; /* All arguments need to be in registers. */
7855
a8eecd00 7856 case IF_THEN_ELSE:
2d5ffe46
AP
7857 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7858 XEXP (x, 2), cost, speed);
a8eecd00
JG
7859
7860 case EQ:
7861 case NE:
7862 case GT:
7863 case GTU:
7864 case LT:
7865 case LTU:
7866 case GE:
7867 case GEU:
7868 case LE:
7869 case LEU:
7870
7871 return false; /* All arguments must be in registers. */
7872
b292109f
JG
7873 case FMA:
7874 op0 = XEXP (x, 0);
7875 op1 = XEXP (x, 1);
7876 op2 = XEXP (x, 2);
7877
7878 if (speed)
b6875aac
KV
7879 {
7880 if (VECTOR_MODE_P (mode))
7881 *cost += extra_cost->vect.alu;
7882 else
7883 *cost += extra_cost->fp[mode == DFmode].fma;
7884 }
b292109f
JG
7885
7886 /* FMSUB, FNMADD, and FNMSUB are free. */
7887 if (GET_CODE (op0) == NEG)
7888 op0 = XEXP (op0, 0);
7889
7890 if (GET_CODE (op2) == NEG)
7891 op2 = XEXP (op2, 0);
7892
7893 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7894 and the by-element operand as operand 0. */
7895 if (GET_CODE (op1) == NEG)
7896 op1 = XEXP (op1, 0);
7897
7898 /* Catch vector-by-element operations. The by-element operand can
7899 either be (vec_duplicate (vec_select (x))) or just
7900 (vec_select (x)), depending on whether we are multiplying by
7901 a vector or a scalar.
7902
7903 Canonicalization is not very good in these cases, FMA4 will put the
7904 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7905 if (GET_CODE (op0) == VEC_DUPLICATE)
7906 op0 = XEXP (op0, 0);
7907 else if (GET_CODE (op1) == VEC_DUPLICATE)
7908 op1 = XEXP (op1, 0);
7909
7910 if (GET_CODE (op0) == VEC_SELECT)
7911 op0 = XEXP (op0, 0);
7912 else if (GET_CODE (op1) == VEC_SELECT)
7913 op1 = XEXP (op1, 0);
7914
7915 /* If the remaining parameters are not registers,
7916 get the cost to put them into registers. */
e548c9df
AM
7917 *cost += rtx_cost (op0, mode, FMA, 0, speed);
7918 *cost += rtx_cost (op1, mode, FMA, 1, speed);
7919 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
7920 return true;
7921
5e2a765b
KT
7922 case FLOAT:
7923 case UNSIGNED_FLOAT:
7924 if (speed)
7925 *cost += extra_cost->fp[mode == DFmode].fromint;
7926 return false;
7927
b292109f
JG
7928 case FLOAT_EXTEND:
7929 if (speed)
b6875aac
KV
7930 {
7931 if (VECTOR_MODE_P (mode))
7932 {
7933 /*Vector truncate. */
7934 *cost += extra_cost->vect.alu;
7935 }
7936 else
7937 *cost += extra_cost->fp[mode == DFmode].widen;
7938 }
b292109f
JG
7939 return false;
7940
7941 case FLOAT_TRUNCATE:
7942 if (speed)
b6875aac
KV
7943 {
7944 if (VECTOR_MODE_P (mode))
7945 {
7946 /*Vector conversion. */
7947 *cost += extra_cost->vect.alu;
7948 }
7949 else
7950 *cost += extra_cost->fp[mode == DFmode].narrow;
7951 }
b292109f
JG
7952 return false;
7953
61263118
KT
7954 case FIX:
7955 case UNSIGNED_FIX:
7956 x = XEXP (x, 0);
7957 /* Strip the rounding part. They will all be implemented
7958 by the fcvt* family of instructions anyway. */
7959 if (GET_CODE (x) == UNSPEC)
7960 {
7961 unsigned int uns_code = XINT (x, 1);
7962
7963 if (uns_code == UNSPEC_FRINTA
7964 || uns_code == UNSPEC_FRINTM
7965 || uns_code == UNSPEC_FRINTN
7966 || uns_code == UNSPEC_FRINTP
7967 || uns_code == UNSPEC_FRINTZ)
7968 x = XVECEXP (x, 0, 0);
7969 }
7970
7971 if (speed)
b6875aac
KV
7972 {
7973 if (VECTOR_MODE_P (mode))
7974 *cost += extra_cost->vect.alu;
7975 else
7976 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
7977 }
39252973
KT
7978
7979 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7980 fixed-point fcvt. */
7981 if (GET_CODE (x) == MULT
7982 && ((VECTOR_MODE_P (mode)
7983 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
7984 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
7985 {
7986 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
7987 0, speed);
7988 return true;
7989 }
7990
e548c9df 7991 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
7992 return true;
7993
b292109f 7994 case ABS:
b6875aac
KV
7995 if (VECTOR_MODE_P (mode))
7996 {
7997 /* ABS (vector). */
7998 if (speed)
7999 *cost += extra_cost->vect.alu;
8000 }
8001 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 8002 {
19261b99
KT
8003 op0 = XEXP (x, 0);
8004
8005 /* FABD, which is analogous to FADD. */
8006 if (GET_CODE (op0) == MINUS)
8007 {
e548c9df
AM
8008 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
8009 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
8010 if (speed)
8011 *cost += extra_cost->fp[mode == DFmode].addsub;
8012
8013 return true;
8014 }
8015 /* Simple FABS is analogous to FNEG. */
b292109f
JG
8016 if (speed)
8017 *cost += extra_cost->fp[mode == DFmode].neg;
8018 }
8019 else
8020 {
8021 /* Integer ABS will either be split to
8022 two arithmetic instructions, or will be an ABS
8023 (scalar), which we don't model. */
8024 *cost = COSTS_N_INSNS (2);
8025 if (speed)
8026 *cost += 2 * extra_cost->alu.arith;
8027 }
8028 return false;
8029
8030 case SMAX:
8031 case SMIN:
8032 if (speed)
8033 {
b6875aac
KV
8034 if (VECTOR_MODE_P (mode))
8035 *cost += extra_cost->vect.alu;
8036 else
8037 {
8038 /* FMAXNM/FMINNM/FMAX/FMIN.
8039 TODO: This may not be accurate for all implementations, but
8040 we do not model this in the cost tables. */
8041 *cost += extra_cost->fp[mode == DFmode].addsub;
8042 }
b292109f
JG
8043 }
8044 return false;
8045
61263118
KT
8046 case UNSPEC:
8047 /* The floating point round to integer frint* instructions. */
8048 if (aarch64_frint_unspec_p (XINT (x, 1)))
8049 {
8050 if (speed)
8051 *cost += extra_cost->fp[mode == DFmode].roundint;
8052
8053 return false;
8054 }
781aeb73
KT
8055
8056 if (XINT (x, 1) == UNSPEC_RBIT)
8057 {
8058 if (speed)
8059 *cost += extra_cost->alu.rev;
8060
8061 return false;
8062 }
61263118
KT
8063 break;
8064
fb620c4a
JG
8065 case TRUNCATE:
8066
8067 /* Decompose <su>muldi3_highpart. */
8068 if (/* (truncate:DI */
8069 mode == DImode
8070 /* (lshiftrt:TI */
8071 && GET_MODE (XEXP (x, 0)) == TImode
8072 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8073 /* (mult:TI */
8074 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8075 /* (ANY_EXTEND:TI (reg:DI))
8076 (ANY_EXTEND:TI (reg:DI))) */
8077 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8078 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
8079 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
8080 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
8081 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
8082 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
8083 /* (const_int 64) */
8084 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8085 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
8086 {
8087 /* UMULH/SMULH. */
8088 if (speed)
8089 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
8090 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
8091 mode, MULT, 0, speed);
8092 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
8093 mode, MULT, 1, speed);
fb620c4a
JG
8094 return true;
8095 }
8096
8097 /* Fall through. */
43e9d192 8098 default:
61263118 8099 break;
43e9d192 8100 }
61263118 8101
c10e3d7f
AP
8102 if (dump_file
8103 && flag_aarch64_verbose_cost)
61263118
KT
8104 fprintf (dump_file,
8105 "\nFailed to cost RTX. Assuming default cost.\n");
8106
8107 return true;
43e9d192
IB
8108}
8109
0ee859b5
JG
8110/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
8111 calculated for X. This cost is stored in *COST. Returns true
8112 if the total cost of X was calculated. */
8113static bool
e548c9df 8114aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
8115 int param, int *cost, bool speed)
8116{
e548c9df 8117 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 8118
c10e3d7f
AP
8119 if (dump_file
8120 && flag_aarch64_verbose_cost)
0ee859b5
JG
8121 {
8122 print_rtl_single (dump_file, x);
8123 fprintf (dump_file, "\n%s cost: %d (%s)\n",
8124 speed ? "Hot" : "Cold",
8125 *cost, result ? "final" : "partial");
8126 }
8127
8128 return result;
8129}
8130
43e9d192 8131static int
ef4bddc2 8132aarch64_register_move_cost (machine_mode mode,
8a3a7e67 8133 reg_class_t from_i, reg_class_t to_i)
43e9d192 8134{
8a3a7e67
RH
8135 enum reg_class from = (enum reg_class) from_i;
8136 enum reg_class to = (enum reg_class) to_i;
43e9d192 8137 const struct cpu_regmove_cost *regmove_cost
b175b679 8138 = aarch64_tune_params.regmove_cost;
43e9d192 8139
3be07662 8140 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
2876a13f 8141 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
3be07662
WD
8142 to = GENERAL_REGS;
8143
2876a13f 8144 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
3be07662
WD
8145 from = GENERAL_REGS;
8146
6ee70f81
AP
8147 /* Moving between GPR and stack cost is the same as GP2GP. */
8148 if ((from == GENERAL_REGS && to == STACK_REG)
8149 || (to == GENERAL_REGS && from == STACK_REG))
8150 return regmove_cost->GP2GP;
8151
8152 /* To/From the stack register, we move via the gprs. */
8153 if (to == STACK_REG || from == STACK_REG)
8154 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
8155 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
8156
8919453c
WD
8157 if (GET_MODE_SIZE (mode) == 16)
8158 {
8159 /* 128-bit operations on general registers require 2 instructions. */
8160 if (from == GENERAL_REGS && to == GENERAL_REGS)
8161 return regmove_cost->GP2GP * 2;
8162 else if (from == GENERAL_REGS)
8163 return regmove_cost->GP2FP * 2;
8164 else if (to == GENERAL_REGS)
8165 return regmove_cost->FP2GP * 2;
8166
8167 /* When AdvSIMD instructions are disabled it is not possible to move
8168 a 128-bit value directly between Q registers. This is handled in
8169 secondary reload. A general register is used as a scratch to move
8170 the upper DI value and the lower DI value is moved directly,
8171 hence the cost is the sum of three moves. */
8172 if (! TARGET_SIMD)
8173 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
8174
8175 return regmove_cost->FP2FP;
8176 }
8177
43e9d192
IB
8178 if (from == GENERAL_REGS && to == GENERAL_REGS)
8179 return regmove_cost->GP2GP;
8180 else if (from == GENERAL_REGS)
8181 return regmove_cost->GP2FP;
8182 else if (to == GENERAL_REGS)
8183 return regmove_cost->FP2GP;
8184
43e9d192
IB
8185 return regmove_cost->FP2FP;
8186}
8187
8188static int
ef4bddc2 8189aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
8190 reg_class_t rclass ATTRIBUTE_UNUSED,
8191 bool in ATTRIBUTE_UNUSED)
8192{
b175b679 8193 return aarch64_tune_params.memmov_cost;
43e9d192
IB
8194}
8195
0c30e0f3
EM
8196/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
8197 to optimize 1.0/sqrt. */
ee62a5a6
RS
8198
8199static bool
9acc9cbe 8200use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
8201{
8202 return (!flag_trapping_math
8203 && flag_unsafe_math_optimizations
9acc9cbe
EM
8204 && ((aarch64_tune_params.approx_modes->recip_sqrt
8205 & AARCH64_APPROX_MODE (mode))
1a33079e 8206 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
8207}
8208
0c30e0f3
EM
8209/* Function to decide when to use the approximate reciprocal square root
8210 builtin. */
a6fc00da
BH
8211
8212static tree
ee62a5a6 8213aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 8214{
9acc9cbe
EM
8215 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
8216
8217 if (!use_rsqrt_p (mode))
a6fc00da 8218 return NULL_TREE;
ee62a5a6 8219 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
8220}
8221
8222typedef rtx (*rsqrte_type) (rtx, rtx);
8223
98daafa0
EM
8224/* Select reciprocal square root initial estimate insn depending on machine
8225 mode. */
a6fc00da 8226
98daafa0 8227static rsqrte_type
a6fc00da
BH
8228get_rsqrte_type (machine_mode mode)
8229{
8230 switch (mode)
8231 {
4e10a5a7
RS
8232 case E_DFmode: return gen_aarch64_rsqrtedf;
8233 case E_SFmode: return gen_aarch64_rsqrtesf;
8234 case E_V2DFmode: return gen_aarch64_rsqrtev2df;
8235 case E_V2SFmode: return gen_aarch64_rsqrtev2sf;
8236 case E_V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
8237 default: gcc_unreachable ();
8238 }
8239}
8240
8241typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
8242
98daafa0 8243/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 8244
98daafa0 8245static rsqrts_type
a6fc00da
BH
8246get_rsqrts_type (machine_mode mode)
8247{
8248 switch (mode)
8249 {
4e10a5a7
RS
8250 case E_DFmode: return gen_aarch64_rsqrtsdf;
8251 case E_SFmode: return gen_aarch64_rsqrtssf;
8252 case E_V2DFmode: return gen_aarch64_rsqrtsv2df;
8253 case E_V2SFmode: return gen_aarch64_rsqrtsv2sf;
8254 case E_V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
8255 default: gcc_unreachable ();
8256 }
8257}
8258
98daafa0
EM
8259/* Emit instruction sequence to compute either the approximate square root
8260 or its approximate reciprocal, depending on the flag RECP, and return
8261 whether the sequence was emitted or not. */
a6fc00da 8262
98daafa0
EM
8263bool
8264aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 8265{
98daafa0 8266 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
8267
8268 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
8269 {
8270 gcc_assert (!recp);
8271 return false;
8272 }
8273
8274 machine_mode mmsk
304b9962 8275 = mode_for_vector (int_mode_for_mode (GET_MODE_INNER (mode)).require (),
2e19adc8
RE
8276 GET_MODE_NUNITS (mode));
8277 if (!recp)
8278 {
8279 if (!(flag_mlow_precision_sqrt
8280 || (aarch64_tune_params.approx_modes->sqrt
8281 & AARCH64_APPROX_MODE (mode))))
8282 return false;
8283
8284 if (flag_finite_math_only
8285 || flag_trapping_math
8286 || !flag_unsafe_math_optimizations
8287 || optimize_function_for_size_p (cfun))
8288 return false;
8289 }
8290 else
8291 /* Caller assumes we cannot fail. */
8292 gcc_assert (use_rsqrt_p (mode));
daef0a8c 8293
a6fc00da 8294
98daafa0
EM
8295 rtx xmsk = gen_reg_rtx (mmsk);
8296 if (!recp)
2e19adc8
RE
8297 /* When calculating the approximate square root, compare the
8298 argument with 0.0 and create a mask. */
8299 emit_insn (gen_rtx_SET (xmsk,
8300 gen_rtx_NEG (mmsk,
8301 gen_rtx_EQ (mmsk, src,
8302 CONST0_RTX (mode)))));
a6fc00da 8303
98daafa0
EM
8304 /* Estimate the approximate reciprocal square root. */
8305 rtx xdst = gen_reg_rtx (mode);
8306 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 8307
98daafa0
EM
8308 /* Iterate over the series twice for SF and thrice for DF. */
8309 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 8310
98daafa0
EM
8311 /* Optionally iterate over the series once less for faster performance
8312 while sacrificing the accuracy. */
8313 if ((recp && flag_mrecip_low_precision_sqrt)
8314 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
8315 iterations--;
8316
98daafa0
EM
8317 /* Iterate over the series to calculate the approximate reciprocal square
8318 root. */
8319 rtx x1 = gen_reg_rtx (mode);
8320 while (iterations--)
a6fc00da 8321 {
a6fc00da 8322 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
8323 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
8324
8325 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 8326
98daafa0
EM
8327 if (iterations > 0)
8328 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
8329 }
8330
8331 if (!recp)
8332 {
8333 /* Qualify the approximate reciprocal square root when the argument is
8334 0.0 by squashing the intermediary result to 0.0. */
8335 rtx xtmp = gen_reg_rtx (mmsk);
8336 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
8337 gen_rtx_SUBREG (mmsk, xdst, 0)));
8338 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 8339
98daafa0
EM
8340 /* Calculate the approximate square root. */
8341 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
8342 }
8343
98daafa0
EM
8344 /* Finalize the approximation. */
8345 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
8346
8347 return true;
a6fc00da
BH
8348}
8349
79a2bc2d
EM
8350typedef rtx (*recpe_type) (rtx, rtx);
8351
8352/* Select reciprocal initial estimate insn depending on machine mode. */
8353
8354static recpe_type
8355get_recpe_type (machine_mode mode)
8356{
8357 switch (mode)
8358 {
4e10a5a7
RS
8359 case E_SFmode: return (gen_aarch64_frecpesf);
8360 case E_V2SFmode: return (gen_aarch64_frecpev2sf);
8361 case E_V4SFmode: return (gen_aarch64_frecpev4sf);
8362 case E_DFmode: return (gen_aarch64_frecpedf);
8363 case E_V2DFmode: return (gen_aarch64_frecpev2df);
8364 default: gcc_unreachable ();
79a2bc2d
EM
8365 }
8366}
8367
8368typedef rtx (*recps_type) (rtx, rtx, rtx);
8369
8370/* Select reciprocal series step insn depending on machine mode. */
8371
8372static recps_type
8373get_recps_type (machine_mode mode)
8374{
8375 switch (mode)
8376 {
4e10a5a7
RS
8377 case E_SFmode: return (gen_aarch64_frecpssf);
8378 case E_V2SFmode: return (gen_aarch64_frecpsv2sf);
8379 case E_V4SFmode: return (gen_aarch64_frecpsv4sf);
8380 case E_DFmode: return (gen_aarch64_frecpsdf);
8381 case E_V2DFmode: return (gen_aarch64_frecpsv2df);
8382 default: gcc_unreachable ();
79a2bc2d
EM
8383 }
8384}
8385
8386/* Emit the instruction sequence to compute the approximation for the division
8387 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
8388
8389bool
8390aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
8391{
8392 machine_mode mode = GET_MODE (quo);
33d72b63
JW
8393
8394 if (GET_MODE_INNER (mode) == HFmode)
8395 return false;
8396
79a2bc2d
EM
8397 bool use_approx_division_p = (flag_mlow_precision_div
8398 || (aarch64_tune_params.approx_modes->division
8399 & AARCH64_APPROX_MODE (mode)));
8400
8401 if (!flag_finite_math_only
8402 || flag_trapping_math
8403 || !flag_unsafe_math_optimizations
8404 || optimize_function_for_size_p (cfun)
8405 || !use_approx_division_p)
8406 return false;
8407
8408 /* Estimate the approximate reciprocal. */
8409 rtx xrcp = gen_reg_rtx (mode);
8410 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
8411
8412 /* Iterate over the series twice for SF and thrice for DF. */
8413 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
8414
8415 /* Optionally iterate over the series once less for faster performance,
8416 while sacrificing the accuracy. */
8417 if (flag_mlow_precision_div)
8418 iterations--;
8419
8420 /* Iterate over the series to calculate the approximate reciprocal. */
8421 rtx xtmp = gen_reg_rtx (mode);
8422 while (iterations--)
8423 {
8424 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
8425
8426 if (iterations > 0)
8427 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
8428 }
8429
8430 if (num != CONST1_RTX (mode))
8431 {
8432 /* As the approximate reciprocal of DEN is already calculated, only
8433 calculate the approximate division when NUM is not 1.0. */
8434 rtx xnum = force_reg (mode, num);
8435 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
8436 }
8437
8438 /* Finalize the approximation. */
8439 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
8440 return true;
8441}
8442
d126a4ae
AP
8443/* Return the number of instructions that can be issued per cycle. */
8444static int
8445aarch64_sched_issue_rate (void)
8446{
b175b679 8447 return aarch64_tune_params.issue_rate;
d126a4ae
AP
8448}
8449
d03f7e44
MK
8450static int
8451aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8452{
8453 int issue_rate = aarch64_sched_issue_rate ();
8454
8455 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
8456}
8457
2d6bc7fa
KT
8458
8459/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8460 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8461 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8462
8463static int
8464aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
8465 int ready_index)
8466{
8467 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
8468}
8469
8470
8990e73a
TB
8471/* Vectorizer cost model target hooks. */
8472
8473/* Implement targetm.vectorize.builtin_vectorization_cost. */
8474static int
8475aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8476 tree vectype,
8477 int misalign ATTRIBUTE_UNUSED)
8478{
8479 unsigned elements;
cd8ae5ed
AP
8480 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
8481 bool fp = false;
8482
8483 if (vectype != NULL)
8484 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
8485
8486 switch (type_of_cost)
8487 {
8488 case scalar_stmt:
cd8ae5ed 8489 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
8490
8491 case scalar_load:
cd8ae5ed 8492 return costs->scalar_load_cost;
8990e73a
TB
8493
8494 case scalar_store:
cd8ae5ed 8495 return costs->scalar_store_cost;
8990e73a
TB
8496
8497 case vector_stmt:
cd8ae5ed 8498 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
8499
8500 case vector_load:
cd8ae5ed 8501 return costs->vec_align_load_cost;
8990e73a
TB
8502
8503 case vector_store:
cd8ae5ed 8504 return costs->vec_store_cost;
8990e73a
TB
8505
8506 case vec_to_scalar:
cd8ae5ed 8507 return costs->vec_to_scalar_cost;
8990e73a
TB
8508
8509 case scalar_to_vec:
cd8ae5ed 8510 return costs->scalar_to_vec_cost;
8990e73a
TB
8511
8512 case unaligned_load:
cd8ae5ed 8513 return costs->vec_unalign_load_cost;
8990e73a
TB
8514
8515 case unaligned_store:
cd8ae5ed 8516 return costs->vec_unalign_store_cost;
8990e73a
TB
8517
8518 case cond_branch_taken:
cd8ae5ed 8519 return costs->cond_taken_branch_cost;
8990e73a
TB
8520
8521 case cond_branch_not_taken:
cd8ae5ed 8522 return costs->cond_not_taken_branch_cost;
8990e73a
TB
8523
8524 case vec_perm:
cd8ae5ed 8525 return costs->vec_permute_cost;
c428f91c 8526
8990e73a 8527 case vec_promote_demote:
cd8ae5ed 8528 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
8529
8530 case vec_construct:
8531 elements = TYPE_VECTOR_SUBPARTS (vectype);
8532 return elements / 2 + 1;
8533
8534 default:
8535 gcc_unreachable ();
8536 }
8537}
8538
8539/* Implement targetm.vectorize.add_stmt_cost. */
8540static unsigned
8541aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8542 struct _stmt_vec_info *stmt_info, int misalign,
8543 enum vect_cost_model_location where)
8544{
8545 unsigned *cost = (unsigned *) data;
8546 unsigned retval = 0;
8547
8548 if (flag_vect_cost_model)
8549 {
8550 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8551 int stmt_cost =
8552 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
8553
8554 /* Statements in an inner loop relative to the loop being
8555 vectorized are weighted more heavily. The value here is
058e4c71 8556 arbitrary and could potentially be improved with analysis. */
8990e73a 8557 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 8558 count *= 50; /* FIXME */
8990e73a
TB
8559
8560 retval = (unsigned) (count * stmt_cost);
8561 cost[where] += retval;
8562 }
8563
8564 return retval;
8565}
8566
0cfff2a1 8567static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 8568
0cfff2a1
KT
8569/* Parse the TO_PARSE string and put the architecture struct that it
8570 selects into RES and the architectural features into ISA_FLAGS.
8571 Return an aarch64_parse_opt_result describing the parse result.
8572 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 8573
0cfff2a1
KT
8574static enum aarch64_parse_opt_result
8575aarch64_parse_arch (const char *to_parse, const struct processor **res,
8576 unsigned long *isa_flags)
43e9d192
IB
8577{
8578 char *ext;
8579 const struct processor *arch;
0cfff2a1 8580 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8581 size_t len;
8582
0cfff2a1 8583 strcpy (str, to_parse);
43e9d192
IB
8584
8585 ext = strchr (str, '+');
8586
8587 if (ext != NULL)
8588 len = ext - str;
8589 else
8590 len = strlen (str);
8591
8592 if (len == 0)
0cfff2a1
KT
8593 return AARCH64_PARSE_MISSING_ARG;
8594
43e9d192 8595
0cfff2a1 8596 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
8597 for (arch = all_architectures; arch->name != NULL; arch++)
8598 {
8599 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
8600 {
0cfff2a1 8601 unsigned long isa_temp = arch->flags;
43e9d192
IB
8602
8603 if (ext != NULL)
8604 {
0cfff2a1
KT
8605 /* TO_PARSE string contains at least one extension. */
8606 enum aarch64_parse_opt_result ext_res
8607 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8608
0cfff2a1
KT
8609 if (ext_res != AARCH64_PARSE_OK)
8610 return ext_res;
ffee7aa9 8611 }
0cfff2a1
KT
8612 /* Extension parsing was successful. Confirm the result
8613 arch and ISA flags. */
8614 *res = arch;
8615 *isa_flags = isa_temp;
8616 return AARCH64_PARSE_OK;
43e9d192
IB
8617 }
8618 }
8619
8620 /* ARCH name not found in list. */
0cfff2a1 8621 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8622}
8623
0cfff2a1
KT
8624/* Parse the TO_PARSE string and put the result tuning in RES and the
8625 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8626 describing the parse result. If there is an error parsing, RES and
8627 ISA_FLAGS are left unchanged. */
43e9d192 8628
0cfff2a1
KT
8629static enum aarch64_parse_opt_result
8630aarch64_parse_cpu (const char *to_parse, const struct processor **res,
8631 unsigned long *isa_flags)
43e9d192
IB
8632{
8633 char *ext;
8634 const struct processor *cpu;
0cfff2a1 8635 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8636 size_t len;
8637
0cfff2a1 8638 strcpy (str, to_parse);
43e9d192
IB
8639
8640 ext = strchr (str, '+');
8641
8642 if (ext != NULL)
8643 len = ext - str;
8644 else
8645 len = strlen (str);
8646
8647 if (len == 0)
0cfff2a1
KT
8648 return AARCH64_PARSE_MISSING_ARG;
8649
43e9d192
IB
8650
8651 /* Loop through the list of supported CPUs to find a match. */
8652 for (cpu = all_cores; cpu->name != NULL; cpu++)
8653 {
8654 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
8655 {
0cfff2a1
KT
8656 unsigned long isa_temp = cpu->flags;
8657
43e9d192
IB
8658
8659 if (ext != NULL)
8660 {
0cfff2a1
KT
8661 /* TO_PARSE string contains at least one extension. */
8662 enum aarch64_parse_opt_result ext_res
8663 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8664
0cfff2a1
KT
8665 if (ext_res != AARCH64_PARSE_OK)
8666 return ext_res;
8667 }
8668 /* Extension parsing was successfull. Confirm the result
8669 cpu and ISA flags. */
8670 *res = cpu;
8671 *isa_flags = isa_temp;
8672 return AARCH64_PARSE_OK;
43e9d192
IB
8673 }
8674 }
8675
8676 /* CPU name not found in list. */
0cfff2a1 8677 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8678}
8679
0cfff2a1
KT
8680/* Parse the TO_PARSE string and put the cpu it selects into RES.
8681 Return an aarch64_parse_opt_result describing the parse result.
8682 If the parsing fails the RES does not change. */
43e9d192 8683
0cfff2a1
KT
8684static enum aarch64_parse_opt_result
8685aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
8686{
8687 const struct processor *cpu;
0cfff2a1
KT
8688 char *str = (char *) alloca (strlen (to_parse) + 1);
8689
8690 strcpy (str, to_parse);
43e9d192
IB
8691
8692 /* Loop through the list of supported CPUs to find a match. */
8693 for (cpu = all_cores; cpu->name != NULL; cpu++)
8694 {
8695 if (strcmp (cpu->name, str) == 0)
8696 {
0cfff2a1
KT
8697 *res = cpu;
8698 return AARCH64_PARSE_OK;
43e9d192
IB
8699 }
8700 }
8701
8702 /* CPU name not found in list. */
0cfff2a1 8703 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8704}
8705
8dec06f2
JG
8706/* Parse TOKEN, which has length LENGTH to see if it is an option
8707 described in FLAG. If it is, return the index bit for that fusion type.
8708 If not, error (printing OPTION_NAME) and return zero. */
8709
8710static unsigned int
8711aarch64_parse_one_option_token (const char *token,
8712 size_t length,
8713 const struct aarch64_flag_desc *flag,
8714 const char *option_name)
8715{
8716 for (; flag->name != NULL; flag++)
8717 {
8718 if (length == strlen (flag->name)
8719 && !strncmp (flag->name, token, length))
8720 return flag->flag;
8721 }
8722
8723 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
8724 return 0;
8725}
8726
8727/* Parse OPTION which is a comma-separated list of flags to enable.
8728 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8729 default state we inherit from the CPU tuning structures. OPTION_NAME
8730 gives the top-level option we are parsing in the -moverride string,
8731 for use in error messages. */
8732
8733static unsigned int
8734aarch64_parse_boolean_options (const char *option,
8735 const struct aarch64_flag_desc *flags,
8736 unsigned int initial_state,
8737 const char *option_name)
8738{
8739 const char separator = '.';
8740 const char* specs = option;
8741 const char* ntoken = option;
8742 unsigned int found_flags = initial_state;
8743
8744 while ((ntoken = strchr (specs, separator)))
8745 {
8746 size_t token_length = ntoken - specs;
8747 unsigned token_ops = aarch64_parse_one_option_token (specs,
8748 token_length,
8749 flags,
8750 option_name);
8751 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8752 in the token stream, reset the supported operations. So:
8753
8754 adrp+add.cmp+branch.none.adrp+add
8755
8756 would have the result of turning on only adrp+add fusion. */
8757 if (!token_ops)
8758 found_flags = 0;
8759
8760 found_flags |= token_ops;
8761 specs = ++ntoken;
8762 }
8763
8764 /* We ended with a comma, print something. */
8765 if (!(*specs))
8766 {
8767 error ("%s string ill-formed\n", option_name);
8768 return 0;
8769 }
8770
8771 /* We still have one more token to parse. */
8772 size_t token_length = strlen (specs);
8773 unsigned token_ops = aarch64_parse_one_option_token (specs,
8774 token_length,
8775 flags,
8776 option_name);
8777 if (!token_ops)
8778 found_flags = 0;
8779
8780 found_flags |= token_ops;
8781 return found_flags;
8782}
8783
8784/* Support for overriding instruction fusion. */
8785
8786static void
8787aarch64_parse_fuse_string (const char *fuse_string,
8788 struct tune_params *tune)
8789{
8790 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8791 aarch64_fusible_pairs,
8792 tune->fusible_ops,
8793 "fuse=");
8794}
8795
8796/* Support for overriding other tuning flags. */
8797
8798static void
8799aarch64_parse_tune_string (const char *tune_string,
8800 struct tune_params *tune)
8801{
8802 tune->extra_tuning_flags
8803 = aarch64_parse_boolean_options (tune_string,
8804 aarch64_tuning_flags,
8805 tune->extra_tuning_flags,
8806 "tune=");
8807}
8808
8809/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8810 we understand. If it is, extract the option string and handoff to
8811 the appropriate function. */
8812
8813void
8814aarch64_parse_one_override_token (const char* token,
8815 size_t length,
8816 struct tune_params *tune)
8817{
8818 const struct aarch64_tuning_override_function *fn
8819 = aarch64_tuning_override_functions;
8820
8821 const char *option_part = strchr (token, '=');
8822 if (!option_part)
8823 {
8824 error ("tuning string missing in option (%s)", token);
8825 return;
8826 }
8827
8828 /* Get the length of the option name. */
8829 length = option_part - token;
8830 /* Skip the '=' to get to the option string. */
8831 option_part++;
8832
8833 for (; fn->name != NULL; fn++)
8834 {
8835 if (!strncmp (fn->name, token, length))
8836 {
8837 fn->parse_override (option_part, tune);
8838 return;
8839 }
8840 }
8841
8842 error ("unknown tuning option (%s)",token);
8843 return;
8844}
8845
5eee3c34
JW
8846/* A checking mechanism for the implementation of the tls size. */
8847
8848static void
8849initialize_aarch64_tls_size (struct gcc_options *opts)
8850{
8851 if (aarch64_tls_size == 0)
8852 aarch64_tls_size = 24;
8853
8854 switch (opts->x_aarch64_cmodel_var)
8855 {
8856 case AARCH64_CMODEL_TINY:
8857 /* Both the default and maximum TLS size allowed under tiny is 1M which
8858 needs two instructions to address, so we clamp the size to 24. */
8859 if (aarch64_tls_size > 24)
8860 aarch64_tls_size = 24;
8861 break;
8862 case AARCH64_CMODEL_SMALL:
8863 /* The maximum TLS size allowed under small is 4G. */
8864 if (aarch64_tls_size > 32)
8865 aarch64_tls_size = 32;
8866 break;
8867 case AARCH64_CMODEL_LARGE:
8868 /* The maximum TLS size allowed under large is 16E.
8869 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8870 if (aarch64_tls_size > 48)
8871 aarch64_tls_size = 48;
8872 break;
8873 default:
8874 gcc_unreachable ();
8875 }
8876
8877 return;
8878}
8879
8dec06f2
JG
8880/* Parse STRING looking for options in the format:
8881 string :: option:string
8882 option :: name=substring
8883 name :: {a-z}
8884 substring :: defined by option. */
8885
8886static void
8887aarch64_parse_override_string (const char* input_string,
8888 struct tune_params* tune)
8889{
8890 const char separator = ':';
8891 size_t string_length = strlen (input_string) + 1;
8892 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8893 char *string = string_root;
8894 strncpy (string, input_string, string_length);
8895 string[string_length - 1] = '\0';
8896
8897 char* ntoken = string;
8898
8899 while ((ntoken = strchr (string, separator)))
8900 {
8901 size_t token_length = ntoken - string;
8902 /* Make this substring look like a string. */
8903 *ntoken = '\0';
8904 aarch64_parse_one_override_token (string, token_length, tune);
8905 string = ++ntoken;
8906 }
8907
8908 /* One last option to parse. */
8909 aarch64_parse_one_override_token (string, strlen (string), tune);
8910 free (string_root);
8911}
43e9d192 8912
43e9d192
IB
8913
8914static void
0cfff2a1 8915aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 8916{
a3dc8760
NC
8917 /* The logic here is that if we are disabling all frame pointer generation
8918 then we do not need to disable leaf frame pointer generation as a
8919 separate operation. But if we are *only* disabling leaf frame pointer
8920 generation then we set flag_omit_frame_pointer to true, but in
8921 aarch64_frame_pointer_required we return false only for leaf functions.
8922
8923 PR 70044: We have to be careful about being called multiple times for the
8924 same function. Once we have decided to set flag_omit_frame_pointer just
8925 so that we can omit leaf frame pointers, we must then not interpret a
8926 second call as meaning that all frame pointer generation should be
8927 omitted. We do this by setting flag_omit_frame_pointer to a special,
8928 non-zero value. */
8929 if (opts->x_flag_omit_frame_pointer == 2)
8930 opts->x_flag_omit_frame_pointer = 0;
8931
0cfff2a1
KT
8932 if (opts->x_flag_omit_frame_pointer)
8933 opts->x_flag_omit_leaf_frame_pointer = false;
8934 else if (opts->x_flag_omit_leaf_frame_pointer)
a3dc8760 8935 opts->x_flag_omit_frame_pointer = 2;
43e9d192 8936
1be34295 8937 /* If not optimizing for size, set the default
0cfff2a1
KT
8938 alignment to what the target wants. */
8939 if (!opts->x_optimize_size)
43e9d192 8940 {
0cfff2a1
KT
8941 if (opts->x_align_loops <= 0)
8942 opts->x_align_loops = aarch64_tune_params.loop_align;
8943 if (opts->x_align_jumps <= 0)
8944 opts->x_align_jumps = aarch64_tune_params.jump_align;
8945 if (opts->x_align_functions <= 0)
8946 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 8947 }
b4f50fd4 8948
9ee6540a
WD
8949 /* We default to no pc-relative literal loads. */
8950
8951 aarch64_pcrelative_literal_loads = false;
8952
8953 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 8954 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
8955 if (opts->x_pcrelative_literal_loads == 1)
8956 aarch64_pcrelative_literal_loads = true;
b4f50fd4 8957
48bb1a55
CL
8958 /* This is PR70113. When building the Linux kernel with
8959 CONFIG_ARM64_ERRATUM_843419, support for relocations
8960 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8961 removed from the kernel to avoid loading objects with possibly
9ee6540a 8962 offending sequences. Without -mpc-relative-literal-loads we would
48bb1a55
CL
8963 generate such relocations, preventing the kernel build from
8964 succeeding. */
9ee6540a
WD
8965 if (opts->x_pcrelative_literal_loads == 2
8966 && TARGET_FIX_ERR_A53_843419)
8967 aarch64_pcrelative_literal_loads = true;
8968
8969 /* In the tiny memory model it makes no sense to disallow PC relative
8970 literal pool loads. */
8971 if (aarch64_cmodel == AARCH64_CMODEL_TINY
8972 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
8973 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
8974
8975 /* When enabling the lower precision Newton series for the square root, also
8976 enable it for the reciprocal square root, since the latter is an
8977 intermediary step for the former. */
8978 if (flag_mlow_precision_sqrt)
8979 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 8980}
43e9d192 8981
0cfff2a1
KT
8982/* 'Unpack' up the internal tuning structs and update the options
8983 in OPTS. The caller must have set up selected_tune and selected_arch
8984 as all the other target-specific codegen decisions are
8985 derived from them. */
8986
e4ea20c8 8987void
0cfff2a1
KT
8988aarch64_override_options_internal (struct gcc_options *opts)
8989{
8990 aarch64_tune_flags = selected_tune->flags;
8991 aarch64_tune = selected_tune->sched_core;
8992 /* Make a copy of the tuning parameters attached to the core, which
8993 we may later overwrite. */
8994 aarch64_tune_params = *(selected_tune->tune);
8995 aarch64_architecture_version = selected_arch->architecture_version;
8996
8997 if (opts->x_aarch64_override_tune_string)
8998 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
8999 &aarch64_tune_params);
9000
9001 /* This target defaults to strict volatile bitfields. */
9002 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
9003 opts->x_flag_strict_volatile_bitfields = 1;
9004
0cfff2a1 9005 initialize_aarch64_code_model (opts);
5eee3c34 9006 initialize_aarch64_tls_size (opts);
63892fa2 9007
2d6bc7fa
KT
9008 int queue_depth = 0;
9009 switch (aarch64_tune_params.autoprefetcher_model)
9010 {
9011 case tune_params::AUTOPREFETCHER_OFF:
9012 queue_depth = -1;
9013 break;
9014 case tune_params::AUTOPREFETCHER_WEAK:
9015 queue_depth = 0;
9016 break;
9017 case tune_params::AUTOPREFETCHER_STRONG:
9018 queue_depth = max_insn_queue_index + 1;
9019 break;
9020 default:
9021 gcc_unreachable ();
9022 }
9023
9024 /* We don't mind passing in global_options_set here as we don't use
9025 the *options_set structs anyway. */
9026 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
9027 queue_depth,
9028 opts->x_param_values,
9029 global_options_set.x_param_values);
9030
9d2c6e2e
MK
9031 /* Set up parameters to be used in prefetching algorithm. Do not
9032 override the defaults unless we are tuning for a core we have
9033 researched values for. */
9034 if (aarch64_tune_params.prefetch->num_slots > 0)
9035 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
9036 aarch64_tune_params.prefetch->num_slots,
9037 opts->x_param_values,
9038 global_options_set.x_param_values);
9039 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
9040 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
9041 aarch64_tune_params.prefetch->l1_cache_size,
9042 opts->x_param_values,
9043 global_options_set.x_param_values);
9044 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 9045 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
9046 aarch64_tune_params.prefetch->l1_cache_line_size,
9047 opts->x_param_values,
9048 global_options_set.x_param_values);
9049 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
9050 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
9051 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
9052 opts->x_param_values,
9053 global_options_set.x_param_values);
9054
16b2cafd
MK
9055 /* Enable sw prefetching at specified optimization level for
9056 CPUS that have prefetch. Lower optimization level threshold by 1
9057 when profiling is enabled. */
9058 if (opts->x_flag_prefetch_loop_arrays < 0
9059 && !opts->x_optimize_size
9060 && aarch64_tune_params.prefetch->default_opt_level >= 0
9061 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
9062 opts->x_flag_prefetch_loop_arrays = 1;
9063
0cfff2a1
KT
9064 aarch64_override_options_after_change_1 (opts);
9065}
43e9d192 9066
01f44038
KT
9067/* Print a hint with a suggestion for a core or architecture name that
9068 most closely resembles what the user passed in STR. ARCH is true if
9069 the user is asking for an architecture name. ARCH is false if the user
9070 is asking for a core name. */
9071
9072static void
9073aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
9074{
9075 auto_vec<const char *> candidates;
9076 const struct processor *entry = arch ? all_architectures : all_cores;
9077 for (; entry->name != NULL; entry++)
9078 candidates.safe_push (entry->name);
9079 char *s;
9080 const char *hint = candidates_list_and_hint (str, s, candidates);
9081 if (hint)
9082 inform (input_location, "valid arguments are: %s;"
9083 " did you mean %qs?", s, hint);
9084 XDELETEVEC (s);
9085}
9086
9087/* Print a hint with a suggestion for a core name that most closely resembles
9088 what the user passed in STR. */
9089
9090inline static void
9091aarch64_print_hint_for_core (const char *str)
9092{
9093 aarch64_print_hint_for_core_or_arch (str, false);
9094}
9095
9096/* Print a hint with a suggestion for an architecture name that most closely
9097 resembles what the user passed in STR. */
9098
9099inline static void
9100aarch64_print_hint_for_arch (const char *str)
9101{
9102 aarch64_print_hint_for_core_or_arch (str, true);
9103}
9104
0cfff2a1
KT
9105/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
9106 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
9107 they are valid in RES and ISA_FLAGS. Return whether the option is
9108 valid. */
43e9d192 9109
361fb3ee 9110static bool
0cfff2a1
KT
9111aarch64_validate_mcpu (const char *str, const struct processor **res,
9112 unsigned long *isa_flags)
9113{
9114 enum aarch64_parse_opt_result parse_res
9115 = aarch64_parse_cpu (str, res, isa_flags);
9116
9117 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 9118 return true;
0cfff2a1
KT
9119
9120 switch (parse_res)
9121 {
9122 case AARCH64_PARSE_MISSING_ARG:
fb241da2 9123 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
9124 break;
9125 case AARCH64_PARSE_INVALID_ARG:
9126 error ("unknown value %qs for -mcpu", str);
01f44038 9127 aarch64_print_hint_for_core (str);
0cfff2a1
KT
9128 break;
9129 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 9130 error ("invalid feature modifier in %<-mcpu=%s%>", str);
0cfff2a1
KT
9131 break;
9132 default:
9133 gcc_unreachable ();
9134 }
361fb3ee
KT
9135
9136 return false;
0cfff2a1
KT
9137}
9138
9139/* Validate a command-line -march option. Parse the arch and extensions
9140 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
9141 results, if they are valid, in RES and ISA_FLAGS. Return whether the
9142 option is valid. */
0cfff2a1 9143
361fb3ee 9144static bool
0cfff2a1 9145aarch64_validate_march (const char *str, const struct processor **res,
01f44038 9146 unsigned long *isa_flags)
0cfff2a1
KT
9147{
9148 enum aarch64_parse_opt_result parse_res
9149 = aarch64_parse_arch (str, res, isa_flags);
9150
9151 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 9152 return true;
0cfff2a1
KT
9153
9154 switch (parse_res)
9155 {
9156 case AARCH64_PARSE_MISSING_ARG:
fb241da2 9157 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
9158 break;
9159 case AARCH64_PARSE_INVALID_ARG:
9160 error ("unknown value %qs for -march", str);
01f44038 9161 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
9162 break;
9163 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 9164 error ("invalid feature modifier in %<-march=%s%>", str);
0cfff2a1
KT
9165 break;
9166 default:
9167 gcc_unreachable ();
9168 }
361fb3ee
KT
9169
9170 return false;
0cfff2a1
KT
9171}
9172
9173/* Validate a command-line -mtune option. Parse the cpu
9174 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
9175 result, if it is valid, in RES. Return whether the option is
9176 valid. */
0cfff2a1 9177
361fb3ee 9178static bool
0cfff2a1
KT
9179aarch64_validate_mtune (const char *str, const struct processor **res)
9180{
9181 enum aarch64_parse_opt_result parse_res
9182 = aarch64_parse_tune (str, res);
9183
9184 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 9185 return true;
0cfff2a1
KT
9186
9187 switch (parse_res)
9188 {
9189 case AARCH64_PARSE_MISSING_ARG:
fb241da2 9190 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
9191 break;
9192 case AARCH64_PARSE_INVALID_ARG:
9193 error ("unknown value %qs for -mtune", str);
01f44038 9194 aarch64_print_hint_for_core (str);
0cfff2a1
KT
9195 break;
9196 default:
9197 gcc_unreachable ();
9198 }
361fb3ee
KT
9199 return false;
9200}
9201
9202/* Return the CPU corresponding to the enum CPU.
9203 If it doesn't specify a cpu, return the default. */
9204
9205static const struct processor *
9206aarch64_get_tune_cpu (enum aarch64_processor cpu)
9207{
9208 if (cpu != aarch64_none)
9209 return &all_cores[cpu];
9210
9211 /* The & 0x3f is to extract the bottom 6 bits that encode the
9212 default cpu as selected by the --with-cpu GCC configure option
9213 in config.gcc.
9214 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
9215 flags mechanism should be reworked to make it more sane. */
9216 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
9217}
9218
9219/* Return the architecture corresponding to the enum ARCH.
9220 If it doesn't specify a valid architecture, return the default. */
9221
9222static const struct processor *
9223aarch64_get_arch (enum aarch64_arch arch)
9224{
9225 if (arch != aarch64_no_arch)
9226 return &all_architectures[arch];
9227
9228 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
9229
9230 return &all_architectures[cpu->arch];
0cfff2a1
KT
9231}
9232
9233/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
9234 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
9235 tuning structs. In particular it must set selected_tune and
9236 aarch64_isa_flags that define the available ISA features and tuning
9237 decisions. It must also set selected_arch as this will be used to
9238 output the .arch asm tags for each function. */
9239
9240static void
9241aarch64_override_options (void)
9242{
9243 unsigned long cpu_isa = 0;
9244 unsigned long arch_isa = 0;
9245 aarch64_isa_flags = 0;
9246
361fb3ee
KT
9247 bool valid_cpu = true;
9248 bool valid_tune = true;
9249 bool valid_arch = true;
9250
0cfff2a1
KT
9251 selected_cpu = NULL;
9252 selected_arch = NULL;
9253 selected_tune = NULL;
9254
9255 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
9256 If either of -march or -mtune is given, they override their
9257 respective component of -mcpu. */
9258 if (aarch64_cpu_string)
361fb3ee
KT
9259 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
9260 &cpu_isa);
0cfff2a1
KT
9261
9262 if (aarch64_arch_string)
361fb3ee
KT
9263 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
9264 &arch_isa);
0cfff2a1
KT
9265
9266 if (aarch64_tune_string)
361fb3ee 9267 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
9268
9269 /* If the user did not specify a processor, choose the default
9270 one for them. This will be the CPU set during configuration using
a3cd0246 9271 --with-cpu, otherwise it is "generic". */
43e9d192
IB
9272 if (!selected_cpu)
9273 {
0cfff2a1
KT
9274 if (selected_arch)
9275 {
9276 selected_cpu = &all_cores[selected_arch->ident];
9277 aarch64_isa_flags = arch_isa;
361fb3ee 9278 explicit_arch = selected_arch->arch;
0cfff2a1
KT
9279 }
9280 else
9281 {
361fb3ee
KT
9282 /* Get default configure-time CPU. */
9283 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
9284 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
9285 }
361fb3ee
KT
9286
9287 if (selected_tune)
9288 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
9289 }
9290 /* If both -mcpu and -march are specified check that they are architecturally
9291 compatible, warn if they're not and prefer the -march ISA flags. */
9292 else if (selected_arch)
9293 {
9294 if (selected_arch->arch != selected_cpu->arch)
9295 {
9296 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
9297 all_architectures[selected_cpu->arch].name,
9298 selected_arch->name);
9299 }
9300 aarch64_isa_flags = arch_isa;
361fb3ee
KT
9301 explicit_arch = selected_arch->arch;
9302 explicit_tune_core = selected_tune ? selected_tune->ident
9303 : selected_cpu->ident;
0cfff2a1
KT
9304 }
9305 else
9306 {
9307 /* -mcpu but no -march. */
9308 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
9309 explicit_tune_core = selected_tune ? selected_tune->ident
9310 : selected_cpu->ident;
9311 gcc_assert (selected_cpu);
9312 selected_arch = &all_architectures[selected_cpu->arch];
9313 explicit_arch = selected_arch->arch;
43e9d192
IB
9314 }
9315
0cfff2a1
KT
9316 /* Set the arch as well as we will need it when outputing
9317 the .arch directive in assembly. */
9318 if (!selected_arch)
9319 {
9320 gcc_assert (selected_cpu);
9321 selected_arch = &all_architectures[selected_cpu->arch];
9322 }
43e9d192 9323
43e9d192 9324 if (!selected_tune)
3edaf26d 9325 selected_tune = selected_cpu;
43e9d192 9326
0cfff2a1
KT
9327#ifndef HAVE_AS_MABI_OPTION
9328 /* The compiler may have been configured with 2.23.* binutils, which does
9329 not have support for ILP32. */
9330 if (TARGET_ILP32)
9331 error ("Assembler does not support -mabi=ilp32");
9332#endif
43e9d192 9333
db58fd89
JW
9334 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
9335 sorry ("Return address signing is only supported for -mabi=lp64");
9336
361fb3ee
KT
9337 /* Make sure we properly set up the explicit options. */
9338 if ((aarch64_cpu_string && valid_cpu)
9339 || (aarch64_tune_string && valid_tune))
9340 gcc_assert (explicit_tune_core != aarch64_none);
9341
9342 if ((aarch64_cpu_string && valid_cpu)
9343 || (aarch64_arch_string && valid_arch))
9344 gcc_assert (explicit_arch != aarch64_no_arch);
9345
0cfff2a1
KT
9346 aarch64_override_options_internal (&global_options);
9347
9348 /* Save these options as the default ones in case we push and pop them later
9349 while processing functions with potential target attributes. */
9350 target_option_default_node = target_option_current_node
9351 = build_target_option_node (&global_options);
43e9d192
IB
9352}
9353
9354/* Implement targetm.override_options_after_change. */
9355
9356static void
9357aarch64_override_options_after_change (void)
9358{
0cfff2a1 9359 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
9360}
9361
9362static struct machine_function *
9363aarch64_init_machine_status (void)
9364{
9365 struct machine_function *machine;
766090c2 9366 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
9367 return machine;
9368}
9369
9370void
9371aarch64_init_expanders (void)
9372{
9373 init_machine_status = aarch64_init_machine_status;
9374}
9375
9376/* A checking mechanism for the implementation of the various code models. */
9377static void
0cfff2a1 9378initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 9379{
0cfff2a1 9380 if (opts->x_flag_pic)
43e9d192 9381 {
0cfff2a1 9382 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
9383 {
9384 case AARCH64_CMODEL_TINY:
9385 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
9386 break;
9387 case AARCH64_CMODEL_SMALL:
34ecdb0f 9388#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
9389 aarch64_cmodel = (flag_pic == 2
9390 ? AARCH64_CMODEL_SMALL_PIC
9391 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
9392#else
9393 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
9394#endif
43e9d192
IB
9395 break;
9396 case AARCH64_CMODEL_LARGE:
9397 sorry ("code model %qs with -f%s", "large",
0cfff2a1 9398 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 9399 break;
43e9d192
IB
9400 default:
9401 gcc_unreachable ();
9402 }
9403 }
9404 else
0cfff2a1 9405 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
9406}
9407
361fb3ee
KT
9408/* Implement TARGET_OPTION_SAVE. */
9409
9410static void
9411aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
9412{
9413 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
9414}
9415
9416/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9417 using the information saved in PTR. */
9418
9419static void
9420aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
9421{
9422 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
9423 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9424 opts->x_explicit_arch = ptr->x_explicit_arch;
9425 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
9426 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
9427
9428 aarch64_override_options_internal (opts);
9429}
9430
9431/* Implement TARGET_OPTION_PRINT. */
9432
9433static void
9434aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
9435{
9436 const struct processor *cpu
9437 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9438 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
9439 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 9440 std::string extension
04a99ebe 9441 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
9442
9443 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
9444 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
9445 arch->name, extension.c_str ());
361fb3ee
KT
9446}
9447
d78006d9
KT
9448static GTY(()) tree aarch64_previous_fndecl;
9449
e4ea20c8
KT
9450void
9451aarch64_reset_previous_fndecl (void)
9452{
9453 aarch64_previous_fndecl = NULL;
9454}
9455
acfc1ac1
KT
9456/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9457 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9458 make sure optab availability predicates are recomputed when necessary. */
9459
9460void
9461aarch64_save_restore_target_globals (tree new_tree)
9462{
9463 if (TREE_TARGET_GLOBALS (new_tree))
9464 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9465 else if (new_tree == target_option_default_node)
9466 restore_target_globals (&default_target_globals);
9467 else
9468 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9469}
9470
d78006d9
KT
9471/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9472 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9473 of the function, if such exists. This function may be called multiple
9474 times on a single function so use aarch64_previous_fndecl to avoid
9475 setting up identical state. */
9476
9477static void
9478aarch64_set_current_function (tree fndecl)
9479{
acfc1ac1
KT
9480 if (!fndecl || fndecl == aarch64_previous_fndecl)
9481 return;
9482
d78006d9
KT
9483 tree old_tree = (aarch64_previous_fndecl
9484 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
9485 : NULL_TREE);
9486
acfc1ac1 9487 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 9488
acfc1ac1
KT
9489 /* If current function has no attributes but the previous one did,
9490 use the default node. */
9491 if (!new_tree && old_tree)
9492 new_tree = target_option_default_node;
d78006d9 9493
acfc1ac1
KT
9494 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9495 the default have been handled by aarch64_save_restore_target_globals from
9496 aarch64_pragma_target_parse. */
9497 if (old_tree == new_tree)
9498 return;
d78006d9 9499
acfc1ac1 9500 aarch64_previous_fndecl = fndecl;
6e17a23b 9501
acfc1ac1
KT
9502 /* First set the target options. */
9503 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 9504
acfc1ac1 9505 aarch64_save_restore_target_globals (new_tree);
d78006d9 9506}
361fb3ee 9507
5a2c8331
KT
9508/* Enum describing the various ways we can handle attributes.
9509 In many cases we can reuse the generic option handling machinery. */
9510
9511enum aarch64_attr_opt_type
9512{
9513 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
9514 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
9515 aarch64_attr_enum, /* Attribute sets an enum variable. */
9516 aarch64_attr_custom /* Attribute requires a custom handling function. */
9517};
9518
9519/* All the information needed to handle a target attribute.
9520 NAME is the name of the attribute.
9c582551 9521 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
9522 in the definition of enum aarch64_attr_opt_type.
9523 ALLOW_NEG is true if the attribute supports a "no-" form.
9524 HANDLER is the function that takes the attribute string and whether
9525 it is a pragma or attribute and handles the option. It is needed only
9526 when the ATTR_TYPE is aarch64_attr_custom.
9527 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 9528 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
9529 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9530 aarch64_attr_enum. */
9531
9532struct aarch64_attribute_info
9533{
9534 const char *name;
9535 enum aarch64_attr_opt_type attr_type;
9536 bool allow_neg;
9537 bool (*handler) (const char *, const char *);
9538 enum opt_code opt_num;
9539};
9540
9541/* Handle the ARCH_STR argument to the arch= target attribute.
9542 PRAGMA_OR_ATTR is used in potential error messages. */
9543
9544static bool
9545aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
9546{
9547 const struct processor *tmp_arch = NULL;
9548 enum aarch64_parse_opt_result parse_res
9549 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
9550
9551 if (parse_res == AARCH64_PARSE_OK)
9552 {
9553 gcc_assert (tmp_arch);
9554 selected_arch = tmp_arch;
9555 explicit_arch = selected_arch->arch;
9556 return true;
9557 }
9558
9559 switch (parse_res)
9560 {
9561 case AARCH64_PARSE_MISSING_ARG:
9562 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
9563 break;
9564 case AARCH64_PARSE_INVALID_ARG:
9565 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
01f44038 9566 aarch64_print_hint_for_arch (str);
5a2c8331
KT
9567 break;
9568 case AARCH64_PARSE_INVALID_FEATURE:
9569 error ("invalid feature modifier %qs for 'arch' target %s",
9570 str, pragma_or_attr);
9571 break;
9572 default:
9573 gcc_unreachable ();
9574 }
9575
9576 return false;
9577}
9578
9579/* Handle the argument CPU_STR to the cpu= target attribute.
9580 PRAGMA_OR_ATTR is used in potential error messages. */
9581
9582static bool
9583aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
9584{
9585 const struct processor *tmp_cpu = NULL;
9586 enum aarch64_parse_opt_result parse_res
9587 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
9588
9589 if (parse_res == AARCH64_PARSE_OK)
9590 {
9591 gcc_assert (tmp_cpu);
9592 selected_tune = tmp_cpu;
9593 explicit_tune_core = selected_tune->ident;
9594
9595 selected_arch = &all_architectures[tmp_cpu->arch];
9596 explicit_arch = selected_arch->arch;
9597 return true;
9598 }
9599
9600 switch (parse_res)
9601 {
9602 case AARCH64_PARSE_MISSING_ARG:
9603 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
9604 break;
9605 case AARCH64_PARSE_INVALID_ARG:
9606 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
01f44038 9607 aarch64_print_hint_for_core (str);
5a2c8331
KT
9608 break;
9609 case AARCH64_PARSE_INVALID_FEATURE:
9610 error ("invalid feature modifier %qs for 'cpu' target %s",
9611 str, pragma_or_attr);
9612 break;
9613 default:
9614 gcc_unreachable ();
9615 }
9616
9617 return false;
9618}
9619
9620/* Handle the argument STR to the tune= target attribute.
9621 PRAGMA_OR_ATTR is used in potential error messages. */
9622
9623static bool
9624aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
9625{
9626 const struct processor *tmp_tune = NULL;
9627 enum aarch64_parse_opt_result parse_res
9628 = aarch64_parse_tune (str, &tmp_tune);
9629
9630 if (parse_res == AARCH64_PARSE_OK)
9631 {
9632 gcc_assert (tmp_tune);
9633 selected_tune = tmp_tune;
9634 explicit_tune_core = selected_tune->ident;
9635 return true;
9636 }
9637
9638 switch (parse_res)
9639 {
9640 case AARCH64_PARSE_INVALID_ARG:
9641 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
01f44038 9642 aarch64_print_hint_for_core (str);
5a2c8331
KT
9643 break;
9644 default:
9645 gcc_unreachable ();
9646 }
9647
9648 return false;
9649}
9650
9651/* Parse an architecture extensions target attribute string specified in STR.
9652 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9653 if successful. Update aarch64_isa_flags to reflect the ISA features
9654 modified.
9655 PRAGMA_OR_ATTR is used in potential error messages. */
9656
9657static bool
9658aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
9659{
9660 enum aarch64_parse_opt_result parse_res;
9661 unsigned long isa_flags = aarch64_isa_flags;
9662
e4ea20c8
KT
9663 /* We allow "+nothing" in the beginning to clear out all architectural
9664 features if the user wants to handpick specific features. */
9665 if (strncmp ("+nothing", str, 8) == 0)
9666 {
9667 isa_flags = 0;
9668 str += 8;
9669 }
9670
5a2c8331
KT
9671 parse_res = aarch64_parse_extension (str, &isa_flags);
9672
9673 if (parse_res == AARCH64_PARSE_OK)
9674 {
9675 aarch64_isa_flags = isa_flags;
9676 return true;
9677 }
9678
9679 switch (parse_res)
9680 {
9681 case AARCH64_PARSE_MISSING_ARG:
9682 error ("missing feature modifier in target %s %qs",
9683 pragma_or_attr, str);
9684 break;
9685
9686 case AARCH64_PARSE_INVALID_FEATURE:
9687 error ("invalid feature modifier in target %s %qs",
9688 pragma_or_attr, str);
9689 break;
9690
9691 default:
9692 gcc_unreachable ();
9693 }
9694
9695 return false;
9696}
9697
9698/* The target attributes that we support. On top of these we also support just
9699 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9700 handled explicitly in aarch64_process_one_target_attr. */
9701
9702static const struct aarch64_attribute_info aarch64_attributes[] =
9703{
9704 { "general-regs-only", aarch64_attr_mask, false, NULL,
9705 OPT_mgeneral_regs_only },
9706 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
9707 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
9708 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
9709 OPT_mfix_cortex_a53_843419 },
5a2c8331
KT
9710 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
9711 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
9712 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
9713 OPT_momit_leaf_frame_pointer },
9714 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
9715 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
9716 OPT_march_ },
9717 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
9718 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
9719 OPT_mtune_ },
db58fd89
JW
9720 { "sign-return-address", aarch64_attr_enum, false, NULL,
9721 OPT_msign_return_address_ },
5a2c8331
KT
9722 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
9723};
9724
9725/* Parse ARG_STR which contains the definition of one target attribute.
9726 Show appropriate errors if any or return true if the attribute is valid.
9727 PRAGMA_OR_ATTR holds the string to use in error messages about whether
9728 we're processing a target attribute or pragma. */
9729
9730static bool
9731aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
9732{
9733 bool invert = false;
9734
9735 size_t len = strlen (arg_str);
9736
9737 if (len == 0)
9738 {
9739 error ("malformed target %s", pragma_or_attr);
9740 return false;
9741 }
9742
9743 char *str_to_check = (char *) alloca (len + 1);
9744 strcpy (str_to_check, arg_str);
9745
9746 /* Skip leading whitespace. */
9747 while (*str_to_check == ' ' || *str_to_check == '\t')
9748 str_to_check++;
9749
9750 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9751 It is easier to detect and handle it explicitly here rather than going
9752 through the machinery for the rest of the target attributes in this
9753 function. */
9754 if (*str_to_check == '+')
9755 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
9756
9757 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
9758 {
9759 invert = true;
9760 str_to_check += 3;
9761 }
9762 char *arg = strchr (str_to_check, '=');
9763
9764 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9765 and point ARG to "foo". */
9766 if (arg)
9767 {
9768 *arg = '\0';
9769 arg++;
9770 }
9771 const struct aarch64_attribute_info *p_attr;
16d12992 9772 bool found = false;
5a2c8331
KT
9773 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
9774 {
9775 /* If the names don't match up, or the user has given an argument
9776 to an attribute that doesn't accept one, or didn't give an argument
9777 to an attribute that expects one, fail to match. */
9778 if (strcmp (str_to_check, p_attr->name) != 0)
9779 continue;
9780
16d12992 9781 found = true;
5a2c8331
KT
9782 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
9783 || p_attr->attr_type == aarch64_attr_enum;
9784
9785 if (attr_need_arg_p ^ (arg != NULL))
9786 {
9787 error ("target %s %qs does not accept an argument",
9788 pragma_or_attr, str_to_check);
9789 return false;
9790 }
9791
9792 /* If the name matches but the attribute does not allow "no-" versions
9793 then we can't match. */
9794 if (invert && !p_attr->allow_neg)
9795 {
9796 error ("target %s %qs does not allow a negated form",
9797 pragma_or_attr, str_to_check);
9798 return false;
9799 }
9800
9801 switch (p_attr->attr_type)
9802 {
9803 /* Has a custom handler registered.
9804 For example, cpu=, arch=, tune=. */
9805 case aarch64_attr_custom:
9806 gcc_assert (p_attr->handler);
9807 if (!p_attr->handler (arg, pragma_or_attr))
9808 return false;
9809 break;
9810
9811 /* Either set or unset a boolean option. */
9812 case aarch64_attr_bool:
9813 {
9814 struct cl_decoded_option decoded;
9815
9816 generate_option (p_attr->opt_num, NULL, !invert,
9817 CL_TARGET, &decoded);
9818 aarch64_handle_option (&global_options, &global_options_set,
9819 &decoded, input_location);
9820 break;
9821 }
9822 /* Set or unset a bit in the target_flags. aarch64_handle_option
9823 should know what mask to apply given the option number. */
9824 case aarch64_attr_mask:
9825 {
9826 struct cl_decoded_option decoded;
9827 /* We only need to specify the option number.
9828 aarch64_handle_option will know which mask to apply. */
9829 decoded.opt_index = p_attr->opt_num;
9830 decoded.value = !invert;
9831 aarch64_handle_option (&global_options, &global_options_set,
9832 &decoded, input_location);
9833 break;
9834 }
9835 /* Use the option setting machinery to set an option to an enum. */
9836 case aarch64_attr_enum:
9837 {
9838 gcc_assert (arg);
9839 bool valid;
9840 int value;
9841 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
9842 &value, CL_TARGET);
9843 if (valid)
9844 {
9845 set_option (&global_options, NULL, p_attr->opt_num, value,
9846 NULL, DK_UNSPECIFIED, input_location,
9847 global_dc);
9848 }
9849 else
9850 {
9851 error ("target %s %s=%s is not valid",
9852 pragma_or_attr, str_to_check, arg);
9853 }
9854 break;
9855 }
9856 default:
9857 gcc_unreachable ();
9858 }
9859 }
9860
16d12992
KT
9861 /* If we reached here we either have found an attribute and validated
9862 it or didn't match any. If we matched an attribute but its arguments
9863 were malformed we will have returned false already. */
9864 return found;
5a2c8331
KT
9865}
9866
9867/* Count how many times the character C appears in
9868 NULL-terminated string STR. */
9869
9870static unsigned int
9871num_occurences_in_str (char c, char *str)
9872{
9873 unsigned int res = 0;
9874 while (*str != '\0')
9875 {
9876 if (*str == c)
9877 res++;
9878
9879 str++;
9880 }
9881
9882 return res;
9883}
9884
9885/* Parse the tree in ARGS that contains the target attribute information
9886 and update the global target options space. PRAGMA_OR_ATTR is a string
9887 to be used in error messages, specifying whether this is processing
9888 a target attribute or a target pragma. */
9889
9890bool
9891aarch64_process_target_attr (tree args, const char* pragma_or_attr)
9892{
9893 if (TREE_CODE (args) == TREE_LIST)
9894 {
9895 do
9896 {
9897 tree head = TREE_VALUE (args);
9898 if (head)
9899 {
9900 if (!aarch64_process_target_attr (head, pragma_or_attr))
9901 return false;
9902 }
9903 args = TREE_CHAIN (args);
9904 } while (args);
9905
9906 return true;
9907 }
3b6cb9e3
ML
9908
9909 if (TREE_CODE (args) != STRING_CST)
9910 {
9911 error ("attribute %<target%> argument not a string");
9912 return false;
9913 }
5a2c8331
KT
9914
9915 size_t len = strlen (TREE_STRING_POINTER (args));
9916 char *str_to_check = (char *) alloca (len + 1);
9917 strcpy (str_to_check, TREE_STRING_POINTER (args));
9918
9919 if (len == 0)
9920 {
9921 error ("malformed target %s value", pragma_or_attr);
9922 return false;
9923 }
9924
9925 /* Used to catch empty spaces between commas i.e.
9926 attribute ((target ("attr1,,attr2"))). */
9927 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9928
9929 /* Handle multiple target attributes separated by ','. */
9930 char *token = strtok (str_to_check, ",");
9931
9932 unsigned int num_attrs = 0;
9933 while (token)
9934 {
9935 num_attrs++;
9936 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
9937 {
9938 error ("target %s %qs is invalid", pragma_or_attr, token);
9939 return false;
9940 }
9941
9942 token = strtok (NULL, ",");
9943 }
9944
9945 if (num_attrs != num_commas + 1)
9946 {
9947 error ("malformed target %s list %qs",
9948 pragma_or_attr, TREE_STRING_POINTER (args));
9949 return false;
9950 }
9951
9952 return true;
9953}
9954
9955/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9956 process attribute ((target ("..."))). */
9957
9958static bool
9959aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
9960{
9961 struct cl_target_option cur_target;
9962 bool ret;
9963 tree old_optimize;
9964 tree new_target, new_optimize;
9965 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
9966
9967 /* If what we're processing is the current pragma string then the
9968 target option node is already stored in target_option_current_node
9969 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9970 having to re-parse the string. This is especially useful to keep
9971 arm_neon.h compile times down since that header contains a lot
9972 of intrinsics enclosed in pragmas. */
9973 if (!existing_target && args == current_target_pragma)
9974 {
9975 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
9976 return true;
9977 }
5a2c8331
KT
9978 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9979
9980 old_optimize = build_optimization_node (&global_options);
9981 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9982
9983 /* If the function changed the optimization levels as well as setting
9984 target options, start with the optimizations specified. */
9985 if (func_optimize && func_optimize != old_optimize)
9986 cl_optimization_restore (&global_options,
9987 TREE_OPTIMIZATION (func_optimize));
9988
9989 /* Save the current target options to restore at the end. */
9990 cl_target_option_save (&cur_target, &global_options);
9991
9992 /* If fndecl already has some target attributes applied to it, unpack
9993 them so that we add this attribute on top of them, rather than
9994 overwriting them. */
9995 if (existing_target)
9996 {
9997 struct cl_target_option *existing_options
9998 = TREE_TARGET_OPTION (existing_target);
9999
10000 if (existing_options)
10001 cl_target_option_restore (&global_options, existing_options);
10002 }
10003 else
10004 cl_target_option_restore (&global_options,
10005 TREE_TARGET_OPTION (target_option_current_node));
10006
10007
10008 ret = aarch64_process_target_attr (args, "attribute");
10009
10010 /* Set up any additional state. */
10011 if (ret)
10012 {
10013 aarch64_override_options_internal (&global_options);
e95a988a
KT
10014 /* Initialize SIMD builtins if we haven't already.
10015 Set current_target_pragma to NULL for the duration so that
10016 the builtin initialization code doesn't try to tag the functions
10017 being built with the attributes specified by any current pragma, thus
10018 going into an infinite recursion. */
10019 if (TARGET_SIMD)
10020 {
10021 tree saved_current_target_pragma = current_target_pragma;
10022 current_target_pragma = NULL;
10023 aarch64_init_simd_builtins ();
10024 current_target_pragma = saved_current_target_pragma;
10025 }
5a2c8331
KT
10026 new_target = build_target_option_node (&global_options);
10027 }
10028 else
10029 new_target = NULL;
10030
10031 new_optimize = build_optimization_node (&global_options);
10032
10033 if (fndecl && ret)
10034 {
10035 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
10036
10037 if (old_optimize != new_optimize)
10038 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
10039 }
10040
10041 cl_target_option_restore (&global_options, &cur_target);
10042
10043 if (old_optimize != new_optimize)
10044 cl_optimization_restore (&global_options,
10045 TREE_OPTIMIZATION (old_optimize));
10046 return ret;
10047}
10048
1fd8d40c
KT
10049/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
10050 tri-bool options (yes, no, don't care) and the default value is
10051 DEF, determine whether to reject inlining. */
10052
10053static bool
10054aarch64_tribools_ok_for_inlining_p (int caller, int callee,
10055 int dont_care, int def)
10056{
10057 /* If the callee doesn't care, always allow inlining. */
10058 if (callee == dont_care)
10059 return true;
10060
10061 /* If the caller doesn't care, always allow inlining. */
10062 if (caller == dont_care)
10063 return true;
10064
10065 /* Otherwise, allow inlining if either the callee and caller values
10066 agree, or if the callee is using the default value. */
10067 return (callee == caller || callee == def);
10068}
10069
10070/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
10071 to inline CALLEE into CALLER based on target-specific info.
10072 Make sure that the caller and callee have compatible architectural
10073 features. Then go through the other possible target attributes
10074 and see if they can block inlining. Try not to reject always_inline
10075 callees unless they are incompatible architecturally. */
10076
10077static bool
10078aarch64_can_inline_p (tree caller, tree callee)
10079{
10080 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
10081 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
10082
10083 /* If callee has no option attributes, then it is ok to inline. */
10084 if (!callee_tree)
10085 return true;
10086
10087 struct cl_target_option *caller_opts
10088 = TREE_TARGET_OPTION (caller_tree ? caller_tree
10089 : target_option_default_node);
10090
10091 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
10092
10093
10094 /* Callee's ISA flags should be a subset of the caller's. */
10095 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
10096 != callee_opts->x_aarch64_isa_flags)
10097 return false;
10098
10099 /* Allow non-strict aligned functions inlining into strict
10100 aligned ones. */
10101 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
10102 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
10103 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
10104 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
10105 return false;
10106
10107 bool always_inline = lookup_attribute ("always_inline",
10108 DECL_ATTRIBUTES (callee));
10109
10110 /* If the architectural features match up and the callee is always_inline
10111 then the other attributes don't matter. */
10112 if (always_inline)
10113 return true;
10114
10115 if (caller_opts->x_aarch64_cmodel_var
10116 != callee_opts->x_aarch64_cmodel_var)
10117 return false;
10118
10119 if (caller_opts->x_aarch64_tls_dialect
10120 != callee_opts->x_aarch64_tls_dialect)
10121 return false;
10122
10123 /* Honour explicit requests to workaround errata. */
10124 if (!aarch64_tribools_ok_for_inlining_p (
10125 caller_opts->x_aarch64_fix_a53_err835769,
10126 callee_opts->x_aarch64_fix_a53_err835769,
10127 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
10128 return false;
10129
48bb1a55
CL
10130 if (!aarch64_tribools_ok_for_inlining_p (
10131 caller_opts->x_aarch64_fix_a53_err843419,
10132 callee_opts->x_aarch64_fix_a53_err843419,
10133 2, TARGET_FIX_ERR_A53_843419))
10134 return false;
10135
1fd8d40c
KT
10136 /* If the user explicitly specified -momit-leaf-frame-pointer for the
10137 caller and calle and they don't match up, reject inlining. */
10138 if (!aarch64_tribools_ok_for_inlining_p (
10139 caller_opts->x_flag_omit_leaf_frame_pointer,
10140 callee_opts->x_flag_omit_leaf_frame_pointer,
10141 2, 1))
10142 return false;
10143
10144 /* If the callee has specific tuning overrides, respect them. */
10145 if (callee_opts->x_aarch64_override_tune_string != NULL
10146 && caller_opts->x_aarch64_override_tune_string == NULL)
10147 return false;
10148
10149 /* If the user specified tuning override strings for the
10150 caller and callee and they don't match up, reject inlining.
10151 We just do a string compare here, we don't analyze the meaning
10152 of the string, as it would be too costly for little gain. */
10153 if (callee_opts->x_aarch64_override_tune_string
10154 && caller_opts->x_aarch64_override_tune_string
10155 && (strcmp (callee_opts->x_aarch64_override_tune_string,
10156 caller_opts->x_aarch64_override_tune_string) != 0))
10157 return false;
10158
10159 return true;
10160}
10161
43e9d192
IB
10162/* Return true if SYMBOL_REF X binds locally. */
10163
10164static bool
10165aarch64_symbol_binds_local_p (const_rtx x)
10166{
10167 return (SYMBOL_REF_DECL (x)
10168 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
10169 : SYMBOL_REF_LOCAL_P (x));
10170}
10171
10172/* Return true if SYMBOL_REF X is thread local */
10173static bool
10174aarch64_tls_symbol_p (rtx x)
10175{
10176 if (! TARGET_HAVE_TLS)
10177 return false;
10178
10179 if (GET_CODE (x) != SYMBOL_REF)
10180 return false;
10181
10182 return SYMBOL_REF_TLS_MODEL (x) != 0;
10183}
10184
10185/* Classify a TLS symbol into one of the TLS kinds. */
10186enum aarch64_symbol_type
10187aarch64_classify_tls_symbol (rtx x)
10188{
10189 enum tls_model tls_kind = tls_symbolic_operand_type (x);
10190
10191 switch (tls_kind)
10192 {
10193 case TLS_MODEL_GLOBAL_DYNAMIC:
10194 case TLS_MODEL_LOCAL_DYNAMIC:
10195 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
10196
10197 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
10198 switch (aarch64_cmodel)
10199 {
10200 case AARCH64_CMODEL_TINY:
10201 case AARCH64_CMODEL_TINY_PIC:
10202 return SYMBOL_TINY_TLSIE;
10203 default:
79496620 10204 return SYMBOL_SMALL_TLSIE;
5ae7caad 10205 }
43e9d192
IB
10206
10207 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
10208 if (aarch64_tls_size == 12)
10209 return SYMBOL_TLSLE12;
10210 else if (aarch64_tls_size == 24)
10211 return SYMBOL_TLSLE24;
10212 else if (aarch64_tls_size == 32)
10213 return SYMBOL_TLSLE32;
10214 else if (aarch64_tls_size == 48)
10215 return SYMBOL_TLSLE48;
10216 else
10217 gcc_unreachable ();
43e9d192
IB
10218
10219 case TLS_MODEL_EMULATED:
10220 case TLS_MODEL_NONE:
10221 return SYMBOL_FORCE_TO_MEM;
10222
10223 default:
10224 gcc_unreachable ();
10225 }
10226}
10227
10228/* Return the method that should be used to access SYMBOL_REF or
a6e0bfa7 10229 LABEL_REF X. */
17f4d4bf 10230
43e9d192 10231enum aarch64_symbol_type
a6e0bfa7 10232aarch64_classify_symbol (rtx x, rtx offset)
43e9d192
IB
10233{
10234 if (GET_CODE (x) == LABEL_REF)
10235 {
10236 switch (aarch64_cmodel)
10237 {
10238 case AARCH64_CMODEL_LARGE:
10239 return SYMBOL_FORCE_TO_MEM;
10240
10241 case AARCH64_CMODEL_TINY_PIC:
10242 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
10243 return SYMBOL_TINY_ABSOLUTE;
10244
1b1e81f8 10245 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
10246 case AARCH64_CMODEL_SMALL_PIC:
10247 case AARCH64_CMODEL_SMALL:
10248 return SYMBOL_SMALL_ABSOLUTE;
10249
10250 default:
10251 gcc_unreachable ();
10252 }
10253 }
10254
17f4d4bf 10255 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 10256 {
43e9d192
IB
10257 if (aarch64_tls_symbol_p (x))
10258 return aarch64_classify_tls_symbol (x);
10259
17f4d4bf
CSS
10260 switch (aarch64_cmodel)
10261 {
10262 case AARCH64_CMODEL_TINY:
15f6e0da 10263 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
10264 the offset does not cause overflow of the final address. But
10265 we have no way of knowing the address of symbol at compile time
10266 so we can't accurately say if the distance between the PC and
10267 symbol + offset is outside the addressible range of +/-1M in the
10268 TINY code model. So we rely on images not being greater than
10269 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
10270 be loaded using an alternative mechanism. Furthermore if the
10271 symbol is a weak reference to something that isn't known to
10272 resolve to a symbol in this module, then force to memory. */
10273 if ((SYMBOL_REF_WEAK (x)
10274 && !aarch64_symbol_binds_local_p (x))
f8b756b7 10275 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
10276 return SYMBOL_FORCE_TO_MEM;
10277 return SYMBOL_TINY_ABSOLUTE;
10278
17f4d4bf 10279 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
10280 /* Same reasoning as the tiny code model, but the offset cap here is
10281 4G. */
15f6e0da
RR
10282 if ((SYMBOL_REF_WEAK (x)
10283 && !aarch64_symbol_binds_local_p (x))
3ff5d1f0
TB
10284 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
10285 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
10286 return SYMBOL_FORCE_TO_MEM;
10287 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 10288
17f4d4bf 10289 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 10290 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 10291 return SYMBOL_TINY_GOT;
38e6c9a6
MS
10292 return SYMBOL_TINY_ABSOLUTE;
10293
1b1e81f8 10294 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
10295 case AARCH64_CMODEL_SMALL_PIC:
10296 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
10297 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
10298 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 10299 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 10300
9ee6540a
WD
10301 case AARCH64_CMODEL_LARGE:
10302 /* This is alright even in PIC code as the constant
10303 pool reference is always PC relative and within
10304 the same translation unit. */
d47d34bb 10305 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
10306 return SYMBOL_SMALL_ABSOLUTE;
10307 else
10308 return SYMBOL_FORCE_TO_MEM;
10309
17f4d4bf
CSS
10310 default:
10311 gcc_unreachable ();
10312 }
43e9d192 10313 }
17f4d4bf 10314
43e9d192
IB
10315 /* By default push everything into the constant pool. */
10316 return SYMBOL_FORCE_TO_MEM;
10317}
10318
43e9d192
IB
10319bool
10320aarch64_constant_address_p (rtx x)
10321{
10322 return (CONSTANT_P (x) && memory_address_p (DImode, x));
10323}
10324
10325bool
10326aarch64_legitimate_pic_operand_p (rtx x)
10327{
10328 if (GET_CODE (x) == SYMBOL_REF
10329 || (GET_CODE (x) == CONST
10330 && GET_CODE (XEXP (x, 0)) == PLUS
10331 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
10332 return false;
10333
10334 return true;
10335}
10336
3520f7cc
JG
10337/* Return true if X holds either a quarter-precision or
10338 floating-point +0.0 constant. */
10339static bool
a2170965 10340aarch64_valid_floating_const (rtx x)
3520f7cc
JG
10341{
10342 if (!CONST_DOUBLE_P (x))
10343 return false;
10344
a2170965
TC
10345 /* This call determines which constants can be used in mov<mode>
10346 as integer moves instead of constant loads. */
10347 if (aarch64_float_const_rtx_p (x))
6a0f8c01
JW
10348 return true;
10349
3520f7cc
JG
10350 return aarch64_float_const_representable_p (x);
10351}
10352
43e9d192 10353static bool
ef4bddc2 10354aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
10355{
10356 /* Do not allow vector struct mode constants. We could support
10357 0 and -1 easily, but they need support in aarch64-simd.md. */
10358 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
10359 return false;
10360
a2170965
TC
10361 /* For these cases we never want to use a literal load.
10362 As such we have to prevent the compiler from forcing these
10363 to memory. */
43e9d192 10364 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 10365 && aarch64_simd_valid_immediate (x, mode, false, NULL))
a2170965
TC
10366 || CONST_INT_P (x)
10367 || aarch64_valid_floating_const (x)
10368 || aarch64_can_const_movi_rtx_p (x, mode)
10369 || aarch64_float_const_rtx_p (x))
3520f7cc 10370 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
10371
10372 if (GET_CODE (x) == HIGH
10373 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10374 return true;
10375
f28e54bd
WD
10376 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
10377 so spilling them is better than rematerialization. */
10378 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
10379 return true;
10380
43e9d192
IB
10381 return aarch64_constant_address_p (x);
10382}
10383
a5bc806c 10384rtx
43e9d192
IB
10385aarch64_load_tp (rtx target)
10386{
10387 if (!target
10388 || GET_MODE (target) != Pmode
10389 || !register_operand (target, Pmode))
10390 target = gen_reg_rtx (Pmode);
10391
10392 /* Can return in any reg. */
10393 emit_insn (gen_aarch64_load_tp_hard (target));
10394 return target;
10395}
10396
43e9d192
IB
10397/* On AAPCS systems, this is the "struct __va_list". */
10398static GTY(()) tree va_list_type;
10399
10400/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
10401 Return the type to use as __builtin_va_list.
10402
10403 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
10404
10405 struct __va_list
10406 {
10407 void *__stack;
10408 void *__gr_top;
10409 void *__vr_top;
10410 int __gr_offs;
10411 int __vr_offs;
10412 }; */
10413
10414static tree
10415aarch64_build_builtin_va_list (void)
10416{
10417 tree va_list_name;
10418 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10419
10420 /* Create the type. */
10421 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
10422 /* Give it the required name. */
10423 va_list_name = build_decl (BUILTINS_LOCATION,
10424 TYPE_DECL,
10425 get_identifier ("__va_list"),
10426 va_list_type);
10427 DECL_ARTIFICIAL (va_list_name) = 1;
10428 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 10429 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
10430
10431 /* Create the fields. */
10432 f_stack = build_decl (BUILTINS_LOCATION,
10433 FIELD_DECL, get_identifier ("__stack"),
10434 ptr_type_node);
10435 f_grtop = build_decl (BUILTINS_LOCATION,
10436 FIELD_DECL, get_identifier ("__gr_top"),
10437 ptr_type_node);
10438 f_vrtop = build_decl (BUILTINS_LOCATION,
10439 FIELD_DECL, get_identifier ("__vr_top"),
10440 ptr_type_node);
10441 f_groff = build_decl (BUILTINS_LOCATION,
10442 FIELD_DECL, get_identifier ("__gr_offs"),
10443 integer_type_node);
10444 f_vroff = build_decl (BUILTINS_LOCATION,
10445 FIELD_DECL, get_identifier ("__vr_offs"),
10446 integer_type_node);
10447
88e3bdd1 10448 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
10449 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
10450 purpose to identify whether the code is updating va_list internal
10451 offset fields through irregular way. */
10452 va_list_gpr_counter_field = f_groff;
10453 va_list_fpr_counter_field = f_vroff;
10454
43e9d192
IB
10455 DECL_ARTIFICIAL (f_stack) = 1;
10456 DECL_ARTIFICIAL (f_grtop) = 1;
10457 DECL_ARTIFICIAL (f_vrtop) = 1;
10458 DECL_ARTIFICIAL (f_groff) = 1;
10459 DECL_ARTIFICIAL (f_vroff) = 1;
10460
10461 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
10462 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
10463 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
10464 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
10465 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
10466
10467 TYPE_FIELDS (va_list_type) = f_stack;
10468 DECL_CHAIN (f_stack) = f_grtop;
10469 DECL_CHAIN (f_grtop) = f_vrtop;
10470 DECL_CHAIN (f_vrtop) = f_groff;
10471 DECL_CHAIN (f_groff) = f_vroff;
10472
10473 /* Compute its layout. */
10474 layout_type (va_list_type);
10475
10476 return va_list_type;
10477}
10478
10479/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10480static void
10481aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
10482{
10483 const CUMULATIVE_ARGS *cum;
10484 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10485 tree stack, grtop, vrtop, groff, vroff;
10486 tree t;
88e3bdd1
JW
10487 int gr_save_area_size = cfun->va_list_gpr_size;
10488 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
10489 int vr_offset;
10490
10491 cum = &crtl->args.info;
88e3bdd1
JW
10492 if (cfun->va_list_gpr_size)
10493 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
10494 cfun->va_list_gpr_size);
10495 if (cfun->va_list_fpr_size)
10496 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
10497 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 10498
d5726973 10499 if (!TARGET_FLOAT)
43e9d192 10500 {
261fb553 10501 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
10502 vr_save_area_size = 0;
10503 }
10504
10505 f_stack = TYPE_FIELDS (va_list_type_node);
10506 f_grtop = DECL_CHAIN (f_stack);
10507 f_vrtop = DECL_CHAIN (f_grtop);
10508 f_groff = DECL_CHAIN (f_vrtop);
10509 f_vroff = DECL_CHAIN (f_groff);
10510
10511 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
10512 NULL_TREE);
10513 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
10514 NULL_TREE);
10515 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
10516 NULL_TREE);
10517 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
10518 NULL_TREE);
10519 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
10520 NULL_TREE);
10521
10522 /* Emit code to initialize STACK, which points to the next varargs stack
10523 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10524 by named arguments. STACK is 8-byte aligned. */
10525 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
10526 if (cum->aapcs_stack_size > 0)
10527 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
10528 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
10529 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10530
10531 /* Emit code to initialize GRTOP, the top of the GR save area.
10532 virtual_incoming_args_rtx should have been 16 byte aligned. */
10533 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
10534 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
10535 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10536
10537 /* Emit code to initialize VRTOP, the top of the VR save area.
10538 This address is gr_save_area_bytes below GRTOP, rounded
10539 down to the next 16-byte boundary. */
10540 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
10541 vr_offset = ROUND_UP (gr_save_area_size,
10542 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10543
10544 if (vr_offset)
10545 t = fold_build_pointer_plus_hwi (t, -vr_offset);
10546 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
10547 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10548
10549 /* Emit code to initialize GROFF, the offset from GRTOP of the
10550 next GPR argument. */
10551 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
10552 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
10553 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10554
10555 /* Likewise emit code to initialize VROFF, the offset from FTOP
10556 of the next VR argument. */
10557 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
10558 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
10559 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10560}
10561
10562/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10563
10564static tree
10565aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
10566 gimple_seq *post_p ATTRIBUTE_UNUSED)
10567{
10568 tree addr;
10569 bool indirect_p;
10570 bool is_ha; /* is HFA or HVA. */
10571 bool dw_align; /* double-word align. */
ef4bddc2 10572 machine_mode ag_mode = VOIDmode;
43e9d192 10573 int nregs;
ef4bddc2 10574 machine_mode mode;
43e9d192
IB
10575
10576 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10577 tree stack, f_top, f_off, off, arg, roundup, on_stack;
10578 HOST_WIDE_INT size, rsize, adjust, align;
10579 tree t, u, cond1, cond2;
10580
10581 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10582 if (indirect_p)
10583 type = build_pointer_type (type);
10584
10585 mode = TYPE_MODE (type);
10586
10587 f_stack = TYPE_FIELDS (va_list_type_node);
10588 f_grtop = DECL_CHAIN (f_stack);
10589 f_vrtop = DECL_CHAIN (f_grtop);
10590 f_groff = DECL_CHAIN (f_vrtop);
10591 f_vroff = DECL_CHAIN (f_groff);
10592
10593 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
10594 f_stack, NULL_TREE);
10595 size = int_size_in_bytes (type);
985b8393 10596 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
10597
10598 dw_align = false;
10599 adjust = 0;
10600 if (aarch64_vfp_is_call_or_return_candidate (mode,
10601 type,
10602 &ag_mode,
10603 &nregs,
10604 &is_ha))
10605 {
10606 /* TYPE passed in fp/simd registers. */
d5726973 10607 if (!TARGET_FLOAT)
261fb553 10608 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
10609
10610 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
10611 unshare_expr (valist), f_vrtop, NULL_TREE);
10612 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
10613 unshare_expr (valist), f_vroff, NULL_TREE);
10614
10615 rsize = nregs * UNITS_PER_VREG;
10616
10617 if (is_ha)
10618 {
10619 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
10620 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
10621 }
10622 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
10623 && size < UNITS_PER_VREG)
10624 {
10625 adjust = UNITS_PER_VREG - size;
10626 }
10627 }
10628 else
10629 {
10630 /* TYPE passed in general registers. */
10631 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
10632 unshare_expr (valist), f_grtop, NULL_TREE);
10633 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
10634 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 10635 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
10636 nregs = rsize / UNITS_PER_WORD;
10637
10638 if (align > 8)
10639 dw_align = true;
10640
10641 if (BLOCK_REG_PADDING (mode, type, 1) == downward
10642 && size < UNITS_PER_WORD)
10643 {
10644 adjust = UNITS_PER_WORD - size;
10645 }
10646 }
10647
10648 /* Get a local temporary for the field value. */
10649 off = get_initialized_tmp_var (f_off, pre_p, NULL);
10650
10651 /* Emit code to branch if off >= 0. */
10652 t = build2 (GE_EXPR, boolean_type_node, off,
10653 build_int_cst (TREE_TYPE (off), 0));
10654 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
10655
10656 if (dw_align)
10657 {
10658 /* Emit: offs = (offs + 15) & -16. */
10659 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10660 build_int_cst (TREE_TYPE (off), 15));
10661 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
10662 build_int_cst (TREE_TYPE (off), -16));
10663 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
10664 }
10665 else
10666 roundup = NULL;
10667
10668 /* Update ap.__[g|v]r_offs */
10669 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10670 build_int_cst (TREE_TYPE (off), rsize));
10671 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
10672
10673 /* String up. */
10674 if (roundup)
10675 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10676
10677 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10678 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
10679 build_int_cst (TREE_TYPE (f_off), 0));
10680 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
10681
10682 /* String up: make sure the assignment happens before the use. */
10683 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
10684 COND_EXPR_ELSE (cond1) = t;
10685
10686 /* Prepare the trees handling the argument that is passed on the stack;
10687 the top level node will store in ON_STACK. */
10688 arg = get_initialized_tmp_var (stack, pre_p, NULL);
10689 if (align > 8)
10690 {
10691 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10692 t = fold_convert (intDI_type_node, arg);
10693 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10694 build_int_cst (TREE_TYPE (t), 15));
10695 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10696 build_int_cst (TREE_TYPE (t), -16));
10697 t = fold_convert (TREE_TYPE (arg), t);
10698 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
10699 }
10700 else
10701 roundup = NULL;
10702 /* Advance ap.__stack */
10703 t = fold_convert (intDI_type_node, arg);
10704 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10705 build_int_cst (TREE_TYPE (t), size + 7));
10706 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10707 build_int_cst (TREE_TYPE (t), -8));
10708 t = fold_convert (TREE_TYPE (arg), t);
10709 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
10710 /* String up roundup and advance. */
10711 if (roundup)
10712 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10713 /* String up with arg */
10714 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
10715 /* Big-endianness related address adjustment. */
10716 if (BLOCK_REG_PADDING (mode, type, 1) == downward
10717 && size < UNITS_PER_WORD)
10718 {
10719 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
10720 size_int (UNITS_PER_WORD - size));
10721 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
10722 }
10723
10724 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
10725 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
10726
10727 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10728 t = off;
10729 if (adjust)
10730 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
10731 build_int_cst (TREE_TYPE (off), adjust));
10732
10733 t = fold_convert (sizetype, t);
10734 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
10735
10736 if (is_ha)
10737 {
10738 /* type ha; // treat as "struct {ftype field[n];}"
10739 ... [computing offs]
10740 for (i = 0; i <nregs; ++i, offs += 16)
10741 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10742 return ha; */
10743 int i;
10744 tree tmp_ha, field_t, field_ptr_t;
10745
10746 /* Declare a local variable. */
10747 tmp_ha = create_tmp_var_raw (type, "ha");
10748 gimple_add_tmp_var (tmp_ha);
10749
10750 /* Establish the base type. */
10751 switch (ag_mode)
10752 {
4e10a5a7 10753 case E_SFmode:
43e9d192
IB
10754 field_t = float_type_node;
10755 field_ptr_t = float_ptr_type_node;
10756 break;
4e10a5a7 10757 case E_DFmode:
43e9d192
IB
10758 field_t = double_type_node;
10759 field_ptr_t = double_ptr_type_node;
10760 break;
4e10a5a7 10761 case E_TFmode:
43e9d192
IB
10762 field_t = long_double_type_node;
10763 field_ptr_t = long_double_ptr_type_node;
10764 break;
4e10a5a7 10765 case E_HFmode:
1b62ed4f
JG
10766 field_t = aarch64_fp16_type_node;
10767 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 10768 break;
4e10a5a7
RS
10769 case E_V2SImode:
10770 case E_V4SImode:
43e9d192
IB
10771 {
10772 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
10773 field_t = build_vector_type_for_mode (innertype, ag_mode);
10774 field_ptr_t = build_pointer_type (field_t);
10775 }
10776 break;
10777 default:
10778 gcc_assert (0);
10779 }
10780
10781 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10782 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
10783 addr = t;
10784 t = fold_convert (field_ptr_t, addr);
10785 t = build2 (MODIFY_EXPR, field_t,
10786 build1 (INDIRECT_REF, field_t, tmp_ha),
10787 build1 (INDIRECT_REF, field_t, t));
10788
10789 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10790 for (i = 1; i < nregs; ++i)
10791 {
10792 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
10793 u = fold_convert (field_ptr_t, addr);
10794 u = build2 (MODIFY_EXPR, field_t,
10795 build2 (MEM_REF, field_t, tmp_ha,
10796 build_int_cst (field_ptr_t,
10797 (i *
10798 int_size_in_bytes (field_t)))),
10799 build1 (INDIRECT_REF, field_t, u));
10800 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
10801 }
10802
10803 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
10804 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
10805 }
10806
10807 COND_EXPR_ELSE (cond2) = t;
10808 addr = fold_convert (build_pointer_type (type), cond1);
10809 addr = build_va_arg_indirect_ref (addr);
10810
10811 if (indirect_p)
10812 addr = build_va_arg_indirect_ref (addr);
10813
10814 return addr;
10815}
10816
10817/* Implement TARGET_SETUP_INCOMING_VARARGS. */
10818
10819static void
ef4bddc2 10820aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
10821 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10822 int no_rtl)
10823{
10824 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10825 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
10826 int gr_saved = cfun->va_list_gpr_size;
10827 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
10828
10829 /* The caller has advanced CUM up to, but not beyond, the last named
10830 argument. Advance a local copy of CUM past the last "real" named
10831 argument, to find out how many registers are left over. */
10832 local_cum = *cum;
10833 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
10834
88e3bdd1
JW
10835 /* Found out how many registers we need to save.
10836 Honor tree-stdvar analysis results. */
10837 if (cfun->va_list_gpr_size)
10838 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
10839 cfun->va_list_gpr_size / UNITS_PER_WORD);
10840 if (cfun->va_list_fpr_size)
10841 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
10842 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 10843
d5726973 10844 if (!TARGET_FLOAT)
43e9d192 10845 {
261fb553 10846 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
10847 vr_saved = 0;
10848 }
10849
10850 if (!no_rtl)
10851 {
10852 if (gr_saved > 0)
10853 {
10854 rtx ptr, mem;
10855
10856 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10857 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
10858 - gr_saved * UNITS_PER_WORD);
10859 mem = gen_frame_mem (BLKmode, ptr);
10860 set_mem_alias_set (mem, get_varargs_alias_set ());
10861
10862 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
10863 mem, gr_saved);
10864 }
10865 if (vr_saved > 0)
10866 {
10867 /* We can't use move_block_from_reg, because it will use
10868 the wrong mode, storing D regs only. */
ef4bddc2 10869 machine_mode mode = TImode;
88e3bdd1 10870 int off, i, vr_start;
43e9d192
IB
10871
10872 /* Set OFF to the offset from virtual_incoming_args_rtx of
10873 the first vector register. The VR save area lies below
10874 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
10875 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
10876 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10877 off -= vr_saved * UNITS_PER_VREG;
10878
88e3bdd1
JW
10879 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
10880 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
10881 {
10882 rtx ptr, mem;
10883
10884 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
10885 mem = gen_frame_mem (mode, ptr);
10886 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 10887 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
10888 off += UNITS_PER_VREG;
10889 }
10890 }
10891 }
10892
10893 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10894 any complication of having crtl->args.pretend_args_size changed. */
8799637a 10895 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
10896 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
10897 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
10898 + vr_saved * UNITS_PER_VREG);
10899}
10900
10901static void
10902aarch64_conditional_register_usage (void)
10903{
10904 int i;
10905 if (!TARGET_FLOAT)
10906 {
10907 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
10908 {
10909 fixed_regs[i] = 1;
10910 call_used_regs[i] = 1;
10911 }
10912 }
10913}
10914
10915/* Walk down the type tree of TYPE counting consecutive base elements.
10916 If *MODEP is VOIDmode, then set it to the first valid floating point
10917 type. If a non-floating point type is found, or if a floating point
10918 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10919 otherwise return the count in the sub-tree. */
10920static int
ef4bddc2 10921aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 10922{
ef4bddc2 10923 machine_mode mode;
43e9d192
IB
10924 HOST_WIDE_INT size;
10925
10926 switch (TREE_CODE (type))
10927 {
10928 case REAL_TYPE:
10929 mode = TYPE_MODE (type);
1b62ed4f
JG
10930 if (mode != DFmode && mode != SFmode
10931 && mode != TFmode && mode != HFmode)
43e9d192
IB
10932 return -1;
10933
10934 if (*modep == VOIDmode)
10935 *modep = mode;
10936
10937 if (*modep == mode)
10938 return 1;
10939
10940 break;
10941
10942 case COMPLEX_TYPE:
10943 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
10944 if (mode != DFmode && mode != SFmode
10945 && mode != TFmode && mode != HFmode)
43e9d192
IB
10946 return -1;
10947
10948 if (*modep == VOIDmode)
10949 *modep = mode;
10950
10951 if (*modep == mode)
10952 return 2;
10953
10954 break;
10955
10956 case VECTOR_TYPE:
10957 /* Use V2SImode and V4SImode as representatives of all 64-bit
10958 and 128-bit vector types. */
10959 size = int_size_in_bytes (type);
10960 switch (size)
10961 {
10962 case 8:
10963 mode = V2SImode;
10964 break;
10965 case 16:
10966 mode = V4SImode;
10967 break;
10968 default:
10969 return -1;
10970 }
10971
10972 if (*modep == VOIDmode)
10973 *modep = mode;
10974
10975 /* Vector modes are considered to be opaque: two vectors are
10976 equivalent for the purposes of being homogeneous aggregates
10977 if they are the same size. */
10978 if (*modep == mode)
10979 return 1;
10980
10981 break;
10982
10983 case ARRAY_TYPE:
10984 {
10985 int count;
10986 tree index = TYPE_DOMAIN (type);
10987
807e902e
KZ
10988 /* Can't handle incomplete types nor sizes that are not
10989 fixed. */
10990 if (!COMPLETE_TYPE_P (type)
10991 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10992 return -1;
10993
10994 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
10995 if (count == -1
10996 || !index
10997 || !TYPE_MAX_VALUE (index)
cc269bb6 10998 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 10999 || !TYPE_MIN_VALUE (index)
cc269bb6 11000 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
11001 || count < 0)
11002 return -1;
11003
ae7e9ddd
RS
11004 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11005 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
11006
11007 /* There must be no padding. */
807e902e 11008 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
11009 return -1;
11010
11011 return count;
11012 }
11013
11014 case RECORD_TYPE:
11015 {
11016 int count = 0;
11017 int sub_count;
11018 tree field;
11019
807e902e
KZ
11020 /* Can't handle incomplete types nor sizes that are not
11021 fixed. */
11022 if (!COMPLETE_TYPE_P (type)
11023 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
11024 return -1;
11025
11026 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11027 {
11028 if (TREE_CODE (field) != FIELD_DECL)
11029 continue;
11030
11031 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
11032 if (sub_count < 0)
11033 return -1;
11034 count += sub_count;
11035 }
11036
11037 /* There must be no padding. */
807e902e 11038 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
11039 return -1;
11040
11041 return count;
11042 }
11043
11044 case UNION_TYPE:
11045 case QUAL_UNION_TYPE:
11046 {
11047 /* These aren't very interesting except in a degenerate case. */
11048 int count = 0;
11049 int sub_count;
11050 tree field;
11051
807e902e
KZ
11052 /* Can't handle incomplete types nor sizes that are not
11053 fixed. */
11054 if (!COMPLETE_TYPE_P (type)
11055 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
11056 return -1;
11057
11058 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11059 {
11060 if (TREE_CODE (field) != FIELD_DECL)
11061 continue;
11062
11063 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
11064 if (sub_count < 0)
11065 return -1;
11066 count = count > sub_count ? count : sub_count;
11067 }
11068
11069 /* There must be no padding. */
807e902e 11070 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
11071 return -1;
11072
11073 return count;
11074 }
11075
11076 default:
11077 break;
11078 }
11079
11080 return -1;
11081}
11082
b6ec6215
KT
11083/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
11084 type as described in AAPCS64 \S 4.1.2.
11085
11086 See the comment above aarch64_composite_type_p for the notes on MODE. */
11087
11088static bool
11089aarch64_short_vector_p (const_tree type,
11090 machine_mode mode)
11091{
11092 HOST_WIDE_INT size = -1;
11093
11094 if (type && TREE_CODE (type) == VECTOR_TYPE)
11095 size = int_size_in_bytes (type);
11096 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
11097 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11098 size = GET_MODE_SIZE (mode);
11099
11100 return (size == 8 || size == 16);
11101}
11102
43e9d192
IB
11103/* Return TRUE if the type, as described by TYPE and MODE, is a composite
11104 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
11105 array types. The C99 floating-point complex types are also considered
11106 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
11107 types, which are GCC extensions and out of the scope of AAPCS64, are
11108 treated as composite types here as well.
11109
11110 Note that MODE itself is not sufficient in determining whether a type
11111 is such a composite type or not. This is because
11112 stor-layout.c:compute_record_mode may have already changed the MODE
11113 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
11114 structure with only one field may have its MODE set to the mode of the
11115 field. Also an integer mode whose size matches the size of the
11116 RECORD_TYPE type may be used to substitute the original mode
11117 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
11118 solely relied on. */
11119
11120static bool
11121aarch64_composite_type_p (const_tree type,
ef4bddc2 11122 machine_mode mode)
43e9d192 11123{
b6ec6215
KT
11124 if (aarch64_short_vector_p (type, mode))
11125 return false;
11126
43e9d192
IB
11127 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
11128 return true;
11129
11130 if (mode == BLKmode
11131 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
11132 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
11133 return true;
11134
11135 return false;
11136}
11137
43e9d192
IB
11138/* Return TRUE if an argument, whose type is described by TYPE and MODE,
11139 shall be passed or returned in simd/fp register(s) (providing these
11140 parameter passing registers are available).
11141
11142 Upon successful return, *COUNT returns the number of needed registers,
11143 *BASE_MODE returns the mode of the individual register and when IS_HAF
11144 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
11145 floating-point aggregate or a homogeneous short-vector aggregate. */
11146
11147static bool
ef4bddc2 11148aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 11149 const_tree type,
ef4bddc2 11150 machine_mode *base_mode,
43e9d192
IB
11151 int *count,
11152 bool *is_ha)
11153{
ef4bddc2 11154 machine_mode new_mode = VOIDmode;
43e9d192
IB
11155 bool composite_p = aarch64_composite_type_p (type, mode);
11156
11157 if (is_ha != NULL) *is_ha = false;
11158
11159 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
11160 || aarch64_short_vector_p (type, mode))
11161 {
11162 *count = 1;
11163 new_mode = mode;
11164 }
11165 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
11166 {
11167 if (is_ha != NULL) *is_ha = true;
11168 *count = 2;
11169 new_mode = GET_MODE_INNER (mode);
11170 }
11171 else if (type && composite_p)
11172 {
11173 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
11174
11175 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
11176 {
11177 if (is_ha != NULL) *is_ha = true;
11178 *count = ag_count;
11179 }
11180 else
11181 return false;
11182 }
11183 else
11184 return false;
11185
11186 *base_mode = new_mode;
11187 return true;
11188}
11189
11190/* Implement TARGET_STRUCT_VALUE_RTX. */
11191
11192static rtx
11193aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
11194 int incoming ATTRIBUTE_UNUSED)
11195{
11196 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
11197}
11198
11199/* Implements target hook vector_mode_supported_p. */
11200static bool
ef4bddc2 11201aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
11202{
11203 if (TARGET_SIMD
11204 && (mode == V4SImode || mode == V8HImode
11205 || mode == V16QImode || mode == V2DImode
11206 || mode == V2SImode || mode == V4HImode
11207 || mode == V8QImode || mode == V2SFmode
ad7d90cc 11208 || mode == V4SFmode || mode == V2DFmode
71a11456 11209 || mode == V4HFmode || mode == V8HFmode
ad7d90cc 11210 || mode == V1DFmode))
43e9d192
IB
11211 return true;
11212
11213 return false;
11214}
11215
b7342d25
IB
11216/* Return appropriate SIMD container
11217 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
11218static machine_mode
11219aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 11220{
b7342d25 11221 gcc_assert (width == 64 || width == 128);
43e9d192 11222 if (TARGET_SIMD)
b7342d25
IB
11223 {
11224 if (width == 128)
11225 switch (mode)
11226 {
4e10a5a7 11227 case E_DFmode:
b7342d25 11228 return V2DFmode;
4e10a5a7 11229 case E_SFmode:
b7342d25 11230 return V4SFmode;
4e10a5a7 11231 case E_HFmode:
b719f884 11232 return V8HFmode;
4e10a5a7 11233 case E_SImode:
b7342d25 11234 return V4SImode;
4e10a5a7 11235 case E_HImode:
b7342d25 11236 return V8HImode;
4e10a5a7 11237 case E_QImode:
b7342d25 11238 return V16QImode;
4e10a5a7 11239 case E_DImode:
b7342d25
IB
11240 return V2DImode;
11241 default:
11242 break;
11243 }
11244 else
11245 switch (mode)
11246 {
4e10a5a7 11247 case E_SFmode:
b7342d25 11248 return V2SFmode;
4e10a5a7 11249 case E_HFmode:
b719f884 11250 return V4HFmode;
4e10a5a7 11251 case E_SImode:
b7342d25 11252 return V2SImode;
4e10a5a7 11253 case E_HImode:
b7342d25 11254 return V4HImode;
4e10a5a7 11255 case E_QImode:
b7342d25
IB
11256 return V8QImode;
11257 default:
11258 break;
11259 }
11260 }
43e9d192
IB
11261 return word_mode;
11262}
11263
b7342d25 11264/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
11265static machine_mode
11266aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
11267{
11268 return aarch64_simd_container_mode (mode, 128);
11269}
11270
3b357264
JG
11271/* Return the bitmask of possible vector sizes for the vectorizer
11272 to iterate over. */
11273static unsigned int
11274aarch64_autovectorize_vector_sizes (void)
11275{
11276 return (16 | 8);
11277}
11278
ac2b960f
YZ
11279/* Implement TARGET_MANGLE_TYPE. */
11280
6f549691 11281static const char *
ac2b960f
YZ
11282aarch64_mangle_type (const_tree type)
11283{
11284 /* The AArch64 ABI documents say that "__va_list" has to be
11285 managled as if it is in the "std" namespace. */
11286 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
11287 return "St9__va_list";
11288
c2ec330c
AL
11289 /* Half-precision float. */
11290 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
11291 return "Dh";
11292
f9d53c27
TB
11293 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
11294 builtin types. */
11295 if (TYPE_NAME (type) != NULL)
11296 return aarch64_mangle_builtin_type (type);
c6fc9e43 11297
ac2b960f
YZ
11298 /* Use the default mangling. */
11299 return NULL;
11300}
11301
75cf1494
KT
11302/* Find the first rtx_insn before insn that will generate an assembly
11303 instruction. */
11304
11305static rtx_insn *
11306aarch64_prev_real_insn (rtx_insn *insn)
11307{
11308 if (!insn)
11309 return NULL;
11310
11311 do
11312 {
11313 insn = prev_real_insn (insn);
11314 }
11315 while (insn && recog_memoized (insn) < 0);
11316
11317 return insn;
11318}
11319
11320static bool
11321is_madd_op (enum attr_type t1)
11322{
11323 unsigned int i;
11324 /* A number of these may be AArch32 only. */
11325 enum attr_type mlatypes[] = {
11326 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
11327 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
11328 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
11329 };
11330
11331 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
11332 {
11333 if (t1 == mlatypes[i])
11334 return true;
11335 }
11336
11337 return false;
11338}
11339
11340/* Check if there is a register dependency between a load and the insn
11341 for which we hold recog_data. */
11342
11343static bool
11344dep_between_memop_and_curr (rtx memop)
11345{
11346 rtx load_reg;
11347 int opno;
11348
8baff86e 11349 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
11350
11351 if (!REG_P (SET_DEST (memop)))
11352 return false;
11353
11354 load_reg = SET_DEST (memop);
8baff86e 11355 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
11356 {
11357 rtx operand = recog_data.operand[opno];
11358 if (REG_P (operand)
11359 && reg_overlap_mentioned_p (load_reg, operand))
11360 return true;
11361
11362 }
11363 return false;
11364}
11365
8baff86e
KT
11366
11367/* When working around the Cortex-A53 erratum 835769,
11368 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
11369 instruction and has a preceding memory instruction such that a NOP
11370 should be inserted between them. */
11371
75cf1494
KT
11372bool
11373aarch64_madd_needs_nop (rtx_insn* insn)
11374{
11375 enum attr_type attr_type;
11376 rtx_insn *prev;
11377 rtx body;
11378
b32c1043 11379 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
11380 return false;
11381
e322d6e3 11382 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
11383 return false;
11384
11385 attr_type = get_attr_type (insn);
11386 if (!is_madd_op (attr_type))
11387 return false;
11388
11389 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
11390 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
11391 Restore recog state to INSN to avoid state corruption. */
11392 extract_constrain_insn_cached (insn);
11393
550e2205 11394 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
11395 return false;
11396
11397 body = single_set (prev);
11398
11399 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
11400 it and the DImode madd, emit a NOP between them. If body is NULL then we
11401 have a complex memory operation, probably a load/store pair.
11402 Be conservative for now and emit a NOP. */
11403 if (GET_MODE (recog_data.operand[0]) == DImode
11404 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
11405 return true;
11406
11407 return false;
11408
11409}
11410
8baff86e
KT
11411
11412/* Implement FINAL_PRESCAN_INSN. */
11413
75cf1494
KT
11414void
11415aarch64_final_prescan_insn (rtx_insn *insn)
11416{
11417 if (aarch64_madd_needs_nop (insn))
11418 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
11419}
11420
11421
43e9d192 11422/* Return the equivalent letter for size. */
81c2dfb9 11423static char
43e9d192
IB
11424sizetochar (int size)
11425{
11426 switch (size)
11427 {
11428 case 64: return 'd';
11429 case 32: return 's';
11430 case 16: return 'h';
11431 case 8 : return 'b';
11432 default: gcc_unreachable ();
11433 }
11434}
11435
3520f7cc
JG
11436/* Return true iff x is a uniform vector of floating-point
11437 constants, and the constant can be represented in
11438 quarter-precision form. Note, as aarch64_float_const_representable
11439 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
11440static bool
11441aarch64_vect_float_const_representable_p (rtx x)
11442{
92695fbb
RS
11443 rtx elt;
11444 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
11445 && const_vec_duplicate_p (x, &elt)
11446 && aarch64_float_const_representable_p (elt));
3520f7cc
JG
11447}
11448
d8edd899 11449/* Return true for valid and false for invalid. */
3ea63f60 11450bool
ef4bddc2 11451aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 11452 struct simd_immediate_info *info)
43e9d192
IB
11453{
11454#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11455 matches = 1; \
11456 for (i = 0; i < idx; i += (STRIDE)) \
11457 if (!(TEST)) \
11458 matches = 0; \
11459 if (matches) \
11460 { \
11461 immtype = (CLASS); \
11462 elsize = (ELSIZE); \
43e9d192
IB
11463 eshift = (SHIFT); \
11464 emvn = (NEG); \
11465 break; \
11466 }
11467
11468 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 11469 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 11470 unsigned char bytes[16];
43e9d192
IB
11471 int immtype = -1, matches;
11472 unsigned int invmask = inverse ? 0xff : 0;
11473 int eshift, emvn;
11474
43e9d192 11475 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 11476 {
81c2dfb9
IB
11477 if (! (aarch64_simd_imm_zero_p (op, mode)
11478 || aarch64_vect_float_const_representable_p (op)))
d8edd899 11479 return false;
3520f7cc 11480
48063b9d
IB
11481 if (info)
11482 {
e386a52f
RS
11483 rtx elt = CONST_VECTOR_ELT (op, 0);
11484 scalar_float_mode elt_mode
11485 = as_a <scalar_float_mode> (GET_MODE (elt));
11486
11487 info->value = elt;
11488 info->element_width = GET_MODE_BITSIZE (elt_mode);
48063b9d
IB
11489 info->mvn = false;
11490 info->shift = 0;
11491 }
3520f7cc 11492
d8edd899 11493 return true;
3520f7cc 11494 }
43e9d192
IB
11495
11496 /* Splat vector constant out into a byte vector. */
11497 for (i = 0; i < n_elts; i++)
11498 {
4b1e108c
AL
11499 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11500 it must be laid out in the vector register in reverse order. */
11501 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192 11502 unsigned HOST_WIDE_INT elpart;
43e9d192 11503
ee78df47
KT
11504 gcc_assert (CONST_INT_P (el));
11505 elpart = INTVAL (el);
11506
11507 for (unsigned int byte = 0; byte < innersize; byte++)
11508 {
11509 bytes[idx++] = (elpart & 0xff) ^ invmask;
11510 elpart >>= BITS_PER_UNIT;
11511 }
43e9d192 11512
43e9d192
IB
11513 }
11514
11515 /* Sanity check. */
11516 gcc_assert (idx == GET_MODE_SIZE (mode));
11517
11518 do
11519 {
11520 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11521 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
11522
11523 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11524 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
11525
11526 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11527 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
11528
11529 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11530 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
11531
11532 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
11533
11534 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
11535
11536 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11537 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
11538
11539 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11540 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
11541
11542 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11543 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
11544
11545 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11546 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
11547
11548 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
11549
11550 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
11551
11552 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 11553 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
11554
11555 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 11556 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
11557
11558 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 11559 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
11560
11561 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 11562 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
11563
11564 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
11565
11566 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11567 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
11568 }
11569 while (0);
11570
e4f0f84d 11571 if (immtype == -1)
d8edd899 11572 return false;
43e9d192 11573
48063b9d 11574 if (info)
43e9d192 11575 {
48063b9d 11576 info->element_width = elsize;
48063b9d
IB
11577 info->mvn = emvn != 0;
11578 info->shift = eshift;
11579
43e9d192
IB
11580 unsigned HOST_WIDE_INT imm = 0;
11581
e4f0f84d
TB
11582 if (immtype >= 12 && immtype <= 15)
11583 info->msl = true;
11584
43e9d192
IB
11585 /* Un-invert bytes of recognized vector, if necessary. */
11586 if (invmask != 0)
11587 for (i = 0; i < idx; i++)
11588 bytes[i] ^= invmask;
11589
11590 if (immtype == 17)
11591 {
11592 /* FIXME: Broken on 32-bit H_W_I hosts. */
11593 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11594
11595 for (i = 0; i < 8; i++)
11596 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11597 << (i * BITS_PER_UNIT);
11598
43e9d192 11599
48063b9d
IB
11600 info->value = GEN_INT (imm);
11601 }
11602 else
11603 {
11604 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11605 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
11606
11607 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
11608 generic constants. */
11609 if (info->mvn)
43e9d192 11610 imm = ~imm;
48063b9d
IB
11611 imm = (imm >> info->shift) & 0xff;
11612 info->value = GEN_INT (imm);
11613 }
43e9d192
IB
11614 }
11615
48063b9d 11616 return true;
43e9d192
IB
11617#undef CHECK
11618}
11619
43e9d192
IB
11620/* Check of immediate shift constants are within range. */
11621bool
ef4bddc2 11622aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
11623{
11624 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
11625 if (left)
ddeabd3e 11626 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 11627 else
ddeabd3e 11628 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
11629}
11630
3520f7cc
JG
11631/* Return true if X is a uniform vector where all elements
11632 are either the floating-point constant 0.0 or the
11633 integer constant 0. */
43e9d192 11634bool
ef4bddc2 11635aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 11636{
3520f7cc 11637 return x == CONST0_RTX (mode);
43e9d192
IB
11638}
11639
7325d85a
KT
11640
11641/* Return the bitmask CONST_INT to select the bits required by a zero extract
11642 operation of width WIDTH at bit position POS. */
11643
11644rtx
11645aarch64_mask_from_zextract_ops (rtx width, rtx pos)
11646{
11647 gcc_assert (CONST_INT_P (width));
11648 gcc_assert (CONST_INT_P (pos));
11649
11650 unsigned HOST_WIDE_INT mask
11651 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
11652 return GEN_INT (mask << UINTVAL (pos));
11653}
11654
83f8c414 11655bool
a6e0bfa7 11656aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 11657{
83f8c414
CSS
11658 if (GET_CODE (x) == HIGH
11659 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
11660 return true;
11661
82614948 11662 if (CONST_INT_P (x))
83f8c414
CSS
11663 return true;
11664
11665 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
11666 return true;
11667
a6e0bfa7 11668 return aarch64_classify_symbolic_expression (x)
a5350ddc 11669 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
11670}
11671
43e9d192
IB
11672/* Return a const_int vector of VAL. */
11673rtx
ab014eb3 11674aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192
IB
11675{
11676 int nunits = GET_MODE_NUNITS (mode);
11677 rtvec v = rtvec_alloc (nunits);
11678 int i;
11679
ab014eb3
TC
11680 rtx cache = GEN_INT (val);
11681
43e9d192 11682 for (i=0; i < nunits; i++)
ab014eb3 11683 RTVEC_ELT (v, i) = cache;
43e9d192
IB
11684
11685 return gen_rtx_CONST_VECTOR (mode, v);
11686}
11687
051d0e2f
SN
11688/* Check OP is a legal scalar immediate for the MOVI instruction. */
11689
11690bool
ef4bddc2 11691aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 11692{
ef4bddc2 11693 machine_mode vmode;
051d0e2f
SN
11694
11695 gcc_assert (!VECTOR_MODE_P (mode));
11696 vmode = aarch64_preferred_simd_mode (mode);
11697 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 11698 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
11699}
11700
988fa693
JG
11701/* Construct and return a PARALLEL RTX vector with elements numbering the
11702 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11703 the vector - from the perspective of the architecture. This does not
11704 line up with GCC's perspective on lane numbers, so we end up with
11705 different masks depending on our target endian-ness. The diagram
11706 below may help. We must draw the distinction when building masks
11707 which select one half of the vector. An instruction selecting
11708 architectural low-lanes for a big-endian target, must be described using
11709 a mask selecting GCC high-lanes.
11710
11711 Big-Endian Little-Endian
11712
11713GCC 0 1 2 3 3 2 1 0
11714 | x | x | x | x | | x | x | x | x |
11715Architecture 3 2 1 0 3 2 1 0
11716
11717Low Mask: { 2, 3 } { 0, 1 }
11718High Mask: { 0, 1 } { 2, 3 }
11719*/
11720
43e9d192 11721rtx
ef4bddc2 11722aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
11723{
11724 int nunits = GET_MODE_NUNITS (mode);
11725 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
11726 int high_base = nunits / 2;
11727 int low_base = 0;
11728 int base;
43e9d192
IB
11729 rtx t1;
11730 int i;
11731
988fa693
JG
11732 if (BYTES_BIG_ENDIAN)
11733 base = high ? low_base : high_base;
11734 else
11735 base = high ? high_base : low_base;
11736
11737 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
11738 RTVEC_ELT (v, i) = GEN_INT (base + i);
11739
11740 t1 = gen_rtx_PARALLEL (mode, v);
11741 return t1;
11742}
11743
988fa693
JG
11744/* Check OP for validity as a PARALLEL RTX vector with elements
11745 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11746 from the perspective of the architecture. See the diagram above
11747 aarch64_simd_vect_par_cnst_half for more details. */
11748
11749bool
ef4bddc2 11750aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
11751 bool high)
11752{
11753 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
11754 HOST_WIDE_INT count_op = XVECLEN (op, 0);
11755 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
11756 int i = 0;
11757
11758 if (!VECTOR_MODE_P (mode))
11759 return false;
11760
11761 if (count_op != count_ideal)
11762 return false;
11763
11764 for (i = 0; i < count_ideal; i++)
11765 {
11766 rtx elt_op = XVECEXP (op, 0, i);
11767 rtx elt_ideal = XVECEXP (ideal, 0, i);
11768
4aa81c2e 11769 if (!CONST_INT_P (elt_op)
988fa693
JG
11770 || INTVAL (elt_ideal) != INTVAL (elt_op))
11771 return false;
11772 }
11773 return true;
11774}
11775
43e9d192
IB
11776/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11777 HIGH (exclusive). */
11778void
46ed6024
CB
11779aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11780 const_tree exp)
43e9d192
IB
11781{
11782 HOST_WIDE_INT lane;
4aa81c2e 11783 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
11784 lane = INTVAL (operand);
11785
11786 if (lane < low || lane >= high)
46ed6024
CB
11787 {
11788 if (exp)
cf0c27ef 11789 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 11790 else
cf0c27ef 11791 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 11792 }
43e9d192
IB
11793}
11794
43e9d192
IB
11795/* Return TRUE if OP is a valid vector addressing mode. */
11796bool
11797aarch64_simd_mem_operand_p (rtx op)
11798{
11799 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 11800 || REG_P (XEXP (op, 0)));
43e9d192
IB
11801}
11802
2d8c6dc1
AH
11803/* Emit a register copy from operand to operand, taking care not to
11804 early-clobber source registers in the process.
43e9d192 11805
2d8c6dc1
AH
11806 COUNT is the number of components into which the copy needs to be
11807 decomposed. */
43e9d192 11808void
b8506a8a 11809aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 11810 unsigned int count)
43e9d192
IB
11811{
11812 unsigned int i;
2d8c6dc1
AH
11813 int rdest = REGNO (operands[0]);
11814 int rsrc = REGNO (operands[1]);
43e9d192
IB
11815
11816 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
11817 || rdest < rsrc)
11818 for (i = 0; i < count; i++)
11819 emit_move_insn (gen_rtx_REG (mode, rdest + i),
11820 gen_rtx_REG (mode, rsrc + i));
43e9d192 11821 else
2d8c6dc1
AH
11822 for (i = 0; i < count; i++)
11823 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
11824 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
11825}
11826
668046d1 11827/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 11828 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 11829int
b8506a8a 11830aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1
DS
11831{
11832 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
11833}
11834
db0253a4
TB
11835/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11836 alignment of a vector to 128 bits. */
11837static HOST_WIDE_INT
11838aarch64_simd_vector_alignment (const_tree type)
11839{
9439e9a1 11840 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
11841 return MIN (align, 128);
11842}
11843
11844/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11845static bool
11846aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
11847{
11848 if (is_packed)
11849 return false;
11850
11851 /* We guarantee alignment for vectors up to 128-bits. */
11852 if (tree_int_cst_compare (TYPE_SIZE (type),
11853 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
11854 return false;
11855
11856 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11857 return true;
11858}
11859
7df76747
N
11860/* Return true if the vector misalignment factor is supported by the
11861 target. */
11862static bool
11863aarch64_builtin_support_vector_misalignment (machine_mode mode,
11864 const_tree type, int misalignment,
11865 bool is_packed)
11866{
11867 if (TARGET_SIMD && STRICT_ALIGNMENT)
11868 {
11869 /* Return if movmisalign pattern is not supported for this mode. */
11870 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
11871 return false;
11872
11873 if (misalignment == -1)
11874 {
11875 /* Misalignment factor is unknown at compile time but we know
11876 it's word aligned. */
11877 if (aarch64_simd_vector_alignment_reachable (type, is_packed))
11878 {
11879 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
11880
11881 if (element_size != 64)
11882 return true;
11883 }
11884 return false;
11885 }
11886 }
11887 return default_builtin_support_vector_misalignment (mode, type, misalignment,
11888 is_packed);
11889}
11890
4369c11e
TB
11891/* If VALS is a vector constant that can be loaded into a register
11892 using DUP, generate instructions to do so and return an RTX to
11893 assign to the register. Otherwise return NULL_RTX. */
11894static rtx
11895aarch64_simd_dup_constant (rtx vals)
11896{
ef4bddc2
RS
11897 machine_mode mode = GET_MODE (vals);
11898 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 11899 rtx x;
4369c11e 11900
92695fbb 11901 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
11902 return NULL_RTX;
11903
11904 /* We can load this constant by using DUP and a constant in a
11905 single ARM register. This will be cheaper than a vector
11906 load. */
92695fbb 11907 x = copy_to_mode_reg (inner_mode, x);
4369c11e
TB
11908 return gen_rtx_VEC_DUPLICATE (mode, x);
11909}
11910
11911
11912/* Generate code to load VALS, which is a PARALLEL containing only
11913 constants (for vec_init) or CONST_VECTOR, efficiently into a
11914 register. Returns an RTX to copy into the register, or NULL_RTX
11915 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 11916static rtx
4369c11e
TB
11917aarch64_simd_make_constant (rtx vals)
11918{
ef4bddc2 11919 machine_mode mode = GET_MODE (vals);
4369c11e
TB
11920 rtx const_dup;
11921 rtx const_vec = NULL_RTX;
11922 int n_elts = GET_MODE_NUNITS (mode);
11923 int n_const = 0;
11924 int i;
11925
11926 if (GET_CODE (vals) == CONST_VECTOR)
11927 const_vec = vals;
11928 else if (GET_CODE (vals) == PARALLEL)
11929 {
11930 /* A CONST_VECTOR must contain only CONST_INTs and
11931 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11932 Only store valid constants in a CONST_VECTOR. */
11933 for (i = 0; i < n_elts; ++i)
11934 {
11935 rtx x = XVECEXP (vals, 0, i);
11936 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11937 n_const++;
11938 }
11939 if (n_const == n_elts)
11940 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11941 }
11942 else
11943 gcc_unreachable ();
11944
11945 if (const_vec != NULL_RTX
48063b9d 11946 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
11947 /* Load using MOVI/MVNI. */
11948 return const_vec;
11949 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
11950 /* Loaded using DUP. */
11951 return const_dup;
11952 else if (const_vec != NULL_RTX)
11953 /* Load from constant pool. We can not take advantage of single-cycle
11954 LD1 because we need a PC-relative addressing mode. */
11955 return const_vec;
11956 else
11957 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11958 We can not construct an initializer. */
11959 return NULL_RTX;
11960}
11961
35a093b6
JG
11962/* Expand a vector initialisation sequence, such that TARGET is
11963 initialised to contain VALS. */
11964
4369c11e
TB
11965void
11966aarch64_expand_vector_init (rtx target, rtx vals)
11967{
ef4bddc2
RS
11968 machine_mode mode = GET_MODE (target);
11969 machine_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 11970 /* The number of vector elements. */
4369c11e 11971 int n_elts = GET_MODE_NUNITS (mode);
35a093b6 11972 /* The number of vector elements which are not constant. */
8b66a2d4
AL
11973 int n_var = 0;
11974 rtx any_const = NULL_RTX;
35a093b6
JG
11975 /* The first element of vals. */
11976 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 11977 bool all_same = true;
4369c11e 11978
35a093b6 11979 /* Count the number of variable elements to initialise. */
8b66a2d4 11980 for (int i = 0; i < n_elts; ++i)
4369c11e 11981 {
8b66a2d4 11982 rtx x = XVECEXP (vals, 0, i);
35a093b6 11983 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
11984 ++n_var;
11985 else
11986 any_const = x;
4369c11e 11987
35a093b6 11988 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
11989 }
11990
35a093b6
JG
11991 /* No variable elements, hand off to aarch64_simd_make_constant which knows
11992 how best to handle this. */
4369c11e
TB
11993 if (n_var == 0)
11994 {
11995 rtx constant = aarch64_simd_make_constant (vals);
11996 if (constant != NULL_RTX)
11997 {
11998 emit_move_insn (target, constant);
11999 return;
12000 }
12001 }
12002
12003 /* Splat a single non-constant element if we can. */
12004 if (all_same)
12005 {
35a093b6 12006 rtx x = copy_to_mode_reg (inner_mode, v0);
4369c11e
TB
12007 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
12008 return;
12009 }
12010
85c1b6d7
AP
12011 enum insn_code icode = optab_handler (vec_set_optab, mode);
12012 gcc_assert (icode != CODE_FOR_nothing);
12013
12014 /* If there are only variable elements, try to optimize
12015 the insertion using dup for the most common element
12016 followed by insertions. */
12017
12018 /* The algorithm will fill matches[*][0] with the earliest matching element,
12019 and matches[X][1] with the count of duplicate elements (if X is the
12020 earliest element which has duplicates). */
12021
12022 if (n_var == n_elts && n_elts <= 16)
12023 {
12024 int matches[16][2] = {0};
12025 for (int i = 0; i < n_elts; i++)
12026 {
12027 for (int j = 0; j <= i; j++)
12028 {
12029 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
12030 {
12031 matches[i][0] = j;
12032 matches[j][1]++;
12033 break;
12034 }
12035 }
12036 }
12037 int maxelement = 0;
12038 int maxv = 0;
12039 for (int i = 0; i < n_elts; i++)
12040 if (matches[i][1] > maxv)
12041 {
12042 maxelement = i;
12043 maxv = matches[i][1];
12044 }
12045
12046 /* Create a duplicate of the most common element. */
12047 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
12048 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
12049
12050 /* Insert the rest. */
12051 for (int i = 0; i < n_elts; i++)
12052 {
12053 rtx x = XVECEXP (vals, 0, i);
12054 if (matches[i][0] == maxelement)
12055 continue;
12056 x = copy_to_mode_reg (inner_mode, x);
12057 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
12058 }
12059 return;
12060 }
12061
35a093b6
JG
12062 /* Initialise a vector which is part-variable. We want to first try
12063 to build those lanes which are constant in the most efficient way we
12064 can. */
12065 if (n_var != n_elts)
4369c11e
TB
12066 {
12067 rtx copy = copy_rtx (vals);
4369c11e 12068
8b66a2d4
AL
12069 /* Load constant part of vector. We really don't care what goes into the
12070 parts we will overwrite, but we're more likely to be able to load the
12071 constant efficiently if it has fewer, larger, repeating parts
12072 (see aarch64_simd_valid_immediate). */
12073 for (int i = 0; i < n_elts; i++)
12074 {
12075 rtx x = XVECEXP (vals, 0, i);
12076 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12077 continue;
12078 rtx subst = any_const;
12079 for (int bit = n_elts / 2; bit > 0; bit /= 2)
12080 {
12081 /* Look in the copied vector, as more elements are const. */
12082 rtx test = XVECEXP (copy, 0, i ^ bit);
12083 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
12084 {
12085 subst = test;
12086 break;
12087 }
12088 }
12089 XVECEXP (copy, 0, i) = subst;
12090 }
4369c11e 12091 aarch64_expand_vector_init (target, copy);
35a093b6 12092 }
4369c11e 12093
35a093b6 12094 /* Insert the variable lanes directly. */
8b66a2d4 12095 for (int i = 0; i < n_elts; i++)
35a093b6
JG
12096 {
12097 rtx x = XVECEXP (vals, 0, i);
12098 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12099 continue;
12100 x = copy_to_mode_reg (inner_mode, x);
12101 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
12102 }
4369c11e
TB
12103}
12104
43e9d192 12105static unsigned HOST_WIDE_INT
ef4bddc2 12106aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
12107{
12108 return
ac59ad4e
KT
12109 (!SHIFT_COUNT_TRUNCATED
12110 || aarch64_vector_mode_supported_p (mode)
43e9d192
IB
12111 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
12112}
12113
43e9d192
IB
12114/* Select a format to encode pointers in exception handling data. */
12115int
12116aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
12117{
12118 int type;
12119 switch (aarch64_cmodel)
12120 {
12121 case AARCH64_CMODEL_TINY:
12122 case AARCH64_CMODEL_TINY_PIC:
12123 case AARCH64_CMODEL_SMALL:
12124 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 12125 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
12126 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
12127 for everything. */
12128 type = DW_EH_PE_sdata4;
12129 break;
12130 default:
12131 /* No assumptions here. 8-byte relocs required. */
12132 type = DW_EH_PE_sdata8;
12133 break;
12134 }
12135 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
12136}
12137
e1c1ecb0
KT
12138/* The last .arch and .tune assembly strings that we printed. */
12139static std::string aarch64_last_printed_arch_string;
12140static std::string aarch64_last_printed_tune_string;
12141
361fb3ee
KT
12142/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
12143 by the function fndecl. */
12144
12145void
12146aarch64_declare_function_name (FILE *stream, const char* name,
12147 tree fndecl)
12148{
12149 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
12150
12151 struct cl_target_option *targ_options;
12152 if (target_parts)
12153 targ_options = TREE_TARGET_OPTION (target_parts);
12154 else
12155 targ_options = TREE_TARGET_OPTION (target_option_current_node);
12156 gcc_assert (targ_options);
12157
12158 const struct processor *this_arch
12159 = aarch64_get_arch (targ_options->x_explicit_arch);
12160
054b4005
JG
12161 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
12162 std::string extension
04a99ebe
JG
12163 = aarch64_get_extension_string_for_isa_flags (isa_flags,
12164 this_arch->flags);
e1c1ecb0
KT
12165 /* Only update the assembler .arch string if it is distinct from the last
12166 such string we printed. */
12167 std::string to_print = this_arch->name + extension;
12168 if (to_print != aarch64_last_printed_arch_string)
12169 {
12170 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
12171 aarch64_last_printed_arch_string = to_print;
12172 }
361fb3ee
KT
12173
12174 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
12175 useful to readers of the generated asm. Do it only when it changes
12176 from function to function and verbose assembly is requested. */
361fb3ee
KT
12177 const struct processor *this_tune
12178 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
12179
e1c1ecb0
KT
12180 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
12181 {
12182 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
12183 this_tune->name);
12184 aarch64_last_printed_tune_string = this_tune->name;
12185 }
361fb3ee
KT
12186
12187 /* Don't forget the type directive for ELF. */
12188 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
12189 ASM_OUTPUT_LABEL (stream, name);
12190}
12191
e1c1ecb0
KT
12192/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
12193
12194static void
12195aarch64_start_file (void)
12196{
12197 struct cl_target_option *default_options
12198 = TREE_TARGET_OPTION (target_option_default_node);
12199
12200 const struct processor *default_arch
12201 = aarch64_get_arch (default_options->x_explicit_arch);
12202 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
12203 std::string extension
04a99ebe
JG
12204 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
12205 default_arch->flags);
e1c1ecb0
KT
12206
12207 aarch64_last_printed_arch_string = default_arch->name + extension;
12208 aarch64_last_printed_tune_string = "";
12209 asm_fprintf (asm_out_file, "\t.arch %s\n",
12210 aarch64_last_printed_arch_string.c_str ());
12211
12212 default_file_start ();
12213}
12214
0462169c
SN
12215/* Emit load exclusive. */
12216
12217static void
ef4bddc2 12218aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
12219 rtx mem, rtx model_rtx)
12220{
12221 rtx (*gen) (rtx, rtx, rtx);
12222
12223 switch (mode)
12224 {
4e10a5a7
RS
12225 case E_QImode: gen = gen_aarch64_load_exclusiveqi; break;
12226 case E_HImode: gen = gen_aarch64_load_exclusivehi; break;
12227 case E_SImode: gen = gen_aarch64_load_exclusivesi; break;
12228 case E_DImode: gen = gen_aarch64_load_exclusivedi; break;
0462169c
SN
12229 default:
12230 gcc_unreachable ();
12231 }
12232
12233 emit_insn (gen (rval, mem, model_rtx));
12234}
12235
12236/* Emit store exclusive. */
12237
12238static void
ef4bddc2 12239aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
12240 rtx rval, rtx mem, rtx model_rtx)
12241{
12242 rtx (*gen) (rtx, rtx, rtx, rtx);
12243
12244 switch (mode)
12245 {
4e10a5a7
RS
12246 case E_QImode: gen = gen_aarch64_store_exclusiveqi; break;
12247 case E_HImode: gen = gen_aarch64_store_exclusivehi; break;
12248 case E_SImode: gen = gen_aarch64_store_exclusivesi; break;
12249 case E_DImode: gen = gen_aarch64_store_exclusivedi; break;
0462169c
SN
12250 default:
12251 gcc_unreachable ();
12252 }
12253
12254 emit_insn (gen (bval, rval, mem, model_rtx));
12255}
12256
12257/* Mark the previous jump instruction as unlikely. */
12258
12259static void
12260aarch64_emit_unlikely_jump (rtx insn)
12261{
f370536c 12262 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 12263 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
12264}
12265
12266/* Expand a compare and swap pattern. */
12267
12268void
12269aarch64_expand_compare_and_swap (rtx operands[])
12270{
12271 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 12272 machine_mode mode, cmp_mode;
b0770c0f
MW
12273 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
12274 int idx;
12275 gen_cas_fn gen;
12276 const gen_cas_fn split_cas[] =
12277 {
12278 gen_aarch64_compare_and_swapqi,
12279 gen_aarch64_compare_and_swaphi,
12280 gen_aarch64_compare_and_swapsi,
12281 gen_aarch64_compare_and_swapdi
12282 };
12283 const gen_cas_fn atomic_cas[] =
12284 {
12285 gen_aarch64_compare_and_swapqi_lse,
12286 gen_aarch64_compare_and_swaphi_lse,
12287 gen_aarch64_compare_and_swapsi_lse,
12288 gen_aarch64_compare_and_swapdi_lse
12289 };
0462169c
SN
12290
12291 bval = operands[0];
12292 rval = operands[1];
12293 mem = operands[2];
12294 oldval = operands[3];
12295 newval = operands[4];
12296 is_weak = operands[5];
12297 mod_s = operands[6];
12298 mod_f = operands[7];
12299 mode = GET_MODE (mem);
12300 cmp_mode = mode;
12301
12302 /* Normally the succ memory model must be stronger than fail, but in the
12303 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
12304 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
12305
46b35980
AM
12306 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
12307 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
12308 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
12309
12310 switch (mode)
12311 {
4e10a5a7
RS
12312 case E_QImode:
12313 case E_HImode:
0462169c
SN
12314 /* For short modes, we're going to perform the comparison in SImode,
12315 so do the zero-extension now. */
12316 cmp_mode = SImode;
12317 rval = gen_reg_rtx (SImode);
12318 oldval = convert_modes (SImode, mode, oldval, true);
12319 /* Fall through. */
12320
4e10a5a7
RS
12321 case E_SImode:
12322 case E_DImode:
0462169c
SN
12323 /* Force the value into a register if needed. */
12324 if (!aarch64_plus_operand (oldval, mode))
12325 oldval = force_reg (cmp_mode, oldval);
12326 break;
12327
12328 default:
12329 gcc_unreachable ();
12330 }
12331
12332 switch (mode)
12333 {
4e10a5a7
RS
12334 case E_QImode: idx = 0; break;
12335 case E_HImode: idx = 1; break;
12336 case E_SImode: idx = 2; break;
12337 case E_DImode: idx = 3; break;
0462169c
SN
12338 default:
12339 gcc_unreachable ();
12340 }
b0770c0f
MW
12341 if (TARGET_LSE)
12342 gen = atomic_cas[idx];
12343 else
12344 gen = split_cas[idx];
0462169c
SN
12345
12346 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
12347
12348 if (mode == QImode || mode == HImode)
12349 emit_move_insn (operands[1], gen_lowpart (mode, rval));
12350
12351 x = gen_rtx_REG (CCmode, CC_REGNUM);
12352 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 12353 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
12354}
12355
641c2f8b
MW
12356/* Test whether the target supports using a atomic load-operate instruction.
12357 CODE is the operation and AFTER is TRUE if the data in memory after the
12358 operation should be returned and FALSE if the data before the operation
12359 should be returned. Returns FALSE if the operation isn't supported by the
12360 architecture. */
12361
12362bool
12363aarch64_atomic_ldop_supported_p (enum rtx_code code)
12364{
12365 if (!TARGET_LSE)
12366 return false;
12367
12368 switch (code)
12369 {
12370 case SET:
12371 case AND:
12372 case IOR:
12373 case XOR:
12374 case MINUS:
12375 case PLUS:
12376 return true;
12377 default:
12378 return false;
12379 }
12380}
12381
f70fb3b6
MW
12382/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
12383 sequence implementing an atomic operation. */
12384
12385static void
12386aarch64_emit_post_barrier (enum memmodel model)
12387{
12388 const enum memmodel base_model = memmodel_base (model);
12389
12390 if (is_mm_sync (model)
12391 && (base_model == MEMMODEL_ACQUIRE
12392 || base_model == MEMMODEL_ACQ_REL
12393 || base_model == MEMMODEL_SEQ_CST))
12394 {
12395 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
12396 }
12397}
12398
b0770c0f
MW
12399/* Emit an atomic compare-and-swap operation. RVAL is the destination register
12400 for the data in memory. EXPECTED is the value expected to be in memory.
12401 DESIRED is the value to store to memory. MEM is the memory location. MODEL
12402 is the memory ordering to use. */
12403
12404void
12405aarch64_gen_atomic_cas (rtx rval, rtx mem,
12406 rtx expected, rtx desired,
12407 rtx model)
12408{
12409 rtx (*gen) (rtx, rtx, rtx, rtx);
12410 machine_mode mode;
12411
12412 mode = GET_MODE (mem);
12413
12414 switch (mode)
12415 {
4e10a5a7
RS
12416 case E_QImode: gen = gen_aarch64_atomic_casqi; break;
12417 case E_HImode: gen = gen_aarch64_atomic_cashi; break;
12418 case E_SImode: gen = gen_aarch64_atomic_cassi; break;
12419 case E_DImode: gen = gen_aarch64_atomic_casdi; break;
b0770c0f
MW
12420 default:
12421 gcc_unreachable ();
12422 }
12423
12424 /* Move the expected value into the CAS destination register. */
12425 emit_insn (gen_rtx_SET (rval, expected));
12426
12427 /* Emit the CAS. */
12428 emit_insn (gen (rval, mem, desired, model));
12429
12430 /* Compare the expected value with the value loaded by the CAS, to establish
12431 whether the swap was made. */
12432 aarch64_gen_compare_reg (EQ, rval, expected);
12433}
12434
0462169c
SN
12435/* Split a compare and swap pattern. */
12436
12437void
12438aarch64_split_compare_and_swap (rtx operands[])
12439{
12440 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 12441 machine_mode mode;
0462169c 12442 bool is_weak;
5d8a22a5
DM
12443 rtx_code_label *label1, *label2;
12444 rtx x, cond;
ab876106
MW
12445 enum memmodel model;
12446 rtx model_rtx;
0462169c
SN
12447
12448 rval = operands[0];
12449 mem = operands[1];
12450 oldval = operands[2];
12451 newval = operands[3];
12452 is_weak = (operands[4] != const0_rtx);
ab876106 12453 model_rtx = operands[5];
0462169c
SN
12454 scratch = operands[7];
12455 mode = GET_MODE (mem);
ab876106 12456 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 12457
17f47f86
KT
12458 /* When OLDVAL is zero and we want the strong version we can emit a tighter
12459 loop:
12460 .label1:
12461 LD[A]XR rval, [mem]
12462 CBNZ rval, .label2
12463 ST[L]XR scratch, newval, [mem]
12464 CBNZ scratch, .label1
12465 .label2:
12466 CMP rval, 0. */
12467 bool strong_zero_p = !is_weak && oldval == const0_rtx;
12468
5d8a22a5 12469 label1 = NULL;
0462169c
SN
12470 if (!is_weak)
12471 {
12472 label1 = gen_label_rtx ();
12473 emit_label (label1);
12474 }
12475 label2 = gen_label_rtx ();
12476
ab876106
MW
12477 /* The initial load can be relaxed for a __sync operation since a final
12478 barrier will be emitted to stop code hoisting. */
12479 if (is_mm_sync (model))
12480 aarch64_emit_load_exclusive (mode, rval, mem,
12481 GEN_INT (MEMMODEL_RELAXED));
12482 else
12483 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 12484
17f47f86
KT
12485 if (strong_zero_p)
12486 {
12487 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
12488 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12489 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
12490 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12491 }
12492 else
12493 {
12494 cond = aarch64_gen_compare_reg (NE, rval, oldval);
12495 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12496 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12497 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
12498 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12499 }
0462169c 12500
ab876106 12501 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
12502
12503 if (!is_weak)
12504 {
12505 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
12506 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12507 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 12508 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
12509 }
12510 else
12511 {
12512 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12513 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 12514 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
12515 }
12516
12517 emit_label (label2);
17f47f86
KT
12518 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
12519 to set the condition flags. If this is not used it will be removed by
12520 later passes. */
12521 if (strong_zero_p)
12522 {
12523 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12524 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
12525 emit_insn (gen_rtx_SET (cond, x));
12526 }
ab876106
MW
12527 /* Emit any final barrier needed for a __sync operation. */
12528 if (is_mm_sync (model))
12529 aarch64_emit_post_barrier (model);
0462169c
SN
12530}
12531
68729b06
MW
12532/* Emit a BIC instruction. */
12533
12534static void
12535aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
12536{
12537 rtx shift_rtx = GEN_INT (shift);
12538 rtx (*gen) (rtx, rtx, rtx, rtx);
12539
12540 switch (mode)
12541 {
4e10a5a7
RS
12542 case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
12543 case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
68729b06
MW
12544 default:
12545 gcc_unreachable ();
12546 }
12547
12548 emit_insn (gen (dst, s2, shift_rtx, s1));
12549}
12550
9cd7b720
MW
12551/* Emit an atomic swap. */
12552
12553static void
12554aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
12555 rtx mem, rtx model)
12556{
12557 rtx (*gen) (rtx, rtx, rtx, rtx);
12558
12559 switch (mode)
12560 {
4e10a5a7
RS
12561 case E_QImode: gen = gen_aarch64_atomic_swpqi; break;
12562 case E_HImode: gen = gen_aarch64_atomic_swphi; break;
12563 case E_SImode: gen = gen_aarch64_atomic_swpsi; break;
12564 case E_DImode: gen = gen_aarch64_atomic_swpdi; break;
9cd7b720
MW
12565 default:
12566 gcc_unreachable ();
12567 }
12568
12569 emit_insn (gen (dst, mem, value, model));
12570}
12571
641c2f8b
MW
12572/* Operations supported by aarch64_emit_atomic_load_op. */
12573
12574enum aarch64_atomic_load_op_code
12575{
12576 AARCH64_LDOP_PLUS, /* A + B */
12577 AARCH64_LDOP_XOR, /* A ^ B */
12578 AARCH64_LDOP_OR, /* A | B */
12579 AARCH64_LDOP_BIC /* A & ~B */
12580};
12581
12582/* Emit an atomic load-operate. */
12583
12584static void
12585aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
12586 machine_mode mode, rtx dst, rtx src,
12587 rtx mem, rtx model)
12588{
12589 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
12590 const aarch64_atomic_load_op_fn plus[] =
12591 {
12592 gen_aarch64_atomic_loadaddqi,
12593 gen_aarch64_atomic_loadaddhi,
12594 gen_aarch64_atomic_loadaddsi,
12595 gen_aarch64_atomic_loadadddi
12596 };
12597 const aarch64_atomic_load_op_fn eor[] =
12598 {
12599 gen_aarch64_atomic_loadeorqi,
12600 gen_aarch64_atomic_loadeorhi,
12601 gen_aarch64_atomic_loadeorsi,
12602 gen_aarch64_atomic_loadeordi
12603 };
12604 const aarch64_atomic_load_op_fn ior[] =
12605 {
12606 gen_aarch64_atomic_loadsetqi,
12607 gen_aarch64_atomic_loadsethi,
12608 gen_aarch64_atomic_loadsetsi,
12609 gen_aarch64_atomic_loadsetdi
12610 };
12611 const aarch64_atomic_load_op_fn bic[] =
12612 {
12613 gen_aarch64_atomic_loadclrqi,
12614 gen_aarch64_atomic_loadclrhi,
12615 gen_aarch64_atomic_loadclrsi,
12616 gen_aarch64_atomic_loadclrdi
12617 };
12618 aarch64_atomic_load_op_fn gen;
12619 int idx = 0;
12620
12621 switch (mode)
12622 {
4e10a5a7
RS
12623 case E_QImode: idx = 0; break;
12624 case E_HImode: idx = 1; break;
12625 case E_SImode: idx = 2; break;
12626 case E_DImode: idx = 3; break;
641c2f8b
MW
12627 default:
12628 gcc_unreachable ();
12629 }
12630
12631 switch (code)
12632 {
12633 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
12634 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
12635 case AARCH64_LDOP_OR: gen = ior[idx]; break;
12636 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
12637 default:
12638 gcc_unreachable ();
12639 }
12640
12641 emit_insn (gen (dst, mem, src, model));
12642}
12643
12644/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
12645 location to store the data read from memory. OUT_RESULT is the location to
12646 store the result of the operation. MEM is the memory location to read and
12647 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12648 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12649 be NULL. */
9cd7b720
MW
12650
12651void
68729b06 12652aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
12653 rtx mem, rtx value, rtx model_rtx)
12654{
12655 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
12656 machine_mode wmode = (mode == DImode ? DImode : SImode);
12657 const bool short_mode = (mode < SImode);
12658 aarch64_atomic_load_op_code ldop_code;
12659 rtx src;
12660 rtx x;
12661
12662 if (out_data)
12663 out_data = gen_lowpart (mode, out_data);
9cd7b720 12664
68729b06
MW
12665 if (out_result)
12666 out_result = gen_lowpart (mode, out_result);
12667
641c2f8b
MW
12668 /* Make sure the value is in a register, putting it into a destination
12669 register if it needs to be manipulated. */
12670 if (!register_operand (value, mode)
12671 || code == AND || code == MINUS)
12672 {
68729b06 12673 src = out_result ? out_result : out_data;
641c2f8b
MW
12674 emit_move_insn (src, gen_lowpart (mode, value));
12675 }
12676 else
12677 src = value;
12678 gcc_assert (register_operand (src, mode));
9cd7b720 12679
641c2f8b
MW
12680 /* Preprocess the data for the operation as necessary. If the operation is
12681 a SET then emit a swap instruction and finish. */
9cd7b720
MW
12682 switch (code)
12683 {
12684 case SET:
641c2f8b 12685 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
12686 return;
12687
641c2f8b
MW
12688 case MINUS:
12689 /* Negate the value and treat it as a PLUS. */
12690 {
12691 rtx neg_src;
12692
12693 /* Resize the value if necessary. */
12694 if (short_mode)
12695 src = gen_lowpart (wmode, src);
12696
12697 neg_src = gen_rtx_NEG (wmode, src);
12698 emit_insn (gen_rtx_SET (src, neg_src));
12699
12700 if (short_mode)
12701 src = gen_lowpart (mode, src);
12702 }
12703 /* Fall-through. */
12704 case PLUS:
12705 ldop_code = AARCH64_LDOP_PLUS;
12706 break;
12707
12708 case IOR:
12709 ldop_code = AARCH64_LDOP_OR;
12710 break;
12711
12712 case XOR:
12713 ldop_code = AARCH64_LDOP_XOR;
12714 break;
12715
12716 case AND:
12717 {
12718 rtx not_src;
12719
12720 /* Resize the value if necessary. */
12721 if (short_mode)
12722 src = gen_lowpart (wmode, src);
12723
12724 not_src = gen_rtx_NOT (wmode, src);
12725 emit_insn (gen_rtx_SET (src, not_src));
12726
12727 if (short_mode)
12728 src = gen_lowpart (mode, src);
12729 }
12730 ldop_code = AARCH64_LDOP_BIC;
12731 break;
12732
9cd7b720
MW
12733 default:
12734 /* The operation can't be done with atomic instructions. */
12735 gcc_unreachable ();
12736 }
641c2f8b
MW
12737
12738 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
12739
12740 /* If necessary, calculate the data in memory after the update by redoing the
12741 operation from values in registers. */
12742 if (!out_result)
12743 return;
12744
12745 if (short_mode)
12746 {
12747 src = gen_lowpart (wmode, src);
12748 out_data = gen_lowpart (wmode, out_data);
12749 out_result = gen_lowpart (wmode, out_result);
12750 }
12751
12752 x = NULL_RTX;
12753
12754 switch (code)
12755 {
12756 case MINUS:
12757 case PLUS:
12758 x = gen_rtx_PLUS (wmode, out_data, src);
12759 break;
12760 case IOR:
12761 x = gen_rtx_IOR (wmode, out_data, src);
12762 break;
12763 case XOR:
12764 x = gen_rtx_XOR (wmode, out_data, src);
12765 break;
12766 case AND:
12767 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
12768 return;
12769 default:
12770 gcc_unreachable ();
12771 }
12772
12773 emit_set_insn (out_result, x);
12774
12775 return;
9cd7b720
MW
12776}
12777
0462169c
SN
12778/* Split an atomic operation. */
12779
12780void
12781aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 12782 rtx value, rtx model_rtx, rtx cond)
0462169c 12783{
ef4bddc2
RS
12784 machine_mode mode = GET_MODE (mem);
12785 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
12786 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
12787 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
12788 rtx_code_label *label;
12789 rtx x;
0462169c 12790
9cd7b720 12791 /* Split the atomic operation into a sequence. */
0462169c
SN
12792 label = gen_label_rtx ();
12793 emit_label (label);
12794
12795 if (new_out)
12796 new_out = gen_lowpart (wmode, new_out);
12797 if (old_out)
12798 old_out = gen_lowpart (wmode, old_out);
12799 else
12800 old_out = new_out;
12801 value = simplify_gen_subreg (wmode, value, mode, 0);
12802
f70fb3b6
MW
12803 /* The initial load can be relaxed for a __sync operation since a final
12804 barrier will be emitted to stop code hoisting. */
12805 if (is_sync)
12806 aarch64_emit_load_exclusive (mode, old_out, mem,
12807 GEN_INT (MEMMODEL_RELAXED));
12808 else
12809 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
12810
12811 switch (code)
12812 {
12813 case SET:
12814 new_out = value;
12815 break;
12816
12817 case NOT:
12818 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 12819 emit_insn (gen_rtx_SET (new_out, x));
0462169c 12820 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 12821 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12822 break;
12823
12824 case MINUS:
12825 if (CONST_INT_P (value))
12826 {
12827 value = GEN_INT (-INTVAL (value));
12828 code = PLUS;
12829 }
12830 /* Fall through. */
12831
12832 default:
12833 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 12834 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12835 break;
12836 }
12837
12838 aarch64_emit_store_exclusive (mode, cond, mem,
12839 gen_lowpart (mode, new_out), model_rtx);
12840
12841 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12842 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12843 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 12844 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
12845
12846 /* Emit any final barrier needed for a __sync operation. */
12847 if (is_sync)
12848 aarch64_emit_post_barrier (model);
0462169c
SN
12849}
12850
c2ec330c
AL
12851static void
12852aarch64_init_libfuncs (void)
12853{
12854 /* Half-precision float operations. The compiler handles all operations
12855 with NULL libfuncs by converting to SFmode. */
12856
12857 /* Conversions. */
12858 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
12859 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
12860
12861 /* Arithmetic. */
12862 set_optab_libfunc (add_optab, HFmode, NULL);
12863 set_optab_libfunc (sdiv_optab, HFmode, NULL);
12864 set_optab_libfunc (smul_optab, HFmode, NULL);
12865 set_optab_libfunc (neg_optab, HFmode, NULL);
12866 set_optab_libfunc (sub_optab, HFmode, NULL);
12867
12868 /* Comparisons. */
12869 set_optab_libfunc (eq_optab, HFmode, NULL);
12870 set_optab_libfunc (ne_optab, HFmode, NULL);
12871 set_optab_libfunc (lt_optab, HFmode, NULL);
12872 set_optab_libfunc (le_optab, HFmode, NULL);
12873 set_optab_libfunc (ge_optab, HFmode, NULL);
12874 set_optab_libfunc (gt_optab, HFmode, NULL);
12875 set_optab_libfunc (unord_optab, HFmode, NULL);
12876}
12877
43e9d192 12878/* Target hook for c_mode_for_suffix. */
ef4bddc2 12879static machine_mode
43e9d192
IB
12880aarch64_c_mode_for_suffix (char suffix)
12881{
12882 if (suffix == 'q')
12883 return TFmode;
12884
12885 return VOIDmode;
12886}
12887
3520f7cc
JG
12888/* We can only represent floating point constants which will fit in
12889 "quarter-precision" values. These values are characterised by
12890 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12891 by:
12892
12893 (-1)^s * (n/16) * 2^r
12894
12895 Where:
12896 's' is the sign bit.
12897 'n' is an integer in the range 16 <= n <= 31.
12898 'r' is an integer in the range -3 <= r <= 4. */
12899
12900/* Return true iff X can be represented by a quarter-precision
12901 floating point immediate operand X. Note, we cannot represent 0.0. */
12902bool
12903aarch64_float_const_representable_p (rtx x)
12904{
12905 /* This represents our current view of how many bits
12906 make up the mantissa. */
12907 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 12908 int exponent;
3520f7cc 12909 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 12910 REAL_VALUE_TYPE r, m;
807e902e 12911 bool fail;
3520f7cc
JG
12912
12913 if (!CONST_DOUBLE_P (x))
12914 return false;
12915
c2ec330c
AL
12916 /* We don't support HFmode constants yet. */
12917 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
12918 return false;
12919
34a72c33 12920 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
12921
12922 /* We cannot represent infinities, NaNs or +/-zero. We won't
12923 know if we have +zero until we analyse the mantissa, but we
12924 can reject the other invalid values. */
12925 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
12926 || REAL_VALUE_MINUS_ZERO (r))
12927 return false;
12928
ba96cdfb 12929 /* Extract exponent. */
3520f7cc
JG
12930 r = real_value_abs (&r);
12931 exponent = REAL_EXP (&r);
12932
12933 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12934 highest (sign) bit, with a fixed binary point at bit point_pos.
12935 m1 holds the low part of the mantissa, m2 the high part.
12936 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12937 bits for the mantissa, this can fail (low bits will be lost). */
12938 real_ldexp (&m, &r, point_pos - exponent);
807e902e 12939 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
12940
12941 /* If the low part of the mantissa has bits set we cannot represent
12942 the value. */
d9074b29 12943 if (w.ulow () != 0)
3520f7cc
JG
12944 return false;
12945 /* We have rejected the lower HOST_WIDE_INT, so update our
12946 understanding of how many bits lie in the mantissa and
12947 look only at the high HOST_WIDE_INT. */
807e902e 12948 mantissa = w.elt (1);
3520f7cc
JG
12949 point_pos -= HOST_BITS_PER_WIDE_INT;
12950
12951 /* We can only represent values with a mantissa of the form 1.xxxx. */
12952 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12953 if ((mantissa & mask) != 0)
12954 return false;
12955
12956 /* Having filtered unrepresentable values, we may now remove all
12957 but the highest 5 bits. */
12958 mantissa >>= point_pos - 5;
12959
12960 /* We cannot represent the value 0.0, so reject it. This is handled
12961 elsewhere. */
12962 if (mantissa == 0)
12963 return false;
12964
12965 /* Then, as bit 4 is always set, we can mask it off, leaving
12966 the mantissa in the range [0, 15]. */
12967 mantissa &= ~(1 << 4);
12968 gcc_assert (mantissa <= 15);
12969
12970 /* GCC internally does not use IEEE754-like encoding (where normalized
12971 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12972 Our mantissa values are shifted 4 places to the left relative to
12973 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12974 by 5 places to correct for GCC's representation. */
12975 exponent = 5 - exponent;
12976
12977 return (exponent >= 0 && exponent <= 7);
12978}
12979
12980char*
81c2dfb9 12981aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 12982 machine_mode mode,
3520f7cc
JG
12983 unsigned width)
12984{
3ea63f60 12985 bool is_valid;
3520f7cc 12986 static char templ[40];
3520f7cc 12987 const char *mnemonic;
e4f0f84d 12988 const char *shift_op;
3520f7cc 12989 unsigned int lane_count = 0;
81c2dfb9 12990 char element_char;
3520f7cc 12991
e4f0f84d 12992 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
12993
12994 /* This will return true to show const_vector is legal for use as either
12995 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
12996 also update INFO to show how the immediate should be generated. */
81c2dfb9 12997 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
12998 gcc_assert (is_valid);
12999
81c2dfb9 13000 element_char = sizetochar (info.element_width);
48063b9d
IB
13001 lane_count = width / info.element_width;
13002
3520f7cc 13003 mode = GET_MODE_INNER (mode);
0d8e1702 13004 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3520f7cc 13005 {
48063b9d 13006 gcc_assert (info.shift == 0 && ! info.mvn);
0d8e1702
KT
13007 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
13008 move immediate path. */
48063b9d
IB
13009 if (aarch64_float_const_zero_rtx_p (info.value))
13010 info.value = GEN_INT (0);
13011 else
13012 {
83faf7d0 13013 const unsigned int buf_size = 20;
48063b9d 13014 char float_buf[buf_size] = {'\0'};
34a72c33
RS
13015 real_to_decimal_for_mode (float_buf,
13016 CONST_DOUBLE_REAL_VALUE (info.value),
13017 buf_size, buf_size, 1, mode);
48063b9d
IB
13018
13019 if (lane_count == 1)
13020 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
13021 else
13022 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 13023 lane_count, element_char, float_buf);
48063b9d
IB
13024 return templ;
13025 }
3520f7cc 13026 }
3520f7cc 13027
48063b9d 13028 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 13029 shift_op = info.msl ? "msl" : "lsl";
3520f7cc 13030
0d8e1702 13031 gcc_assert (CONST_INT_P (info.value));
3520f7cc 13032 if (lane_count == 1)
48063b9d
IB
13033 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
13034 mnemonic, UINTVAL (info.value));
13035 else if (info.shift)
13036 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
13037 ", %s %d", mnemonic, lane_count, element_char,
13038 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 13039 else
48063b9d 13040 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 13041 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
13042 return templ;
13043}
13044
b7342d25 13045char*
a2170965 13046aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode)
b7342d25 13047{
a2170965
TC
13048
13049 /* If a floating point number was passed and we desire to use it in an
13050 integer mode do the conversion to integer. */
13051 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
13052 {
13053 unsigned HOST_WIDE_INT ival;
13054 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
13055 gcc_unreachable ();
13056 immediate = gen_int_mode (ival, mode);
13057 }
13058
ef4bddc2 13059 machine_mode vmode;
a2170965
TC
13060 /* use a 64 bit mode for everything except for DI/DF mode, where we use
13061 a 128 bit vector mode. */
13062 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25
IB
13063
13064 gcc_assert (!VECTOR_MODE_P (mode));
a2170965 13065 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 13066 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
a2170965 13067 return aarch64_output_simd_mov_immediate (v_op, vmode, width);
b7342d25
IB
13068}
13069
88b08073
JG
13070/* Split operands into moves from op[1] + op[2] into op[0]. */
13071
13072void
13073aarch64_split_combinev16qi (rtx operands[3])
13074{
13075 unsigned int dest = REGNO (operands[0]);
13076 unsigned int src1 = REGNO (operands[1]);
13077 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 13078 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
13079 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
13080 rtx destlo, desthi;
13081
13082 gcc_assert (halfmode == V16QImode);
13083
13084 if (src1 == dest && src2 == dest + halfregs)
13085 {
13086 /* No-op move. Can't split to nothing; emit something. */
13087 emit_note (NOTE_INSN_DELETED);
13088 return;
13089 }
13090
13091 /* Preserve register attributes for variable tracking. */
13092 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
13093 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
13094 GET_MODE_SIZE (halfmode));
13095
13096 /* Special case of reversed high/low parts. */
13097 if (reg_overlap_mentioned_p (operands[2], destlo)
13098 && reg_overlap_mentioned_p (operands[1], desthi))
13099 {
13100 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
13101 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
13102 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
13103 }
13104 else if (!reg_overlap_mentioned_p (operands[2], destlo))
13105 {
13106 /* Try to avoid unnecessary moves if part of the result
13107 is in the right place already. */
13108 if (src1 != dest)
13109 emit_move_insn (destlo, operands[1]);
13110 if (src2 != dest + halfregs)
13111 emit_move_insn (desthi, operands[2]);
13112 }
13113 else
13114 {
13115 if (src2 != dest + halfregs)
13116 emit_move_insn (desthi, operands[2]);
13117 if (src1 != dest)
13118 emit_move_insn (destlo, operands[1]);
13119 }
13120}
13121
13122/* vec_perm support. */
13123
13124#define MAX_VECT_LEN 16
13125
13126struct expand_vec_perm_d
13127{
13128 rtx target, op0, op1;
13129 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 13130 machine_mode vmode;
88b08073
JG
13131 unsigned char nelt;
13132 bool one_vector_p;
13133 bool testing_p;
13134};
13135
13136/* Generate a variable permutation. */
13137
13138static void
13139aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
13140{
ef4bddc2 13141 machine_mode vmode = GET_MODE (target);
88b08073
JG
13142 bool one_vector_p = rtx_equal_p (op0, op1);
13143
13144 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
13145 gcc_checking_assert (GET_MODE (op0) == vmode);
13146 gcc_checking_assert (GET_MODE (op1) == vmode);
13147 gcc_checking_assert (GET_MODE (sel) == vmode);
13148 gcc_checking_assert (TARGET_SIMD);
13149
13150 if (one_vector_p)
13151 {
13152 if (vmode == V8QImode)
13153 {
13154 /* Expand the argument to a V16QI mode by duplicating it. */
13155 rtx pair = gen_reg_rtx (V16QImode);
13156 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
13157 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
13158 }
13159 else
13160 {
13161 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
13162 }
13163 }
13164 else
13165 {
13166 rtx pair;
13167
13168 if (vmode == V8QImode)
13169 {
13170 pair = gen_reg_rtx (V16QImode);
13171 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
13172 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
13173 }
13174 else
13175 {
13176 pair = gen_reg_rtx (OImode);
13177 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
13178 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
13179 }
13180 }
13181}
13182
13183void
13184aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
13185{
ef4bddc2 13186 machine_mode vmode = GET_MODE (target);
c9d1a16a 13187 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 13188 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 13189 rtx mask;
88b08073
JG
13190
13191 /* The TBL instruction does not use a modulo index, so we must take care
13192 of that ourselves. */
f7c4e5b8
AL
13193 mask = aarch64_simd_gen_const_vector_dup (vmode,
13194 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
13195 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
13196
f7c4e5b8
AL
13197 /* For big-endian, we also need to reverse the index within the vector
13198 (but not which vector). */
13199 if (BYTES_BIG_ENDIAN)
13200 {
13201 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
13202 if (!one_vector_p)
13203 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
13204 sel = expand_simple_binop (vmode, XOR, sel, mask,
13205 NULL, 0, OPTAB_LIB_WIDEN);
13206 }
88b08073
JG
13207 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
13208}
13209
cc4d934f
JG
13210/* Recognize patterns suitable for the TRN instructions. */
13211static bool
13212aarch64_evpc_trn (struct expand_vec_perm_d *d)
13213{
13214 unsigned int i, odd, mask, nelt = d->nelt;
13215 rtx out, in0, in1, x;
13216 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13217 machine_mode vmode = d->vmode;
cc4d934f
JG
13218
13219 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13220 return false;
13221
13222 /* Note that these are little-endian tests.
13223 We correct for big-endian later. */
13224 if (d->perm[0] == 0)
13225 odd = 0;
13226 else if (d->perm[0] == 1)
13227 odd = 1;
13228 else
13229 return false;
13230 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13231
13232 for (i = 0; i < nelt; i += 2)
13233 {
13234 if (d->perm[i] != i + odd)
13235 return false;
13236 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
13237 return false;
13238 }
13239
13240 /* Success! */
13241 if (d->testing_p)
13242 return true;
13243
13244 in0 = d->op0;
13245 in1 = d->op1;
13246 if (BYTES_BIG_ENDIAN)
13247 {
13248 x = in0, in0 = in1, in1 = x;
13249 odd = !odd;
13250 }
13251 out = d->target;
13252
13253 if (odd)
13254 {
13255 switch (vmode)
13256 {
4e10a5a7
RS
13257 case E_V16QImode: gen = gen_aarch64_trn2v16qi; break;
13258 case E_V8QImode: gen = gen_aarch64_trn2v8qi; break;
13259 case E_V8HImode: gen = gen_aarch64_trn2v8hi; break;
13260 case E_V4HImode: gen = gen_aarch64_trn2v4hi; break;
13261 case E_V4SImode: gen = gen_aarch64_trn2v4si; break;
13262 case E_V2SImode: gen = gen_aarch64_trn2v2si; break;
13263 case E_V2DImode: gen = gen_aarch64_trn2v2di; break;
13264 case E_V4HFmode: gen = gen_aarch64_trn2v4hf; break;
13265 case E_V8HFmode: gen = gen_aarch64_trn2v8hf; break;
13266 case E_V4SFmode: gen = gen_aarch64_trn2v4sf; break;
13267 case E_V2SFmode: gen = gen_aarch64_trn2v2sf; break;
13268 case E_V2DFmode: gen = gen_aarch64_trn2v2df; break;
cc4d934f
JG
13269 default:
13270 return false;
13271 }
13272 }
13273 else
13274 {
13275 switch (vmode)
13276 {
4e10a5a7
RS
13277 case E_V16QImode: gen = gen_aarch64_trn1v16qi; break;
13278 case E_V8QImode: gen = gen_aarch64_trn1v8qi; break;
13279 case E_V8HImode: gen = gen_aarch64_trn1v8hi; break;
13280 case E_V4HImode: gen = gen_aarch64_trn1v4hi; break;
13281 case E_V4SImode: gen = gen_aarch64_trn1v4si; break;
13282 case E_V2SImode: gen = gen_aarch64_trn1v2si; break;
13283 case E_V2DImode: gen = gen_aarch64_trn1v2di; break;
13284 case E_V4HFmode: gen = gen_aarch64_trn1v4hf; break;
13285 case E_V8HFmode: gen = gen_aarch64_trn1v8hf; break;
13286 case E_V4SFmode: gen = gen_aarch64_trn1v4sf; break;
13287 case E_V2SFmode: gen = gen_aarch64_trn1v2sf; break;
13288 case E_V2DFmode: gen = gen_aarch64_trn1v2df; break;
cc4d934f
JG
13289 default:
13290 return false;
13291 }
13292 }
13293
13294 emit_insn (gen (out, in0, in1));
13295 return true;
13296}
13297
13298/* Recognize patterns suitable for the UZP instructions. */
13299static bool
13300aarch64_evpc_uzp (struct expand_vec_perm_d *d)
13301{
13302 unsigned int i, odd, mask, nelt = d->nelt;
13303 rtx out, in0, in1, x;
13304 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13305 machine_mode vmode = d->vmode;
cc4d934f
JG
13306
13307 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13308 return false;
13309
13310 /* Note that these are little-endian tests.
13311 We correct for big-endian later. */
13312 if (d->perm[0] == 0)
13313 odd = 0;
13314 else if (d->perm[0] == 1)
13315 odd = 1;
13316 else
13317 return false;
13318 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13319
13320 for (i = 0; i < nelt; i++)
13321 {
13322 unsigned elt = (i * 2 + odd) & mask;
13323 if (d->perm[i] != elt)
13324 return false;
13325 }
13326
13327 /* Success! */
13328 if (d->testing_p)
13329 return true;
13330
13331 in0 = d->op0;
13332 in1 = d->op1;
13333 if (BYTES_BIG_ENDIAN)
13334 {
13335 x = in0, in0 = in1, in1 = x;
13336 odd = !odd;
13337 }
13338 out = d->target;
13339
13340 if (odd)
13341 {
13342 switch (vmode)
13343 {
4e10a5a7
RS
13344 case E_V16QImode: gen = gen_aarch64_uzp2v16qi; break;
13345 case E_V8QImode: gen = gen_aarch64_uzp2v8qi; break;
13346 case E_V8HImode: gen = gen_aarch64_uzp2v8hi; break;
13347 case E_V4HImode: gen = gen_aarch64_uzp2v4hi; break;
13348 case E_V4SImode: gen = gen_aarch64_uzp2v4si; break;
13349 case E_V2SImode: gen = gen_aarch64_uzp2v2si; break;
13350 case E_V2DImode: gen = gen_aarch64_uzp2v2di; break;
13351 case E_V4HFmode: gen = gen_aarch64_uzp2v4hf; break;
13352 case E_V8HFmode: gen = gen_aarch64_uzp2v8hf; break;
13353 case E_V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
13354 case E_V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
13355 case E_V2DFmode: gen = gen_aarch64_uzp2v2df; break;
cc4d934f
JG
13356 default:
13357 return false;
13358 }
13359 }
13360 else
13361 {
13362 switch (vmode)
13363 {
4e10a5a7
RS
13364 case E_V16QImode: gen = gen_aarch64_uzp1v16qi; break;
13365 case E_V8QImode: gen = gen_aarch64_uzp1v8qi; break;
13366 case E_V8HImode: gen = gen_aarch64_uzp1v8hi; break;
13367 case E_V4HImode: gen = gen_aarch64_uzp1v4hi; break;
13368 case E_V4SImode: gen = gen_aarch64_uzp1v4si; break;
13369 case E_V2SImode: gen = gen_aarch64_uzp1v2si; break;
13370 case E_V2DImode: gen = gen_aarch64_uzp1v2di; break;
13371 case E_V4HFmode: gen = gen_aarch64_uzp1v4hf; break;
13372 case E_V8HFmode: gen = gen_aarch64_uzp1v8hf; break;
13373 case E_V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
13374 case E_V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
13375 case E_V2DFmode: gen = gen_aarch64_uzp1v2df; break;
cc4d934f
JG
13376 default:
13377 return false;
13378 }
13379 }
13380
13381 emit_insn (gen (out, in0, in1));
13382 return true;
13383}
13384
13385/* Recognize patterns suitable for the ZIP instructions. */
13386static bool
13387aarch64_evpc_zip (struct expand_vec_perm_d *d)
13388{
13389 unsigned int i, high, mask, nelt = d->nelt;
13390 rtx out, in0, in1, x;
13391 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13392 machine_mode vmode = d->vmode;
cc4d934f
JG
13393
13394 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13395 return false;
13396
13397 /* Note that these are little-endian tests.
13398 We correct for big-endian later. */
13399 high = nelt / 2;
13400 if (d->perm[0] == high)
13401 /* Do Nothing. */
13402 ;
13403 else if (d->perm[0] == 0)
13404 high = 0;
13405 else
13406 return false;
13407 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13408
13409 for (i = 0; i < nelt / 2; i++)
13410 {
13411 unsigned elt = (i + high) & mask;
13412 if (d->perm[i * 2] != elt)
13413 return false;
13414 elt = (elt + nelt) & mask;
13415 if (d->perm[i * 2 + 1] != elt)
13416 return false;
13417 }
13418
13419 /* Success! */
13420 if (d->testing_p)
13421 return true;
13422
13423 in0 = d->op0;
13424 in1 = d->op1;
13425 if (BYTES_BIG_ENDIAN)
13426 {
13427 x = in0, in0 = in1, in1 = x;
13428 high = !high;
13429 }
13430 out = d->target;
13431
13432 if (high)
13433 {
13434 switch (vmode)
13435 {
4e10a5a7
RS
13436 case E_V16QImode: gen = gen_aarch64_zip2v16qi; break;
13437 case E_V8QImode: gen = gen_aarch64_zip2v8qi; break;
13438 case E_V8HImode: gen = gen_aarch64_zip2v8hi; break;
13439 case E_V4HImode: gen = gen_aarch64_zip2v4hi; break;
13440 case E_V4SImode: gen = gen_aarch64_zip2v4si; break;
13441 case E_V2SImode: gen = gen_aarch64_zip2v2si; break;
13442 case E_V2DImode: gen = gen_aarch64_zip2v2di; break;
13443 case E_V4HFmode: gen = gen_aarch64_zip2v4hf; break;
13444 case E_V8HFmode: gen = gen_aarch64_zip2v8hf; break;
13445 case E_V4SFmode: gen = gen_aarch64_zip2v4sf; break;
13446 case E_V2SFmode: gen = gen_aarch64_zip2v2sf; break;
13447 case E_V2DFmode: gen = gen_aarch64_zip2v2df; break;
cc4d934f
JG
13448 default:
13449 return false;
13450 }
13451 }
13452 else
13453 {
13454 switch (vmode)
13455 {
4e10a5a7
RS
13456 case E_V16QImode: gen = gen_aarch64_zip1v16qi; break;
13457 case E_V8QImode: gen = gen_aarch64_zip1v8qi; break;
13458 case E_V8HImode: gen = gen_aarch64_zip1v8hi; break;
13459 case E_V4HImode: gen = gen_aarch64_zip1v4hi; break;
13460 case E_V4SImode: gen = gen_aarch64_zip1v4si; break;
13461 case E_V2SImode: gen = gen_aarch64_zip1v2si; break;
13462 case E_V2DImode: gen = gen_aarch64_zip1v2di; break;
13463 case E_V4HFmode: gen = gen_aarch64_zip1v4hf; break;
13464 case E_V8HFmode: gen = gen_aarch64_zip1v8hf; break;
13465 case E_V4SFmode: gen = gen_aarch64_zip1v4sf; break;
13466 case E_V2SFmode: gen = gen_aarch64_zip1v2sf; break;
13467 case E_V2DFmode: gen = gen_aarch64_zip1v2df; break;
cc4d934f
JG
13468 default:
13469 return false;
13470 }
13471 }
13472
13473 emit_insn (gen (out, in0, in1));
13474 return true;
13475}
13476
ae0533da
AL
13477/* Recognize patterns for the EXT insn. */
13478
13479static bool
13480aarch64_evpc_ext (struct expand_vec_perm_d *d)
13481{
13482 unsigned int i, nelt = d->nelt;
13483 rtx (*gen) (rtx, rtx, rtx, rtx);
13484 rtx offset;
13485
13486 unsigned int location = d->perm[0]; /* Always < nelt. */
13487
13488 /* Check if the extracted indices are increasing by one. */
13489 for (i = 1; i < nelt; i++)
13490 {
13491 unsigned int required = location + i;
13492 if (d->one_vector_p)
13493 {
13494 /* We'll pass the same vector in twice, so allow indices to wrap. */
13495 required &= (nelt - 1);
13496 }
13497 if (d->perm[i] != required)
13498 return false;
13499 }
13500
ae0533da
AL
13501 switch (d->vmode)
13502 {
4e10a5a7
RS
13503 case E_V16QImode: gen = gen_aarch64_extv16qi; break;
13504 case E_V8QImode: gen = gen_aarch64_extv8qi; break;
13505 case E_V4HImode: gen = gen_aarch64_extv4hi; break;
13506 case E_V8HImode: gen = gen_aarch64_extv8hi; break;
13507 case E_V2SImode: gen = gen_aarch64_extv2si; break;
13508 case E_V4SImode: gen = gen_aarch64_extv4si; break;
13509 case E_V4HFmode: gen = gen_aarch64_extv4hf; break;
13510 case E_V8HFmode: gen = gen_aarch64_extv8hf; break;
13511 case E_V2SFmode: gen = gen_aarch64_extv2sf; break;
13512 case E_V4SFmode: gen = gen_aarch64_extv4sf; break;
13513 case E_V2DImode: gen = gen_aarch64_extv2di; break;
13514 case E_V2DFmode: gen = gen_aarch64_extv2df; break;
ae0533da
AL
13515 default:
13516 return false;
13517 }
13518
13519 /* Success! */
13520 if (d->testing_p)
13521 return true;
13522
b31e65bb
AL
13523 /* The case where (location == 0) is a no-op for both big- and little-endian,
13524 and is removed by the mid-end at optimization levels -O1 and higher. */
13525
13526 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
13527 {
13528 /* After setup, we want the high elements of the first vector (stored
13529 at the LSB end of the register), and the low elements of the second
13530 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 13531 std::swap (d->op0, d->op1);
ae0533da
AL
13532 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13533 location = nelt - location;
13534 }
13535
13536 offset = GEN_INT (location);
13537 emit_insn (gen (d->target, d->op0, d->op1, offset));
13538 return true;
13539}
13540
923fcec3
AL
13541/* Recognize patterns for the REV insns. */
13542
13543static bool
13544aarch64_evpc_rev (struct expand_vec_perm_d *d)
13545{
13546 unsigned int i, j, diff, nelt = d->nelt;
13547 rtx (*gen) (rtx, rtx);
13548
13549 if (!d->one_vector_p)
13550 return false;
13551
13552 diff = d->perm[0];
13553 switch (diff)
13554 {
13555 case 7:
13556 switch (d->vmode)
13557 {
4e10a5a7
RS
13558 case E_V16QImode: gen = gen_aarch64_rev64v16qi; break;
13559 case E_V8QImode: gen = gen_aarch64_rev64v8qi; break;
923fcec3
AL
13560 default:
13561 return false;
13562 }
13563 break;
13564 case 3:
13565 switch (d->vmode)
13566 {
4e10a5a7
RS
13567 case E_V16QImode: gen = gen_aarch64_rev32v16qi; break;
13568 case E_V8QImode: gen = gen_aarch64_rev32v8qi; break;
13569 case E_V8HImode: gen = gen_aarch64_rev64v8hi; break;
13570 case E_V4HImode: gen = gen_aarch64_rev64v4hi; break;
923fcec3
AL
13571 default:
13572 return false;
13573 }
13574 break;
13575 case 1:
13576 switch (d->vmode)
13577 {
4e10a5a7
RS
13578 case E_V16QImode: gen = gen_aarch64_rev16v16qi; break;
13579 case E_V8QImode: gen = gen_aarch64_rev16v8qi; break;
13580 case E_V8HImode: gen = gen_aarch64_rev32v8hi; break;
13581 case E_V4HImode: gen = gen_aarch64_rev32v4hi; break;
13582 case E_V4SImode: gen = gen_aarch64_rev64v4si; break;
13583 case E_V2SImode: gen = gen_aarch64_rev64v2si; break;
13584 case E_V4SFmode: gen = gen_aarch64_rev64v4sf; break;
13585 case E_V2SFmode: gen = gen_aarch64_rev64v2sf; break;
13586 case E_V8HFmode: gen = gen_aarch64_rev64v8hf; break;
13587 case E_V4HFmode: gen = gen_aarch64_rev64v4hf; break;
923fcec3
AL
13588 default:
13589 return false;
13590 }
13591 break;
13592 default:
13593 return false;
13594 }
13595
13596 for (i = 0; i < nelt ; i += diff + 1)
13597 for (j = 0; j <= diff; j += 1)
13598 {
13599 /* This is guaranteed to be true as the value of diff
13600 is 7, 3, 1 and we should have enough elements in the
13601 queue to generate this. Getting a vector mask with a
13602 value of diff other than these values implies that
13603 something is wrong by the time we get here. */
13604 gcc_assert (i + j < nelt);
13605 if (d->perm[i + j] != i + diff - j)
13606 return false;
13607 }
13608
13609 /* Success! */
13610 if (d->testing_p)
13611 return true;
13612
13613 emit_insn (gen (d->target, d->op0));
13614 return true;
13615}
13616
91bd4114
JG
13617static bool
13618aarch64_evpc_dup (struct expand_vec_perm_d *d)
13619{
13620 rtx (*gen) (rtx, rtx, rtx);
13621 rtx out = d->target;
13622 rtx in0;
ef4bddc2 13623 machine_mode vmode = d->vmode;
91bd4114
JG
13624 unsigned int i, elt, nelt = d->nelt;
13625 rtx lane;
13626
91bd4114
JG
13627 elt = d->perm[0];
13628 for (i = 1; i < nelt; i++)
13629 {
13630 if (elt != d->perm[i])
13631 return false;
13632 }
13633
13634 /* The generic preparation in aarch64_expand_vec_perm_const_1
13635 swaps the operand order and the permute indices if it finds
13636 d->perm[0] to be in the second operand. Thus, we can always
13637 use d->op0 and need not do any extra arithmetic to get the
13638 correct lane number. */
13639 in0 = d->op0;
f901401e 13640 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
13641
13642 switch (vmode)
13643 {
4e10a5a7
RS
13644 case E_V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
13645 case E_V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
13646 case E_V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
13647 case E_V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
13648 case E_V4SImode: gen = gen_aarch64_dup_lanev4si; break;
13649 case E_V2SImode: gen = gen_aarch64_dup_lanev2si; break;
13650 case E_V2DImode: gen = gen_aarch64_dup_lanev2di; break;
13651 case E_V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
13652 case E_V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
13653 case E_V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
13654 case E_V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
13655 case E_V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
91bd4114
JG
13656 default:
13657 return false;
13658 }
13659
13660 emit_insn (gen (out, in0, lane));
13661 return true;
13662}
13663
88b08073
JG
13664static bool
13665aarch64_evpc_tbl (struct expand_vec_perm_d *d)
13666{
13667 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 13668 machine_mode vmode = d->vmode;
88b08073
JG
13669 unsigned int i, nelt = d->nelt;
13670
88b08073
JG
13671 if (d->testing_p)
13672 return true;
13673
13674 /* Generic code will try constant permutation twice. Once with the
13675 original mode and again with the elements lowered to QImode.
13676 So wait and don't do the selector expansion ourselves. */
13677 if (vmode != V8QImode && vmode != V16QImode)
13678 return false;
13679
13680 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
13681 {
13682 int nunits = GET_MODE_NUNITS (vmode);
13683
13684 /* If big-endian and two vectors we end up with a weird mixed-endian
13685 mode on NEON. Reverse the index within each word but not the word
13686 itself. */
13687 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
13688 : d->perm[i]);
13689 }
88b08073
JG
13690 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
13691 sel = force_reg (vmode, sel);
13692
13693 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
13694 return true;
13695}
13696
13697static bool
13698aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
13699{
13700 /* The pattern matching functions above are written to look for a small
13701 number to begin the sequence (0, 1, N/2). If we begin with an index
13702 from the second operand, we can swap the operands. */
13703 if (d->perm[0] >= d->nelt)
13704 {
13705 unsigned i, nelt = d->nelt;
88b08073 13706
0696116a 13707 gcc_assert (nelt == (nelt & -nelt));
88b08073 13708 for (i = 0; i < nelt; ++i)
0696116a 13709 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 13710
cb5c6c29 13711 std::swap (d->op0, d->op1);
88b08073
JG
13712 }
13713
13714 if (TARGET_SIMD)
cc4d934f 13715 {
923fcec3
AL
13716 if (aarch64_evpc_rev (d))
13717 return true;
13718 else if (aarch64_evpc_ext (d))
ae0533da 13719 return true;
f901401e
AL
13720 else if (aarch64_evpc_dup (d))
13721 return true;
ae0533da 13722 else if (aarch64_evpc_zip (d))
cc4d934f
JG
13723 return true;
13724 else if (aarch64_evpc_uzp (d))
13725 return true;
13726 else if (aarch64_evpc_trn (d))
13727 return true;
13728 return aarch64_evpc_tbl (d);
13729 }
88b08073
JG
13730 return false;
13731}
13732
13733/* Expand a vec_perm_const pattern. */
13734
13735bool
13736aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
13737{
13738 struct expand_vec_perm_d d;
13739 int i, nelt, which;
13740
13741 d.target = target;
13742 d.op0 = op0;
13743 d.op1 = op1;
13744
13745 d.vmode = GET_MODE (target);
13746 gcc_assert (VECTOR_MODE_P (d.vmode));
13747 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13748 d.testing_p = false;
13749
13750 for (i = which = 0; i < nelt; ++i)
13751 {
13752 rtx e = XVECEXP (sel, 0, i);
13753 int ei = INTVAL (e) & (2 * nelt - 1);
13754 which |= (ei < nelt ? 1 : 2);
13755 d.perm[i] = ei;
13756 }
13757
13758 switch (which)
13759 {
13760 default:
13761 gcc_unreachable ();
13762
13763 case 3:
13764 d.one_vector_p = false;
13765 if (!rtx_equal_p (op0, op1))
13766 break;
13767
13768 /* The elements of PERM do not suggest that only the first operand
13769 is used, but both operands are identical. Allow easier matching
13770 of the permutation by folding the permutation into the single
13771 input vector. */
13772 /* Fall Through. */
13773 case 2:
13774 for (i = 0; i < nelt; ++i)
13775 d.perm[i] &= nelt - 1;
13776 d.op0 = op1;
13777 d.one_vector_p = true;
13778 break;
13779
13780 case 1:
13781 d.op1 = op0;
13782 d.one_vector_p = true;
13783 break;
13784 }
13785
13786 return aarch64_expand_vec_perm_const_1 (&d);
13787}
13788
13789static bool
ef4bddc2 13790aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
13791 const unsigned char *sel)
13792{
13793 struct expand_vec_perm_d d;
13794 unsigned int i, nelt, which;
13795 bool ret;
13796
13797 d.vmode = vmode;
13798 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13799 d.testing_p = true;
13800 memcpy (d.perm, sel, nelt);
13801
13802 /* Calculate whether all elements are in one vector. */
13803 for (i = which = 0; i < nelt; ++i)
13804 {
13805 unsigned char e = d.perm[i];
13806 gcc_assert (e < 2 * nelt);
13807 which |= (e < nelt ? 1 : 2);
13808 }
13809
13810 /* If all elements are from the second vector, reindex as if from the
13811 first vector. */
13812 if (which == 2)
13813 for (i = 0; i < nelt; ++i)
13814 d.perm[i] -= nelt;
13815
13816 /* Check whether the mask can be applied to a single vector. */
13817 d.one_vector_p = (which != 3);
13818
13819 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
13820 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
13821 if (!d.one_vector_p)
13822 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
13823
13824 start_sequence ();
13825 ret = aarch64_expand_vec_perm_const_1 (&d);
13826 end_sequence ();
13827
13828 return ret;
13829}
13830
668046d1 13831rtx
b8506a8a 13832aarch64_reverse_mask (machine_mode mode)
668046d1
DS
13833{
13834 /* We have to reverse each vector because we dont have
13835 a permuted load that can reverse-load according to ABI rules. */
13836 rtx mask;
13837 rtvec v = rtvec_alloc (16);
13838 int i, j;
13839 int nunits = GET_MODE_NUNITS (mode);
13840 int usize = GET_MODE_UNIT_SIZE (mode);
13841
13842 gcc_assert (BYTES_BIG_ENDIAN);
13843 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
13844
13845 for (i = 0; i < nunits; i++)
13846 for (j = 0; j < usize; j++)
13847 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
13848 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
13849 return force_reg (V16QImode, mask);
13850}
13851
61f17a5c
WD
13852/* Implement MODES_TIEABLE_P. In principle we should always return true.
13853 However due to issues with register allocation it is preferable to avoid
13854 tieing integer scalar and FP scalar modes. Executing integer operations
13855 in general registers is better than treating them as scalar vector
13856 operations. This reduces latency and avoids redundant int<->FP moves.
13857 So tie modes if they are either the same class, or vector modes with
13858 other vector modes, vector structs or any scalar mode.
13859*/
97e1ad78
JG
13860
13861bool
ef4bddc2 13862aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
13863{
13864 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
13865 return true;
13866
13867 /* We specifically want to allow elements of "structure" modes to
13868 be tieable to the structure. This more general condition allows
13869 other rarer situations too. */
61f17a5c
WD
13870 if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2))
13871 return true;
13872
13873 /* Also allow any scalar modes with vectors. */
13874 if (aarch64_vector_mode_supported_p (mode1)
13875 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
13876 return true;
13877
13878 return false;
13879}
13880
e2c75eea
JG
13881/* Return a new RTX holding the result of moving POINTER forward by
13882 AMOUNT bytes. */
13883
13884static rtx
13885aarch64_move_pointer (rtx pointer, int amount)
13886{
13887 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
13888
13889 return adjust_automodify_address (pointer, GET_MODE (pointer),
13890 next, amount);
13891}
13892
13893/* Return a new RTX holding the result of moving POINTER forward by the
13894 size of the mode it points to. */
13895
13896static rtx
13897aarch64_progress_pointer (rtx pointer)
13898{
13899 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
13900
13901 return aarch64_move_pointer (pointer, amount);
13902}
13903
13904/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13905 MODE bytes. */
13906
13907static void
13908aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 13909 machine_mode mode)
e2c75eea
JG
13910{
13911 rtx reg = gen_reg_rtx (mode);
13912
13913 /* "Cast" the pointers to the correct mode. */
13914 *src = adjust_address (*src, mode, 0);
13915 *dst = adjust_address (*dst, mode, 0);
13916 /* Emit the memcpy. */
13917 emit_move_insn (reg, *src);
13918 emit_move_insn (*dst, reg);
13919 /* Move the pointers forward. */
13920 *src = aarch64_progress_pointer (*src);
13921 *dst = aarch64_progress_pointer (*dst);
13922}
13923
13924/* Expand movmem, as if from a __builtin_memcpy. Return true if
13925 we succeed, otherwise return false. */
13926
13927bool
13928aarch64_expand_movmem (rtx *operands)
13929{
13930 unsigned int n;
13931 rtx dst = operands[0];
13932 rtx src = operands[1];
13933 rtx base;
13934 bool speed_p = !optimize_function_for_size_p (cfun);
13935
13936 /* When optimizing for size, give a better estimate of the length of a
13937 memcpy call, but use the default otherwise. */
13938 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
13939
13940 /* We can't do anything smart if the amount to copy is not constant. */
13941 if (!CONST_INT_P (operands[2]))
13942 return false;
13943
13944 n = UINTVAL (operands[2]);
13945
13946 /* Try to keep the number of instructions low. For cases below 16 bytes we
13947 need to make at most two moves. For cases above 16 bytes it will be one
13948 move for each 16 byte chunk, then at most two additional moves. */
13949 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
13950 return false;
13951
13952 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13953 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13954
13955 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
13956 src = adjust_automodify_address (src, VOIDmode, base, 0);
13957
13958 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13959 1-byte chunk. */
13960 if (n < 4)
13961 {
13962 if (n >= 2)
13963 {
13964 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13965 n -= 2;
13966 }
13967
13968 if (n == 1)
13969 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13970
13971 return true;
13972 }
13973
13974 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13975 4-byte chunk, partially overlapping with the previously copied chunk. */
13976 if (n < 8)
13977 {
13978 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13979 n -= 4;
13980 if (n > 0)
13981 {
13982 int move = n - 4;
13983
13984 src = aarch64_move_pointer (src, move);
13985 dst = aarch64_move_pointer (dst, move);
13986 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13987 }
13988 return true;
13989 }
13990
13991 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13992 them, then (if applicable) an 8-byte chunk. */
13993 while (n >= 8)
13994 {
13995 if (n / 16)
13996 {
13997 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
13998 n -= 16;
13999 }
14000 else
14001 {
14002 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
14003 n -= 8;
14004 }
14005 }
14006
14007 /* Finish the final bytes of the copy. We can always do this in one
14008 instruction. We either copy the exact amount we need, or partially
14009 overlap with the previous chunk we copied and copy 8-bytes. */
14010 if (n == 0)
14011 return true;
14012 else if (n == 1)
14013 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
14014 else if (n == 2)
14015 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
14016 else if (n == 4)
14017 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
14018 else
14019 {
14020 if (n == 3)
14021 {
14022 src = aarch64_move_pointer (src, -1);
14023 dst = aarch64_move_pointer (dst, -1);
14024 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
14025 }
14026 else
14027 {
14028 int move = n - 8;
14029
14030 src = aarch64_move_pointer (src, move);
14031 dst = aarch64_move_pointer (dst, move);
14032 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
14033 }
14034 }
14035
14036 return true;
14037}
14038
141a3ccf
KT
14039/* Split a DImode store of a CONST_INT SRC to MEM DST as two
14040 SImode stores. Handle the case when the constant has identical
14041 bottom and top halves. This is beneficial when the two stores can be
14042 merged into an STP and we avoid synthesising potentially expensive
14043 immediates twice. Return true if such a split is possible. */
14044
14045bool
14046aarch64_split_dimode_const_store (rtx dst, rtx src)
14047{
14048 rtx lo = gen_lowpart (SImode, src);
14049 rtx hi = gen_highpart_mode (SImode, DImode, src);
14050
14051 bool size_p = optimize_function_for_size_p (cfun);
14052
14053 if (!rtx_equal_p (lo, hi))
14054 return false;
14055
14056 unsigned int orig_cost
14057 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
14058 unsigned int lo_cost
14059 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
14060
14061 /* We want to transform:
14062 MOV x1, 49370
14063 MOVK x1, 0x140, lsl 16
14064 MOVK x1, 0xc0da, lsl 32
14065 MOVK x1, 0x140, lsl 48
14066 STR x1, [x0]
14067 into:
14068 MOV w1, 49370
14069 MOVK w1, 0x140, lsl 16
14070 STP w1, w1, [x0]
14071 So we want to perform this only when we save two instructions
14072 or more. When optimizing for size, however, accept any code size
14073 savings we can. */
14074 if (size_p && orig_cost <= lo_cost)
14075 return false;
14076
14077 if (!size_p
14078 && (orig_cost <= lo_cost + 1))
14079 return false;
14080
14081 rtx mem_lo = adjust_address (dst, SImode, 0);
14082 if (!aarch64_mem_pair_operand (mem_lo, SImode))
14083 return false;
14084
14085 rtx tmp_reg = gen_reg_rtx (SImode);
14086 aarch64_expand_mov_immediate (tmp_reg, lo);
14087 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
14088 /* Don't emit an explicit store pair as this may not be always profitable.
14089 Let the sched-fusion logic decide whether to merge them. */
14090 emit_move_insn (mem_lo, tmp_reg);
14091 emit_move_insn (mem_hi, tmp_reg);
14092
14093 return true;
14094}
14095
a3125fc2
CL
14096/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
14097
14098static unsigned HOST_WIDE_INT
14099aarch64_asan_shadow_offset (void)
14100{
14101 return (HOST_WIDE_INT_1 << 36);
14102}
14103
d3006da6 14104static bool
445d7826 14105aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
14106 unsigned int align,
14107 enum by_pieces_operation op,
14108 bool speed_p)
14109{
14110 /* STORE_BY_PIECES can be used when copying a constant string, but
14111 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
14112 For now we always fail this and let the move_by_pieces code copy
14113 the string from read-only memory. */
14114 if (op == STORE_BY_PIECES)
14115 return false;
14116
14117 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
14118}
14119
5f3bc026 14120static rtx
cb4347e8 14121aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
14122 int code, tree treeop0, tree treeop1)
14123{
c8012fbc
WD
14124 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
14125 rtx op0, op1;
5f3bc026 14126 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 14127 insn_code icode;
5f3bc026
ZC
14128 struct expand_operand ops[4];
14129
5f3bc026
ZC
14130 start_sequence ();
14131 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
14132
14133 op_mode = GET_MODE (op0);
14134 if (op_mode == VOIDmode)
14135 op_mode = GET_MODE (op1);
14136
14137 switch (op_mode)
14138 {
4e10a5a7
RS
14139 case E_QImode:
14140 case E_HImode:
14141 case E_SImode:
5f3bc026
ZC
14142 cmp_mode = SImode;
14143 icode = CODE_FOR_cmpsi;
14144 break;
14145
4e10a5a7 14146 case E_DImode:
5f3bc026
ZC
14147 cmp_mode = DImode;
14148 icode = CODE_FOR_cmpdi;
14149 break;
14150
4e10a5a7 14151 case E_SFmode:
786e3c06
WD
14152 cmp_mode = SFmode;
14153 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
14154 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
14155 break;
14156
4e10a5a7 14157 case E_DFmode:
786e3c06
WD
14158 cmp_mode = DFmode;
14159 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
14160 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
14161 break;
14162
5f3bc026
ZC
14163 default:
14164 end_sequence ();
14165 return NULL_RTX;
14166 }
14167
c8012fbc
WD
14168 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
14169 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
14170 if (!op0 || !op1)
14171 {
14172 end_sequence ();
14173 return NULL_RTX;
14174 }
14175 *prep_seq = get_insns ();
14176 end_sequence ();
14177
c8012fbc
WD
14178 create_fixed_operand (&ops[0], op0);
14179 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
14180
14181 start_sequence ();
c8012fbc 14182 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
14183 {
14184 end_sequence ();
14185 return NULL_RTX;
14186 }
14187 *gen_seq = get_insns ();
14188 end_sequence ();
14189
c8012fbc
WD
14190 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
14191 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
14192}
14193
14194static rtx
cb4347e8
TS
14195aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
14196 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 14197{
c8012fbc
WD
14198 rtx op0, op1, target;
14199 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 14200 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 14201 insn_code icode;
5f3bc026 14202 struct expand_operand ops[6];
c8012fbc 14203 int aarch64_cond;
5f3bc026 14204
cb4347e8 14205 push_to_sequence (*prep_seq);
5f3bc026
ZC
14206 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
14207
14208 op_mode = GET_MODE (op0);
14209 if (op_mode == VOIDmode)
14210 op_mode = GET_MODE (op1);
14211
14212 switch (op_mode)
14213 {
4e10a5a7
RS
14214 case E_QImode:
14215 case E_HImode:
14216 case E_SImode:
5f3bc026 14217 cmp_mode = SImode;
c8012fbc 14218 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
14219 break;
14220
4e10a5a7 14221 case E_DImode:
5f3bc026 14222 cmp_mode = DImode;
c8012fbc 14223 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
14224 break;
14225
4e10a5a7 14226 case E_SFmode:
786e3c06
WD
14227 cmp_mode = SFmode;
14228 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
14229 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
14230 break;
14231
4e10a5a7 14232 case E_DFmode:
786e3c06
WD
14233 cmp_mode = DFmode;
14234 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
14235 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
14236 break;
14237
5f3bc026
ZC
14238 default:
14239 end_sequence ();
14240 return NULL_RTX;
14241 }
14242
14243 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
14244 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
14245 if (!op0 || !op1)
14246 {
14247 end_sequence ();
14248 return NULL_RTX;
14249 }
14250 *prep_seq = get_insns ();
14251 end_sequence ();
14252
14253 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 14254 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 14255
c8012fbc
WD
14256 if (bit_code != AND)
14257 {
14258 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
14259 GET_MODE (XEXP (prev, 0))),
14260 VOIDmode, XEXP (prev, 0), const0_rtx);
14261 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
14262 }
14263
14264 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
14265 create_fixed_operand (&ops[1], target);
14266 create_fixed_operand (&ops[2], op0);
14267 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
14268 create_fixed_operand (&ops[4], prev);
14269 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 14270
cb4347e8 14271 push_to_sequence (*gen_seq);
5f3bc026
ZC
14272 if (!maybe_expand_insn (icode, 6, ops))
14273 {
14274 end_sequence ();
14275 return NULL_RTX;
14276 }
14277
14278 *gen_seq = get_insns ();
14279 end_sequence ();
14280
c8012fbc 14281 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
14282}
14283
14284#undef TARGET_GEN_CCMP_FIRST
14285#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
14286
14287#undef TARGET_GEN_CCMP_NEXT
14288#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
14289
6a569cdd
KT
14290/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
14291 instruction fusion of some sort. */
14292
14293static bool
14294aarch64_macro_fusion_p (void)
14295{
b175b679 14296 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
14297}
14298
14299
14300/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
14301 should be kept together during scheduling. */
14302
14303static bool
14304aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
14305{
14306 rtx set_dest;
14307 rtx prev_set = single_set (prev);
14308 rtx curr_set = single_set (curr);
14309 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
14310 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
14311
14312 if (!aarch64_macro_fusion_p ())
14313 return false;
14314
d7b03373 14315 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
14316 {
14317 /* We are trying to match:
14318 prev (mov) == (set (reg r0) (const_int imm16))
14319 curr (movk) == (set (zero_extract (reg r0)
14320 (const_int 16)
14321 (const_int 16))
14322 (const_int imm16_1)) */
14323
14324 set_dest = SET_DEST (curr_set);
14325
14326 if (GET_CODE (set_dest) == ZERO_EXTRACT
14327 && CONST_INT_P (SET_SRC (curr_set))
14328 && CONST_INT_P (SET_SRC (prev_set))
14329 && CONST_INT_P (XEXP (set_dest, 2))
14330 && INTVAL (XEXP (set_dest, 2)) == 16
14331 && REG_P (XEXP (set_dest, 0))
14332 && REG_P (SET_DEST (prev_set))
14333 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
14334 {
14335 return true;
14336 }
14337 }
14338
d7b03373 14339 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
14340 {
14341
14342 /* We're trying to match:
14343 prev (adrp) == (set (reg r1)
14344 (high (symbol_ref ("SYM"))))
14345 curr (add) == (set (reg r0)
14346 (lo_sum (reg r1)
14347 (symbol_ref ("SYM"))))
14348 Note that r0 need not necessarily be the same as r1, especially
14349 during pre-regalloc scheduling. */
14350
14351 if (satisfies_constraint_Ush (SET_SRC (prev_set))
14352 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
14353 {
14354 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
14355 && REG_P (XEXP (SET_SRC (curr_set), 0))
14356 && REGNO (XEXP (SET_SRC (curr_set), 0))
14357 == REGNO (SET_DEST (prev_set))
14358 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
14359 XEXP (SET_SRC (curr_set), 1)))
14360 return true;
14361 }
14362 }
14363
d7b03373 14364 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
14365 {
14366
14367 /* We're trying to match:
14368 prev (movk) == (set (zero_extract (reg r0)
14369 (const_int 16)
14370 (const_int 32))
14371 (const_int imm16_1))
14372 curr (movk) == (set (zero_extract (reg r0)
14373 (const_int 16)
14374 (const_int 48))
14375 (const_int imm16_2)) */
14376
14377 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
14378 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
14379 && REG_P (XEXP (SET_DEST (prev_set), 0))
14380 && REG_P (XEXP (SET_DEST (curr_set), 0))
14381 && REGNO (XEXP (SET_DEST (prev_set), 0))
14382 == REGNO (XEXP (SET_DEST (curr_set), 0))
14383 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
14384 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
14385 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
14386 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
14387 && CONST_INT_P (SET_SRC (prev_set))
14388 && CONST_INT_P (SET_SRC (curr_set)))
14389 return true;
14390
14391 }
d7b03373 14392 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
14393 {
14394 /* We're trying to match:
14395 prev (adrp) == (set (reg r0)
14396 (high (symbol_ref ("SYM"))))
14397 curr (ldr) == (set (reg r1)
14398 (mem (lo_sum (reg r0)
14399 (symbol_ref ("SYM")))))
14400 or
14401 curr (ldr) == (set (reg r1)
14402 (zero_extend (mem
14403 (lo_sum (reg r0)
14404 (symbol_ref ("SYM")))))) */
14405 if (satisfies_constraint_Ush (SET_SRC (prev_set))
14406 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
14407 {
14408 rtx curr_src = SET_SRC (curr_set);
14409
14410 if (GET_CODE (curr_src) == ZERO_EXTEND)
14411 curr_src = XEXP (curr_src, 0);
14412
14413 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
14414 && REG_P (XEXP (XEXP (curr_src, 0), 0))
14415 && REGNO (XEXP (XEXP (curr_src, 0), 0))
14416 == REGNO (SET_DEST (prev_set))
14417 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
14418 XEXP (SET_SRC (prev_set), 0)))
14419 return true;
14420 }
14421 }
cd0cb232 14422
d7b03373 14423 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
14424 && aarch_crypto_can_dual_issue (prev, curr))
14425 return true;
14426
d7b03373 14427 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
14428 && any_condjump_p (curr))
14429 {
14430 enum attr_type prev_type = get_attr_type (prev);
14431
509f819a
N
14432 unsigned int condreg1, condreg2;
14433 rtx cc_reg_1;
14434 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
14435 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
14436
14437 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
14438 && prev
14439 && modified_in_p (cc_reg_1, prev))
14440 {
14441 /* FIXME: this misses some which is considered simple arthematic
14442 instructions for ThunderX. Simple shifts are missed here. */
14443 if (prev_type == TYPE_ALUS_SREG
14444 || prev_type == TYPE_ALUS_IMM
14445 || prev_type == TYPE_LOGICS_REG
14446 || prev_type == TYPE_LOGICS_IMM)
14447 return true;
14448 }
3759108f
AP
14449 }
14450
bee7e0fc
AP
14451 if (prev_set
14452 && curr_set
14453 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
14454 && any_condjump_p (curr))
14455 {
14456 /* We're trying to match:
14457 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
14458 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
14459 (const_int 0))
14460 (label_ref ("SYM"))
14461 (pc)) */
14462 if (SET_DEST (curr_set) == (pc_rtx)
14463 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
14464 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
14465 && REG_P (SET_DEST (prev_set))
14466 && REGNO (SET_DEST (prev_set))
14467 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
14468 {
14469 /* Fuse ALU operations followed by conditional branch instruction. */
14470 switch (get_attr_type (prev))
14471 {
14472 case TYPE_ALU_IMM:
14473 case TYPE_ALU_SREG:
14474 case TYPE_ADC_REG:
14475 case TYPE_ADC_IMM:
14476 case TYPE_ADCS_REG:
14477 case TYPE_ADCS_IMM:
14478 case TYPE_LOGIC_REG:
14479 case TYPE_LOGIC_IMM:
14480 case TYPE_CSEL:
14481 case TYPE_ADR:
14482 case TYPE_MOV_IMM:
14483 case TYPE_SHIFT_REG:
14484 case TYPE_SHIFT_IMM:
14485 case TYPE_BFM:
14486 case TYPE_RBIT:
14487 case TYPE_REV:
14488 case TYPE_EXTEND:
14489 return true;
14490
14491 default:;
14492 }
14493 }
14494 }
14495
6a569cdd
KT
14496 return false;
14497}
14498
f2879a90
KT
14499/* Return true iff the instruction fusion described by OP is enabled. */
14500
14501bool
14502aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
14503{
14504 return (aarch64_tune_params.fusible_ops & op) != 0;
14505}
14506
350013bc
BC
14507/* If MEM is in the form of [base+offset], extract the two parts
14508 of address and set to BASE and OFFSET, otherwise return false
14509 after clearing BASE and OFFSET. */
14510
14511bool
14512extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
14513{
14514 rtx addr;
14515
14516 gcc_assert (MEM_P (mem));
14517
14518 addr = XEXP (mem, 0);
14519
14520 if (REG_P (addr))
14521 {
14522 *base = addr;
14523 *offset = const0_rtx;
14524 return true;
14525 }
14526
14527 if (GET_CODE (addr) == PLUS
14528 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
14529 {
14530 *base = XEXP (addr, 0);
14531 *offset = XEXP (addr, 1);
14532 return true;
14533 }
14534
14535 *base = NULL_RTX;
14536 *offset = NULL_RTX;
14537
14538 return false;
14539}
14540
14541/* Types for scheduling fusion. */
14542enum sched_fusion_type
14543{
14544 SCHED_FUSION_NONE = 0,
14545 SCHED_FUSION_LD_SIGN_EXTEND,
14546 SCHED_FUSION_LD_ZERO_EXTEND,
14547 SCHED_FUSION_LD,
14548 SCHED_FUSION_ST,
14549 SCHED_FUSION_NUM
14550};
14551
14552/* If INSN is a load or store of address in the form of [base+offset],
14553 extract the two parts and set to BASE and OFFSET. Return scheduling
14554 fusion type this INSN is. */
14555
14556static enum sched_fusion_type
14557fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
14558{
14559 rtx x, dest, src;
14560 enum sched_fusion_type fusion = SCHED_FUSION_LD;
14561
14562 gcc_assert (INSN_P (insn));
14563 x = PATTERN (insn);
14564 if (GET_CODE (x) != SET)
14565 return SCHED_FUSION_NONE;
14566
14567 src = SET_SRC (x);
14568 dest = SET_DEST (x);
14569
abc52318
KT
14570 machine_mode dest_mode = GET_MODE (dest);
14571
14572 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
14573 return SCHED_FUSION_NONE;
14574
14575 if (GET_CODE (src) == SIGN_EXTEND)
14576 {
14577 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
14578 src = XEXP (src, 0);
14579 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14580 return SCHED_FUSION_NONE;
14581 }
14582 else if (GET_CODE (src) == ZERO_EXTEND)
14583 {
14584 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
14585 src = XEXP (src, 0);
14586 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14587 return SCHED_FUSION_NONE;
14588 }
14589
14590 if (GET_CODE (src) == MEM && REG_P (dest))
14591 extract_base_offset_in_addr (src, base, offset);
14592 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
14593 {
14594 fusion = SCHED_FUSION_ST;
14595 extract_base_offset_in_addr (dest, base, offset);
14596 }
14597 else
14598 return SCHED_FUSION_NONE;
14599
14600 if (*base == NULL_RTX || *offset == NULL_RTX)
14601 fusion = SCHED_FUSION_NONE;
14602
14603 return fusion;
14604}
14605
14606/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14607
14608 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14609 and PRI are only calculated for these instructions. For other instruction,
14610 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14611 type instruction fusion can be added by returning different priorities.
14612
14613 It's important that irrelevant instructions get the largest FUSION_PRI. */
14614
14615static void
14616aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
14617 int *fusion_pri, int *pri)
14618{
14619 int tmp, off_val;
14620 rtx base, offset;
14621 enum sched_fusion_type fusion;
14622
14623 gcc_assert (INSN_P (insn));
14624
14625 tmp = max_pri - 1;
14626 fusion = fusion_load_store (insn, &base, &offset);
14627 if (fusion == SCHED_FUSION_NONE)
14628 {
14629 *pri = tmp;
14630 *fusion_pri = tmp;
14631 return;
14632 }
14633
14634 /* Set FUSION_PRI according to fusion type and base register. */
14635 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
14636
14637 /* Calculate PRI. */
14638 tmp /= 2;
14639
14640 /* INSN with smaller offset goes first. */
14641 off_val = (int)(INTVAL (offset));
14642 if (off_val >= 0)
14643 tmp -= (off_val & 0xfffff);
14644 else
14645 tmp += ((- off_val) & 0xfffff);
14646
14647 *pri = tmp;
14648 return;
14649}
14650
9bca63d4
WD
14651/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
14652 Adjust priority of sha1h instructions so they are scheduled before
14653 other SHA1 instructions. */
14654
14655static int
14656aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
14657{
14658 rtx x = PATTERN (insn);
14659
14660 if (GET_CODE (x) == SET)
14661 {
14662 x = SET_SRC (x);
14663
14664 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
14665 return priority + 10;
14666 }
14667
14668 return priority;
14669}
14670
350013bc
BC
14671/* Given OPERANDS of consecutive load/store, check if we can merge
14672 them into ldp/stp. LOAD is true if they are load instructions.
14673 MODE is the mode of memory operands. */
14674
14675bool
14676aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 14677 machine_mode mode)
350013bc
BC
14678{
14679 HOST_WIDE_INT offval_1, offval_2, msize;
14680 enum reg_class rclass_1, rclass_2;
14681 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
14682
14683 if (load)
14684 {
14685 mem_1 = operands[1];
14686 mem_2 = operands[3];
14687 reg_1 = operands[0];
14688 reg_2 = operands[2];
14689 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
14690 if (REGNO (reg_1) == REGNO (reg_2))
14691 return false;
14692 }
14693 else
14694 {
14695 mem_1 = operands[0];
14696 mem_2 = operands[2];
14697 reg_1 = operands[1];
14698 reg_2 = operands[3];
14699 }
14700
bf84ac44
AP
14701 /* The mems cannot be volatile. */
14702 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
14703 return false;
14704
54700e2e
AP
14705 /* If we have SImode and slow unaligned ldp,
14706 check the alignment to be at least 8 byte. */
14707 if (mode == SImode
14708 && (aarch64_tune_params.extra_tuning_flags
14709 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14710 && !optimize_size
14711 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14712 return false;
14713
350013bc
BC
14714 /* Check if the addresses are in the form of [base+offset]. */
14715 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14716 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14717 return false;
14718 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14719 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14720 return false;
14721
14722 /* Check if the bases are same. */
14723 if (!rtx_equal_p (base_1, base_2))
14724 return false;
14725
14726 offval_1 = INTVAL (offset_1);
14727 offval_2 = INTVAL (offset_2);
14728 msize = GET_MODE_SIZE (mode);
14729 /* Check if the offsets are consecutive. */
14730 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
14731 return false;
14732
14733 /* Check if the addresses are clobbered by load. */
14734 if (load)
14735 {
14736 if (reg_mentioned_p (reg_1, mem_1))
14737 return false;
14738
14739 /* In increasing order, the last load can clobber the address. */
14740 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
14741 return false;
14742 }
14743
14744 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14745 rclass_1 = FP_REGS;
14746 else
14747 rclass_1 = GENERAL_REGS;
14748
14749 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14750 rclass_2 = FP_REGS;
14751 else
14752 rclass_2 = GENERAL_REGS;
14753
14754 /* Check if the registers are of same class. */
14755 if (rclass_1 != rclass_2)
14756 return false;
14757
14758 return true;
14759}
14760
14761/* Given OPERANDS of consecutive load/store, check if we can merge
14762 them into ldp/stp by adjusting the offset. LOAD is true if they
14763 are load instructions. MODE is the mode of memory operands.
14764
14765 Given below consecutive stores:
14766
14767 str w1, [xb, 0x100]
14768 str w1, [xb, 0x104]
14769 str w1, [xb, 0x108]
14770 str w1, [xb, 0x10c]
14771
14772 Though the offsets are out of the range supported by stp, we can
14773 still pair them after adjusting the offset, like:
14774
14775 add scratch, xb, 0x100
14776 stp w1, w1, [scratch]
14777 stp w1, w1, [scratch, 0x8]
14778
14779 The peephole patterns detecting this opportunity should guarantee
14780 the scratch register is avaliable. */
14781
14782bool
14783aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 14784 machine_mode mode)
350013bc
BC
14785{
14786 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
14787 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
14788 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
14789 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
14790
14791 if (load)
14792 {
14793 reg_1 = operands[0];
14794 mem_1 = operands[1];
14795 reg_2 = operands[2];
14796 mem_2 = operands[3];
14797 reg_3 = operands[4];
14798 mem_3 = operands[5];
14799 reg_4 = operands[6];
14800 mem_4 = operands[7];
14801 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
14802 && REG_P (reg_3) && REG_P (reg_4));
14803 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
14804 return false;
14805 }
14806 else
14807 {
14808 mem_1 = operands[0];
14809 reg_1 = operands[1];
14810 mem_2 = operands[2];
14811 reg_2 = operands[3];
14812 mem_3 = operands[4];
14813 reg_3 = operands[5];
14814 mem_4 = operands[6];
14815 reg_4 = operands[7];
14816 }
14817 /* Skip if memory operand is by itslef valid for ldp/stp. */
14818 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
14819 return false;
14820
bf84ac44
AP
14821 /* The mems cannot be volatile. */
14822 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
14823 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
14824 return false;
14825
350013bc
BC
14826 /* Check if the addresses are in the form of [base+offset]. */
14827 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14828 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14829 return false;
14830 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14831 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14832 return false;
14833 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
14834 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
14835 return false;
14836 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
14837 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
14838 return false;
14839
14840 /* Check if the bases are same. */
14841 if (!rtx_equal_p (base_1, base_2)
14842 || !rtx_equal_p (base_2, base_3)
14843 || !rtx_equal_p (base_3, base_4))
14844 return false;
14845
14846 offval_1 = INTVAL (offset_1);
14847 offval_2 = INTVAL (offset_2);
14848 offval_3 = INTVAL (offset_3);
14849 offval_4 = INTVAL (offset_4);
14850 msize = GET_MODE_SIZE (mode);
14851 /* Check if the offsets are consecutive. */
14852 if ((offval_1 != (offval_2 + msize)
14853 || offval_1 != (offval_3 + msize * 2)
14854 || offval_1 != (offval_4 + msize * 3))
14855 && (offval_4 != (offval_3 + msize)
14856 || offval_4 != (offval_2 + msize * 2)
14857 || offval_4 != (offval_1 + msize * 3)))
14858 return false;
14859
14860 /* Check if the addresses are clobbered by load. */
14861 if (load)
14862 {
14863 if (reg_mentioned_p (reg_1, mem_1)
14864 || reg_mentioned_p (reg_2, mem_2)
14865 || reg_mentioned_p (reg_3, mem_3))
14866 return false;
14867
14868 /* In increasing order, the last load can clobber the address. */
14869 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
14870 return false;
14871 }
14872
54700e2e
AP
14873 /* If we have SImode and slow unaligned ldp,
14874 check the alignment to be at least 8 byte. */
14875 if (mode == SImode
14876 && (aarch64_tune_params.extra_tuning_flags
14877 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14878 && !optimize_size
14879 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14880 return false;
14881
350013bc
BC
14882 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14883 rclass_1 = FP_REGS;
14884 else
14885 rclass_1 = GENERAL_REGS;
14886
14887 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14888 rclass_2 = FP_REGS;
14889 else
14890 rclass_2 = GENERAL_REGS;
14891
14892 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
14893 rclass_3 = FP_REGS;
14894 else
14895 rclass_3 = GENERAL_REGS;
14896
14897 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
14898 rclass_4 = FP_REGS;
14899 else
14900 rclass_4 = GENERAL_REGS;
14901
14902 /* Check if the registers are of same class. */
14903 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
14904 return false;
14905
14906 return true;
14907}
14908
14909/* Given OPERANDS of consecutive load/store, this function pairs them
14910 into ldp/stp after adjusting the offset. It depends on the fact
14911 that addresses of load/store instructions are in increasing order.
14912 MODE is the mode of memory operands. CODE is the rtl operator
14913 which should be applied to all memory operands, it's SIGN_EXTEND,
14914 ZERO_EXTEND or UNKNOWN. */
14915
14916bool
14917aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
b8506a8a 14918 machine_mode mode, RTX_CODE code)
350013bc
BC
14919{
14920 rtx base, offset, t1, t2;
14921 rtx mem_1, mem_2, mem_3, mem_4;
14922 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
14923
14924 if (load)
14925 {
14926 mem_1 = operands[1];
14927 mem_2 = operands[3];
14928 mem_3 = operands[5];
14929 mem_4 = operands[7];
14930 }
14931 else
14932 {
14933 mem_1 = operands[0];
14934 mem_2 = operands[2];
14935 mem_3 = operands[4];
14936 mem_4 = operands[6];
14937 gcc_assert (code == UNKNOWN);
14938 }
14939
14940 extract_base_offset_in_addr (mem_1, &base, &offset);
14941 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
14942
14943 /* Adjust offset thus it can fit in ldp/stp instruction. */
14944 msize = GET_MODE_SIZE (mode);
14945 stp_off_limit = msize * 0x40;
14946 off_val = INTVAL (offset);
14947 abs_off = (off_val < 0) ? -off_val : off_val;
14948 new_off = abs_off % stp_off_limit;
14949 adj_off = abs_off - new_off;
14950
14951 /* Further adjust to make sure all offsets are OK. */
14952 if ((new_off + msize * 2) >= stp_off_limit)
14953 {
14954 adj_off += stp_off_limit;
14955 new_off -= stp_off_limit;
14956 }
14957
14958 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14959 if (adj_off >= 0x1000)
14960 return false;
14961
14962 if (off_val < 0)
14963 {
14964 adj_off = -adj_off;
14965 new_off = -new_off;
14966 }
14967
14968 /* Create new memory references. */
14969 mem_1 = change_address (mem_1, VOIDmode,
14970 plus_constant (DImode, operands[8], new_off));
14971
14972 /* Check if the adjusted address is OK for ldp/stp. */
14973 if (!aarch64_mem_pair_operand (mem_1, mode))
14974 return false;
14975
14976 msize = GET_MODE_SIZE (mode);
14977 mem_2 = change_address (mem_2, VOIDmode,
14978 plus_constant (DImode,
14979 operands[8],
14980 new_off + msize));
14981 mem_3 = change_address (mem_3, VOIDmode,
14982 plus_constant (DImode,
14983 operands[8],
14984 new_off + msize * 2));
14985 mem_4 = change_address (mem_4, VOIDmode,
14986 plus_constant (DImode,
14987 operands[8],
14988 new_off + msize * 3));
14989
14990 if (code == ZERO_EXTEND)
14991 {
14992 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
14993 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
14994 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
14995 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
14996 }
14997 else if (code == SIGN_EXTEND)
14998 {
14999 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
15000 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
15001 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
15002 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
15003 }
15004
15005 if (load)
15006 {
15007 operands[1] = mem_1;
15008 operands[3] = mem_2;
15009 operands[5] = mem_3;
15010 operands[7] = mem_4;
15011 }
15012 else
15013 {
15014 operands[0] = mem_1;
15015 operands[2] = mem_2;
15016 operands[4] = mem_3;
15017 operands[6] = mem_4;
15018 }
15019
15020 /* Emit adjusting instruction. */
f7df4a84 15021 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 15022 /* Emit ldp/stp instructions. */
f7df4a84
RS
15023 t1 = gen_rtx_SET (operands[0], operands[1]);
15024 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 15025 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
15026 t1 = gen_rtx_SET (operands[4], operands[5]);
15027 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
15028 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
15029 return true;
15030}
15031
1b1e81f8
JW
15032/* Return 1 if pseudo register should be created and used to hold
15033 GOT address for PIC code. */
15034
15035bool
15036aarch64_use_pseudo_pic_reg (void)
15037{
15038 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
15039}
15040
7b841a12
JW
15041/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
15042
15043static int
15044aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
15045{
15046 switch (XINT (x, 1))
15047 {
15048 case UNSPEC_GOTSMALLPIC:
15049 case UNSPEC_GOTSMALLPIC28K:
15050 case UNSPEC_GOTTINYPIC:
15051 return 0;
15052 default:
15053 break;
15054 }
15055
15056 return default_unspec_may_trap_p (x, flags);
15057}
15058
39252973
KT
15059
15060/* If X is a positive CONST_DOUBLE with a value that is a power of 2
15061 return the log2 of that value. Otherwise return -1. */
15062
15063int
15064aarch64_fpconst_pow_of_2 (rtx x)
15065{
15066 const REAL_VALUE_TYPE *r;
15067
15068 if (!CONST_DOUBLE_P (x))
15069 return -1;
15070
15071 r = CONST_DOUBLE_REAL_VALUE (x);
15072
15073 if (REAL_VALUE_NEGATIVE (*r)
15074 || REAL_VALUE_ISNAN (*r)
15075 || REAL_VALUE_ISINF (*r)
15076 || !real_isinteger (r, DFmode))
15077 return -1;
15078
15079 return exact_log2 (real_to_integer (r));
15080}
15081
15082/* If X is a vector of equal CONST_DOUBLE values and that value is
15083 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
15084
15085int
15086aarch64_vec_fpconst_pow_of_2 (rtx x)
15087{
15088 if (GET_CODE (x) != CONST_VECTOR)
15089 return -1;
15090
15091 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
15092 return -1;
15093
15094 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
15095 if (firstval <= 0)
15096 return -1;
15097
15098 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
15099 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
15100 return -1;
15101
15102 return firstval;
15103}
15104
11e554b3
JG
15105/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
15106 to float.
15107
15108 __fp16 always promotes through this hook.
15109 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
15110 through the generic excess precision logic rather than here. */
15111
c2ec330c
AL
15112static tree
15113aarch64_promoted_type (const_tree t)
15114{
11e554b3
JG
15115 if (SCALAR_FLOAT_TYPE_P (t)
15116 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 15117 return float_type_node;
11e554b3 15118
c2ec330c
AL
15119 return NULL_TREE;
15120}
ee62a5a6
RS
15121
15122/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
15123
15124static bool
9acc9cbe 15125aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
15126 optimization_type opt_type)
15127{
15128 switch (op)
15129 {
15130 case rsqrt_optab:
9acc9cbe 15131 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
15132
15133 default:
15134 return true;
15135 }
15136}
15137
11e554b3
JG
15138/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
15139 if MODE is HFmode, and punt to the generic implementation otherwise. */
15140
15141static bool
7c5bd57a 15142aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
15143{
15144 return (mode == HFmode
15145 ? true
15146 : default_libgcc_floating_mode_supported_p (mode));
15147}
15148
2e5f8203
JG
15149/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
15150 if MODE is HFmode, and punt to the generic implementation otherwise. */
15151
15152static bool
15153aarch64_scalar_mode_supported_p (machine_mode mode)
15154{
15155 return (mode == HFmode
15156 ? true
15157 : default_scalar_mode_supported_p (mode));
15158}
15159
11e554b3
JG
15160/* Set the value of FLT_EVAL_METHOD.
15161 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
15162
15163 0: evaluate all operations and constants, whose semantic type has at
15164 most the range and precision of type float, to the range and
15165 precision of float; evaluate all other operations and constants to
15166 the range and precision of the semantic type;
15167
15168 N, where _FloatN is a supported interchange floating type
15169 evaluate all operations and constants, whose semantic type has at
15170 most the range and precision of _FloatN type, to the range and
15171 precision of the _FloatN type; evaluate all other operations and
15172 constants to the range and precision of the semantic type;
15173
15174 If we have the ARMv8.2-A extensions then we support _Float16 in native
15175 precision, so we should set this to 16. Otherwise, we support the type,
15176 but want to evaluate expressions in float precision, so set this to
15177 0. */
15178
15179static enum flt_eval_method
15180aarch64_excess_precision (enum excess_precision_type type)
15181{
15182 switch (type)
15183 {
15184 case EXCESS_PRECISION_TYPE_FAST:
15185 case EXCESS_PRECISION_TYPE_STANDARD:
15186 /* We can calculate either in 16-bit range and precision or
15187 32-bit range and precision. Make that decision based on whether
15188 we have native support for the ARMv8.2-A 16-bit floating-point
15189 instructions or not. */
15190 return (TARGET_FP_F16INST
15191 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
15192 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
15193 case EXCESS_PRECISION_TYPE_IMPLICIT:
15194 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
15195 default:
15196 gcc_unreachable ();
15197 }
15198 return FLT_EVAL_METHOD_UNPREDICTABLE;
15199}
15200
b48d6421
KT
15201/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
15202 scheduled for speculative execution. Reject the long-running division
15203 and square-root instructions. */
15204
15205static bool
15206aarch64_sched_can_speculate_insn (rtx_insn *insn)
15207{
15208 switch (get_attr_type (insn))
15209 {
15210 case TYPE_SDIV:
15211 case TYPE_UDIV:
15212 case TYPE_FDIVS:
15213 case TYPE_FDIVD:
15214 case TYPE_FSQRTS:
15215 case TYPE_FSQRTD:
15216 case TYPE_NEON_FP_SQRT_S:
15217 case TYPE_NEON_FP_SQRT_D:
15218 case TYPE_NEON_FP_SQRT_S_Q:
15219 case TYPE_NEON_FP_SQRT_D_Q:
15220 case TYPE_NEON_FP_DIV_S:
15221 case TYPE_NEON_FP_DIV_D:
15222 case TYPE_NEON_FP_DIV_S_Q:
15223 case TYPE_NEON_FP_DIV_D_Q:
15224 return false;
15225 default:
15226 return true;
15227 }
15228}
15229
51b86113
DM
15230/* Target-specific selftests. */
15231
15232#if CHECKING_P
15233
15234namespace selftest {
15235
15236/* Selftest for the RTL loader.
15237 Verify that the RTL loader copes with a dump from
15238 print_rtx_function. This is essentially just a test that class
15239 function_reader can handle a real dump, but it also verifies
15240 that lookup_reg_by_dump_name correctly handles hard regs.
15241 The presence of hard reg names in the dump means that the test is
15242 target-specific, hence it is in this file. */
15243
15244static void
15245aarch64_test_loading_full_dump ()
15246{
15247 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
15248
15249 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
15250
15251 rtx_insn *insn_1 = get_insn_by_uid (1);
15252 ASSERT_EQ (NOTE, GET_CODE (insn_1));
15253
15254 rtx_insn *insn_15 = get_insn_by_uid (15);
15255 ASSERT_EQ (INSN, GET_CODE (insn_15));
15256 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
15257
15258 /* Verify crtl->return_rtx. */
15259 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
15260 ASSERT_EQ (0, REGNO (crtl->return_rtx));
15261 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
15262}
15263
15264/* Run all target-specific selftests. */
15265
15266static void
15267aarch64_run_selftests (void)
15268{
15269 aarch64_test_loading_full_dump ();
15270}
15271
15272} // namespace selftest
15273
15274#endif /* #if CHECKING_P */
15275
43e9d192
IB
15276#undef TARGET_ADDRESS_COST
15277#define TARGET_ADDRESS_COST aarch64_address_cost
15278
15279/* This hook will determines whether unnamed bitfields affect the alignment
15280 of the containing structure. The hook returns true if the structure
15281 should inherit the alignment requirements of an unnamed bitfield's
15282 type. */
15283#undef TARGET_ALIGN_ANON_BITFIELD
15284#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
15285
15286#undef TARGET_ASM_ALIGNED_DI_OP
15287#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
15288
15289#undef TARGET_ASM_ALIGNED_HI_OP
15290#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
15291
15292#undef TARGET_ASM_ALIGNED_SI_OP
15293#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
15294
15295#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15296#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
15297 hook_bool_const_tree_hwi_hwi_const_tree_true
15298
e1c1ecb0
KT
15299#undef TARGET_ASM_FILE_START
15300#define TARGET_ASM_FILE_START aarch64_start_file
15301
43e9d192
IB
15302#undef TARGET_ASM_OUTPUT_MI_THUNK
15303#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
15304
15305#undef TARGET_ASM_SELECT_RTX_SECTION
15306#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
15307
15308#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15309#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
15310
15311#undef TARGET_BUILD_BUILTIN_VA_LIST
15312#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
15313
15314#undef TARGET_CALLEE_COPIES
15315#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
15316
15317#undef TARGET_CAN_ELIMINATE
15318#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
15319
1fd8d40c
KT
15320#undef TARGET_CAN_INLINE_P
15321#define TARGET_CAN_INLINE_P aarch64_can_inline_p
15322
43e9d192
IB
15323#undef TARGET_CANNOT_FORCE_CONST_MEM
15324#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
15325
50487d79
EM
15326#undef TARGET_CASE_VALUES_THRESHOLD
15327#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
15328
43e9d192
IB
15329#undef TARGET_CONDITIONAL_REGISTER_USAGE
15330#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
15331
15332/* Only the least significant bit is used for initialization guard
15333 variables. */
15334#undef TARGET_CXX_GUARD_MASK_BIT
15335#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
15336
15337#undef TARGET_C_MODE_FOR_SUFFIX
15338#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
15339
15340#ifdef TARGET_BIG_ENDIAN_DEFAULT
15341#undef TARGET_DEFAULT_TARGET_FLAGS
15342#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
15343#endif
15344
15345#undef TARGET_CLASS_MAX_NREGS
15346#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
15347
119103ca
JG
15348#undef TARGET_BUILTIN_DECL
15349#define TARGET_BUILTIN_DECL aarch64_builtin_decl
15350
a6fc00da
BH
15351#undef TARGET_BUILTIN_RECIPROCAL
15352#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
15353
11e554b3
JG
15354#undef TARGET_C_EXCESS_PRECISION
15355#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
15356
43e9d192
IB
15357#undef TARGET_EXPAND_BUILTIN
15358#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
15359
15360#undef TARGET_EXPAND_BUILTIN_VA_START
15361#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
15362
9697e620
JG
15363#undef TARGET_FOLD_BUILTIN
15364#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
15365
43e9d192
IB
15366#undef TARGET_FUNCTION_ARG
15367#define TARGET_FUNCTION_ARG aarch64_function_arg
15368
15369#undef TARGET_FUNCTION_ARG_ADVANCE
15370#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
15371
15372#undef TARGET_FUNCTION_ARG_BOUNDARY
15373#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
15374
15375#undef TARGET_FUNCTION_OK_FOR_SIBCALL
15376#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
15377
15378#undef TARGET_FUNCTION_VALUE
15379#define TARGET_FUNCTION_VALUE aarch64_function_value
15380
15381#undef TARGET_FUNCTION_VALUE_REGNO_P
15382#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
15383
15384#undef TARGET_FRAME_POINTER_REQUIRED
15385#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
15386
fc72cba7
AL
15387#undef TARGET_GIMPLE_FOLD_BUILTIN
15388#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 15389
43e9d192
IB
15390#undef TARGET_GIMPLIFY_VA_ARG_EXPR
15391#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
15392
15393#undef TARGET_INIT_BUILTINS
15394#define TARGET_INIT_BUILTINS aarch64_init_builtins
15395
c64f7d37
WD
15396#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
15397#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
15398 aarch64_ira_change_pseudo_allocno_class
15399
43e9d192
IB
15400#undef TARGET_LEGITIMATE_ADDRESS_P
15401#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
15402
15403#undef TARGET_LEGITIMATE_CONSTANT_P
15404#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
15405
491ec060
WD
15406#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
15407#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
15408 aarch64_legitimize_address_displacement
15409
43e9d192
IB
15410#undef TARGET_LIBGCC_CMP_RETURN_MODE
15411#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
15412
11e554b3
JG
15413#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
15414#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
15415aarch64_libgcc_floating_mode_supported_p
15416
ac2b960f
YZ
15417#undef TARGET_MANGLE_TYPE
15418#define TARGET_MANGLE_TYPE aarch64_mangle_type
15419
43e9d192
IB
15420#undef TARGET_MEMORY_MOVE_COST
15421#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
15422
26e0ff94
WD
15423#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
15424#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
15425
43e9d192
IB
15426#undef TARGET_MUST_PASS_IN_STACK
15427#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
15428
15429/* This target hook should return true if accesses to volatile bitfields
15430 should use the narrowest mode possible. It should return false if these
15431 accesses should use the bitfield container type. */
15432#undef TARGET_NARROW_VOLATILE_BITFIELD
15433#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
15434
15435#undef TARGET_OPTION_OVERRIDE
15436#define TARGET_OPTION_OVERRIDE aarch64_override_options
15437
15438#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
15439#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
15440 aarch64_override_options_after_change
15441
361fb3ee
KT
15442#undef TARGET_OPTION_SAVE
15443#define TARGET_OPTION_SAVE aarch64_option_save
15444
15445#undef TARGET_OPTION_RESTORE
15446#define TARGET_OPTION_RESTORE aarch64_option_restore
15447
15448#undef TARGET_OPTION_PRINT
15449#define TARGET_OPTION_PRINT aarch64_option_print
15450
5a2c8331
KT
15451#undef TARGET_OPTION_VALID_ATTRIBUTE_P
15452#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
15453
d78006d9
KT
15454#undef TARGET_SET_CURRENT_FUNCTION
15455#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
15456
43e9d192
IB
15457#undef TARGET_PASS_BY_REFERENCE
15458#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
15459
15460#undef TARGET_PREFERRED_RELOAD_CLASS
15461#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
15462
cee66c68
WD
15463#undef TARGET_SCHED_REASSOCIATION_WIDTH
15464#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
15465
c2ec330c
AL
15466#undef TARGET_PROMOTED_TYPE
15467#define TARGET_PROMOTED_TYPE aarch64_promoted_type
15468
43e9d192
IB
15469#undef TARGET_SECONDARY_RELOAD
15470#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
15471
15472#undef TARGET_SHIFT_TRUNCATION_MASK
15473#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
15474
15475#undef TARGET_SETUP_INCOMING_VARARGS
15476#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
15477
15478#undef TARGET_STRUCT_VALUE_RTX
15479#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
15480
15481#undef TARGET_REGISTER_MOVE_COST
15482#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
15483
15484#undef TARGET_RETURN_IN_MEMORY
15485#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
15486
15487#undef TARGET_RETURN_IN_MSB
15488#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
15489
15490#undef TARGET_RTX_COSTS
7cc2145f 15491#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 15492
2e5f8203
JG
15493#undef TARGET_SCALAR_MODE_SUPPORTED_P
15494#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
15495
d126a4ae
AP
15496#undef TARGET_SCHED_ISSUE_RATE
15497#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
15498
d03f7e44
MK
15499#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15500#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
15501 aarch64_sched_first_cycle_multipass_dfa_lookahead
15502
2d6bc7fa
KT
15503#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
15504#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
15505 aarch64_first_cycle_multipass_dfa_lookahead_guard
15506
827ab47a
KT
15507#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
15508#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
15509 aarch64_get_separate_components
15510
15511#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
15512#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
15513 aarch64_components_for_bb
15514
15515#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
15516#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
15517 aarch64_disqualify_components
15518
15519#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
15520#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
15521 aarch64_emit_prologue_components
15522
15523#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
15524#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
15525 aarch64_emit_epilogue_components
15526
15527#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
15528#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
15529 aarch64_set_handled_components
15530
43e9d192
IB
15531#undef TARGET_TRAMPOLINE_INIT
15532#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
15533
15534#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
15535#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
15536
15537#undef TARGET_VECTOR_MODE_SUPPORTED_P
15538#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
15539
7df76747
N
15540#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15541#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
15542 aarch64_builtin_support_vector_misalignment
15543
43e9d192
IB
15544#undef TARGET_ARRAY_MODE_SUPPORTED_P
15545#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
15546
8990e73a
TB
15547#undef TARGET_VECTORIZE_ADD_STMT_COST
15548#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
15549
15550#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15551#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15552 aarch64_builtin_vectorization_cost
15553
43e9d192
IB
15554#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15555#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
15556
42fc9a7f
JG
15557#undef TARGET_VECTORIZE_BUILTINS
15558#define TARGET_VECTORIZE_BUILTINS
15559
15560#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
15561#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
15562 aarch64_builtin_vectorized_function
15563
3b357264
JG
15564#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
15565#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
15566 aarch64_autovectorize_vector_sizes
15567
aa87aced
KV
15568#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15569#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
15570 aarch64_atomic_assign_expand_fenv
15571
43e9d192
IB
15572/* Section anchor support. */
15573
15574#undef TARGET_MIN_ANCHOR_OFFSET
15575#define TARGET_MIN_ANCHOR_OFFSET -256
15576
15577/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
15578 byte offset; we can do much more for larger data types, but have no way
15579 to determine the size of the access. We assume accesses are aligned. */
15580#undef TARGET_MAX_ANCHOR_OFFSET
15581#define TARGET_MAX_ANCHOR_OFFSET 4095
15582
db0253a4
TB
15583#undef TARGET_VECTOR_ALIGNMENT
15584#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
15585
15586#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
15587#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
15588 aarch64_simd_vector_alignment_reachable
15589
88b08073
JG
15590/* vec_perm support. */
15591
15592#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
15593#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
15594 aarch64_vectorize_vec_perm_const_ok
15595
c2ec330c
AL
15596#undef TARGET_INIT_LIBFUNCS
15597#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 15598
706b2314 15599#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
15600#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
15601
5cb74e90
RR
15602#undef TARGET_FLAGS_REGNUM
15603#define TARGET_FLAGS_REGNUM CC_REGNUM
15604
78607708
TV
15605#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
15606#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
15607
a3125fc2
CL
15608#undef TARGET_ASAN_SHADOW_OFFSET
15609#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
15610
0c4ec427
RE
15611#undef TARGET_LEGITIMIZE_ADDRESS
15612#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
15613
d3006da6
JG
15614#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15615#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15616 aarch64_use_by_pieces_infrastructure_p
15617
b48d6421
KT
15618#undef TARGET_SCHED_CAN_SPECULATE_INSN
15619#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
15620
594bdd53
FY
15621#undef TARGET_CAN_USE_DOLOOP_P
15622#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
15623
9bca63d4
WD
15624#undef TARGET_SCHED_ADJUST_PRIORITY
15625#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
15626
6a569cdd
KT
15627#undef TARGET_SCHED_MACRO_FUSION_P
15628#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
15629
15630#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
15631#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
15632
350013bc
BC
15633#undef TARGET_SCHED_FUSION_PRIORITY
15634#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
15635
7b841a12
JW
15636#undef TARGET_UNSPEC_MAY_TRAP_P
15637#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
15638
1b1e81f8
JW
15639#undef TARGET_USE_PSEUDO_PIC_REG
15640#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
15641
cc8ca59e
JB
15642#undef TARGET_PRINT_OPERAND
15643#define TARGET_PRINT_OPERAND aarch64_print_operand
15644
15645#undef TARGET_PRINT_OPERAND_ADDRESS
15646#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
15647
ee62a5a6
RS
15648#undef TARGET_OPTAB_SUPPORTED_P
15649#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
15650
43203dea
RR
15651#undef TARGET_OMIT_STRUCT_RETURN_REG
15652#define TARGET_OMIT_STRUCT_RETURN_REG true
15653
f46fe37e
EB
15654/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
15655#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15656#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
15657
51b86113
DM
15658#if CHECKING_P
15659#undef TARGET_RUN_TARGET_SELFTESTS
15660#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
15661#endif /* #if CHECKING_P */
15662
43e9d192
IB
15663struct gcc_target targetm = TARGET_INITIALIZER;
15664
15665#include "gt-aarch64.h"