]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Turn HARD_REGNO_NREGS into a target hook
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
cbe34bb5 2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
01736018 22#define INCLUDE_STRING
43e9d192
IB
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e11c4407
AM
26#include "target.h"
27#include "rtl.h"
c7131fb2 28#include "tree.h"
e73cf9a2 29#include "memmodel.h"
c7131fb2 30#include "gimple.h"
e11c4407
AM
31#include "cfghooks.h"
32#include "cfgloop.h"
c7131fb2 33#include "df.h"
e11c4407
AM
34#include "tm_p.h"
35#include "stringpool.h"
314e6352 36#include "attribs.h"
e11c4407
AM
37#include "optabs.h"
38#include "regs.h"
39#include "emit-rtl.h"
40#include "recog.h"
41#include "diagnostic.h"
43e9d192 42#include "insn-attr.h"
40e23961 43#include "alias.h"
40e23961 44#include "fold-const.h"
d8a2d370
DN
45#include "stor-layout.h"
46#include "calls.h"
47#include "varasm.h"
43e9d192 48#include "output.h"
36566b39 49#include "flags.h"
36566b39 50#include "explow.h"
43e9d192
IB
51#include "expr.h"
52#include "reload.h"
43e9d192 53#include "langhooks.h"
5a2c8331 54#include "opts.h"
2d6bc7fa 55#include "params.h"
45b0be94 56#include "gimplify.h"
43e9d192 57#include "dwarf2.h"
61d371eb 58#include "gimple-iterator.h"
8990e73a 59#include "tree-vectorizer.h"
d1bcc29f 60#include "aarch64-cost-tables.h"
0ee859b5 61#include "dumpfile.h"
9b2b7279 62#include "builtins.h"
8baff86e 63#include "rtl-iter.h"
9bbe08fe 64#include "tm-constrs.h"
d03f7e44 65#include "sched-int.h"
d78006d9 66#include "target-globals.h"
a3eb8a52 67#include "common/common-target.h"
51b86113
DM
68#include "selftest.h"
69#include "selftest-rtl.h"
43e9d192 70
994c5d85 71/* This file should be included last. */
d58627a0
RS
72#include "target-def.h"
73
28514dda
YZ
74/* Defined for convenience. */
75#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
76
43e9d192
IB
77/* Classifies an address.
78
79 ADDRESS_REG_IMM
80 A simple base register plus immediate offset.
81
82 ADDRESS_REG_WB
83 A base register indexed by immediate offset with writeback.
84
85 ADDRESS_REG_REG
86 A base register indexed by (optionally scaled) register.
87
88 ADDRESS_REG_UXTW
89 A base register indexed by (optionally scaled) zero-extended register.
90
91 ADDRESS_REG_SXTW
92 A base register indexed by (optionally scaled) sign-extended register.
93
94 ADDRESS_LO_SUM
95 A LO_SUM rtx with a base register and "LO12" symbol relocation.
96
97 ADDRESS_SYMBOLIC:
98 A constant symbolic address, in pc-relative literal pool. */
99
100enum aarch64_address_type {
101 ADDRESS_REG_IMM,
102 ADDRESS_REG_WB,
103 ADDRESS_REG_REG,
104 ADDRESS_REG_UXTW,
105 ADDRESS_REG_SXTW,
106 ADDRESS_LO_SUM,
107 ADDRESS_SYMBOLIC
108};
109
110struct aarch64_address_info {
111 enum aarch64_address_type type;
112 rtx base;
113 rtx offset;
114 int shift;
115 enum aarch64_symbol_type symbol_type;
116};
117
48063b9d
IB
118struct simd_immediate_info
119{
120 rtx value;
121 int shift;
122 int element_width;
48063b9d 123 bool mvn;
e4f0f84d 124 bool msl;
48063b9d
IB
125};
126
43e9d192
IB
127/* The current code model. */
128enum aarch64_code_model aarch64_cmodel;
129
130#ifdef HAVE_AS_TLS
131#undef TARGET_HAVE_TLS
132#define TARGET_HAVE_TLS 1
133#endif
134
ef4bddc2
RS
135static bool aarch64_composite_type_p (const_tree, machine_mode);
136static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 137 const_tree,
ef4bddc2 138 machine_mode *, int *,
43e9d192
IB
139 bool *);
140static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
141static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 142static void aarch64_override_options_after_change (void);
ef4bddc2 143static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 144static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 145 const unsigned char *sel);
ef4bddc2 146static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
147static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
148 const_tree type,
149 int misalignment,
150 bool is_packed);
a2170965 151static machine_mode
146c2e3a 152aarch64_simd_container_mode (scalar_mode mode, unsigned width);
88b08073 153
0c6caaf8
RL
154/* Major revision number of the ARM Architecture implemented by the target. */
155unsigned aarch64_architecture_version;
156
43e9d192 157/* The processor for which instructions should be scheduled. */
02fdbd5b 158enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 159
43e9d192
IB
160/* Mask to specify which instruction scheduling options should be used. */
161unsigned long aarch64_tune_flags = 0;
162
1be34295 163/* Global flag for PC relative loads. */
9ee6540a 164bool aarch64_pcrelative_literal_loads;
1be34295 165
8dec06f2
JG
166/* Support for command line parsing of boolean flags in the tuning
167 structures. */
168struct aarch64_flag_desc
169{
170 const char* name;
171 unsigned int flag;
172};
173
ed9fa8d2 174#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
175 { name, AARCH64_FUSE_##internal_name },
176static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
177{
178 { "none", AARCH64_FUSE_NOTHING },
179#include "aarch64-fusion-pairs.def"
180 { "all", AARCH64_FUSE_ALL },
181 { NULL, AARCH64_FUSE_NOTHING }
182};
8dec06f2 183
a339a01c 184#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
185 { name, AARCH64_EXTRA_TUNE_##internal_name },
186static const struct aarch64_flag_desc aarch64_tuning_flags[] =
187{
188 { "none", AARCH64_EXTRA_TUNE_NONE },
189#include "aarch64-tuning-flags.def"
190 { "all", AARCH64_EXTRA_TUNE_ALL },
191 { NULL, AARCH64_EXTRA_TUNE_NONE }
192};
8dec06f2 193
43e9d192
IB
194/* Tuning parameters. */
195
43e9d192
IB
196static const struct cpu_addrcost_table generic_addrcost_table =
197{
67747367 198 {
2fae724a 199 1, /* hi */
bd95e655
JG
200 0, /* si */
201 0, /* di */
2fae724a 202 1, /* ti */
67747367 203 },
bd95e655
JG
204 0, /* pre_modify */
205 0, /* post_modify */
206 0, /* register_offset */
783879e6
EM
207 0, /* register_sextend */
208 0, /* register_zextend */
bd95e655 209 0 /* imm_offset */
43e9d192
IB
210};
211
5ec1ae3b
EM
212static const struct cpu_addrcost_table exynosm1_addrcost_table =
213{
214 {
215 0, /* hi */
216 0, /* si */
217 0, /* di */
218 2, /* ti */
219 },
220 0, /* pre_modify */
221 0, /* post_modify */
222 1, /* register_offset */
223 1, /* register_sextend */
224 2, /* register_zextend */
225 0, /* imm_offset */
226};
227
381e27aa
PT
228static const struct cpu_addrcost_table xgene1_addrcost_table =
229{
381e27aa 230 {
bd95e655
JG
231 1, /* hi */
232 0, /* si */
233 0, /* di */
234 1, /* ti */
381e27aa 235 },
bd95e655
JG
236 1, /* pre_modify */
237 0, /* post_modify */
238 0, /* register_offset */
783879e6
EM
239 1, /* register_sextend */
240 1, /* register_zextend */
bd95e655 241 0, /* imm_offset */
381e27aa
PT
242};
243
d1261ac6 244static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
245{
246 {
5f407e57
AP
247 1, /* hi */
248 1, /* si */
249 1, /* di */
ad611a4c
VP
250 2, /* ti */
251 },
252 0, /* pre_modify */
253 0, /* post_modify */
254 2, /* register_offset */
255 3, /* register_sextend */
256 3, /* register_zextend */
257 0, /* imm_offset */
258};
259
43e9d192
IB
260static const struct cpu_regmove_cost generic_regmove_cost =
261{
bd95e655 262 1, /* GP2GP */
3969c510
WD
263 /* Avoid the use of slow int<->fp moves for spilling by setting
264 their cost higher than memmov_cost. */
bd95e655
JG
265 5, /* GP2FP */
266 5, /* FP2GP */
267 2 /* FP2FP */
43e9d192
IB
268};
269
e4a9c55a
WD
270static const struct cpu_regmove_cost cortexa57_regmove_cost =
271{
bd95e655 272 1, /* GP2GP */
e4a9c55a
WD
273 /* Avoid the use of slow int<->fp moves for spilling by setting
274 their cost higher than memmov_cost. */
bd95e655
JG
275 5, /* GP2FP */
276 5, /* FP2GP */
277 2 /* FP2FP */
e4a9c55a
WD
278};
279
280static const struct cpu_regmove_cost cortexa53_regmove_cost =
281{
bd95e655 282 1, /* GP2GP */
e4a9c55a
WD
283 /* Avoid the use of slow int<->fp moves for spilling by setting
284 their cost higher than memmov_cost. */
bd95e655
JG
285 5, /* GP2FP */
286 5, /* FP2GP */
287 2 /* FP2FP */
e4a9c55a
WD
288};
289
5ec1ae3b
EM
290static const struct cpu_regmove_cost exynosm1_regmove_cost =
291{
292 1, /* GP2GP */
293 /* Avoid the use of slow int<->fp moves for spilling by setting
294 their cost higher than memmov_cost (actual, 4 and 9). */
295 9, /* GP2FP */
296 9, /* FP2GP */
297 1 /* FP2FP */
298};
299
d1bcc29f
AP
300static const struct cpu_regmove_cost thunderx_regmove_cost =
301{
bd95e655
JG
302 2, /* GP2GP */
303 2, /* GP2FP */
304 6, /* FP2GP */
305 4 /* FP2FP */
d1bcc29f
AP
306};
307
381e27aa
PT
308static const struct cpu_regmove_cost xgene1_regmove_cost =
309{
bd95e655 310 1, /* GP2GP */
381e27aa
PT
311 /* Avoid the use of slow int<->fp moves for spilling by setting
312 their cost higher than memmov_cost. */
bd95e655
JG
313 8, /* GP2FP */
314 8, /* FP2GP */
315 2 /* FP2FP */
381e27aa
PT
316};
317
ee446d9f
JW
318static const struct cpu_regmove_cost qdf24xx_regmove_cost =
319{
320 2, /* GP2GP */
321 /* Avoid the use of int<->fp moves for spilling. */
322 6, /* GP2FP */
323 6, /* FP2GP */
324 4 /* FP2FP */
325};
326
d1261ac6 327static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
328{
329 1, /* GP2GP */
330 /* Avoid the use of int<->fp moves for spilling. */
331 8, /* GP2FP */
332 8, /* FP2GP */
333 4 /* FP2FP */
334};
335
8990e73a 336/* Generic costs for vector insn classes. */
8990e73a
TB
337static const struct cpu_vector_cost generic_vector_cost =
338{
cd8ae5ed
AP
339 1, /* scalar_int_stmt_cost */
340 1, /* scalar_fp_stmt_cost */
bd95e655
JG
341 1, /* scalar_load_cost */
342 1, /* scalar_store_cost */
cd8ae5ed
AP
343 1, /* vec_int_stmt_cost */
344 1, /* vec_fp_stmt_cost */
c428f91c 345 2, /* vec_permute_cost */
bd95e655
JG
346 1, /* vec_to_scalar_cost */
347 1, /* scalar_to_vec_cost */
348 1, /* vec_align_load_cost */
349 1, /* vec_unalign_load_cost */
350 1, /* vec_unalign_store_cost */
351 1, /* vec_store_cost */
352 3, /* cond_taken_branch_cost */
353 1 /* cond_not_taken_branch_cost */
8990e73a
TB
354};
355
c3f20327
AP
356/* ThunderX costs for vector insn classes. */
357static const struct cpu_vector_cost thunderx_vector_cost =
358{
cd8ae5ed
AP
359 1, /* scalar_int_stmt_cost */
360 1, /* scalar_fp_stmt_cost */
c3f20327
AP
361 3, /* scalar_load_cost */
362 1, /* scalar_store_cost */
cd8ae5ed 363 4, /* vec_int_stmt_cost */
b29d7591 364 1, /* vec_fp_stmt_cost */
c3f20327
AP
365 4, /* vec_permute_cost */
366 2, /* vec_to_scalar_cost */
367 2, /* scalar_to_vec_cost */
368 3, /* vec_align_load_cost */
7e87a3d9
AP
369 5, /* vec_unalign_load_cost */
370 5, /* vec_unalign_store_cost */
c3f20327
AP
371 1, /* vec_store_cost */
372 3, /* cond_taken_branch_cost */
373 3 /* cond_not_taken_branch_cost */
374};
375
60bff090 376/* Generic costs for vector insn classes. */
60bff090
JG
377static const struct cpu_vector_cost cortexa57_vector_cost =
378{
cd8ae5ed
AP
379 1, /* scalar_int_stmt_cost */
380 1, /* scalar_fp_stmt_cost */
bd95e655
JG
381 4, /* scalar_load_cost */
382 1, /* scalar_store_cost */
cd8ae5ed
AP
383 2, /* vec_int_stmt_cost */
384 2, /* vec_fp_stmt_cost */
c428f91c 385 3, /* vec_permute_cost */
bd95e655
JG
386 8, /* vec_to_scalar_cost */
387 8, /* scalar_to_vec_cost */
db4a1c18
WD
388 4, /* vec_align_load_cost */
389 4, /* vec_unalign_load_cost */
bd95e655
JG
390 1, /* vec_unalign_store_cost */
391 1, /* vec_store_cost */
392 1, /* cond_taken_branch_cost */
393 1 /* cond_not_taken_branch_cost */
60bff090
JG
394};
395
5ec1ae3b
EM
396static const struct cpu_vector_cost exynosm1_vector_cost =
397{
cd8ae5ed
AP
398 1, /* scalar_int_stmt_cost */
399 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
400 5, /* scalar_load_cost */
401 1, /* scalar_store_cost */
cd8ae5ed
AP
402 3, /* vec_int_stmt_cost */
403 3, /* vec_fp_stmt_cost */
c428f91c 404 3, /* vec_permute_cost */
5ec1ae3b
EM
405 3, /* vec_to_scalar_cost */
406 3, /* scalar_to_vec_cost */
407 5, /* vec_align_load_cost */
408 5, /* vec_unalign_load_cost */
409 1, /* vec_unalign_store_cost */
410 1, /* vec_store_cost */
411 1, /* cond_taken_branch_cost */
412 1 /* cond_not_taken_branch_cost */
413};
414
381e27aa 415/* Generic costs for vector insn classes. */
381e27aa
PT
416static const struct cpu_vector_cost xgene1_vector_cost =
417{
cd8ae5ed
AP
418 1, /* scalar_int_stmt_cost */
419 1, /* scalar_fp_stmt_cost */
bd95e655
JG
420 5, /* scalar_load_cost */
421 1, /* scalar_store_cost */
cd8ae5ed
AP
422 2, /* vec_int_stmt_cost */
423 2, /* vec_fp_stmt_cost */
c428f91c 424 2, /* vec_permute_cost */
bd95e655
JG
425 4, /* vec_to_scalar_cost */
426 4, /* scalar_to_vec_cost */
427 10, /* vec_align_load_cost */
428 10, /* vec_unalign_load_cost */
429 2, /* vec_unalign_store_cost */
430 2, /* vec_store_cost */
431 2, /* cond_taken_branch_cost */
432 1 /* cond_not_taken_branch_cost */
381e27aa
PT
433};
434
ad611a4c 435/* Costs for vector insn classes for Vulcan. */
d1261ac6 436static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 437{
cd8ae5ed
AP
438 1, /* scalar_int_stmt_cost */
439 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
440 4, /* scalar_load_cost */
441 1, /* scalar_store_cost */
cd8ae5ed
AP
442 5, /* vec_int_stmt_cost */
443 6, /* vec_fp_stmt_cost */
ad611a4c
VP
444 3, /* vec_permute_cost */
445 6, /* vec_to_scalar_cost */
446 5, /* scalar_to_vec_cost */
447 8, /* vec_align_load_cost */
448 8, /* vec_unalign_load_cost */
449 4, /* vec_unalign_store_cost */
450 4, /* vec_store_cost */
451 2, /* cond_taken_branch_cost */
452 1 /* cond_not_taken_branch_cost */
453};
454
b9066f5a
MW
455/* Generic costs for branch instructions. */
456static const struct cpu_branch_cost generic_branch_cost =
457{
9094d4a4
WD
458 1, /* Predictable. */
459 3 /* Unpredictable. */
b9066f5a
MW
460};
461
9acc9cbe
EM
462/* Generic approximation modes. */
463static const cpu_approx_modes generic_approx_modes =
464{
79a2bc2d 465 AARCH64_APPROX_NONE, /* division */
98daafa0 466 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
467 AARCH64_APPROX_NONE /* recip_sqrt */
468};
469
470/* Approximation modes for Exynos M1. */
471static const cpu_approx_modes exynosm1_approx_modes =
472{
79a2bc2d 473 AARCH64_APPROX_NONE, /* division */
98daafa0 474 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
475 AARCH64_APPROX_ALL /* recip_sqrt */
476};
477
478/* Approximation modes for X-Gene 1. */
479static const cpu_approx_modes xgene1_approx_modes =
480{
79a2bc2d 481 AARCH64_APPROX_NONE, /* division */
98daafa0 482 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
483 AARCH64_APPROX_ALL /* recip_sqrt */
484};
485
9d2c6e2e
MK
486/* Generic prefetch settings (which disable prefetch). */
487static const cpu_prefetch_tune generic_prefetch_tune =
488{
489 0, /* num_slots */
490 -1, /* l1_cache_size */
491 -1, /* l1_cache_line_size */
16b2cafd
MK
492 -1, /* l2_cache_size */
493 -1 /* default_opt_level */
9d2c6e2e
MK
494};
495
496static const cpu_prefetch_tune exynosm1_prefetch_tune =
497{
498 0, /* num_slots */
499 -1, /* l1_cache_size */
500 64, /* l1_cache_line_size */
16b2cafd
MK
501 -1, /* l2_cache_size */
502 -1 /* default_opt_level */
9d2c6e2e
MK
503};
504
505static const cpu_prefetch_tune qdf24xx_prefetch_tune =
506{
70c51b58
MK
507 4, /* num_slots */
508 32, /* l1_cache_size */
9d2c6e2e 509 64, /* l1_cache_line_size */
70c51b58
MK
510 1024, /* l2_cache_size */
511 3 /* default_opt_level */
9d2c6e2e
MK
512};
513
f1e247d0
AP
514static const cpu_prefetch_tune thunderxt88_prefetch_tune =
515{
516 8, /* num_slots */
517 32, /* l1_cache_size */
518 128, /* l1_cache_line_size */
519 16*1024, /* l2_cache_size */
520 3 /* default_opt_level */
521};
522
523static const cpu_prefetch_tune thunderx_prefetch_tune =
524{
525 8, /* num_slots */
526 32, /* l1_cache_size */
527 128, /* l1_cache_line_size */
528 -1, /* l2_cache_size */
529 -1 /* default_opt_level */
530};
531
9d2c6e2e
MK
532static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
533{
f1e247d0
AP
534 8, /* num_slots */
535 32, /* l1_cache_size */
9d2c6e2e 536 64, /* l1_cache_line_size */
f1e247d0 537 256, /* l2_cache_size */
16b2cafd 538 -1 /* default_opt_level */
9d2c6e2e
MK
539};
540
43e9d192
IB
541static const struct tune_params generic_tunings =
542{
4e2cd668 543 &cortexa57_extra_costs,
43e9d192
IB
544 &generic_addrcost_table,
545 &generic_regmove_cost,
8990e73a 546 &generic_vector_cost,
b9066f5a 547 &generic_branch_cost,
9acc9cbe 548 &generic_approx_modes,
bd95e655
JG
549 4, /* memmov_cost */
550 2, /* issue_rate */
e0701ef0 551 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
0b82a5a2 552 8, /* function_align. */
6b13482b
WD
553 4, /* jump_align. */
554 8, /* loop_align. */
cee66c68
WD
555 2, /* int_reassoc_width. */
556 4, /* fp_reassoc_width. */
50093a33
WD
557 1, /* vec_reassoc_width. */
558 2, /* min_div_recip_mul_sf. */
dfba575f 559 2, /* min_div_recip_mul_df. */
50487d79 560 0, /* max_case_values. */
3b4c0f7e 561 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
562 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
563 &generic_prefetch_tune
43e9d192
IB
564};
565
1c72a3ca
JG
566static const struct tune_params cortexa35_tunings =
567{
568 &cortexa53_extra_costs,
569 &generic_addrcost_table,
570 &cortexa53_regmove_cost,
571 &generic_vector_cost,
aca97ef8 572 &generic_branch_cost,
9acc9cbe 573 &generic_approx_modes,
1c72a3ca
JG
574 4, /* memmov_cost */
575 1, /* issue_rate */
0bc24338 576 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 577 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 578 16, /* function_align. */
9779b2e8 579 4, /* jump_align. */
d4407370 580 8, /* loop_align. */
1c72a3ca
JG
581 2, /* int_reassoc_width. */
582 4, /* fp_reassoc_width. */
583 1, /* vec_reassoc_width. */
584 2, /* min_div_recip_mul_sf. */
585 2, /* min_div_recip_mul_df. */
586 0, /* max_case_values. */
1c72a3ca 587 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
588 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
589 &generic_prefetch_tune
1c72a3ca
JG
590};
591
984239ad
KT
592static const struct tune_params cortexa53_tunings =
593{
594 &cortexa53_extra_costs,
595 &generic_addrcost_table,
e4a9c55a 596 &cortexa53_regmove_cost,
984239ad 597 &generic_vector_cost,
aca97ef8 598 &generic_branch_cost,
9acc9cbe 599 &generic_approx_modes,
bd95e655
JG
600 4, /* memmov_cost */
601 2, /* issue_rate */
00a8574a 602 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 603 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 604 16, /* function_align. */
9779b2e8 605 4, /* jump_align. */
d4407370 606 8, /* loop_align. */
cee66c68
WD
607 2, /* int_reassoc_width. */
608 4, /* fp_reassoc_width. */
50093a33
WD
609 1, /* vec_reassoc_width. */
610 2, /* min_div_recip_mul_sf. */
dfba575f 611 2, /* min_div_recip_mul_df. */
50487d79 612 0, /* max_case_values. */
2d6bc7fa 613 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
614 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
615 &generic_prefetch_tune
984239ad
KT
616};
617
4fd92af6
KT
618static const struct tune_params cortexa57_tunings =
619{
620 &cortexa57_extra_costs,
a39d4348 621 &generic_addrcost_table,
e4a9c55a 622 &cortexa57_regmove_cost,
60bff090 623 &cortexa57_vector_cost,
aca97ef8 624 &generic_branch_cost,
9acc9cbe 625 &generic_approx_modes,
bd95e655
JG
626 4, /* memmov_cost */
627 3, /* issue_rate */
00a8574a 628 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 629 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2 630 16, /* function_align. */
9779b2e8 631 4, /* jump_align. */
d4407370 632 8, /* loop_align. */
cee66c68
WD
633 2, /* int_reassoc_width. */
634 4, /* fp_reassoc_width. */
50093a33
WD
635 1, /* vec_reassoc_width. */
636 2, /* min_div_recip_mul_sf. */
dfba575f 637 2, /* min_div_recip_mul_df. */
50487d79 638 0, /* max_case_values. */
2d6bc7fa 639 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
640 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
641 &generic_prefetch_tune
dfba575f
JG
642};
643
644static const struct tune_params cortexa72_tunings =
645{
646 &cortexa57_extra_costs,
a39d4348 647 &generic_addrcost_table,
dfba575f
JG
648 &cortexa57_regmove_cost,
649 &cortexa57_vector_cost,
aca97ef8 650 &generic_branch_cost,
9acc9cbe 651 &generic_approx_modes,
dfba575f
JG
652 4, /* memmov_cost */
653 3, /* issue_rate */
00a8574a 654 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f
JG
655 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
656 16, /* function_align. */
9779b2e8 657 4, /* jump_align. */
d4407370 658 8, /* loop_align. */
dfba575f
JG
659 2, /* int_reassoc_width. */
660 4, /* fp_reassoc_width. */
661 1, /* vec_reassoc_width. */
662 2, /* min_div_recip_mul_sf. */
663 2, /* min_div_recip_mul_df. */
50487d79 664 0, /* max_case_values. */
0bc24338 665 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
666 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
667 &generic_prefetch_tune
4fd92af6
KT
668};
669
4fb570c4
KT
670static const struct tune_params cortexa73_tunings =
671{
672 &cortexa57_extra_costs,
a39d4348 673 &generic_addrcost_table,
4fb570c4
KT
674 &cortexa57_regmove_cost,
675 &cortexa57_vector_cost,
aca97ef8 676 &generic_branch_cost,
4fb570c4
KT
677 &generic_approx_modes,
678 4, /* memmov_cost. */
679 2, /* issue_rate. */
680 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
681 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
682 16, /* function_align. */
9779b2e8 683 4, /* jump_align. */
d4407370 684 8, /* loop_align. */
4fb570c4
KT
685 2, /* int_reassoc_width. */
686 4, /* fp_reassoc_width. */
687 1, /* vec_reassoc_width. */
688 2, /* min_div_recip_mul_sf. */
689 2, /* min_div_recip_mul_df. */
690 0, /* max_case_values. */
4fb570c4 691 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
692 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
693 &generic_prefetch_tune
4fb570c4
KT
694};
695
9d2c6e2e
MK
696
697
5ec1ae3b
EM
698static const struct tune_params exynosm1_tunings =
699{
700 &exynosm1_extra_costs,
701 &exynosm1_addrcost_table,
702 &exynosm1_regmove_cost,
703 &exynosm1_vector_cost,
704 &generic_branch_cost,
9acc9cbe 705 &exynosm1_approx_modes,
5ec1ae3b
EM
706 4, /* memmov_cost */
707 3, /* issue_rate */
25cc2199 708 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
5ec1ae3b
EM
709 4, /* function_align. */
710 4, /* jump_align. */
711 4, /* loop_align. */
712 2, /* int_reassoc_width. */
713 4, /* fp_reassoc_width. */
714 1, /* vec_reassoc_width. */
715 2, /* min_div_recip_mul_sf. */
716 2, /* min_div_recip_mul_df. */
717 48, /* max_case_values. */
220379df 718 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
719 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
720 &exynosm1_prefetch_tune
5ec1ae3b
EM
721};
722
f1e247d0
AP
723static const struct tune_params thunderxt88_tunings =
724{
725 &thunderx_extra_costs,
726 &generic_addrcost_table,
727 &thunderx_regmove_cost,
728 &thunderx_vector_cost,
729 &generic_branch_cost,
730 &generic_approx_modes,
731 6, /* memmov_cost */
732 2, /* issue_rate */
733 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
734 8, /* function_align. */
735 8, /* jump_align. */
736 8, /* loop_align. */
737 2, /* int_reassoc_width. */
738 4, /* fp_reassoc_width. */
739 1, /* vec_reassoc_width. */
740 2, /* min_div_recip_mul_sf. */
741 2, /* min_div_recip_mul_df. */
742 0, /* max_case_values. */
743 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
744 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
745 &thunderxt88_prefetch_tune
746};
747
d1bcc29f
AP
748static const struct tune_params thunderx_tunings =
749{
750 &thunderx_extra_costs,
751 &generic_addrcost_table,
752 &thunderx_regmove_cost,
c3f20327 753 &thunderx_vector_cost,
b9066f5a 754 &generic_branch_cost,
9acc9cbe 755 &generic_approx_modes,
bd95e655
JG
756 6, /* memmov_cost */
757 2, /* issue_rate */
e9a3a175 758 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
759 8, /* function_align. */
760 8, /* jump_align. */
761 8, /* loop_align. */
cee66c68
WD
762 2, /* int_reassoc_width. */
763 4, /* fp_reassoc_width. */
50093a33
WD
764 1, /* vec_reassoc_width. */
765 2, /* min_div_recip_mul_sf. */
dfba575f 766 2, /* min_div_recip_mul_df. */
50487d79 767 0, /* max_case_values. */
2d6bc7fa 768 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
769 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
770 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 771 &thunderx_prefetch_tune
d1bcc29f
AP
772};
773
381e27aa
PT
774static const struct tune_params xgene1_tunings =
775{
776 &xgene1_extra_costs,
777 &xgene1_addrcost_table,
778 &xgene1_regmove_cost,
779 &xgene1_vector_cost,
b9066f5a 780 &generic_branch_cost,
9acc9cbe 781 &xgene1_approx_modes,
bd95e655
JG
782 6, /* memmov_cost */
783 4, /* issue_rate */
e9a3a175 784 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
785 16, /* function_align. */
786 8, /* jump_align. */
787 16, /* loop_align. */
788 2, /* int_reassoc_width. */
789 4, /* fp_reassoc_width. */
50093a33
WD
790 1, /* vec_reassoc_width. */
791 2, /* min_div_recip_mul_sf. */
dfba575f 792 2, /* min_div_recip_mul_df. */
50487d79 793 0, /* max_case_values. */
2d6bc7fa 794 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9d2c6e2e
MK
795 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
796 &generic_prefetch_tune
381e27aa
PT
797};
798
ee446d9f
JW
799static const struct tune_params qdf24xx_tunings =
800{
801 &qdf24xx_extra_costs,
a39d4348 802 &generic_addrcost_table,
ee446d9f
JW
803 &qdf24xx_regmove_cost,
804 &generic_vector_cost,
805 &generic_branch_cost,
806 &generic_approx_modes,
807 4, /* memmov_cost */
808 4, /* issue_rate */
809 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
810 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
811 16, /* function_align. */
812 8, /* jump_align. */
813 16, /* loop_align. */
814 2, /* int_reassoc_width. */
815 4, /* fp_reassoc_width. */
816 1, /* vec_reassoc_width. */
817 2, /* min_div_recip_mul_sf. */
818 2, /* min_div_recip_mul_df. */
819 0, /* max_case_values. */
ee446d9f 820 tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
9d2c6e2e
MK
821 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
822 &qdf24xx_prefetch_tune
ee446d9f
JW
823};
824
d1261ac6 825static const struct tune_params thunderx2t99_tunings =
ad611a4c 826{
d1261ac6
AP
827 &thunderx2t99_extra_costs,
828 &thunderx2t99_addrcost_table,
829 &thunderx2t99_regmove_cost,
830 &thunderx2t99_vector_cost,
aca97ef8 831 &generic_branch_cost,
ad611a4c
VP
832 &generic_approx_modes,
833 4, /* memmov_cost. */
834 4, /* issue_rate. */
00c7c57f
JB
835 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
836 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
ad611a4c
VP
837 16, /* function_align. */
838 8, /* jump_align. */
839 16, /* loop_align. */
840 3, /* int_reassoc_width. */
841 2, /* fp_reassoc_width. */
842 2, /* vec_reassoc_width. */
843 2, /* min_div_recip_mul_sf. */
844 2, /* min_div_recip_mul_df. */
845 0, /* max_case_values. */
f1e247d0 846 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
847 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
848 &thunderx2t99_prefetch_tune
ad611a4c
VP
849};
850
8dec06f2
JG
851/* Support for fine-grained override of the tuning structures. */
852struct aarch64_tuning_override_function
853{
854 const char* name;
855 void (*parse_override)(const char*, struct tune_params*);
856};
857
858static void aarch64_parse_fuse_string (const char*, struct tune_params*);
859static void aarch64_parse_tune_string (const char*, struct tune_params*);
860
861static const struct aarch64_tuning_override_function
862aarch64_tuning_override_functions[] =
863{
864 { "fuse", aarch64_parse_fuse_string },
865 { "tune", aarch64_parse_tune_string },
866 { NULL, NULL }
867};
868
43e9d192
IB
869/* A processor implementing AArch64. */
870struct processor
871{
872 const char *const name;
46806c44
KT
873 enum aarch64_processor ident;
874 enum aarch64_processor sched_core;
393ae126 875 enum aarch64_arch arch;
0c6caaf8 876 unsigned architecture_version;
43e9d192
IB
877 const unsigned long flags;
878 const struct tune_params *const tune;
879};
880
393ae126
KT
881/* Architectures implementing AArch64. */
882static const struct processor all_architectures[] =
883{
884#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
885 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
886#include "aarch64-arches.def"
393ae126
KT
887 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
888};
889
43e9d192
IB
890/* Processor cores implementing AArch64. */
891static const struct processor all_cores[] =
892{
e8fcc9fa 893#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
894 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
895 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
896 FLAGS, &COSTS##_tunings},
43e9d192 897#include "aarch64-cores.def"
393ae126
KT
898 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
899 AARCH64_FL_FOR_ARCH8, &generic_tunings},
900 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
901};
902
43e9d192 903
361fb3ee
KT
904/* Target specification. These are populated by the -march, -mtune, -mcpu
905 handling code or by target attributes. */
43e9d192
IB
906static const struct processor *selected_arch;
907static const struct processor *selected_cpu;
908static const struct processor *selected_tune;
909
b175b679
JG
910/* The current tuning set. */
911struct tune_params aarch64_tune_params = generic_tunings;
912
43e9d192
IB
913#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
914
915/* An ISA extension in the co-processor and main instruction set space. */
916struct aarch64_option_extension
917{
918 const char *const name;
919 const unsigned long flags_on;
920 const unsigned long flags_off;
921};
922
43e9d192
IB
923typedef enum aarch64_cond_code
924{
925 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
926 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
927 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
928}
929aarch64_cc;
930
931#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
932
933/* The condition codes of the processor, and the inverse function. */
934static const char * const aarch64_condition_codes[] =
935{
936 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
937 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
938};
939
973d2e01
TP
940/* Generate code to enable conditional branches in functions over 1 MiB. */
941const char *
942aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
943 const char * branch_format)
944{
945 rtx_code_label * tmp_label = gen_label_rtx ();
946 char label_buf[256];
947 char buffer[128];
948 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
949 CODE_LABEL_NUMBER (tmp_label));
950 const char *label_ptr = targetm.strip_name_encoding (label_buf);
951 rtx dest_label = operands[pos_label];
952 operands[pos_label] = tmp_label;
953
954 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
955 output_asm_insn (buffer, operands);
956
957 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
958 operands[pos_label] = dest_label;
959 output_asm_insn (buffer, operands);
960 return "";
961}
962
261fb553
AL
963void
964aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
965{
966 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
967 if (TARGET_GENERAL_REGS_ONLY)
968 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
969 else
970 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
971}
972
c64f7d37
WD
973/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
974 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
31e2b5a3
WD
975 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
976 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
977 cost (in this case the best class is the lowest cost one). Using ALL_REGS
978 irrespectively of its cost results in bad allocations with many redundant
979 int<->FP moves which are expensive on various cores.
980 To avoid this we don't allow ALL_REGS as the allocno class, but force a
981 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
982 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
983 Otherwise set the allocno class depending on the mode.
984 The result of this is that it is no longer inefficient to have a higher
985 memory move cost than the register move cost.
986*/
c64f7d37
WD
987
988static reg_class_t
31e2b5a3
WD
989aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
990 reg_class_t best_class)
c64f7d37 991{
b8506a8a 992 machine_mode mode;
c64f7d37
WD
993
994 if (allocno_class != ALL_REGS)
995 return allocno_class;
996
31e2b5a3
WD
997 if (best_class != ALL_REGS)
998 return best_class;
999
c64f7d37
WD
1000 mode = PSEUDO_REGNO_MODE (regno);
1001 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1002}
1003
26e0ff94 1004static unsigned int
b8506a8a 1005aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1006{
50093a33 1007 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1008 return aarch64_tune_params.min_div_recip_mul_sf;
1009 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1010}
1011
cee66c68
WD
1012static int
1013aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
b8506a8a 1014 machine_mode mode)
cee66c68
WD
1015{
1016 if (VECTOR_MODE_P (mode))
b175b679 1017 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1018 if (INTEGRAL_MODE_P (mode))
b175b679 1019 return aarch64_tune_params.int_reassoc_width;
cee66c68 1020 if (FLOAT_MODE_P (mode))
b175b679 1021 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1022 return 1;
1023}
1024
43e9d192
IB
1025/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1026unsigned
1027aarch64_dbx_register_number (unsigned regno)
1028{
1029 if (GP_REGNUM_P (regno))
1030 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1031 else if (regno == SP_REGNUM)
1032 return AARCH64_DWARF_SP;
1033 else if (FP_REGNUM_P (regno))
1034 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
1035
1036 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1037 equivalent DWARF register. */
1038 return DWARF_FRAME_REGISTERS;
1039}
1040
1041/* Return TRUE if MODE is any of the large INT modes. */
1042static bool
ef4bddc2 1043aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
1044{
1045 return mode == OImode || mode == CImode || mode == XImode;
1046}
1047
1048/* Return TRUE if MODE is any of the vector modes. */
1049static bool
ef4bddc2 1050aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
1051{
1052 return aarch64_vector_mode_supported_p (mode)
1053 || aarch64_vect_struct_mode_p (mode);
1054}
1055
1056/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1057static bool
ef4bddc2 1058aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1059 unsigned HOST_WIDE_INT nelems)
1060{
1061 if (TARGET_SIMD
635e66fe
AL
1062 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1063 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1064 && (nelems >= 2 && nelems <= 4))
1065 return true;
1066
1067 return false;
1068}
1069
c43f4279 1070/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1071
c43f4279 1072static unsigned int
ef4bddc2 1073aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
1074{
1075 switch (aarch64_regno_regclass (regno))
1076 {
1077 case FP_REGS:
1078 case FP_LO_REGS:
1079 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
1080 default:
1081 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1082 }
1083 gcc_unreachable ();
1084}
1085
f939c3e6 1086/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1087
f939c3e6 1088static bool
ef4bddc2 1089aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1090{
1091 if (GET_MODE_CLASS (mode) == MODE_CC)
1092 return regno == CC_REGNUM;
1093
9259db42
YZ
1094 if (regno == SP_REGNUM)
1095 /* The purpose of comparing with ptr_mode is to support the
1096 global register variable associated with the stack pointer
1097 register via the syntax of asm ("wsp") in ILP32. */
1098 return mode == Pmode || mode == ptr_mode;
1099
1100 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1101 return mode == Pmode;
1102
1103 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
f939c3e6 1104 return true;
43e9d192
IB
1105
1106 if (FP_REGNUM_P (regno))
1107 {
1108 if (aarch64_vect_struct_mode_p (mode))
4edd6298 1109 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1110 else
f939c3e6 1111 return true;
43e9d192
IB
1112 }
1113
f939c3e6 1114 return false;
43e9d192
IB
1115}
1116
80ec73f4
RS
1117/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1118 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1119 clobbers the top 64 bits when restoring the bottom 64 bits. */
1120
1121static bool
1122aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
1123{
1124 return FP_REGNUM_P (regno) && GET_MODE_SIZE (mode) > 8;
1125}
1126
73d9ac6a 1127/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1128machine_mode
73d9ac6a 1129aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 1130 machine_mode mode)
73d9ac6a
IB
1131{
1132 /* Handle modes that fit within single registers. */
1133 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
1134 {
1135 if (GET_MODE_SIZE (mode) >= 4)
1136 return mode;
1137 else
1138 return SImode;
1139 }
1140 /* Fall back to generic for multi-reg and very large modes. */
1141 else
1142 return choose_hard_reg_mode (regno, nregs, false);
1143}
1144
43e9d192
IB
1145/* Return true if calls to DECL should be treated as
1146 long-calls (ie called via a register). */
1147static bool
1148aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1149{
1150 return false;
1151}
1152
1153/* Return true if calls to symbol-ref SYM should be treated as
1154 long-calls (ie called via a register). */
1155bool
1156aarch64_is_long_call_p (rtx sym)
1157{
1158 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1159}
1160
b60d63cb
JW
1161/* Return true if calls to symbol-ref SYM should not go through
1162 plt stubs. */
1163
1164bool
1165aarch64_is_noplt_call_p (rtx sym)
1166{
1167 const_tree decl = SYMBOL_REF_DECL (sym);
1168
1169 if (flag_pic
1170 && decl
1171 && (!flag_plt
1172 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1173 && !targetm.binds_local_p (decl))
1174 return true;
1175
1176 return false;
1177}
1178
43e9d192
IB
1179/* Return true if the offsets to a zero/sign-extract operation
1180 represent an expression that matches an extend operation. The
1181 operands represent the paramters from
1182
4745e701 1183 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1184bool
77e994c9 1185aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1186 rtx extract_imm)
1187{
1188 HOST_WIDE_INT mult_val, extract_val;
1189
1190 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1191 return false;
1192
1193 mult_val = INTVAL (mult_imm);
1194 extract_val = INTVAL (extract_imm);
1195
1196 if (extract_val > 8
1197 && extract_val < GET_MODE_BITSIZE (mode)
1198 && exact_log2 (extract_val & ~7) > 0
1199 && (extract_val & 7) <= 4
1200 && mult_val == (1 << (extract_val & 7)))
1201 return true;
1202
1203 return false;
1204}
1205
1206/* Emit an insn that's a simple single-set. Both the operands must be
1207 known to be valid. */
827ab47a 1208inline static rtx_insn *
43e9d192
IB
1209emit_set_insn (rtx x, rtx y)
1210{
f7df4a84 1211 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1212}
1213
1214/* X and Y are two things to compare using CODE. Emit the compare insn and
1215 return the rtx for register 0 in the proper mode. */
1216rtx
1217aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1218{
ef4bddc2 1219 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1220 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1221
1222 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1223 return cc_reg;
1224}
1225
1226/* Build the SYMBOL_REF for __tls_get_addr. */
1227
1228static GTY(()) rtx tls_get_addr_libfunc;
1229
1230rtx
1231aarch64_tls_get_addr (void)
1232{
1233 if (!tls_get_addr_libfunc)
1234 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1235 return tls_get_addr_libfunc;
1236}
1237
1238/* Return the TLS model to use for ADDR. */
1239
1240static enum tls_model
1241tls_symbolic_operand_type (rtx addr)
1242{
1243 enum tls_model tls_kind = TLS_MODEL_NONE;
1244 rtx sym, addend;
1245
1246 if (GET_CODE (addr) == CONST)
1247 {
1248 split_const (addr, &sym, &addend);
1249 if (GET_CODE (sym) == SYMBOL_REF)
1250 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1251 }
1252 else if (GET_CODE (addr) == SYMBOL_REF)
1253 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1254
1255 return tls_kind;
1256}
1257
1258/* We'll allow lo_sum's in addresses in our legitimate addresses
1259 so that combine would take care of combining addresses where
1260 necessary, but for generation purposes, we'll generate the address
1261 as :
1262 RTL Absolute
1263 tmp = hi (symbol_ref); adrp x1, foo
1264 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1265 nop
1266
1267 PIC TLS
1268 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1269 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1270 bl __tls_get_addr
1271 nop
1272
1273 Load TLS symbol, depending on TLS mechanism and TLS access model.
1274
1275 Global Dynamic - Traditional TLS:
1276 adrp tmp, :tlsgd:imm
1277 add dest, tmp, #:tlsgd_lo12:imm
1278 bl __tls_get_addr
1279
1280 Global Dynamic - TLS Descriptors:
1281 adrp dest, :tlsdesc:imm
1282 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1283 add dest, dest, #:tlsdesc_lo12:imm
1284 blr tmp
1285 mrs tp, tpidr_el0
1286 add dest, dest, tp
1287
1288 Initial Exec:
1289 mrs tp, tpidr_el0
1290 adrp tmp, :gottprel:imm
1291 ldr dest, [tmp, #:gottprel_lo12:imm]
1292 add dest, dest, tp
1293
1294 Local Exec:
1295 mrs tp, tpidr_el0
0699caae
RL
1296 add t0, tp, #:tprel_hi12:imm, lsl #12
1297 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1298*/
1299
1300static void
1301aarch64_load_symref_appropriately (rtx dest, rtx imm,
1302 enum aarch64_symbol_type type)
1303{
1304 switch (type)
1305 {
1306 case SYMBOL_SMALL_ABSOLUTE:
1307 {
28514dda 1308 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1309 rtx tmp_reg = dest;
ef4bddc2 1310 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1311
1312 gcc_assert (mode == Pmode || mode == ptr_mode);
1313
43e9d192 1314 if (can_create_pseudo_p ())
28514dda 1315 tmp_reg = gen_reg_rtx (mode);
43e9d192 1316
28514dda 1317 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1318 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1319 return;
1320 }
1321
a5350ddc 1322 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1323 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1324 return;
1325
1b1e81f8
JW
1326 case SYMBOL_SMALL_GOT_28K:
1327 {
1328 machine_mode mode = GET_MODE (dest);
1329 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1330 rtx insn;
1331 rtx mem;
1b1e81f8
JW
1332
1333 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1334 here before rtl expand. Tree IVOPT will generate rtl pattern to
1335 decide rtx costs, in which case pic_offset_table_rtx is not
1336 initialized. For that case no need to generate the first adrp
026c3cfd 1337 instruction as the final cost for global variable access is
1b1e81f8
JW
1338 one instruction. */
1339 if (gp_rtx != NULL)
1340 {
1341 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1342 using the page base as GOT base, the first page may be wasted,
1343 in the worst scenario, there is only 28K space for GOT).
1344
1345 The generate instruction sequence for accessing global variable
1346 is:
1347
a3957742 1348 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1349
1350 Only one instruction needed. But we must initialize
1351 pic_offset_table_rtx properly. We generate initialize insn for
1352 every global access, and allow CSE to remove all redundant.
1353
1354 The final instruction sequences will look like the following
1355 for multiply global variables access.
1356
a3957742 1357 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1358
a3957742
JW
1359 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1360 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1361 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1362 ... */
1b1e81f8
JW
1363
1364 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1365 crtl->uses_pic_offset_table = 1;
1366 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1367
1368 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1369 gp_rtx = gen_lowpart (mode, gp_rtx);
1370
1b1e81f8
JW
1371 }
1372
1373 if (mode == ptr_mode)
1374 {
1375 if (mode == DImode)
53021678 1376 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1377 else
53021678
JW
1378 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1379
1380 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1381 }
1382 else
1383 {
1384 gcc_assert (mode == Pmode);
53021678
JW
1385
1386 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1387 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1388 }
1389
53021678
JW
1390 /* The operand is expected to be MEM. Whenever the related insn
1391 pattern changed, above code which calculate mem should be
1392 updated. */
1393 gcc_assert (GET_CODE (mem) == MEM);
1394 MEM_READONLY_P (mem) = 1;
1395 MEM_NOTRAP_P (mem) = 1;
1396 emit_insn (insn);
1b1e81f8
JW
1397 return;
1398 }
1399
6642bdb4 1400 case SYMBOL_SMALL_GOT_4G:
43e9d192 1401 {
28514dda
YZ
1402 /* In ILP32, the mode of dest can be either SImode or DImode,
1403 while the got entry is always of SImode size. The mode of
1404 dest depends on how dest is used: if dest is assigned to a
1405 pointer (e.g. in the memory), it has SImode; it may have
1406 DImode if dest is dereferenced to access the memeory.
1407 This is why we have to handle three different ldr_got_small
1408 patterns here (two patterns for ILP32). */
53021678
JW
1409
1410 rtx insn;
1411 rtx mem;
43e9d192 1412 rtx tmp_reg = dest;
ef4bddc2 1413 machine_mode mode = GET_MODE (dest);
28514dda 1414
43e9d192 1415 if (can_create_pseudo_p ())
28514dda
YZ
1416 tmp_reg = gen_reg_rtx (mode);
1417
1418 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1419 if (mode == ptr_mode)
1420 {
1421 if (mode == DImode)
53021678 1422 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1423 else
53021678
JW
1424 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1425
1426 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1427 }
1428 else
1429 {
1430 gcc_assert (mode == Pmode);
53021678
JW
1431
1432 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1433 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1434 }
1435
53021678
JW
1436 gcc_assert (GET_CODE (mem) == MEM);
1437 MEM_READONLY_P (mem) = 1;
1438 MEM_NOTRAP_P (mem) = 1;
1439 emit_insn (insn);
43e9d192
IB
1440 return;
1441 }
1442
1443 case SYMBOL_SMALL_TLSGD:
1444 {
5d8a22a5 1445 rtx_insn *insns;
23b88fda
N
1446 machine_mode mode = GET_MODE (dest);
1447 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1448
1449 start_sequence ();
23b88fda
N
1450 if (TARGET_ILP32)
1451 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1452 else
1453 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1454 insns = get_insns ();
1455 end_sequence ();
1456
1457 RTL_CONST_CALL_P (insns) = 1;
1458 emit_libcall_block (insns, dest, result, imm);
1459 return;
1460 }
1461
1462 case SYMBOL_SMALL_TLSDESC:
1463 {
ef4bddc2 1464 machine_mode mode = GET_MODE (dest);
621ad2de 1465 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1466 rtx tp;
1467
621ad2de
AP
1468 gcc_assert (mode == Pmode || mode == ptr_mode);
1469
2876a13f
JW
1470 /* In ILP32, the got entry is always of SImode size. Unlike
1471 small GOT, the dest is fixed at reg 0. */
1472 if (TARGET_ILP32)
1473 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1474 else
2876a13f 1475 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1476 tp = aarch64_load_tp (NULL);
621ad2de
AP
1477
1478 if (mode != Pmode)
1479 tp = gen_lowpart (mode, tp);
1480
2876a13f 1481 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1482 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1483 return;
1484 }
1485
79496620 1486 case SYMBOL_SMALL_TLSIE:
43e9d192 1487 {
621ad2de
AP
1488 /* In ILP32, the mode of dest can be either SImode or DImode,
1489 while the got entry is always of SImode size. The mode of
1490 dest depends on how dest is used: if dest is assigned to a
1491 pointer (e.g. in the memory), it has SImode; it may have
1492 DImode if dest is dereferenced to access the memeory.
1493 This is why we have to handle three different tlsie_small
1494 patterns here (two patterns for ILP32). */
ef4bddc2 1495 machine_mode mode = GET_MODE (dest);
621ad2de 1496 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1497 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1498
1499 if (mode == ptr_mode)
1500 {
1501 if (mode == DImode)
1502 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1503 else
1504 {
1505 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1506 tp = gen_lowpart (mode, tp);
1507 }
1508 }
1509 else
1510 {
1511 gcc_assert (mode == Pmode);
1512 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1513 }
1514
f7df4a84 1515 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1516 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1517 return;
1518 }
1519
cbf5629e 1520 case SYMBOL_TLSLE12:
d18ba284 1521 case SYMBOL_TLSLE24:
cbf5629e
JW
1522 case SYMBOL_TLSLE32:
1523 case SYMBOL_TLSLE48:
43e9d192 1524 {
cbf5629e 1525 machine_mode mode = GET_MODE (dest);
43e9d192 1526 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1527
cbf5629e
JW
1528 if (mode != Pmode)
1529 tp = gen_lowpart (mode, tp);
1530
1531 switch (type)
1532 {
1533 case SYMBOL_TLSLE12:
1534 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1535 (dest, tp, imm));
1536 break;
1537 case SYMBOL_TLSLE24:
1538 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1539 (dest, tp, imm));
1540 break;
1541 case SYMBOL_TLSLE32:
1542 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1543 (dest, imm));
1544 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1545 (dest, dest, tp));
1546 break;
1547 case SYMBOL_TLSLE48:
1548 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1549 (dest, imm));
1550 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1551 (dest, dest, tp));
1552 break;
1553 default:
1554 gcc_unreachable ();
1555 }
e6f7f0e9 1556
43e9d192
IB
1557 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1558 return;
1559 }
1560
87dd8ab0
MS
1561 case SYMBOL_TINY_GOT:
1562 emit_insn (gen_ldr_got_tiny (dest, imm));
1563 return;
1564
5ae7caad
JW
1565 case SYMBOL_TINY_TLSIE:
1566 {
1567 machine_mode mode = GET_MODE (dest);
1568 rtx tp = aarch64_load_tp (NULL);
1569
1570 if (mode == ptr_mode)
1571 {
1572 if (mode == DImode)
1573 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1574 else
1575 {
1576 tp = gen_lowpart (mode, tp);
1577 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1578 }
1579 }
1580 else
1581 {
1582 gcc_assert (mode == Pmode);
1583 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1584 }
1585
1586 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1587 return;
1588 }
1589
43e9d192
IB
1590 default:
1591 gcc_unreachable ();
1592 }
1593}
1594
1595/* Emit a move from SRC to DEST. Assume that the move expanders can
1596 handle all moves if !can_create_pseudo_p (). The distinction is
1597 important because, unlike emit_move_insn, the move expanders know
1598 how to force Pmode objects into the constant pool even when the
1599 constant pool address is not itself legitimate. */
1600static rtx
1601aarch64_emit_move (rtx dest, rtx src)
1602{
1603 return (can_create_pseudo_p ()
1604 ? emit_move_insn (dest, src)
1605 : emit_move_insn_1 (dest, src));
1606}
1607
030d03b8
RE
1608/* Split a 128-bit move operation into two 64-bit move operations,
1609 taking care to handle partial overlap of register to register
1610 copies. Special cases are needed when moving between GP regs and
1611 FP regs. SRC can be a register, constant or memory; DST a register
1612 or memory. If either operand is memory it must not have any side
1613 effects. */
43e9d192
IB
1614void
1615aarch64_split_128bit_move (rtx dst, rtx src)
1616{
030d03b8
RE
1617 rtx dst_lo, dst_hi;
1618 rtx src_lo, src_hi;
43e9d192 1619
ef4bddc2 1620 machine_mode mode = GET_MODE (dst);
12dc6974 1621
030d03b8
RE
1622 gcc_assert (mode == TImode || mode == TFmode);
1623 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1624 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1625
1626 if (REG_P (dst) && REG_P (src))
1627 {
030d03b8
RE
1628 int src_regno = REGNO (src);
1629 int dst_regno = REGNO (dst);
43e9d192 1630
030d03b8 1631 /* Handle FP <-> GP regs. */
43e9d192
IB
1632 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1633 {
030d03b8
RE
1634 src_lo = gen_lowpart (word_mode, src);
1635 src_hi = gen_highpart (word_mode, src);
1636
1637 if (mode == TImode)
1638 {
1639 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1640 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1641 }
1642 else
1643 {
1644 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1645 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1646 }
1647 return;
43e9d192
IB
1648 }
1649 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1650 {
030d03b8
RE
1651 dst_lo = gen_lowpart (word_mode, dst);
1652 dst_hi = gen_highpart (word_mode, dst);
1653
1654 if (mode == TImode)
1655 {
1656 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1657 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1658 }
1659 else
1660 {
1661 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1662 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1663 }
1664 return;
43e9d192 1665 }
43e9d192
IB
1666 }
1667
030d03b8
RE
1668 dst_lo = gen_lowpart (word_mode, dst);
1669 dst_hi = gen_highpart (word_mode, dst);
1670 src_lo = gen_lowpart (word_mode, src);
1671 src_hi = gen_highpart_mode (word_mode, mode, src);
1672
1673 /* At most one pairing may overlap. */
1674 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1675 {
1676 aarch64_emit_move (dst_hi, src_hi);
1677 aarch64_emit_move (dst_lo, src_lo);
1678 }
1679 else
1680 {
1681 aarch64_emit_move (dst_lo, src_lo);
1682 aarch64_emit_move (dst_hi, src_hi);
1683 }
43e9d192
IB
1684}
1685
1686bool
1687aarch64_split_128bit_move_p (rtx dst, rtx src)
1688{
1689 return (! REG_P (src)
1690 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1691}
1692
8b033a8a
SN
1693/* Split a complex SIMD combine. */
1694
1695void
1696aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1697{
ef4bddc2
RS
1698 machine_mode src_mode = GET_MODE (src1);
1699 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1700
1701 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
1702 gcc_assert (register_operand (dst, dst_mode)
1703 && register_operand (src1, src_mode)
1704 && register_operand (src2, src_mode));
8b033a8a 1705
a977dc0c 1706 rtx (*gen) (rtx, rtx, rtx);
8b033a8a 1707
a977dc0c
MC
1708 switch (src_mode)
1709 {
4e10a5a7 1710 case E_V8QImode:
a977dc0c
MC
1711 gen = gen_aarch64_simd_combinev8qi;
1712 break;
4e10a5a7 1713 case E_V4HImode:
a977dc0c
MC
1714 gen = gen_aarch64_simd_combinev4hi;
1715 break;
4e10a5a7 1716 case E_V2SImode:
a977dc0c
MC
1717 gen = gen_aarch64_simd_combinev2si;
1718 break;
4e10a5a7 1719 case E_V4HFmode:
a977dc0c
MC
1720 gen = gen_aarch64_simd_combinev4hf;
1721 break;
4e10a5a7 1722 case E_V2SFmode:
a977dc0c
MC
1723 gen = gen_aarch64_simd_combinev2sf;
1724 break;
4e10a5a7 1725 case E_DImode:
a977dc0c
MC
1726 gen = gen_aarch64_simd_combinedi;
1727 break;
4e10a5a7 1728 case E_DFmode:
a977dc0c
MC
1729 gen = gen_aarch64_simd_combinedf;
1730 break;
1731 default:
1732 gcc_unreachable ();
8b033a8a 1733 }
a977dc0c
MC
1734
1735 emit_insn (gen (dst, src1, src2));
1736 return;
8b033a8a
SN
1737}
1738
fd4842cd
SN
1739/* Split a complex SIMD move. */
1740
1741void
1742aarch64_split_simd_move (rtx dst, rtx src)
1743{
ef4bddc2
RS
1744 machine_mode src_mode = GET_MODE (src);
1745 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1746
1747 gcc_assert (VECTOR_MODE_P (dst_mode));
1748
1749 if (REG_P (dst) && REG_P (src))
1750 {
c59b7e28
SN
1751 rtx (*gen) (rtx, rtx);
1752
fd4842cd
SN
1753 gcc_assert (VECTOR_MODE_P (src_mode));
1754
1755 switch (src_mode)
1756 {
4e10a5a7 1757 case E_V16QImode:
c59b7e28 1758 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd 1759 break;
4e10a5a7 1760 case E_V8HImode:
c59b7e28 1761 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd 1762 break;
4e10a5a7 1763 case E_V4SImode:
c59b7e28 1764 gen = gen_aarch64_split_simd_movv4si;
fd4842cd 1765 break;
4e10a5a7 1766 case E_V2DImode:
c59b7e28 1767 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 1768 break;
4e10a5a7 1769 case E_V8HFmode:
71a11456
AL
1770 gen = gen_aarch64_split_simd_movv8hf;
1771 break;
4e10a5a7 1772 case E_V4SFmode:
c59b7e28 1773 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd 1774 break;
4e10a5a7 1775 case E_V2DFmode:
c59b7e28 1776 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1777 break;
1778 default:
1779 gcc_unreachable ();
1780 }
c59b7e28
SN
1781
1782 emit_insn (gen (dst, src));
fd4842cd
SN
1783 return;
1784 }
1785}
1786
ef22810a
RH
1787bool
1788aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
1789 machine_mode ymode, rtx y)
1790{
1791 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
1792 gcc_assert (r != NULL);
1793 return rtx_equal_p (x, r);
1794}
1795
1796
43e9d192 1797static rtx
ef4bddc2 1798aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1799{
1800 if (can_create_pseudo_p ())
e18b4a81 1801 return force_reg (mode, value);
43e9d192
IB
1802 else
1803 {
1804 x = aarch64_emit_move (x, value);
1805 return x;
1806 }
1807}
1808
1809
1810static rtx
77e994c9
RS
1811aarch64_add_offset (scalar_int_mode mode, rtx temp, rtx reg,
1812 HOST_WIDE_INT offset)
43e9d192 1813{
9c023bf0 1814 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1815 {
1816 rtx high;
1817 /* Load the full offset into a register. This
1818 might be improvable in the future. */
1819 high = GEN_INT (offset);
1820 offset = 0;
e18b4a81
YZ
1821 high = aarch64_force_temporary (mode, temp, high);
1822 reg = aarch64_force_temporary (mode, temp,
1823 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1824 }
1825 return plus_constant (mode, reg, offset);
1826}
1827
82614948
RR
1828static int
1829aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 1830 scalar_int_mode mode)
43e9d192 1831{
43e9d192 1832 int i;
9a4865db
WD
1833 unsigned HOST_WIDE_INT val, val2, mask;
1834 int one_match, zero_match;
1835 int num_insns;
43e9d192 1836
9a4865db
WD
1837 val = INTVAL (imm);
1838
1839 if (aarch64_move_imm (val, mode))
43e9d192 1840 {
82614948 1841 if (generate)
f7df4a84 1842 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 1843 return 1;
43e9d192
IB
1844 }
1845
9de00935
TC
1846 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
1847 (with XXXX non-zero). In that case check to see if the move can be done in
1848 a smaller mode. */
1849 val2 = val & 0xffffffff;
1850 if (mode == DImode
1851 && aarch64_move_imm (val2, SImode)
1852 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
1853 {
1854 if (generate)
1855 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1856
1857 /* Check if we have to emit a second instruction by checking to see
1858 if any of the upper 32 bits of the original DI mode value is set. */
1859 if (val == val2)
1860 return 1;
1861
1862 i = (val >> 48) ? 48 : 32;
1863
1864 if (generate)
1865 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1866 GEN_INT ((val >> i) & 0xffff)));
1867
1868 return 2;
1869 }
1870
9a4865db 1871 if ((val >> 32) == 0 || mode == SImode)
43e9d192 1872 {
82614948
RR
1873 if (generate)
1874 {
9a4865db
WD
1875 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1876 if (mode == SImode)
1877 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1878 GEN_INT ((val >> 16) & 0xffff)));
1879 else
1880 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1881 GEN_INT ((val >> 16) & 0xffff)));
82614948 1882 }
9a4865db 1883 return 2;
43e9d192
IB
1884 }
1885
1886 /* Remaining cases are all for DImode. */
1887
43e9d192 1888 mask = 0xffff;
9a4865db
WD
1889 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1890 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1891 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1892 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 1893
62c8d76c 1894 if (zero_match != 2 && one_match != 2)
43e9d192 1895 {
62c8d76c
WD
1896 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1897 For a 64-bit bitmask try whether changing 16 bits to all ones or
1898 zeroes creates a valid bitmask. To check any repeated bitmask,
1899 try using 16 bits from the other 32-bit half of val. */
43e9d192 1900
62c8d76c 1901 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 1902 {
62c8d76c
WD
1903 val2 = val & ~mask;
1904 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1905 break;
1906 val2 = val | mask;
1907 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1908 break;
1909 val2 = val2 & ~mask;
1910 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1911 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1912 break;
43e9d192 1913 }
62c8d76c 1914 if (i != 64)
43e9d192 1915 {
62c8d76c 1916 if (generate)
43e9d192 1917 {
62c8d76c
WD
1918 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1919 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 1920 GEN_INT ((val >> i) & 0xffff)));
43e9d192 1921 }
1312b1ba 1922 return 2;
43e9d192
IB
1923 }
1924 }
1925
9a4865db
WD
1926 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1927 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1928 otherwise skip zero bits. */
2c274197 1929
9a4865db 1930 num_insns = 1;
43e9d192 1931 mask = 0xffff;
9a4865db
WD
1932 val2 = one_match > zero_match ? ~val : val;
1933 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1934
1935 if (generate)
1936 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1937 ? (val | ~(mask << i))
1938 : (val & (mask << i)))));
1939 for (i += 16; i < 64; i += 16)
43e9d192 1940 {
9a4865db
WD
1941 if ((val2 & (mask << i)) == 0)
1942 continue;
1943 if (generate)
1944 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1945 GEN_INT ((val >> i) & 0xffff)));
1946 num_insns ++;
82614948
RR
1947 }
1948
1949 return num_insns;
1950}
1951
1952
1953void
1954aarch64_expand_mov_immediate (rtx dest, rtx imm)
1955{
1956 machine_mode mode = GET_MODE (dest);
1957
1958 gcc_assert (mode == SImode || mode == DImode);
1959
1960 /* Check on what type of symbol it is. */
77e994c9
RS
1961 scalar_int_mode int_mode;
1962 if ((GET_CODE (imm) == SYMBOL_REF
1963 || GET_CODE (imm) == LABEL_REF
1964 || GET_CODE (imm) == CONST)
1965 && is_a <scalar_int_mode> (mode, &int_mode))
82614948
RR
1966 {
1967 rtx mem, base, offset;
1968 enum aarch64_symbol_type sty;
1969
1970 /* If we have (const (plus symbol offset)), separate out the offset
1971 before we start classifying the symbol. */
1972 split_const (imm, &base, &offset);
1973
a6e0bfa7 1974 sty = aarch64_classify_symbol (base, offset);
82614948
RR
1975 switch (sty)
1976 {
1977 case SYMBOL_FORCE_TO_MEM:
1978 if (offset != const0_rtx
77e994c9 1979 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
1980 {
1981 gcc_assert (can_create_pseudo_p ());
77e994c9
RS
1982 base = aarch64_force_temporary (int_mode, dest, base);
1983 base = aarch64_add_offset (int_mode, NULL, base,
1984 INTVAL (offset));
82614948
RR
1985 aarch64_emit_move (dest, base);
1986 return;
1987 }
b4f50fd4 1988
82614948
RR
1989 mem = force_const_mem (ptr_mode, imm);
1990 gcc_assert (mem);
b4f50fd4
RR
1991
1992 /* If we aren't generating PC relative literals, then
1993 we need to expand the literal pool access carefully.
1994 This is something that needs to be done in a number
1995 of places, so could well live as a separate function. */
9ee6540a 1996 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
1997 {
1998 gcc_assert (can_create_pseudo_p ());
1999 base = gen_reg_rtx (ptr_mode);
2000 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
2001 if (ptr_mode != Pmode)
2002 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
2003 mem = gen_rtx_MEM (ptr_mode, base);
2004 }
2005
77e994c9
RS
2006 if (int_mode != ptr_mode)
2007 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 2008
f7df4a84 2009 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 2010
82614948
RR
2011 return;
2012
2013 case SYMBOL_SMALL_TLSGD:
2014 case SYMBOL_SMALL_TLSDESC:
79496620 2015 case SYMBOL_SMALL_TLSIE:
1b1e81f8 2016 case SYMBOL_SMALL_GOT_28K:
6642bdb4 2017 case SYMBOL_SMALL_GOT_4G:
82614948 2018 case SYMBOL_TINY_GOT:
5ae7caad 2019 case SYMBOL_TINY_TLSIE:
82614948
RR
2020 if (offset != const0_rtx)
2021 {
2022 gcc_assert(can_create_pseudo_p ());
77e994c9
RS
2023 base = aarch64_force_temporary (int_mode, dest, base);
2024 base = aarch64_add_offset (int_mode, NULL, base,
2025 INTVAL (offset));
82614948
RR
2026 aarch64_emit_move (dest, base);
2027 return;
2028 }
2029 /* FALLTHRU */
2030
82614948
RR
2031 case SYMBOL_SMALL_ABSOLUTE:
2032 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 2033 case SYMBOL_TLSLE12:
d18ba284 2034 case SYMBOL_TLSLE24:
cbf5629e
JW
2035 case SYMBOL_TLSLE32:
2036 case SYMBOL_TLSLE48:
82614948
RR
2037 aarch64_load_symref_appropriately (dest, imm, sty);
2038 return;
2039
2040 default:
2041 gcc_unreachable ();
2042 }
2043 }
2044
2045 if (!CONST_INT_P (imm))
2046 {
2047 if (GET_CODE (imm) == HIGH)
f7df4a84 2048 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
2049 else
2050 {
2051 rtx mem = force_const_mem (mode, imm);
2052 gcc_assert (mem);
f7df4a84 2053 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 2054 }
82614948
RR
2055
2056 return;
43e9d192 2057 }
82614948 2058
77e994c9
RS
2059 aarch64_internal_mov_immediate (dest, imm, true,
2060 as_a <scalar_int_mode> (mode));
43e9d192
IB
2061}
2062
5be6b295
WD
2063/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
2064 temporary value if necessary. FRAME_RELATED_P should be true if
2065 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
2066 to the generated instructions. If SCRATCHREG is known to hold
2067 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
2068 immediate again.
2069
2070 Since this function may be used to adjust the stack pointer, we must
2071 ensure that it cannot cause transient stack deallocation (for example
2072 by first incrementing SP and then decrementing when adjusting by a
2073 large immediate). */
c4ddc43a
JW
2074
2075static void
77e994c9
RS
2076aarch64_add_constant_internal (scalar_int_mode mode, int regnum,
2077 int scratchreg, HOST_WIDE_INT delta,
2078 bool frame_related_p, bool emit_move_imm)
c4ddc43a
JW
2079{
2080 HOST_WIDE_INT mdelta = abs_hwi (delta);
2081 rtx this_rtx = gen_rtx_REG (mode, regnum);
37d6a4b7 2082 rtx_insn *insn;
c4ddc43a 2083
c4ddc43a
JW
2084 if (!mdelta)
2085 return;
2086
5be6b295 2087 /* Single instruction adjustment. */
c4ddc43a
JW
2088 if (aarch64_uimm12_shift (mdelta))
2089 {
37d6a4b7
JW
2090 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
2091 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2092 return;
2093 }
2094
5be6b295
WD
2095 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
2096 Only do this if mdelta is not a 16-bit move as adjusting using a move
2097 is better. */
2098 if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
c4ddc43a
JW
2099 {
2100 HOST_WIDE_INT low_off = mdelta & 0xfff;
2101
2102 low_off = delta < 0 ? -low_off : low_off;
37d6a4b7
JW
2103 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
2104 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2105 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
2106 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2107 return;
2108 }
2109
5be6b295 2110 /* Emit a move immediate if required and an addition/subtraction. */
c4ddc43a 2111 rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
5be6b295
WD
2112 if (emit_move_imm)
2113 aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
2114 insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
2115 : gen_add2_insn (this_rtx, scratch_rtx));
37d6a4b7
JW
2116 if (frame_related_p)
2117 {
2118 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2119 rtx adj = plus_constant (mode, this_rtx, delta);
2120 add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj));
2121 }
c4ddc43a
JW
2122}
2123
5be6b295 2124static inline void
77e994c9 2125aarch64_add_constant (scalar_int_mode mode, int regnum, int scratchreg,
5be6b295
WD
2126 HOST_WIDE_INT delta)
2127{
2128 aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
2129}
2130
2131static inline void
2132aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
2133{
2134 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
2135 true, emit_move_imm);
2136}
2137
2138static inline void
2139aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
2140{
2141 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
2142 frame_related_p, true);
2143}
2144
43e9d192 2145static bool
fee9ba42
JW
2146aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2147 tree exp ATTRIBUTE_UNUSED)
43e9d192 2148{
fee9ba42 2149 /* Currently, always true. */
43e9d192
IB
2150 return true;
2151}
2152
2153/* Implement TARGET_PASS_BY_REFERENCE. */
2154
2155static bool
2156aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 2157 machine_mode mode,
43e9d192
IB
2158 const_tree type,
2159 bool named ATTRIBUTE_UNUSED)
2160{
2161 HOST_WIDE_INT size;
ef4bddc2 2162 machine_mode dummymode;
43e9d192
IB
2163 int nregs;
2164
2165 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2166 size = (mode == BLKmode && type)
2167 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2168
aadc1c43
MHD
2169 /* Aggregates are passed by reference based on their size. */
2170 if (type && AGGREGATE_TYPE_P (type))
43e9d192 2171 {
aadc1c43 2172 size = int_size_in_bytes (type);
43e9d192
IB
2173 }
2174
2175 /* Variable sized arguments are always returned by reference. */
2176 if (size < 0)
2177 return true;
2178
2179 /* Can this be a candidate to be passed in fp/simd register(s)? */
2180 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2181 &dummymode, &nregs,
2182 NULL))
2183 return false;
2184
2185 /* Arguments which are variable sized or larger than 2 registers are
2186 passed by reference unless they are a homogenous floating point
2187 aggregate. */
2188 return size > 2 * UNITS_PER_WORD;
2189}
2190
2191/* Return TRUE if VALTYPE is padded to its least significant bits. */
2192static bool
2193aarch64_return_in_msb (const_tree valtype)
2194{
ef4bddc2 2195 machine_mode dummy_mode;
43e9d192
IB
2196 int dummy_int;
2197
2198 /* Never happens in little-endian mode. */
2199 if (!BYTES_BIG_ENDIAN)
2200 return false;
2201
2202 /* Only composite types smaller than or equal to 16 bytes can
2203 be potentially returned in registers. */
2204 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
2205 || int_size_in_bytes (valtype) <= 0
2206 || int_size_in_bytes (valtype) > 16)
2207 return false;
2208
2209 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2210 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2211 is always passed/returned in the least significant bits of fp/simd
2212 register(s). */
2213 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
2214 &dummy_mode, &dummy_int, NULL))
2215 return false;
2216
2217 return true;
2218}
2219
2220/* Implement TARGET_FUNCTION_VALUE.
2221 Define how to find the value returned by a function. */
2222
2223static rtx
2224aarch64_function_value (const_tree type, const_tree func,
2225 bool outgoing ATTRIBUTE_UNUSED)
2226{
ef4bddc2 2227 machine_mode mode;
43e9d192
IB
2228 int unsignedp;
2229 int count;
ef4bddc2 2230 machine_mode ag_mode;
43e9d192
IB
2231
2232 mode = TYPE_MODE (type);
2233 if (INTEGRAL_TYPE_P (type))
2234 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
2235
2236 if (aarch64_return_in_msb (type))
2237 {
2238 HOST_WIDE_INT size = int_size_in_bytes (type);
2239
2240 if (size % UNITS_PER_WORD != 0)
2241 {
2242 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 2243 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
2244 }
2245 }
2246
2247 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2248 &ag_mode, &count, NULL))
2249 {
2250 if (!aarch64_composite_type_p (type, mode))
2251 {
2252 gcc_assert (count == 1 && mode == ag_mode);
2253 return gen_rtx_REG (mode, V0_REGNUM);
2254 }
2255 else
2256 {
2257 int i;
2258 rtx par;
2259
2260 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
2261 for (i = 0; i < count; i++)
2262 {
2263 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
2264 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2265 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
2266 XVECEXP (par, 0, i) = tmp;
2267 }
2268 return par;
2269 }
2270 }
2271 else
2272 return gen_rtx_REG (mode, R0_REGNUM);
2273}
2274
2275/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2276 Return true if REGNO is the number of a hard register in which the values
2277 of called function may come back. */
2278
2279static bool
2280aarch64_function_value_regno_p (const unsigned int regno)
2281{
2282 /* Maximum of 16 bytes can be returned in the general registers. Examples
2283 of 16-byte return values are: 128-bit integers and 16-byte small
2284 structures (excluding homogeneous floating-point aggregates). */
2285 if (regno == R0_REGNUM || regno == R1_REGNUM)
2286 return true;
2287
2288 /* Up to four fp/simd registers can return a function value, e.g. a
2289 homogeneous floating-point aggregate having four members. */
2290 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 2291 return TARGET_FLOAT;
43e9d192
IB
2292
2293 return false;
2294}
2295
2296/* Implement TARGET_RETURN_IN_MEMORY.
2297
2298 If the type T of the result of a function is such that
2299 void func (T arg)
2300 would require that arg be passed as a value in a register (or set of
2301 registers) according to the parameter passing rules, then the result
2302 is returned in the same registers as would be used for such an
2303 argument. */
2304
2305static bool
2306aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2307{
2308 HOST_WIDE_INT size;
ef4bddc2 2309 machine_mode ag_mode;
43e9d192
IB
2310 int count;
2311
2312 if (!AGGREGATE_TYPE_P (type)
2313 && TREE_CODE (type) != COMPLEX_TYPE
2314 && TREE_CODE (type) != VECTOR_TYPE)
2315 /* Simple scalar types always returned in registers. */
2316 return false;
2317
2318 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
2319 type,
2320 &ag_mode,
2321 &count,
2322 NULL))
2323 return false;
2324
2325 /* Types larger than 2 registers returned in memory. */
2326 size = int_size_in_bytes (type);
2327 return (size < 0 || size > 2 * UNITS_PER_WORD);
2328}
2329
2330static bool
ef4bddc2 2331aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2332 const_tree type, int *nregs)
2333{
2334 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2335 return aarch64_vfp_is_call_or_return_candidate (mode,
2336 type,
2337 &pcum->aapcs_vfp_rmode,
2338 nregs,
2339 NULL);
2340}
2341
985b8393 2342/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
2343 bits. The idea is to suppress any stronger alignment requested by
2344 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2345 This is a helper function for local use only. */
2346
985b8393 2347static unsigned int
ef4bddc2 2348aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 2349{
75d6cc81 2350 if (!type)
985b8393 2351 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 2352
75d6cc81 2353 if (integer_zerop (TYPE_SIZE (type)))
985b8393 2354 return 0;
43e9d192 2355
75d6cc81
AL
2356 gcc_assert (TYPE_MODE (type) == mode);
2357
2358 if (!AGGREGATE_TYPE_P (type))
985b8393 2359 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
2360
2361 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 2362 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 2363
985b8393 2364 unsigned int alignment = 0;
75d6cc81 2365 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
2366 if (TREE_CODE (field) == FIELD_DECL)
2367 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 2368
985b8393 2369 return alignment;
43e9d192
IB
2370}
2371
2372/* Layout a function argument according to the AAPCS64 rules. The rule
2373 numbers refer to the rule numbers in the AAPCS64. */
2374
2375static void
ef4bddc2 2376aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2377 const_tree type,
2378 bool named ATTRIBUTE_UNUSED)
2379{
2380 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2381 int ncrn, nvrn, nregs;
2382 bool allocate_ncrn, allocate_nvrn;
3abf17cf 2383 HOST_WIDE_INT size;
43e9d192
IB
2384
2385 /* We need to do this once per argument. */
2386 if (pcum->aapcs_arg_processed)
2387 return;
2388
2389 pcum->aapcs_arg_processed = true;
2390
3abf17cf
YZ
2391 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2392 size
4f59f9f2
UB
2393 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2394 UNITS_PER_WORD);
3abf17cf 2395
43e9d192
IB
2396 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2397 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2398 mode,
2399 type,
2400 &nregs);
2401
2402 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2403 The following code thus handles passing by SIMD/FP registers first. */
2404
2405 nvrn = pcum->aapcs_nvrn;
2406
2407 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2408 and homogenous short-vector aggregates (HVA). */
2409 if (allocate_nvrn)
2410 {
261fb553
AL
2411 if (!TARGET_FLOAT)
2412 aarch64_err_no_fpadvsimd (mode, "argument");
2413
43e9d192
IB
2414 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2415 {
2416 pcum->aapcs_nextnvrn = nvrn + nregs;
2417 if (!aarch64_composite_type_p (type, mode))
2418 {
2419 gcc_assert (nregs == 1);
2420 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2421 }
2422 else
2423 {
2424 rtx par;
2425 int i;
2426 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2427 for (i = 0; i < nregs; i++)
2428 {
2429 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2430 V0_REGNUM + nvrn + i);
2431 tmp = gen_rtx_EXPR_LIST
2432 (VOIDmode, tmp,
2433 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2434 XVECEXP (par, 0, i) = tmp;
2435 }
2436 pcum->aapcs_reg = par;
2437 }
2438 return;
2439 }
2440 else
2441 {
2442 /* C.3 NSRN is set to 8. */
2443 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2444 goto on_stack;
2445 }
2446 }
2447
2448 ncrn = pcum->aapcs_ncrn;
3abf17cf 2449 nregs = size / UNITS_PER_WORD;
43e9d192
IB
2450
2451 /* C6 - C9. though the sign and zero extension semantics are
2452 handled elsewhere. This is the case where the argument fits
2453 entirely general registers. */
2454 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2455 {
43e9d192
IB
2456
2457 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2458
2459 /* C.8 if the argument has an alignment of 16 then the NGRN is
2460 rounded up to the next even number. */
985b8393
JJ
2461 if (nregs == 2
2462 && ncrn % 2
2ec07fa6 2463 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 2464 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
2465 alignment nregs should be > 2 and therefore it should be
2466 passed by reference rather than value. */
985b8393
JJ
2467 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2468 {
2469 ++ncrn;
2470 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 2471 }
2ec07fa6 2472
43e9d192
IB
2473 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2474 A reg is still generated for it, but the caller should be smart
2475 enough not to use it. */
2476 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 2477 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
2478 else
2479 {
2480 rtx par;
2481 int i;
2482
2483 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2484 for (i = 0; i < nregs; i++)
2485 {
2486 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2487 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2488 GEN_INT (i * UNITS_PER_WORD));
2489 XVECEXP (par, 0, i) = tmp;
2490 }
2491 pcum->aapcs_reg = par;
2492 }
2493
2494 pcum->aapcs_nextncrn = ncrn + nregs;
2495 return;
2496 }
2497
2498 /* C.11 */
2499 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2500
2501 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2502 this argument and align the total size if necessary. */
43e9d192 2503on_stack:
3abf17cf 2504 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 2505
985b8393 2506 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
2507 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2508 16 / UNITS_PER_WORD);
43e9d192
IB
2509 return;
2510}
2511
2512/* Implement TARGET_FUNCTION_ARG. */
2513
2514static rtx
ef4bddc2 2515aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2516 const_tree type, bool named)
2517{
2518 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2519 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2520
2521 if (mode == VOIDmode)
2522 return NULL_RTX;
2523
2524 aarch64_layout_arg (pcum_v, mode, type, named);
2525 return pcum->aapcs_reg;
2526}
2527
2528void
2529aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2530 const_tree fntype ATTRIBUTE_UNUSED,
2531 rtx libname ATTRIBUTE_UNUSED,
2532 const_tree fndecl ATTRIBUTE_UNUSED,
2533 unsigned n_named ATTRIBUTE_UNUSED)
2534{
2535 pcum->aapcs_ncrn = 0;
2536 pcum->aapcs_nvrn = 0;
2537 pcum->aapcs_nextncrn = 0;
2538 pcum->aapcs_nextnvrn = 0;
2539 pcum->pcs_variant = ARM_PCS_AAPCS64;
2540 pcum->aapcs_reg = NULL_RTX;
2541 pcum->aapcs_arg_processed = false;
2542 pcum->aapcs_stack_words = 0;
2543 pcum->aapcs_stack_size = 0;
2544
261fb553
AL
2545 if (!TARGET_FLOAT
2546 && fndecl && TREE_PUBLIC (fndecl)
2547 && fntype && fntype != error_mark_node)
2548 {
2549 const_tree type = TREE_TYPE (fntype);
2550 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2551 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2552 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2553 &mode, &nregs, NULL))
2554 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2555 }
43e9d192
IB
2556 return;
2557}
2558
2559static void
2560aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2561 machine_mode mode,
43e9d192
IB
2562 const_tree type,
2563 bool named)
2564{
2565 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2566 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2567 {
2568 aarch64_layout_arg (pcum_v, mode, type, named);
2569 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2570 != (pcum->aapcs_stack_words != 0));
2571 pcum->aapcs_arg_processed = false;
2572 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2573 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2574 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2575 pcum->aapcs_stack_words = 0;
2576 pcum->aapcs_reg = NULL_RTX;
2577 }
2578}
2579
2580bool
2581aarch64_function_arg_regno_p (unsigned regno)
2582{
2583 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2584 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2585}
2586
2587/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2588 PARM_BOUNDARY bits of alignment, but will be given anything up
2589 to STACK_BOUNDARY bits if the type requires it. This makes sure
2590 that both before and after the layout of each argument, the Next
2591 Stacked Argument Address (NSAA) will have a minimum alignment of
2592 8 bytes. */
2593
2594static unsigned int
ef4bddc2 2595aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 2596{
985b8393
JJ
2597 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2598 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
2599}
2600
76b0cbf8 2601/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
2602
2603 Small aggregate types are placed in the lowest memory address.
2604
2605 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2606
76b0cbf8
RS
2607static pad_direction
2608aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
2609{
2610 /* On little-endian targets, the least significant byte of every stack
2611 argument is passed at the lowest byte address of the stack slot. */
2612 if (!BYTES_BIG_ENDIAN)
76b0cbf8 2613 return PAD_UPWARD;
43e9d192 2614
00edcfbe 2615 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2616 the least significant byte of a stack argument is passed at the highest
2617 byte address of the stack slot. */
2618 if (type
00edcfbe
YZ
2619 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2620 || POINTER_TYPE_P (type))
43e9d192 2621 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 2622 return PAD_DOWNWARD;
43e9d192
IB
2623
2624 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 2625 return PAD_UPWARD;
43e9d192
IB
2626}
2627
2628/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2629
2630 It specifies padding for the last (may also be the only)
2631 element of a block move between registers and memory. If
2632 assuming the block is in the memory, padding upward means that
2633 the last element is padded after its highest significant byte,
2634 while in downward padding, the last element is padded at the
2635 its least significant byte side.
2636
2637 Small aggregates and small complex types are always padded
2638 upwards.
2639
2640 We don't need to worry about homogeneous floating-point or
2641 short-vector aggregates; their move is not affected by the
2642 padding direction determined here. Regardless of endianness,
2643 each element of such an aggregate is put in the least
2644 significant bits of a fp/simd register.
2645
2646 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2647 register has useful data, and return the opposite if the most
2648 significant byte does. */
2649
2650bool
ef4bddc2 2651aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2652 bool first ATTRIBUTE_UNUSED)
2653{
2654
2655 /* Small composite types are always padded upward. */
2656 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2657 {
2658 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2659 : GET_MODE_SIZE (mode));
2660 if (size < 2 * UNITS_PER_WORD)
2661 return true;
2662 }
2663
2664 /* Otherwise, use the default padding. */
2665 return !BYTES_BIG_ENDIAN;
2666}
2667
095a2d76 2668static scalar_int_mode
43e9d192
IB
2669aarch64_libgcc_cmp_return_mode (void)
2670{
2671 return SImode;
2672}
2673
a3eb8a52
EB
2674#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2675
2676/* We use the 12-bit shifted immediate arithmetic instructions so values
2677 must be multiple of (1 << 12), i.e. 4096. */
2678#define ARITH_FACTOR 4096
2679
2680#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2681#error Cannot use simple address calculation for stack probing
2682#endif
2683
2684/* The pair of scratch registers used for stack probing. */
2685#define PROBE_STACK_FIRST_REG 9
2686#define PROBE_STACK_SECOND_REG 10
2687
2688/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2689 inclusive. These are offsets from the current stack pointer. */
2690
2691static void
2692aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2693{
5f5c5e0f 2694 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
2695
2696 /* See the same assertion on PROBE_INTERVAL above. */
2697 gcc_assert ((first % ARITH_FACTOR) == 0);
2698
2699 /* See if we have a constant small number of probes to generate. If so,
2700 that's the easy case. */
2701 if (size <= PROBE_INTERVAL)
2702 {
2703 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2704
2705 emit_set_insn (reg1,
5f5c5e0f 2706 plus_constant (Pmode,
a3eb8a52 2707 stack_pointer_rtx, -(first + base)));
5f5c5e0f 2708 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
2709 }
2710
2711 /* The run-time loop is made up of 8 insns in the generic case while the
2712 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2713 else if (size <= 4 * PROBE_INTERVAL)
2714 {
2715 HOST_WIDE_INT i, rem;
2716
2717 emit_set_insn (reg1,
5f5c5e0f 2718 plus_constant (Pmode,
a3eb8a52
EB
2719 stack_pointer_rtx,
2720 -(first + PROBE_INTERVAL)));
2721 emit_stack_probe (reg1);
2722
2723 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2724 it exceeds SIZE. If only two probes are needed, this will not
2725 generate any code. Then probe at FIRST + SIZE. */
2726 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2727 {
2728 emit_set_insn (reg1,
5f5c5e0f 2729 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
2730 emit_stack_probe (reg1);
2731 }
2732
2733 rem = size - (i - PROBE_INTERVAL);
2734 if (rem > 256)
2735 {
2736 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2737
5f5c5e0f
EB
2738 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
2739 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
2740 }
2741 else
5f5c5e0f 2742 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
2743 }
2744
2745 /* Otherwise, do the same as above, but in a loop. Note that we must be
2746 extra careful with variables wrapping around because we might be at
2747 the very top (or the very bottom) of the address space and we have
2748 to be able to handle this case properly; in particular, we use an
2749 equality test for the loop condition. */
2750 else
2751 {
5f5c5e0f 2752 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
2753
2754 /* Step 1: round SIZE to the previous multiple of the interval. */
2755
2756 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2757
2758
2759 /* Step 2: compute initial and final value of the loop counter. */
2760
2761 /* TEST_ADDR = SP + FIRST. */
2762 emit_set_insn (reg1,
5f5c5e0f 2763 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
2764
2765 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
2766 HOST_WIDE_INT adjustment = - (first + rounded_size);
2767 if (! aarch64_uimm12_shift (adjustment))
2768 {
2769 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
2770 true, Pmode);
2771 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
2772 }
2773 else
2774 {
2775 emit_set_insn (reg2,
2776 plus_constant (Pmode, stack_pointer_rtx, adjustment));
2777 }
2778
a3eb8a52
EB
2779 /* Step 3: the loop
2780
2781 do
2782 {
2783 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2784 probe at TEST_ADDR
2785 }
2786 while (TEST_ADDR != LAST_ADDR)
2787
2788 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2789 until it is equal to ROUNDED_SIZE. */
2790
5f5c5e0f 2791 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
2792
2793
2794 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2795 that SIZE is equal to ROUNDED_SIZE. */
2796
2797 if (size != rounded_size)
2798 {
2799 HOST_WIDE_INT rem = size - rounded_size;
2800
2801 if (rem > 256)
2802 {
2803 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2804
5f5c5e0f
EB
2805 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
2806 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
2807 }
2808 else
5f5c5e0f 2809 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
2810 }
2811 }
2812
2813 /* Make sure nothing is scheduled before we are done. */
2814 emit_insn (gen_blockage ());
2815}
2816
2817/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2818 absolute addresses. */
2819
2820const char *
2821aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2822{
2823 static int labelno = 0;
2824 char loop_lab[32];
2825 rtx xops[2];
2826
2827 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2828
2829 /* Loop. */
2830 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2831
2832 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2833 xops[0] = reg1;
2834 xops[1] = GEN_INT (PROBE_INTERVAL);
2835 output_asm_insn ("sub\t%0, %0, %1", xops);
2836
2837 /* Probe at TEST_ADDR. */
2838 output_asm_insn ("str\txzr, [%0]", xops);
2839
2840 /* Test if TEST_ADDR == LAST_ADDR. */
2841 xops[1] = reg2;
2842 output_asm_insn ("cmp\t%0, %1", xops);
2843
2844 /* Branch. */
2845 fputs ("\tb.ne\t", asm_out_file);
2846 assemble_name_raw (asm_out_file, loop_lab);
2847 fputc ('\n', asm_out_file);
2848
2849 return "";
2850}
2851
43e9d192
IB
2852static bool
2853aarch64_frame_pointer_required (void)
2854{
0b7f8166
MS
2855 /* In aarch64_override_options_after_change
2856 flag_omit_leaf_frame_pointer turns off the frame pointer by
2857 default. Turn it back on now if we've not got a leaf
2858 function. */
2859 if (flag_omit_leaf_frame_pointer
2860 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2861 return true;
43e9d192 2862
8144a493
WD
2863 /* Force a frame pointer for EH returns so the return address is at FP+8. */
2864 if (crtl->calls_eh_return)
2865 return true;
2866
0b7f8166 2867 return false;
43e9d192
IB
2868}
2869
2870/* Mark the registers that need to be saved by the callee and calculate
2871 the size of the callee-saved registers area and frame record (both FP
2872 and LR may be omitted). */
2873static void
2874aarch64_layout_frame (void)
2875{
2876 HOST_WIDE_INT offset = 0;
4b0685d9 2877 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
2878
2879 if (reload_completed && cfun->machine->frame.laid_out)
2880 return;
2881
97826595
MS
2882#define SLOT_NOT_REQUIRED (-2)
2883#define SLOT_REQUIRED (-1)
2884
71bfb77a
WD
2885 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
2886 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 2887
43e9d192
IB
2888 /* First mark all the registers that really need to be saved... */
2889 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2890 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2891
2892 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2893 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2894
2895 /* ... that includes the eh data registers (if needed)... */
2896 if (crtl->calls_eh_return)
2897 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2898 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2899 = SLOT_REQUIRED;
43e9d192
IB
2900
2901 /* ... and any callee saved register that dataflow says is live. */
2902 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2903 if (df_regs_ever_live_p (regno)
1c923b60
JW
2904 && (regno == R30_REGNUM
2905 || !call_used_regs[regno]))
97826595 2906 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2907
2908 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2909 if (df_regs_ever_live_p (regno)
2910 && !call_used_regs[regno])
4b0685d9
WD
2911 {
2912 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2913 last_fp_reg = regno;
2914 }
43e9d192
IB
2915
2916 if (frame_pointer_needed)
2917 {
2e1cdae5 2918 /* FP and LR are placed in the linkage record. */
43e9d192 2919 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2920 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2921 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2922 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2e1cdae5 2923 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2924 }
2925
2926 /* Now assign stack slots for them. */
2e1cdae5 2927 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2928 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2929 {
2930 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2931 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2932 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2933 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 2934 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2935 offset += UNITS_PER_WORD;
2936 }
2937
4b0685d9
WD
2938 HOST_WIDE_INT max_int_offset = offset;
2939 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2940 bool has_align_gap = offset != max_int_offset;
2941
43e9d192 2942 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2943 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 2944 {
4b0685d9
WD
2945 /* If there is an alignment gap between integer and fp callee-saves,
2946 allocate the last fp register to it if possible. */
2947 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
2948 {
2949 cfun->machine->frame.reg_offset[regno] = max_int_offset;
2950 break;
2951 }
2952
43e9d192 2953 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2954 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2955 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2956 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
2957 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2958 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2959 offset += UNITS_PER_WORD;
2960 }
2961
4f59f9f2 2962 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
2963
2964 cfun->machine->frame.saved_regs_size = offset;
1c960e02 2965
71bfb77a
WD
2966 HOST_WIDE_INT varargs_and_saved_regs_size
2967 = offset + cfun->machine->frame.saved_varargs_size;
2968
1c960e02 2969 cfun->machine->frame.hard_fp_offset
71bfb77a 2970 = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
4f59f9f2 2971 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02
MS
2972
2973 cfun->machine->frame.frame_size
4f59f9f2
UB
2974 = ROUND_UP (cfun->machine->frame.hard_fp_offset
2975 + crtl->outgoing_args_size,
2976 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 2977
71bfb77a
WD
2978 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
2979
2980 cfun->machine->frame.initial_adjust = 0;
2981 cfun->machine->frame.final_adjust = 0;
2982 cfun->machine->frame.callee_adjust = 0;
2983 cfun->machine->frame.callee_offset = 0;
2984
2985 HOST_WIDE_INT max_push_offset = 0;
2986 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
2987 max_push_offset = 512;
2988 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
2989 max_push_offset = 256;
2990
2991 if (cfun->machine->frame.frame_size < max_push_offset
2992 && crtl->outgoing_args_size == 0)
2993 {
2994 /* Simple, small frame with no outgoing arguments:
2995 stp reg1, reg2, [sp, -frame_size]!
2996 stp reg3, reg4, [sp, 16] */
2997 cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
2998 }
2999 else if ((crtl->outgoing_args_size
3000 + cfun->machine->frame.saved_regs_size < 512)
3001 && !(cfun->calls_alloca
3002 && cfun->machine->frame.hard_fp_offset < max_push_offset))
3003 {
3004 /* Frame with small outgoing arguments:
3005 sub sp, sp, frame_size
3006 stp reg1, reg2, [sp, outgoing_args_size]
3007 stp reg3, reg4, [sp, outgoing_args_size + 16] */
3008 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
3009 cfun->machine->frame.callee_offset
3010 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
3011 }
3012 else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
3013 {
3014 /* Frame with large outgoing arguments but a small local area:
3015 stp reg1, reg2, [sp, -hard_fp_offset]!
3016 stp reg3, reg4, [sp, 16]
3017 sub sp, sp, outgoing_args_size */
3018 cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
3019 cfun->machine->frame.final_adjust
3020 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
3021 }
3022 else if (!frame_pointer_needed
3023 && varargs_and_saved_regs_size < max_push_offset)
3024 {
3025 /* Frame with large local area and outgoing arguments (this pushes the
3026 callee-saves first, followed by the locals and outgoing area):
3027 stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
3028 stp reg3, reg4, [sp, 16]
3029 sub sp, sp, frame_size - varargs_and_saved_regs_size */
3030 cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size;
3031 cfun->machine->frame.final_adjust
3032 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
3033 cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust;
3034 cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset;
3035 }
3036 else
3037 {
3038 /* Frame with large local area and outgoing arguments using frame pointer:
3039 sub sp, sp, hard_fp_offset
3040 stp x29, x30, [sp, 0]
3041 add x29, sp, 0
3042 stp reg3, reg4, [sp, 16]
3043 sub sp, sp, outgoing_args_size */
3044 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
3045 cfun->machine->frame.final_adjust
3046 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
3047 }
3048
43e9d192
IB
3049 cfun->machine->frame.laid_out = true;
3050}
3051
04ddfe06
KT
3052/* Return true if the register REGNO is saved on entry to
3053 the current function. */
3054
43e9d192
IB
3055static bool
3056aarch64_register_saved_on_entry (int regno)
3057{
97826595 3058 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
3059}
3060
04ddfe06
KT
3061/* Return the next register up from REGNO up to LIMIT for the callee
3062 to save. */
3063
64dedd72
JW
3064static unsigned
3065aarch64_next_callee_save (unsigned regno, unsigned limit)
3066{
3067 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
3068 regno ++;
3069 return regno;
3070}
43e9d192 3071
04ddfe06
KT
3072/* Push the register number REGNO of mode MODE to the stack with write-back
3073 adjusting the stack by ADJUSTMENT. */
3074
c5e1f66e 3075static void
ef4bddc2 3076aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
3077 HOST_WIDE_INT adjustment)
3078 {
3079 rtx base_rtx = stack_pointer_rtx;
3080 rtx insn, reg, mem;
3081
3082 reg = gen_rtx_REG (mode, regno);
3083 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
3084 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 3085 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
3086
3087 insn = emit_move_insn (mem, reg);
3088 RTX_FRAME_RELATED_P (insn) = 1;
3089}
3090
04ddfe06
KT
3091/* Generate and return an instruction to store the pair of registers
3092 REG and REG2 of mode MODE to location BASE with write-back adjusting
3093 the stack location BASE by ADJUSTMENT. */
3094
80c11907 3095static rtx
ef4bddc2 3096aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
3097 HOST_WIDE_INT adjustment)
3098{
3099 switch (mode)
3100 {
4e10a5a7 3101 case E_DImode:
80c11907
JW
3102 return gen_storewb_pairdi_di (base, base, reg, reg2,
3103 GEN_INT (-adjustment),
3104 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 3105 case E_DFmode:
80c11907
JW
3106 return gen_storewb_pairdf_di (base, base, reg, reg2,
3107 GEN_INT (-adjustment),
3108 GEN_INT (UNITS_PER_WORD - adjustment));
3109 default:
3110 gcc_unreachable ();
3111 }
3112}
3113
04ddfe06
KT
3114/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3115 stack pointer by ADJUSTMENT. */
3116
80c11907 3117static void
89ac681e 3118aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 3119{
5d8a22a5 3120 rtx_insn *insn;
0d4a1197 3121 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e 3122
71bfb77a 3123 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3124 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
3125
80c11907
JW
3126 rtx reg1 = gen_rtx_REG (mode, regno1);
3127 rtx reg2 = gen_rtx_REG (mode, regno2);
3128
3129 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
3130 reg2, adjustment));
3131 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
3132 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3133 RTX_FRAME_RELATED_P (insn) = 1;
3134}
3135
04ddfe06
KT
3136/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3137 adjusting it by ADJUSTMENT afterwards. */
3138
159313d9 3139static rtx
ef4bddc2 3140aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
3141 HOST_WIDE_INT adjustment)
3142{
3143 switch (mode)
3144 {
4e10a5a7 3145 case E_DImode:
159313d9 3146 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3147 GEN_INT (UNITS_PER_WORD));
4e10a5a7 3148 case E_DFmode:
159313d9 3149 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3150 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3151 default:
3152 gcc_unreachable ();
3153 }
3154}
3155
04ddfe06
KT
3156/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3157 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3158 into CFI_OPS. */
3159
89ac681e
WD
3160static void
3161aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
3162 rtx *cfi_ops)
3163{
0d4a1197 3164 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e
WD
3165 rtx reg1 = gen_rtx_REG (mode, regno1);
3166
3167 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
3168
71bfb77a 3169 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3170 {
3171 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
3172 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 3173 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
3174 }
3175 else
3176 {
3177 rtx reg2 = gen_rtx_REG (mode, regno2);
3178 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3179 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
3180 reg2, adjustment));
3181 }
3182}
3183
04ddfe06
KT
3184/* Generate and return a store pair instruction of mode MODE to store
3185 register REG1 to MEM1 and register REG2 to MEM2. */
3186
72df5c1f 3187static rtx
ef4bddc2 3188aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
3189 rtx reg2)
3190{
3191 switch (mode)
3192 {
4e10a5a7 3193 case E_DImode:
72df5c1f
JW
3194 return gen_store_pairdi (mem1, reg1, mem2, reg2);
3195
4e10a5a7 3196 case E_DFmode:
72df5c1f
JW
3197 return gen_store_pairdf (mem1, reg1, mem2, reg2);
3198
3199 default:
3200 gcc_unreachable ();
3201 }
3202}
3203
04ddfe06
KT
3204/* Generate and regurn a load pair isntruction of mode MODE to load register
3205 REG1 from MEM1 and register REG2 from MEM2. */
3206
72df5c1f 3207static rtx
ef4bddc2 3208aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
3209 rtx mem2)
3210{
3211 switch (mode)
3212 {
4e10a5a7 3213 case E_DImode:
72df5c1f
JW
3214 return gen_load_pairdi (reg1, mem1, reg2, mem2);
3215
4e10a5a7 3216 case E_DFmode:
72df5c1f
JW
3217 return gen_load_pairdf (reg1, mem1, reg2, mem2);
3218
3219 default:
3220 gcc_unreachable ();
3221 }
3222}
3223
db58fd89
JW
3224/* Return TRUE if return address signing should be enabled for the current
3225 function, otherwise return FALSE. */
3226
3227bool
3228aarch64_return_address_signing_enabled (void)
3229{
3230 /* This function should only be called after frame laid out. */
3231 gcc_assert (cfun->machine->frame.laid_out);
3232
3233 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
3234 if it's LR is pushed onto stack. */
3235 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
3236 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
3237 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
3238}
3239
04ddfe06
KT
3240/* Emit code to save the callee-saved registers from register number START
3241 to LIMIT to the stack at the location starting at offset START_OFFSET,
3242 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 3243
43e9d192 3244static void
ef4bddc2 3245aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 3246 unsigned start, unsigned limit, bool skip_wb)
43e9d192 3247{
5d8a22a5 3248 rtx_insn *insn;
43e9d192
IB
3249 unsigned regno;
3250 unsigned regno2;
3251
0ec74a1e 3252 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
3253 regno <= limit;
3254 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 3255 {
ae13fce3
JW
3256 rtx reg, mem;
3257 HOST_WIDE_INT offset;
64dedd72 3258
ae13fce3
JW
3259 if (skip_wb
3260 && (regno == cfun->machine->frame.wb_candidate1
3261 || regno == cfun->machine->frame.wb_candidate2))
3262 continue;
3263
827ab47a
KT
3264 if (cfun->machine->reg_is_wrapped_separately[regno])
3265 continue;
3266
ae13fce3
JW
3267 reg = gen_rtx_REG (mode, regno);
3268 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
3269 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
3270 offset));
64dedd72
JW
3271
3272 regno2 = aarch64_next_callee_save (regno + 1, limit);
3273
3274 if (regno2 <= limit
827ab47a 3275 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
3276 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3277 == cfun->machine->frame.reg_offset[regno2]))
3278
43e9d192 3279 {
0ec74a1e 3280 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
3281 rtx mem2;
3282
3283 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
3284 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
3285 offset));
8ed2fc62
JW
3286 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
3287 reg2));
0b4a9743 3288
64dedd72
JW
3289 /* The first part of a frame-related parallel insn is
3290 always assumed to be relevant to the frame
3291 calculations; subsequent parts, are only
3292 frame-related if explicitly marked. */
3293 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3294 regno = regno2;
3295 }
3296 else
8ed2fc62
JW
3297 insn = emit_move_insn (mem, reg);
3298
3299 RTX_FRAME_RELATED_P (insn) = 1;
3300 }
3301}
3302
04ddfe06
KT
3303/* Emit code to restore the callee registers of mode MODE from register
3304 number START up to and including LIMIT. Restore from the stack offset
3305 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3306 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3307
8ed2fc62 3308static void
ef4bddc2 3309aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 3310 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 3311 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 3312{
8ed2fc62 3313 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
3314 unsigned regno;
3315 unsigned regno2;
3316 HOST_WIDE_INT offset;
3317
3318 for (regno = aarch64_next_callee_save (start, limit);
3319 regno <= limit;
3320 regno = aarch64_next_callee_save (regno + 1, limit))
3321 {
827ab47a
KT
3322 if (cfun->machine->reg_is_wrapped_separately[regno])
3323 continue;
3324
ae13fce3 3325 rtx reg, mem;
8ed2fc62 3326
ae13fce3
JW
3327 if (skip_wb
3328 && (regno == cfun->machine->frame.wb_candidate1
3329 || regno == cfun->machine->frame.wb_candidate2))
3330 continue;
3331
3332 reg = gen_rtx_REG (mode, regno);
8ed2fc62 3333 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 3334 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
3335
3336 regno2 = aarch64_next_callee_save (regno + 1, limit);
3337
3338 if (regno2 <= limit
827ab47a 3339 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
3340 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3341 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 3342 {
8ed2fc62
JW
3343 rtx reg2 = gen_rtx_REG (mode, regno2);
3344 rtx mem2;
3345
3346 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 3347 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 3348 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 3349
dd991abb 3350 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 3351 regno = regno2;
43e9d192 3352 }
8ed2fc62 3353 else
dd991abb
RH
3354 emit_move_insn (reg, mem);
3355 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 3356 }
43e9d192
IB
3357}
3358
827ab47a
KT
3359static inline bool
3360offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3361 HOST_WIDE_INT offset)
3362{
3363 return offset >= -256 && offset < 256;
3364}
3365
3366static inline bool
3367offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3368{
3369 return (offset >= 0
3370 && offset < 4096 * GET_MODE_SIZE (mode)
3371 && offset % GET_MODE_SIZE (mode) == 0);
3372}
3373
3374bool
3375aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3376{
3377 return (offset >= -64 * GET_MODE_SIZE (mode)
3378 && offset < 64 * GET_MODE_SIZE (mode)
3379 && offset % GET_MODE_SIZE (mode) == 0);
3380}
3381
3382/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3383
3384static sbitmap
3385aarch64_get_separate_components (void)
3386{
3387 aarch64_layout_frame ();
3388
3389 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3390 bitmap_clear (components);
3391
3392 /* The registers we need saved to the frame. */
3393 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3394 if (aarch64_register_saved_on_entry (regno))
3395 {
3396 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3397 if (!frame_pointer_needed)
3398 offset += cfun->machine->frame.frame_size
3399 - cfun->machine->frame.hard_fp_offset;
3400 /* Check that we can access the stack slot of the register with one
3401 direct load with no adjustments needed. */
3402 if (offset_12bit_unsigned_scaled_p (DImode, offset))
3403 bitmap_set_bit (components, regno);
3404 }
3405
3406 /* Don't mess with the hard frame pointer. */
3407 if (frame_pointer_needed)
3408 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
3409
3410 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3411 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3412 /* If aarch64_layout_frame has chosen registers to store/restore with
3413 writeback don't interfere with them to avoid having to output explicit
3414 stack adjustment instructions. */
3415 if (reg2 != INVALID_REGNUM)
3416 bitmap_clear_bit (components, reg2);
3417 if (reg1 != INVALID_REGNUM)
3418 bitmap_clear_bit (components, reg1);
3419
3420 bitmap_clear_bit (components, LR_REGNUM);
3421 bitmap_clear_bit (components, SP_REGNUM);
3422
3423 return components;
3424}
3425
3426/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3427
3428static sbitmap
3429aarch64_components_for_bb (basic_block bb)
3430{
3431 bitmap in = DF_LIVE_IN (bb);
3432 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
3433 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
3434
3435 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3436 bitmap_clear (components);
3437
3438 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3439 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3440 if ((!call_used_regs[regno])
3441 && (bitmap_bit_p (in, regno)
3442 || bitmap_bit_p (gen, regno)
3443 || bitmap_bit_p (kill, regno)))
3444 bitmap_set_bit (components, regno);
3445
3446 return components;
3447}
3448
3449/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3450 Nothing to do for aarch64. */
3451
3452static void
3453aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
3454{
3455}
3456
3457/* Return the next set bit in BMP from START onwards. Return the total number
3458 of bits in BMP if no set bit is found at or after START. */
3459
3460static unsigned int
3461aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
3462{
3463 unsigned int nbits = SBITMAP_SIZE (bmp);
3464 if (start == nbits)
3465 return start;
3466
3467 gcc_assert (start < nbits);
3468 for (unsigned int i = start; i < nbits; i++)
3469 if (bitmap_bit_p (bmp, i))
3470 return i;
3471
3472 return nbits;
3473}
3474
3475/* Do the work for aarch64_emit_prologue_components and
3476 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3477 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3478 for these components or the epilogue sequence. That is, it determines
3479 whether we should emit stores or loads and what kind of CFA notes to attach
3480 to the insns. Otherwise the logic for the two sequences is very
3481 similar. */
3482
3483static void
3484aarch64_process_components (sbitmap components, bool prologue_p)
3485{
3486 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
3487 ? HARD_FRAME_POINTER_REGNUM
3488 : STACK_POINTER_REGNUM);
3489
3490 unsigned last_regno = SBITMAP_SIZE (components);
3491 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
3492 rtx_insn *insn = NULL;
3493
3494 while (regno != last_regno)
3495 {
3496 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3497 so DFmode for the vector registers is enough. */
0d4a1197 3498 machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
827ab47a
KT
3499 rtx reg = gen_rtx_REG (mode, regno);
3500 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3501 if (!frame_pointer_needed)
3502 offset += cfun->machine->frame.frame_size
3503 - cfun->machine->frame.hard_fp_offset;
3504 rtx addr = plus_constant (Pmode, ptr_reg, offset);
3505 rtx mem = gen_frame_mem (mode, addr);
3506
3507 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
3508 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
3509 /* No more registers to handle after REGNO.
3510 Emit a single save/restore and exit. */
3511 if (regno2 == last_regno)
3512 {
3513 insn = emit_insn (set);
3514 RTX_FRAME_RELATED_P (insn) = 1;
3515 if (prologue_p)
3516 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3517 else
3518 add_reg_note (insn, REG_CFA_RESTORE, reg);
3519 break;
3520 }
3521
3522 HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2];
3523 /* The next register is not of the same class or its offset is not
3524 mergeable with the current one into a pair. */
3525 if (!satisfies_constraint_Ump (mem)
3526 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
3527 || (offset2 - cfun->machine->frame.reg_offset[regno])
3528 != GET_MODE_SIZE (mode))
3529 {
3530 insn = emit_insn (set);
3531 RTX_FRAME_RELATED_P (insn) = 1;
3532 if (prologue_p)
3533 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3534 else
3535 add_reg_note (insn, REG_CFA_RESTORE, reg);
3536
3537 regno = regno2;
3538 continue;
3539 }
3540
3541 /* REGNO2 can be saved/restored in a pair with REGNO. */
3542 rtx reg2 = gen_rtx_REG (mode, regno2);
3543 if (!frame_pointer_needed)
3544 offset2 += cfun->machine->frame.frame_size
3545 - cfun->machine->frame.hard_fp_offset;
3546 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
3547 rtx mem2 = gen_frame_mem (mode, addr2);
3548 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
3549 : gen_rtx_SET (reg2, mem2);
3550
3551 if (prologue_p)
3552 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
3553 else
3554 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
3555
3556 RTX_FRAME_RELATED_P (insn) = 1;
3557 if (prologue_p)
3558 {
3559 add_reg_note (insn, REG_CFA_OFFSET, set);
3560 add_reg_note (insn, REG_CFA_OFFSET, set2);
3561 }
3562 else
3563 {
3564 add_reg_note (insn, REG_CFA_RESTORE, reg);
3565 add_reg_note (insn, REG_CFA_RESTORE, reg2);
3566 }
3567
3568 regno = aarch64_get_next_set_bit (components, regno2 + 1);
3569 }
3570}
3571
3572/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3573
3574static void
3575aarch64_emit_prologue_components (sbitmap components)
3576{
3577 aarch64_process_components (components, true);
3578}
3579
3580/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3581
3582static void
3583aarch64_emit_epilogue_components (sbitmap components)
3584{
3585 aarch64_process_components (components, false);
3586}
3587
3588/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3589
3590static void
3591aarch64_set_handled_components (sbitmap components)
3592{
3593 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3594 if (bitmap_bit_p (components, regno))
3595 cfun->machine->reg_is_wrapped_separately[regno] = true;
3596}
3597
43e9d192
IB
3598/* AArch64 stack frames generated by this compiler look like:
3599
3600 +-------------------------------+
3601 | |
3602 | incoming stack arguments |
3603 | |
34834420
MS
3604 +-------------------------------+
3605 | | <-- incoming stack pointer (aligned)
43e9d192
IB
3606 | callee-allocated save area |
3607 | for register varargs |
3608 | |
34834420
MS
3609 +-------------------------------+
3610 | local variables | <-- frame_pointer_rtx
43e9d192
IB
3611 | |
3612 +-------------------------------+
454fdba9
RL
3613 | padding0 | \
3614 +-------------------------------+ |
454fdba9 3615 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
3616 +-------------------------------+ |
3617 | LR' | |
3618 +-------------------------------+ |
34834420
MS
3619 | FP' | / <- hard_frame_pointer_rtx (aligned)
3620 +-------------------------------+
43e9d192
IB
3621 | dynamic allocation |
3622 +-------------------------------+
34834420
MS
3623 | padding |
3624 +-------------------------------+
3625 | outgoing stack arguments | <-- arg_pointer
3626 | |
3627 +-------------------------------+
3628 | | <-- stack_pointer_rtx (aligned)
43e9d192 3629
34834420
MS
3630 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3631 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3632 unchanged. */
43e9d192
IB
3633
3634/* Generate the prologue instructions for entry into a function.
3635 Establish the stack frame by decreasing the stack pointer with a
3636 properly calculated size and, if necessary, create a frame record
3637 filled with the values of LR and previous frame pointer. The
6991c977 3638 current FP is also set up if it is in use. */
43e9d192
IB
3639
3640void
3641aarch64_expand_prologue (void)
3642{
43e9d192 3643 aarch64_layout_frame ();
43e9d192 3644
71bfb77a
WD
3645 HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
3646 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3647 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3648 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3649 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3650 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3651 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3652 rtx_insn *insn;
43e9d192 3653
db58fd89
JW
3654 /* Sign return address for functions. */
3655 if (aarch64_return_address_signing_enabled ())
27169e45
JW
3656 {
3657 insn = emit_insn (gen_pacisp ());
3658 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3659 RTX_FRAME_RELATED_P (insn) = 1;
3660 }
db58fd89 3661
dd991abb
RH
3662 if (flag_stack_usage_info)
3663 current_function_static_stack_size = frame_size;
43e9d192 3664
a3eb8a52
EB
3665 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3666 {
3667 if (crtl->is_leaf && !cfun->calls_alloca)
3668 {
3669 if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
3670 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3671 frame_size - STACK_CHECK_PROTECT);
3672 }
3673 else if (frame_size > 0)
3674 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
3675 }
3676
5be6b295 3677 aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
43e9d192 3678
71bfb77a
WD
3679 if (callee_adjust != 0)
3680 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 3681
71bfb77a 3682 if (frame_pointer_needed)
43e9d192 3683 {
71bfb77a
WD
3684 if (callee_adjust == 0)
3685 aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
3686 R30_REGNUM, false);
3687 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
3688 stack_pointer_rtx,
3689 GEN_INT (callee_offset)));
3690 RTX_FRAME_RELATED_P (insn) = 1;
3691 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 3692 }
71bfb77a
WD
3693
3694 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3695 callee_adjust != 0 || frame_pointer_needed);
3696 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3697 callee_adjust != 0 || frame_pointer_needed);
5be6b295 3698 aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
43e9d192
IB
3699}
3700
4f942779
RL
3701/* Return TRUE if we can use a simple_return insn.
3702
3703 This function checks whether the callee saved stack is empty, which
3704 means no restore actions are need. The pro_and_epilogue will use
3705 this to check whether shrink-wrapping opt is feasible. */
3706
3707bool
3708aarch64_use_return_insn_p (void)
3709{
3710 if (!reload_completed)
3711 return false;
3712
3713 if (crtl->profile)
3714 return false;
3715
3716 aarch64_layout_frame ();
3717
3718 return cfun->machine->frame.frame_size == 0;
3719}
3720
71bfb77a
WD
3721/* Generate the epilogue instructions for returning from a function.
3722 This is almost exactly the reverse of the prolog sequence, except
3723 that we need to insert barriers to avoid scheduling loads that read
3724 from a deallocated stack, and we optimize the unwind records by
3725 emitting them all together if possible. */
43e9d192
IB
3726void
3727aarch64_expand_epilogue (bool for_sibcall)
3728{
43e9d192 3729 aarch64_layout_frame ();
43e9d192 3730
71bfb77a
WD
3731 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3732 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3733 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3734 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3735 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3736 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3737 rtx cfi_ops = NULL;
3738 rtx_insn *insn;
44c0e7b9 3739
71bfb77a
WD
3740 /* We need to add memory barrier to prevent read from deallocated stack. */
3741 bool need_barrier_p = (get_frame_size ()
3742 + cfun->machine->frame.saved_varargs_size) != 0;
43e9d192 3743
71bfb77a 3744 /* Emit a barrier to prevent loads from a deallocated stack. */
8144a493
WD
3745 if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
3746 || crtl->calls_eh_return)
43e9d192 3747 {
71bfb77a
WD
3748 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3749 need_barrier_p = false;
3750 }
7e8c2bd5 3751
71bfb77a
WD
3752 /* Restore the stack pointer from the frame pointer if it may not
3753 be the same as the stack pointer. */
3754 if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
3755 {
43e9d192
IB
3756 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3757 hard_frame_pointer_rtx,
71bfb77a
WD
3758 GEN_INT (-callee_offset)));
3759 /* If writeback is used when restoring callee-saves, the CFA
3760 is restored on the instruction doing the writeback. */
3761 RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
43e9d192 3762 }
71bfb77a 3763 else
5be6b295 3764 aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
43e9d192 3765
71bfb77a
WD
3766 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3767 callee_adjust != 0, &cfi_ops);
3768 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3769 callee_adjust != 0, &cfi_ops);
43e9d192 3770
71bfb77a
WD
3771 if (need_barrier_p)
3772 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3773
3774 if (callee_adjust != 0)
3775 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
3776
3777 if (callee_adjust != 0 || initial_adjust > 65536)
3778 {
3779 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 3780 insn = get_last_insn ();
71bfb77a
WD
3781 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
3782 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 3783 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 3784 cfi_ops = NULL;
43e9d192
IB
3785 }
3786
5be6b295 3787 aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 3788
71bfb77a
WD
3789 if (cfi_ops)
3790 {
3791 /* Emit delayed restores and reset the CFA to be SP. */
3792 insn = get_last_insn ();
3793 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
3794 REG_NOTES (insn) = cfi_ops;
3795 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
3796 }
3797
db58fd89
JW
3798 /* We prefer to emit the combined return/authenticate instruction RETAA,
3799 however there are three cases in which we must instead emit an explicit
3800 authentication instruction.
3801
3802 1) Sibcalls don't return in a normal way, so if we're about to call one
3803 we must authenticate.
3804
3805 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
3806 generating code for !TARGET_ARMV8_3 we can't use it and must
3807 explicitly authenticate.
3808
3809 3) On an eh_return path we make extra stack adjustments to update the
3810 canonical frame address to be the exception handler's CFA. We want
3811 to authenticate using the CFA of the function which calls eh_return.
3812 */
3813 if (aarch64_return_address_signing_enabled ()
3814 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
3815 {
3816 insn = emit_insn (gen_autisp ());
3817 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3818 RTX_FRAME_RELATED_P (insn) = 1;
3819 }
db58fd89 3820
dd991abb
RH
3821 /* Stack adjustment for exception handler. */
3822 if (crtl->calls_eh_return)
3823 {
3824 /* We need to unwind the stack by the offset computed by
3825 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3826 to be SP; letting the CFA move during this adjustment
3827 is just as correct as retaining the CFA from the body
3828 of the function. Therefore, do nothing special. */
3829 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
3830 }
3831
3832 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3833 if (!for_sibcall)
3834 emit_jump_insn (ret_rtx);
3835}
3836
8144a493
WD
3837/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
3838 normally or return to a previous frame after unwinding.
1c960e02 3839
8144a493
WD
3840 An EH return uses a single shared return sequence. The epilogue is
3841 exactly like a normal epilogue except that it has an extra input
3842 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
3843 that must be applied after the frame has been destroyed. An extra label
3844 is inserted before the epilogue which initializes this register to zero,
3845 and this is the entry point for a normal return.
43e9d192 3846
8144a493
WD
3847 An actual EH return updates the return address, initializes the stack
3848 adjustment and jumps directly into the epilogue (bypassing the zeroing
3849 of the adjustment). Since the return address is typically saved on the
3850 stack when a function makes a call, the saved LR must be updated outside
3851 the epilogue.
43e9d192 3852
8144a493
WD
3853 This poses problems as the store is generated well before the epilogue,
3854 so the offset of LR is not known yet. Also optimizations will remove the
3855 store as it appears dead, even after the epilogue is generated (as the
3856 base or offset for loading LR is different in many cases).
43e9d192 3857
8144a493
WD
3858 To avoid these problems this implementation forces the frame pointer
3859 in eh_return functions so that the location of LR is fixed and known early.
3860 It also marks the store volatile, so no optimization is permitted to
3861 remove the store. */
3862rtx
3863aarch64_eh_return_handler_rtx (void)
3864{
3865 rtx tmp = gen_frame_mem (Pmode,
3866 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 3867
8144a493
WD
3868 /* Mark the store volatile, so no optimization is permitted to remove it. */
3869 MEM_VOLATILE_P (tmp) = true;
3870 return tmp;
43e9d192
IB
3871}
3872
43e9d192
IB
3873/* Output code to add DELTA to the first argument, and then jump
3874 to FUNCTION. Used for C++ multiple inheritance. */
3875static void
3876aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3877 HOST_WIDE_INT delta,
3878 HOST_WIDE_INT vcall_offset,
3879 tree function)
3880{
3881 /* The this pointer is always in x0. Note that this differs from
3882 Arm where the this pointer maybe bumped to r1 if r0 is required
3883 to return a pointer to an aggregate. On AArch64 a result value
3884 pointer will be in x8. */
3885 int this_regno = R0_REGNUM;
5d8a22a5
DM
3886 rtx this_rtx, temp0, temp1, addr, funexp;
3887 rtx_insn *insn;
43e9d192 3888
75f1d6fc
SN
3889 reload_completed = 1;
3890 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3891
3892 if (vcall_offset == 0)
5be6b295 3893 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3894 else
3895 {
28514dda 3896 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3897
75f1d6fc
SN
3898 this_rtx = gen_rtx_REG (Pmode, this_regno);
3899 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3900 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3901
75f1d6fc
SN
3902 addr = this_rtx;
3903 if (delta != 0)
3904 {
3905 if (delta >= -256 && delta < 256)
3906 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3907 plus_constant (Pmode, this_rtx, delta));
3908 else
5be6b295 3909 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3910 }
3911
28514dda
YZ
3912 if (Pmode == ptr_mode)
3913 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3914 else
3915 aarch64_emit_move (temp0,
3916 gen_rtx_ZERO_EXTEND (Pmode,
3917 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3918
28514dda 3919 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3920 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3921 else
3922 {
f43657b4
JW
3923 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
3924 Pmode);
75f1d6fc 3925 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3926 }
3927
28514dda
YZ
3928 if (Pmode == ptr_mode)
3929 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3930 else
3931 aarch64_emit_move (temp1,
3932 gen_rtx_SIGN_EXTEND (Pmode,
3933 gen_rtx_MEM (ptr_mode, addr)));
3934
75f1d6fc 3935 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3936 }
3937
75f1d6fc
SN
3938 /* Generate a tail call to the target function. */
3939 if (!TREE_USED (function))
3940 {
3941 assemble_external (function);
3942 TREE_USED (function) = 1;
3943 }
3944 funexp = XEXP (DECL_RTL (function), 0);
3945 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3946 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3947 SIBLING_CALL_P (insn) = 1;
3948
3949 insn = get_insns ();
3950 shorten_branches (insn);
3951 final_start_function (insn, file, 1);
3952 final (insn, file, 1);
43e9d192 3953 final_end_function ();
75f1d6fc
SN
3954
3955 /* Stop pretending to be a post-reload pass. */
3956 reload_completed = 0;
43e9d192
IB
3957}
3958
43e9d192
IB
3959static bool
3960aarch64_tls_referenced_p (rtx x)
3961{
3962 if (!TARGET_HAVE_TLS)
3963 return false;
e7de8563
RS
3964 subrtx_iterator::array_type array;
3965 FOR_EACH_SUBRTX (iter, array, x, ALL)
3966 {
3967 const_rtx x = *iter;
3968 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3969 return true;
3970 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3971 TLS offsets, not real symbol references. */
3972 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3973 iter.skip_subrtxes ();
3974 }
3975 return false;
43e9d192
IB
3976}
3977
3978
43e9d192
IB
3979/* Return true if val can be encoded as a 12-bit unsigned immediate with
3980 a left shift of 0 or 12 bits. */
3981bool
3982aarch64_uimm12_shift (HOST_WIDE_INT val)
3983{
3984 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3985 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3986 );
3987}
3988
3989
3990/* Return true if val is an immediate that can be loaded into a
3991 register by a MOVZ instruction. */
3992static bool
77e994c9 3993aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
3994{
3995 if (GET_MODE_SIZE (mode) > 4)
3996 {
3997 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3998 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3999 return 1;
4000 }
4001 else
4002 {
4003 /* Ignore sign extension. */
4004 val &= (HOST_WIDE_INT) 0xffffffff;
4005 }
4006 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
4007 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
4008}
4009
a64c73a2
WD
4010/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
4011
4012static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
4013 {
4014 0x0000000100000001ull,
4015 0x0001000100010001ull,
4016 0x0101010101010101ull,
4017 0x1111111111111111ull,
4018 0x5555555555555555ull,
4019 };
4020
43e9d192
IB
4021
4022/* Return true if val is a valid bitmask immediate. */
a64c73a2 4023
43e9d192 4024bool
a64c73a2 4025aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 4026{
a64c73a2
WD
4027 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
4028 int bits;
4029
4030 /* Check for a single sequence of one bits and return quickly if so.
4031 The special cases of all ones and all zeroes returns false. */
4032 val = (unsigned HOST_WIDE_INT) val_in;
4033 tmp = val + (val & -val);
4034
4035 if (tmp == (tmp & -tmp))
4036 return (val + 1) > 1;
4037
4038 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
4039 if (mode == SImode)
4040 val = (val << 32) | (val & 0xffffffff);
4041
4042 /* Invert if the immediate doesn't start with a zero bit - this means we
4043 only need to search for sequences of one bits. */
4044 if (val & 1)
4045 val = ~val;
4046
4047 /* Find the first set bit and set tmp to val with the first sequence of one
4048 bits removed. Return success if there is a single sequence of ones. */
4049 first_one = val & -val;
4050 tmp = val & (val + first_one);
4051
4052 if (tmp == 0)
4053 return true;
4054
4055 /* Find the next set bit and compute the difference in bit position. */
4056 next_one = tmp & -tmp;
4057 bits = clz_hwi (first_one) - clz_hwi (next_one);
4058 mask = val ^ tmp;
4059
4060 /* Check the bit position difference is a power of 2, and that the first
4061 sequence of one bits fits within 'bits' bits. */
4062 if ((mask >> bits) != 0 || bits != (bits & -bits))
4063 return false;
4064
4065 /* Check the sequence of one bits is repeated 64/bits times. */
4066 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
4067}
4068
43fd192f
MC
4069/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
4070 Assumed precondition: VAL_IN Is not zero. */
4071
4072unsigned HOST_WIDE_INT
4073aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
4074{
4075 int lowest_bit_set = ctz_hwi (val_in);
4076 int highest_bit_set = floor_log2 (val_in);
4077 gcc_assert (val_in != 0);
4078
4079 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
4080 (HOST_WIDE_INT_1U << lowest_bit_set));
4081}
4082
4083/* Create constant where bits outside of lowest bit set to highest bit set
4084 are set to 1. */
4085
4086unsigned HOST_WIDE_INT
4087aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
4088{
4089 return val_in | ~aarch64_and_split_imm1 (val_in);
4090}
4091
4092/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
4093
4094bool
4095aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
4096{
77e994c9
RS
4097 scalar_int_mode int_mode;
4098 if (!is_a <scalar_int_mode> (mode, &int_mode))
4099 return false;
4100
4101 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
4102 return false;
4103
77e994c9 4104 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
4105 return false;
4106
4107 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
4108
77e994c9 4109 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 4110}
43e9d192
IB
4111
4112/* Return true if val is an immediate that can be loaded into a
4113 register in a single instruction. */
4114bool
ef4bddc2 4115aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 4116{
77e994c9
RS
4117 scalar_int_mode int_mode;
4118 if (!is_a <scalar_int_mode> (mode, &int_mode))
4119 return false;
4120
4121 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 4122 return 1;
77e994c9 4123 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
4124}
4125
4126static bool
ef4bddc2 4127aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
4128{
4129 rtx base, offset;
7eda14e1 4130
43e9d192
IB
4131 if (GET_CODE (x) == HIGH)
4132 return true;
4133
4134 split_const (x, &base, &offset);
4135 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 4136 {
a6e0bfa7 4137 if (aarch64_classify_symbol (base, offset)
28514dda
YZ
4138 != SYMBOL_FORCE_TO_MEM)
4139 return true;
4140 else
4141 /* Avoid generating a 64-bit relocation in ILP32; leave
4142 to aarch64_expand_mov_immediate to handle it properly. */
4143 return mode != ptr_mode;
4144 }
43e9d192
IB
4145
4146 return aarch64_tls_referenced_p (x);
4147}
4148
e79136e4
WD
4149/* Implement TARGET_CASE_VALUES_THRESHOLD.
4150 The expansion for a table switch is quite expensive due to the number
4151 of instructions, the table lookup and hard to predict indirect jump.
4152 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4153 set, otherwise use tables for > 16 cases as a tradeoff between size and
4154 performance. When optimizing for size, use the default setting. */
50487d79
EM
4155
4156static unsigned int
4157aarch64_case_values_threshold (void)
4158{
4159 /* Use the specified limit for the number of cases before using jump
4160 tables at higher optimization levels. */
4161 if (optimize > 2
4162 && selected_cpu->tune->max_case_values != 0)
4163 return selected_cpu->tune->max_case_values;
4164 else
e79136e4 4165 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
4166}
4167
43e9d192
IB
4168/* Return true if register REGNO is a valid index register.
4169 STRICT_P is true if REG_OK_STRICT is in effect. */
4170
4171bool
4172aarch64_regno_ok_for_index_p (int regno, bool strict_p)
4173{
4174 if (!HARD_REGISTER_NUM_P (regno))
4175 {
4176 if (!strict_p)
4177 return true;
4178
4179 if (!reg_renumber)
4180 return false;
4181
4182 regno = reg_renumber[regno];
4183 }
4184 return GP_REGNUM_P (regno);
4185}
4186
4187/* Return true if register REGNO is a valid base register for mode MODE.
4188 STRICT_P is true if REG_OK_STRICT is in effect. */
4189
4190bool
4191aarch64_regno_ok_for_base_p (int regno, bool strict_p)
4192{
4193 if (!HARD_REGISTER_NUM_P (regno))
4194 {
4195 if (!strict_p)
4196 return true;
4197
4198 if (!reg_renumber)
4199 return false;
4200
4201 regno = reg_renumber[regno];
4202 }
4203
4204 /* The fake registers will be eliminated to either the stack or
4205 hard frame pointer, both of which are usually valid base registers.
4206 Reload deals with the cases where the eliminated form isn't valid. */
4207 return (GP_REGNUM_P (regno)
4208 || regno == SP_REGNUM
4209 || regno == FRAME_POINTER_REGNUM
4210 || regno == ARG_POINTER_REGNUM);
4211}
4212
4213/* Return true if X is a valid base register for mode MODE.
4214 STRICT_P is true if REG_OK_STRICT is in effect. */
4215
4216static bool
4217aarch64_base_register_rtx_p (rtx x, bool strict_p)
4218{
76160199
RS
4219 if (!strict_p
4220 && GET_CODE (x) == SUBREG
4221 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
4222 x = SUBREG_REG (x);
4223
4224 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
4225}
4226
4227/* Return true if address offset is a valid index. If it is, fill in INFO
4228 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4229
4230static bool
4231aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 4232 machine_mode mode, bool strict_p)
43e9d192
IB
4233{
4234 enum aarch64_address_type type;
4235 rtx index;
4236 int shift;
4237
4238 /* (reg:P) */
4239 if ((REG_P (x) || GET_CODE (x) == SUBREG)
4240 && GET_MODE (x) == Pmode)
4241 {
4242 type = ADDRESS_REG_REG;
4243 index = x;
4244 shift = 0;
4245 }
4246 /* (sign_extend:DI (reg:SI)) */
4247 else if ((GET_CODE (x) == SIGN_EXTEND
4248 || GET_CODE (x) == ZERO_EXTEND)
4249 && GET_MODE (x) == DImode
4250 && GET_MODE (XEXP (x, 0)) == SImode)
4251 {
4252 type = (GET_CODE (x) == SIGN_EXTEND)
4253 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4254 index = XEXP (x, 0);
4255 shift = 0;
4256 }
4257 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4258 else if (GET_CODE (x) == MULT
4259 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4260 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4261 && GET_MODE (XEXP (x, 0)) == DImode
4262 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4263 && CONST_INT_P (XEXP (x, 1)))
4264 {
4265 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4266 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4267 index = XEXP (XEXP (x, 0), 0);
4268 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4269 }
4270 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4271 else if (GET_CODE (x) == ASHIFT
4272 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4273 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4274 && GET_MODE (XEXP (x, 0)) == DImode
4275 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4276 && CONST_INT_P (XEXP (x, 1)))
4277 {
4278 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4279 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4280 index = XEXP (XEXP (x, 0), 0);
4281 shift = INTVAL (XEXP (x, 1));
4282 }
4283 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4284 else if ((GET_CODE (x) == SIGN_EXTRACT
4285 || GET_CODE (x) == ZERO_EXTRACT)
4286 && GET_MODE (x) == DImode
4287 && GET_CODE (XEXP (x, 0)) == MULT
4288 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4289 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4290 {
4291 type = (GET_CODE (x) == SIGN_EXTRACT)
4292 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4293 index = XEXP (XEXP (x, 0), 0);
4294 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4295 if (INTVAL (XEXP (x, 1)) != 32 + shift
4296 || INTVAL (XEXP (x, 2)) != 0)
4297 shift = -1;
4298 }
4299 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4300 (const_int 0xffffffff<<shift)) */
4301 else if (GET_CODE (x) == AND
4302 && GET_MODE (x) == DImode
4303 && GET_CODE (XEXP (x, 0)) == MULT
4304 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4305 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4306 && CONST_INT_P (XEXP (x, 1)))
4307 {
4308 type = ADDRESS_REG_UXTW;
4309 index = XEXP (XEXP (x, 0), 0);
4310 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4311 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4312 shift = -1;
4313 }
4314 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4315 else if ((GET_CODE (x) == SIGN_EXTRACT
4316 || GET_CODE (x) == ZERO_EXTRACT)
4317 && GET_MODE (x) == DImode
4318 && GET_CODE (XEXP (x, 0)) == ASHIFT
4319 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4320 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4321 {
4322 type = (GET_CODE (x) == SIGN_EXTRACT)
4323 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4324 index = XEXP (XEXP (x, 0), 0);
4325 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4326 if (INTVAL (XEXP (x, 1)) != 32 + shift
4327 || INTVAL (XEXP (x, 2)) != 0)
4328 shift = -1;
4329 }
4330 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4331 (const_int 0xffffffff<<shift)) */
4332 else if (GET_CODE (x) == AND
4333 && GET_MODE (x) == DImode
4334 && GET_CODE (XEXP (x, 0)) == ASHIFT
4335 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4336 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4337 && CONST_INT_P (XEXP (x, 1)))
4338 {
4339 type = ADDRESS_REG_UXTW;
4340 index = XEXP (XEXP (x, 0), 0);
4341 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4342 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4343 shift = -1;
4344 }
4345 /* (mult:P (reg:P) (const_int scale)) */
4346 else if (GET_CODE (x) == MULT
4347 && GET_MODE (x) == Pmode
4348 && GET_MODE (XEXP (x, 0)) == Pmode
4349 && CONST_INT_P (XEXP (x, 1)))
4350 {
4351 type = ADDRESS_REG_REG;
4352 index = XEXP (x, 0);
4353 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4354 }
4355 /* (ashift:P (reg:P) (const_int shift)) */
4356 else if (GET_CODE (x) == ASHIFT
4357 && GET_MODE (x) == Pmode
4358 && GET_MODE (XEXP (x, 0)) == Pmode
4359 && CONST_INT_P (XEXP (x, 1)))
4360 {
4361 type = ADDRESS_REG_REG;
4362 index = XEXP (x, 0);
4363 shift = INTVAL (XEXP (x, 1));
4364 }
4365 else
4366 return false;
4367
76160199
RS
4368 if (!strict_p
4369 && GET_CODE (index) == SUBREG
4370 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
4371 index = SUBREG_REG (index);
4372
4373 if ((shift == 0 ||
4374 (shift > 0 && shift <= 3
4375 && (1 << shift) == GET_MODE_SIZE (mode)))
4376 && REG_P (index)
4377 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
4378 {
4379 info->type = type;
4380 info->offset = index;
4381 info->shift = shift;
4382 return true;
4383 }
4384
4385 return false;
4386}
4387
abc52318
KT
4388/* Return true if MODE is one of the modes for which we
4389 support LDP/STP operations. */
4390
4391static bool
4392aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
4393{
4394 return mode == SImode || mode == DImode
4395 || mode == SFmode || mode == DFmode
4396 || (aarch64_vector_mode_supported_p (mode)
4397 && GET_MODE_SIZE (mode) == 8);
4398}
4399
9e0218fc
RH
4400/* Return true if REGNO is a virtual pointer register, or an eliminable
4401 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4402 include stack_pointer or hard_frame_pointer. */
4403static bool
4404virt_or_elim_regno_p (unsigned regno)
4405{
4406 return ((regno >= FIRST_VIRTUAL_REGISTER
4407 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
4408 || regno == FRAME_POINTER_REGNUM
4409 || regno == ARG_POINTER_REGNUM);
4410}
4411
43e9d192
IB
4412/* Return true if X is a valid address for machine mode MODE. If it is,
4413 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4414 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4415
4416static bool
4417aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 4418 rtx x, machine_mode mode,
43e9d192
IB
4419 RTX_CODE outer_code, bool strict_p)
4420{
4421 enum rtx_code code = GET_CODE (x);
4422 rtx op0, op1;
2d8c6dc1 4423
80d43579
WD
4424 /* On BE, we use load/store pair for all large int mode load/stores.
4425 TI/TFmode may also use a load/store pair. */
2d8c6dc1 4426 bool load_store_pair_p = (outer_code == PARALLEL
80d43579
WD
4427 || mode == TImode
4428 || mode == TFmode
2d8c6dc1
AH
4429 || (BYTES_BIG_ENDIAN
4430 && aarch64_vect_struct_mode_p (mode)));
4431
43e9d192 4432 bool allow_reg_index_p =
2d8c6dc1
AH
4433 !load_store_pair_p
4434 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
4435 && !aarch64_vect_struct_mode_p (mode);
4436
4437 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4438 REG addressing. */
4439 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
4440 && (code != POST_INC && code != REG))
4441 return false;
4442
4443 switch (code)
4444 {
4445 case REG:
4446 case SUBREG:
4447 info->type = ADDRESS_REG_IMM;
4448 info->base = x;
4449 info->offset = const0_rtx;
4450 return aarch64_base_register_rtx_p (x, strict_p);
4451
4452 case PLUS:
4453 op0 = XEXP (x, 0);
4454 op1 = XEXP (x, 1);
15c0c5c9
JW
4455
4456 if (! strict_p
4aa81c2e 4457 && REG_P (op0)
9e0218fc 4458 && virt_or_elim_regno_p (REGNO (op0))
4aa81c2e 4459 && CONST_INT_P (op1))
15c0c5c9
JW
4460 {
4461 info->type = ADDRESS_REG_IMM;
4462 info->base = op0;
4463 info->offset = op1;
4464
4465 return true;
4466 }
4467
43e9d192
IB
4468 if (GET_MODE_SIZE (mode) != 0
4469 && CONST_INT_P (op1)
4470 && aarch64_base_register_rtx_p (op0, strict_p))
4471 {
4472 HOST_WIDE_INT offset = INTVAL (op1);
4473
4474 info->type = ADDRESS_REG_IMM;
4475 info->base = op0;
4476 info->offset = op1;
4477
4478 /* TImode and TFmode values are allowed in both pairs of X
4479 registers and individual Q registers. The available
4480 address modes are:
4481 X,X: 7-bit signed scaled offset
4482 Q: 9-bit signed offset
4483 We conservatively require an offset representable in either mode.
8ed49fab
KT
4484 When performing the check for pairs of X registers i.e. LDP/STP
4485 pass down DImode since that is the natural size of the LDP/STP
4486 instruction memory accesses. */
43e9d192 4487 if (mode == TImode || mode == TFmode)
8ed49fab 4488 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
8734dfac
WD
4489 && (offset_9bit_signed_unscaled_p (mode, offset)
4490 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 4491
2d8c6dc1
AH
4492 /* A 7bit offset check because OImode will emit a ldp/stp
4493 instruction (only big endian will get here).
4494 For ldp/stp instructions, the offset is scaled for the size of a
4495 single element of the pair. */
4496 if (mode == OImode)
4497 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
4498
4499 /* Three 9/12 bit offsets checks because CImode will emit three
4500 ldr/str instructions (only big endian will get here). */
4501 if (mode == CImode)
4502 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4503 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
4504 || offset_12bit_unsigned_scaled_p (V16QImode,
4505 offset + 32)));
4506
4507 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4508 instructions (only big endian will get here). */
4509 if (mode == XImode)
4510 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4511 && aarch64_offset_7bit_signed_scaled_p (TImode,
4512 offset + 32));
4513
4514 if (load_store_pair_p)
43e9d192 4515 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4516 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4517 else
4518 return (offset_9bit_signed_unscaled_p (mode, offset)
4519 || offset_12bit_unsigned_scaled_p (mode, offset));
4520 }
4521
4522 if (allow_reg_index_p)
4523 {
4524 /* Look for base + (scaled/extended) index register. */
4525 if (aarch64_base_register_rtx_p (op0, strict_p)
4526 && aarch64_classify_index (info, op1, mode, strict_p))
4527 {
4528 info->base = op0;
4529 return true;
4530 }
4531 if (aarch64_base_register_rtx_p (op1, strict_p)
4532 && aarch64_classify_index (info, op0, mode, strict_p))
4533 {
4534 info->base = op1;
4535 return true;
4536 }
4537 }
4538
4539 return false;
4540
4541 case POST_INC:
4542 case POST_DEC:
4543 case PRE_INC:
4544 case PRE_DEC:
4545 info->type = ADDRESS_REG_WB;
4546 info->base = XEXP (x, 0);
4547 info->offset = NULL_RTX;
4548 return aarch64_base_register_rtx_p (info->base, strict_p);
4549
4550 case POST_MODIFY:
4551 case PRE_MODIFY:
4552 info->type = ADDRESS_REG_WB;
4553 info->base = XEXP (x, 0);
4554 if (GET_CODE (XEXP (x, 1)) == PLUS
4555 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
4556 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
4557 && aarch64_base_register_rtx_p (info->base, strict_p))
4558 {
4559 HOST_WIDE_INT offset;
4560 info->offset = XEXP (XEXP (x, 1), 1);
4561 offset = INTVAL (info->offset);
4562
4563 /* TImode and TFmode values are allowed in both pairs of X
4564 registers and individual Q registers. The available
4565 address modes are:
4566 X,X: 7-bit signed scaled offset
4567 Q: 9-bit signed offset
4568 We conservatively require an offset representable in either mode.
4569 */
4570 if (mode == TImode || mode == TFmode)
44707478 4571 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
4572 && offset_9bit_signed_unscaled_p (mode, offset));
4573
2d8c6dc1 4574 if (load_store_pair_p)
43e9d192 4575 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4576 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4577 else
4578 return offset_9bit_signed_unscaled_p (mode, offset);
4579 }
4580 return false;
4581
4582 case CONST:
4583 case SYMBOL_REF:
4584 case LABEL_REF:
79517551
SN
4585 /* load literal: pc-relative constant pool entry. Only supported
4586 for SI mode or larger. */
43e9d192 4587 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
4588
4589 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
4590 {
4591 rtx sym, addend;
4592
4593 split_const (x, &sym, &addend);
b4f50fd4
RR
4594 return ((GET_CODE (sym) == LABEL_REF
4595 || (GET_CODE (sym) == SYMBOL_REF
4596 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 4597 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
4598 }
4599 return false;
4600
4601 case LO_SUM:
4602 info->type = ADDRESS_LO_SUM;
4603 info->base = XEXP (x, 0);
4604 info->offset = XEXP (x, 1);
4605 if (allow_reg_index_p
4606 && aarch64_base_register_rtx_p (info->base, strict_p))
4607 {
4608 rtx sym, offs;
4609 split_const (info->offset, &sym, &offs);
4610 if (GET_CODE (sym) == SYMBOL_REF
a6e0bfa7 4611 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
4612 {
4613 /* The symbol and offset must be aligned to the access size. */
4614 unsigned int align;
4615 unsigned int ref_size;
4616
4617 if (CONSTANT_POOL_ADDRESS_P (sym))
4618 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4619 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4620 {
4621 tree exp = SYMBOL_REF_DECL (sym);
4622 align = TYPE_ALIGN (TREE_TYPE (exp));
4623 align = CONSTANT_ALIGNMENT (exp, align);
4624 }
4625 else if (SYMBOL_REF_DECL (sym))
4626 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
4627 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4628 && SYMBOL_REF_BLOCK (sym) != NULL)
4629 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
4630 else
4631 align = BITS_PER_UNIT;
4632
4633 ref_size = GET_MODE_SIZE (mode);
4634 if (ref_size == 0)
4635 ref_size = GET_MODE_SIZE (DImode);
4636
4637 return ((INTVAL (offs) & (ref_size - 1)) == 0
4638 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4639 }
4640 }
4641 return false;
4642
4643 default:
4644 return false;
4645 }
4646}
4647
9bf2f779
KT
4648/* Return true if the address X is valid for a PRFM instruction.
4649 STRICT_P is true if we should do strict checking with
4650 aarch64_classify_address. */
4651
4652bool
4653aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
4654{
4655 struct aarch64_address_info addr;
4656
4657 /* PRFM accepts the same addresses as DImode... */
4658 bool res = aarch64_classify_address (&addr, x, DImode, MEM, strict_p);
4659 if (!res)
4660 return false;
4661
4662 /* ... except writeback forms. */
4663 return addr.type != ADDRESS_REG_WB;
4664}
4665
43e9d192
IB
4666bool
4667aarch64_symbolic_address_p (rtx x)
4668{
4669 rtx offset;
4670
4671 split_const (x, &x, &offset);
4672 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4673}
4674
a6e0bfa7 4675/* Classify the base of symbolic expression X. */
da4f13a4
MS
4676
4677enum aarch64_symbol_type
a6e0bfa7 4678aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
4679{
4680 rtx offset;
da4f13a4 4681
43e9d192 4682 split_const (x, &x, &offset);
a6e0bfa7 4683 return aarch64_classify_symbol (x, offset);
43e9d192
IB
4684}
4685
4686
4687/* Return TRUE if X is a legitimate address for accessing memory in
4688 mode MODE. */
4689static bool
ef4bddc2 4690aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
4691{
4692 struct aarch64_address_info addr;
4693
4694 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4695}
4696
4697/* Return TRUE if X is a legitimate address for accessing memory in
4698 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4699 pair operation. */
4700bool
ef4bddc2 4701aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 4702 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
4703{
4704 struct aarch64_address_info addr;
4705
4706 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4707}
4708
491ec060
WD
4709/* Split an out-of-range address displacement into a base and offset.
4710 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4711 to increase opportunities for sharing the base address of different sizes.
8734dfac 4712 For unaligned accesses and TI/TF mode use the signed 9-bit range. */
491ec060
WD
4713static bool
4714aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
4715{
8734dfac
WD
4716 HOST_WIDE_INT offset = INTVAL (*disp);
4717 HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
491ec060 4718
8734dfac
WD
4719 if (mode == TImode || mode == TFmode
4720 || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
4721 base = (offset + 0x100) & ~0x1ff;
491ec060 4722
8734dfac
WD
4723 *off = GEN_INT (base);
4724 *disp = GEN_INT (offset - base);
491ec060
WD
4725 return true;
4726}
4727
a2170965
TC
4728/* Return the binary representation of floating point constant VALUE in INTVAL.
4729 If the value cannot be converted, return false without setting INTVAL.
4730 The conversion is done in the given MODE. */
4731bool
4732aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
4733{
4734
4735 /* We make a general exception for 0. */
4736 if (aarch64_float_const_zero_rtx_p (value))
4737 {
4738 *intval = 0;
4739 return true;
4740 }
4741
4742 machine_mode mode = GET_MODE (value);
4743 if (GET_CODE (value) != CONST_DOUBLE
4744 || !SCALAR_FLOAT_MODE_P (mode)
4745 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
4746 /* Only support up to DF mode. */
4747 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
4748 return false;
4749
4750 unsigned HOST_WIDE_INT ival = 0;
4751
4752 long res[2];
4753 real_to_target (res,
4754 CONST_DOUBLE_REAL_VALUE (value),
4755 REAL_MODE_FORMAT (mode));
4756
5c22bb48
TC
4757 if (mode == DFmode)
4758 {
4759 int order = BYTES_BIG_ENDIAN ? 1 : 0;
4760 ival = zext_hwi (res[order], 32);
4761 ival |= (zext_hwi (res[1 - order], 32) << 32);
4762 }
4763 else
4764 ival = zext_hwi (res[0], 32);
a2170965
TC
4765
4766 *intval = ival;
4767 return true;
4768}
4769
4770/* Return TRUE if rtx X is an immediate constant that can be moved using a
4771 single MOV(+MOVK) followed by an FMOV. */
4772bool
4773aarch64_float_const_rtx_p (rtx x)
4774{
4775 machine_mode mode = GET_MODE (x);
4776 if (mode == VOIDmode)
4777 return false;
4778
4779 /* Determine whether it's cheaper to write float constants as
4780 mov/movk pairs over ldr/adrp pairs. */
4781 unsigned HOST_WIDE_INT ival;
4782
4783 if (GET_CODE (x) == CONST_DOUBLE
4784 && SCALAR_FLOAT_MODE_P (mode)
4785 && aarch64_reinterpret_float_as_int (x, &ival))
4786 {
77e994c9
RS
4787 scalar_int_mode imode = (mode == HFmode
4788 ? SImode
4789 : int_mode_for_mode (mode).require ());
a2170965
TC
4790 int num_instr = aarch64_internal_mov_immediate
4791 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
4792 return num_instr < 3;
4793 }
4794
4795 return false;
4796}
4797
43e9d192
IB
4798/* Return TRUE if rtx X is immediate constant 0.0 */
4799bool
3520f7cc 4800aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 4801{
43e9d192
IB
4802 if (GET_MODE (x) == VOIDmode)
4803 return false;
4804
34a72c33 4805 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 4806 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 4807 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
4808}
4809
a2170965
TC
4810/* Return TRUE if rtx X is immediate constant that fits in a single
4811 MOVI immediate operation. */
4812bool
4813aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
4814{
4815 if (!TARGET_SIMD)
4816 return false;
4817
77e994c9
RS
4818 machine_mode vmode;
4819 scalar_int_mode imode;
a2170965
TC
4820 unsigned HOST_WIDE_INT ival;
4821
4822 if (GET_CODE (x) == CONST_DOUBLE
4823 && SCALAR_FLOAT_MODE_P (mode))
4824 {
4825 if (!aarch64_reinterpret_float_as_int (x, &ival))
4826 return false;
4827
35c38fa6
TC
4828 /* We make a general exception for 0. */
4829 if (aarch64_float_const_zero_rtx_p (x))
4830 return true;
4831
304b9962 4832 imode = int_mode_for_mode (mode).require ();
a2170965
TC
4833 }
4834 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
4835 && is_a <scalar_int_mode> (mode, &imode))
4836 ival = INTVAL (x);
a2170965
TC
4837 else
4838 return false;
4839
4840 /* use a 64 bit mode for everything except for DI/DF mode, where we use
4841 a 128 bit vector mode. */
77e994c9 4842 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
4843
4844 vmode = aarch64_simd_container_mode (imode, width);
4845 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
4846
4847 return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
4848}
4849
4850
70f09188
AP
4851/* Return the fixed registers used for condition codes. */
4852
4853static bool
4854aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4855{
4856 *p1 = CC_REGNUM;
4857 *p2 = INVALID_REGNUM;
4858 return true;
4859}
4860
47210a04
RL
4861/* This function is used by the call expanders of the machine description.
4862 RESULT is the register in which the result is returned. It's NULL for
4863 "call" and "sibcall".
4864 MEM is the location of the function call.
4865 SIBCALL indicates whether this function call is normal call or sibling call.
4866 It will generate different pattern accordingly. */
4867
4868void
4869aarch64_expand_call (rtx result, rtx mem, bool sibcall)
4870{
4871 rtx call, callee, tmp;
4872 rtvec vec;
4873 machine_mode mode;
4874
4875 gcc_assert (MEM_P (mem));
4876 callee = XEXP (mem, 0);
4877 mode = GET_MODE (callee);
4878 gcc_assert (mode == Pmode);
4879
4880 /* Decide if we should generate indirect calls by loading the
4881 address of the callee into a register before performing
4882 the branch-and-link. */
4883 if (SYMBOL_REF_P (callee)
4884 ? (aarch64_is_long_call_p (callee)
4885 || aarch64_is_noplt_call_p (callee))
4886 : !REG_P (callee))
4887 XEXP (mem, 0) = force_reg (mode, callee);
4888
4889 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
4890
4891 if (result != NULL_RTX)
4892 call = gen_rtx_SET (result, call);
4893
4894 if (sibcall)
4895 tmp = ret_rtx;
4896 else
4897 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
4898
4899 vec = gen_rtvec (2, call, tmp);
4900 call = gen_rtx_PARALLEL (VOIDmode, vec);
4901
4902 aarch64_emit_call_insn (call);
4903}
4904
78607708
TV
4905/* Emit call insn with PAT and do aarch64-specific handling. */
4906
d07a3fed 4907void
78607708
TV
4908aarch64_emit_call_insn (rtx pat)
4909{
4910 rtx insn = emit_call_insn (pat);
4911
4912 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4913 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4914 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4915}
4916
ef4bddc2 4917machine_mode
43e9d192
IB
4918aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4919{
4920 /* All floating point compares return CCFP if it is an equality
4921 comparison, and CCFPE otherwise. */
4922 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4923 {
4924 switch (code)
4925 {
4926 case EQ:
4927 case NE:
4928 case UNORDERED:
4929 case ORDERED:
4930 case UNLT:
4931 case UNLE:
4932 case UNGT:
4933 case UNGE:
4934 case UNEQ:
4935 case LTGT:
4936 return CCFPmode;
4937
4938 case LT:
4939 case LE:
4940 case GT:
4941 case GE:
4942 return CCFPEmode;
4943
4944 default:
4945 gcc_unreachable ();
4946 }
4947 }
4948
2b8568fe
KT
4949 /* Equality comparisons of short modes against zero can be performed
4950 using the TST instruction with the appropriate bitmask. */
4951 if (y == const0_rtx && REG_P (x)
4952 && (code == EQ || code == NE)
4953 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
4954 return CC_NZmode;
4955
b06335f9
KT
4956 /* Similarly, comparisons of zero_extends from shorter modes can
4957 be performed using an ANDS with an immediate mask. */
4958 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
4959 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4960 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
4961 && (code == EQ || code == NE))
4962 return CC_NZmode;
4963
43e9d192
IB
4964 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4965 && y == const0_rtx
4966 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 4967 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
4968 || GET_CODE (x) == NEG
4969 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
4970 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
4971 return CC_NZmode;
4972
1c992d1e 4973 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
4974 the comparison will have to be swapped when we emit the assembly
4975 code. */
4976 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 4977 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
4978 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4979 || GET_CODE (x) == LSHIFTRT
1c992d1e 4980 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
4981 return CC_SWPmode;
4982
1c992d1e
RE
4983 /* Similarly for a negated operand, but we can only do this for
4984 equalities. */
4985 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4986 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
4987 && (code == EQ || code == NE)
4988 && GET_CODE (x) == NEG)
4989 return CC_Zmode;
4990
ef22810a
RH
4991 /* A test for unsigned overflow. */
4992 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
4993 && code == NE
4994 && GET_CODE (x) == PLUS
4995 && GET_CODE (y) == ZERO_EXTEND)
4996 return CC_Cmode;
4997
43e9d192
IB
4998 /* For everything else, return CCmode. */
4999 return CCmode;
5000}
5001
3dfa7055 5002static int
b8506a8a 5003aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 5004
cd5660ab 5005int
43e9d192
IB
5006aarch64_get_condition_code (rtx x)
5007{
ef4bddc2 5008 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
5009 enum rtx_code comp_code = GET_CODE (x);
5010
5011 if (GET_MODE_CLASS (mode) != MODE_CC)
5012 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
5013 return aarch64_get_condition_code_1 (mode, comp_code);
5014}
43e9d192 5015
3dfa7055 5016static int
b8506a8a 5017aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 5018{
43e9d192
IB
5019 switch (mode)
5020 {
4e10a5a7
RS
5021 case E_CCFPmode:
5022 case E_CCFPEmode:
43e9d192
IB
5023 switch (comp_code)
5024 {
5025 case GE: return AARCH64_GE;
5026 case GT: return AARCH64_GT;
5027 case LE: return AARCH64_LS;
5028 case LT: return AARCH64_MI;
5029 case NE: return AARCH64_NE;
5030 case EQ: return AARCH64_EQ;
5031 case ORDERED: return AARCH64_VC;
5032 case UNORDERED: return AARCH64_VS;
5033 case UNLT: return AARCH64_LT;
5034 case UNLE: return AARCH64_LE;
5035 case UNGT: return AARCH64_HI;
5036 case UNGE: return AARCH64_PL;
cd5660ab 5037 default: return -1;
43e9d192
IB
5038 }
5039 break;
5040
4e10a5a7 5041 case E_CCmode:
43e9d192
IB
5042 switch (comp_code)
5043 {
5044 case NE: return AARCH64_NE;
5045 case EQ: return AARCH64_EQ;
5046 case GE: return AARCH64_GE;
5047 case GT: return AARCH64_GT;
5048 case LE: return AARCH64_LE;
5049 case LT: return AARCH64_LT;
5050 case GEU: return AARCH64_CS;
5051 case GTU: return AARCH64_HI;
5052 case LEU: return AARCH64_LS;
5053 case LTU: return AARCH64_CC;
cd5660ab 5054 default: return -1;
43e9d192
IB
5055 }
5056 break;
5057
4e10a5a7 5058 case E_CC_SWPmode:
43e9d192
IB
5059 switch (comp_code)
5060 {
5061 case NE: return AARCH64_NE;
5062 case EQ: return AARCH64_EQ;
5063 case GE: return AARCH64_LE;
5064 case GT: return AARCH64_LT;
5065 case LE: return AARCH64_GE;
5066 case LT: return AARCH64_GT;
5067 case GEU: return AARCH64_LS;
5068 case GTU: return AARCH64_CC;
5069 case LEU: return AARCH64_CS;
5070 case LTU: return AARCH64_HI;
cd5660ab 5071 default: return -1;
43e9d192
IB
5072 }
5073 break;
5074
4e10a5a7 5075 case E_CC_NZmode:
43e9d192
IB
5076 switch (comp_code)
5077 {
5078 case NE: return AARCH64_NE;
5079 case EQ: return AARCH64_EQ;
5080 case GE: return AARCH64_PL;
5081 case LT: return AARCH64_MI;
cd5660ab 5082 default: return -1;
43e9d192
IB
5083 }
5084 break;
5085
4e10a5a7 5086 case E_CC_Zmode:
1c992d1e
RE
5087 switch (comp_code)
5088 {
5089 case NE: return AARCH64_NE;
5090 case EQ: return AARCH64_EQ;
cd5660ab 5091 default: return -1;
1c992d1e
RE
5092 }
5093 break;
5094
4e10a5a7 5095 case E_CC_Cmode:
ef22810a
RH
5096 switch (comp_code)
5097 {
5098 case NE: return AARCH64_CS;
5099 case EQ: return AARCH64_CC;
5100 default: return -1;
5101 }
5102 break;
5103
43e9d192 5104 default:
cd5660ab 5105 return -1;
43e9d192 5106 }
3dfa7055 5107
3dfa7055 5108 return -1;
43e9d192
IB
5109}
5110
ddeabd3e
AL
5111bool
5112aarch64_const_vec_all_same_in_range_p (rtx x,
5113 HOST_WIDE_INT minval,
5114 HOST_WIDE_INT maxval)
5115{
5116 HOST_WIDE_INT firstval;
5117 int count, i;
5118
5119 if (GET_CODE (x) != CONST_VECTOR
5120 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
5121 return false;
5122
5123 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
5124 if (firstval < minval || firstval > maxval)
5125 return false;
5126
5127 count = CONST_VECTOR_NUNITS (x);
5128 for (i = 1; i < count; i++)
5129 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
5130 return false;
5131
5132 return true;
5133}
5134
5135bool
5136aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
5137{
5138 return aarch64_const_vec_all_same_in_range_p (x, val, val);
5139}
5140
43e9d192 5141
cf670503
ZC
5142/* N Z C V. */
5143#define AARCH64_CC_V 1
5144#define AARCH64_CC_C (1 << 1)
5145#define AARCH64_CC_Z (1 << 2)
5146#define AARCH64_CC_N (1 << 3)
5147
c8012fbc
WD
5148/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
5149static const int aarch64_nzcv_codes[] =
5150{
5151 0, /* EQ, Z == 1. */
5152 AARCH64_CC_Z, /* NE, Z == 0. */
5153 0, /* CS, C == 1. */
5154 AARCH64_CC_C, /* CC, C == 0. */
5155 0, /* MI, N == 1. */
5156 AARCH64_CC_N, /* PL, N == 0. */
5157 0, /* VS, V == 1. */
5158 AARCH64_CC_V, /* VC, V == 0. */
5159 0, /* HI, C ==1 && Z == 0. */
5160 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
5161 AARCH64_CC_V, /* GE, N == V. */
5162 0, /* LT, N != V. */
5163 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
5164 0, /* LE, !(Z == 0 && N == V). */
5165 0, /* AL, Any. */
5166 0 /* NV, Any. */
cf670503
ZC
5167};
5168
bcf19844
JW
5169/* Print operand X to file F in a target specific manner according to CODE.
5170 The acceptable formatting commands given by CODE are:
5171 'c': An integer or symbol address without a preceding #
5172 sign.
5173 'e': Print the sign/zero-extend size as a character 8->b,
5174 16->h, 32->w.
5175 'p': Prints N such that 2^N == X (X must be power of 2 and
5176 const int).
5177 'P': Print the number of non-zero bits in X (a const_int).
5178 'H': Print the higher numbered register of a pair (TImode)
5179 of regs.
5180 'm': Print a condition (eq, ne, etc).
5181 'M': Same as 'm', but invert condition.
5182 'b/h/s/d/q': Print a scalar FP/SIMD register name.
5183 'S/T/U/V': Print a FP/SIMD register name for a register list.
5184 The register printed is the FP/SIMD register name
5185 of X + 0/1/2/3 for S/T/U/V.
5186 'R': Print a scalar FP/SIMD register name + 1.
5187 'X': Print bottom 16 bits of integer constant in hex.
5188 'w/x': Print a general register name or the zero register
5189 (32-bit or 64-bit).
5190 '0': Print a normal operand, if it's a general register,
5191 then we assume DImode.
5192 'k': Print NZCV for conditional compare instructions.
5193 'A': Output address constant representing the first
5194 argument of X, specifying a relocation offset
5195 if appropriate.
5196 'L': Output constant address specified by X
5197 with a relocation offset if appropriate.
5198 'G': Prints address of X, specifying a PC relative
5199 relocation mode if appropriate. */
5200
cc8ca59e
JB
5201static void
5202aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192
IB
5203{
5204 switch (code)
5205 {
f541a481
KT
5206 case 'c':
5207 switch (GET_CODE (x))
5208 {
5209 case CONST_INT:
5210 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5211 break;
5212
5213 case SYMBOL_REF:
5214 output_addr_const (f, x);
5215 break;
5216
5217 case CONST:
5218 if (GET_CODE (XEXP (x, 0)) == PLUS
5219 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5220 {
5221 output_addr_const (f, x);
5222 break;
5223 }
5224 /* Fall through. */
5225
5226 default:
5227 output_operand_lossage ("Unsupported operand for code '%c'", code);
5228 }
5229 break;
5230
43e9d192 5231 case 'e':
43e9d192
IB
5232 {
5233 int n;
5234
4aa81c2e 5235 if (!CONST_INT_P (x)
43e9d192
IB
5236 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
5237 {
5238 output_operand_lossage ("invalid operand for '%%%c'", code);
5239 return;
5240 }
5241
5242 switch (n)
5243 {
5244 case 3:
5245 fputc ('b', f);
5246 break;
5247 case 4:
5248 fputc ('h', f);
5249 break;
5250 case 5:
5251 fputc ('w', f);
5252 break;
5253 default:
5254 output_operand_lossage ("invalid operand for '%%%c'", code);
5255 return;
5256 }
5257 }
5258 break;
5259
5260 case 'p':
5261 {
5262 int n;
5263
4aa81c2e 5264 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
5265 {
5266 output_operand_lossage ("invalid operand for '%%%c'", code);
5267 return;
5268 }
5269
5270 asm_fprintf (f, "%d", n);
5271 }
5272 break;
5273
5274 case 'P':
4aa81c2e 5275 if (!CONST_INT_P (x))
43e9d192
IB
5276 {
5277 output_operand_lossage ("invalid operand for '%%%c'", code);
5278 return;
5279 }
5280
8d55c61b 5281 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
5282 break;
5283
5284 case 'H':
4aa81c2e 5285 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
5286 {
5287 output_operand_lossage ("invalid operand for '%%%c'", code);
5288 return;
5289 }
5290
01a3a324 5291 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
5292 break;
5293
43e9d192 5294 case 'M':
c8012fbc 5295 case 'm':
cd5660ab
KT
5296 {
5297 int cond_code;
c8012fbc
WD
5298 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
5299 if (x == const_true_rtx)
cd5660ab 5300 {
c8012fbc
WD
5301 if (code == 'M')
5302 fputs ("nv", f);
cd5660ab
KT
5303 return;
5304 }
43e9d192 5305
cd5660ab
KT
5306 if (!COMPARISON_P (x))
5307 {
5308 output_operand_lossage ("invalid operand for '%%%c'", code);
5309 return;
5310 }
c8012fbc 5311
cd5660ab
KT
5312 cond_code = aarch64_get_condition_code (x);
5313 gcc_assert (cond_code >= 0);
c8012fbc
WD
5314 if (code == 'M')
5315 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
5316 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 5317 }
43e9d192
IB
5318 break;
5319
5320 case 'b':
5321 case 'h':
5322 case 's':
5323 case 'd':
5324 case 'q':
43e9d192
IB
5325 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5326 {
5327 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5328 return;
5329 }
50ce6f88 5330 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
5331 break;
5332
5333 case 'S':
5334 case 'T':
5335 case 'U':
5336 case 'V':
43e9d192
IB
5337 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5338 {
5339 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5340 return;
5341 }
50ce6f88 5342 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
5343 break;
5344
2d8c6dc1 5345 case 'R':
2d8c6dc1
AH
5346 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5347 {
5348 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5349 return;
5350 }
5351 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
5352 break;
5353
a05c0ddf 5354 case 'X':
4aa81c2e 5355 if (!CONST_INT_P (x))
a05c0ddf
IB
5356 {
5357 output_operand_lossage ("invalid operand for '%%%c'", code);
5358 return;
5359 }
50d38551 5360 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
5361 break;
5362
43e9d192
IB
5363 case 'w':
5364 case 'x':
3520f7cc
JG
5365 if (x == const0_rtx
5366 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 5367 {
50ce6f88 5368 asm_fprintf (f, "%czr", code);
43e9d192
IB
5369 break;
5370 }
5371
5372 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
5373 {
50ce6f88 5374 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
5375 break;
5376 }
5377
5378 if (REG_P (x) && REGNO (x) == SP_REGNUM)
5379 {
50ce6f88 5380 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
5381 break;
5382 }
5383
5384 /* Fall through */
5385
5386 case 0:
43e9d192
IB
5387 if (x == NULL)
5388 {
5389 output_operand_lossage ("missing operand");
5390 return;
5391 }
5392
5393 switch (GET_CODE (x))
5394 {
5395 case REG:
01a3a324 5396 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
5397 break;
5398
5399 case MEM:
cc8ca59e 5400 output_address (GET_MODE (x), XEXP (x, 0));
00eee3fa
WD
5401 /* Check all memory references are Pmode - even with ILP32. */
5402 gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode);
43e9d192
IB
5403 break;
5404
2af16a7c 5405 case CONST:
43e9d192
IB
5406 case LABEL_REF:
5407 case SYMBOL_REF:
5408 output_addr_const (asm_out_file, x);
5409 break;
5410
5411 case CONST_INT:
5412 asm_fprintf (f, "%wd", INTVAL (x));
5413 break;
5414
5415 case CONST_VECTOR:
3520f7cc
JG
5416 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
5417 {
ddeabd3e
AL
5418 gcc_assert (
5419 aarch64_const_vec_all_same_in_range_p (x,
5420 HOST_WIDE_INT_MIN,
5421 HOST_WIDE_INT_MAX));
3520f7cc
JG
5422 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
5423 }
5424 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
5425 {
5426 fputc ('0', f);
5427 }
5428 else
5429 gcc_unreachable ();
43e9d192
IB
5430 break;
5431
3520f7cc 5432 case CONST_DOUBLE:
2ca5b430
KT
5433 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5434 be getting CONST_DOUBLEs holding integers. */
5435 gcc_assert (GET_MODE (x) != VOIDmode);
5436 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
5437 {
5438 fputc ('0', f);
5439 break;
5440 }
5441 else if (aarch64_float_const_representable_p (x))
5442 {
5443#define buf_size 20
5444 char float_buf[buf_size] = {'\0'};
34a72c33
RS
5445 real_to_decimal_for_mode (float_buf,
5446 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
5447 buf_size, buf_size,
5448 1, GET_MODE (x));
5449 asm_fprintf (asm_out_file, "%s", float_buf);
5450 break;
5451#undef buf_size
5452 }
5453 output_operand_lossage ("invalid constant");
5454 return;
43e9d192
IB
5455 default:
5456 output_operand_lossage ("invalid operand");
5457 return;
5458 }
5459 break;
5460
5461 case 'A':
5462 if (GET_CODE (x) == HIGH)
5463 x = XEXP (x, 0);
5464
a6e0bfa7 5465 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5466 {
6642bdb4 5467 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5468 asm_fprintf (asm_out_file, ":got:");
5469 break;
5470
5471 case SYMBOL_SMALL_TLSGD:
5472 asm_fprintf (asm_out_file, ":tlsgd:");
5473 break;
5474
5475 case SYMBOL_SMALL_TLSDESC:
5476 asm_fprintf (asm_out_file, ":tlsdesc:");
5477 break;
5478
79496620 5479 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5480 asm_fprintf (asm_out_file, ":gottprel:");
5481 break;
5482
d18ba284 5483 case SYMBOL_TLSLE24:
43e9d192
IB
5484 asm_fprintf (asm_out_file, ":tprel:");
5485 break;
5486
87dd8ab0
MS
5487 case SYMBOL_TINY_GOT:
5488 gcc_unreachable ();
5489 break;
5490
43e9d192
IB
5491 default:
5492 break;
5493 }
5494 output_addr_const (asm_out_file, x);
5495 break;
5496
5497 case 'L':
a6e0bfa7 5498 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5499 {
6642bdb4 5500 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
5501 asm_fprintf (asm_out_file, ":lo12:");
5502 break;
5503
5504 case SYMBOL_SMALL_TLSGD:
5505 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
5506 break;
5507
5508 case SYMBOL_SMALL_TLSDESC:
5509 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
5510 break;
5511
79496620 5512 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
5513 asm_fprintf (asm_out_file, ":gottprel_lo12:");
5514 break;
5515
cbf5629e
JW
5516 case SYMBOL_TLSLE12:
5517 asm_fprintf (asm_out_file, ":tprel_lo12:");
5518 break;
5519
d18ba284 5520 case SYMBOL_TLSLE24:
43e9d192
IB
5521 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
5522 break;
5523
87dd8ab0
MS
5524 case SYMBOL_TINY_GOT:
5525 asm_fprintf (asm_out_file, ":got:");
5526 break;
5527
5ae7caad
JW
5528 case SYMBOL_TINY_TLSIE:
5529 asm_fprintf (asm_out_file, ":gottprel:");
5530 break;
5531
43e9d192
IB
5532 default:
5533 break;
5534 }
5535 output_addr_const (asm_out_file, x);
5536 break;
5537
5538 case 'G':
a6e0bfa7 5539 switch (aarch64_classify_symbolic_expression (x))
43e9d192 5540 {
d18ba284 5541 case SYMBOL_TLSLE24:
43e9d192
IB
5542 asm_fprintf (asm_out_file, ":tprel_hi12:");
5543 break;
5544 default:
5545 break;
5546 }
5547 output_addr_const (asm_out_file, x);
5548 break;
5549
cf670503
ZC
5550 case 'k':
5551 {
c8012fbc 5552 HOST_WIDE_INT cond_code;
cf670503 5553
c8012fbc 5554 if (!CONST_INT_P (x))
cf670503
ZC
5555 {
5556 output_operand_lossage ("invalid operand for '%%%c'", code);
5557 return;
5558 }
5559
c8012fbc
WD
5560 cond_code = INTVAL (x);
5561 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
5562 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
5563 }
5564 break;
5565
43e9d192
IB
5566 default:
5567 output_operand_lossage ("invalid operand prefix '%%%c'", code);
5568 return;
5569 }
5570}
5571
cc8ca59e
JB
5572static void
5573aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
43e9d192
IB
5574{
5575 struct aarch64_address_info addr;
5576
cc8ca59e 5577 if (aarch64_classify_address (&addr, x, mode, MEM, true))
43e9d192
IB
5578 switch (addr.type)
5579 {
5580 case ADDRESS_REG_IMM:
5581 if (addr.offset == const0_rtx)
01a3a324 5582 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 5583 else
16a3246f 5584 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
5585 INTVAL (addr.offset));
5586 return;
5587
5588 case ADDRESS_REG_REG:
5589 if (addr.shift == 0)
16a3246f 5590 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 5591 reg_names [REGNO (addr.offset)]);
43e9d192 5592 else
16a3246f 5593 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 5594 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
5595 return;
5596
5597 case ADDRESS_REG_UXTW:
5598 if (addr.shift == 0)
16a3246f 5599 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5600 REGNO (addr.offset) - R0_REGNUM);
5601 else
16a3246f 5602 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5603 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5604 return;
5605
5606 case ADDRESS_REG_SXTW:
5607 if (addr.shift == 0)
16a3246f 5608 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
5609 REGNO (addr.offset) - R0_REGNUM);
5610 else
16a3246f 5611 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
5612 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5613 return;
5614
5615 case ADDRESS_REG_WB:
5616 switch (GET_CODE (x))
5617 {
5618 case PRE_INC:
16a3246f 5619 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5620 GET_MODE_SIZE (mode));
43e9d192
IB
5621 return;
5622 case POST_INC:
16a3246f 5623 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
cc8ca59e 5624 GET_MODE_SIZE (mode));
43e9d192
IB
5625 return;
5626 case PRE_DEC:
16a3246f 5627 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5628 GET_MODE_SIZE (mode));
43e9d192
IB
5629 return;
5630 case POST_DEC:
16a3246f 5631 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
cc8ca59e 5632 GET_MODE_SIZE (mode));
43e9d192
IB
5633 return;
5634 case PRE_MODIFY:
16a3246f 5635 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
5636 INTVAL (addr.offset));
5637 return;
5638 case POST_MODIFY:
16a3246f 5639 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
5640 INTVAL (addr.offset));
5641 return;
5642 default:
5643 break;
5644 }
5645 break;
5646
5647 case ADDRESS_LO_SUM:
16a3246f 5648 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
5649 output_addr_const (f, addr.offset);
5650 asm_fprintf (f, "]");
5651 return;
5652
5653 case ADDRESS_SYMBOLIC:
5654 break;
5655 }
5656
5657 output_addr_const (f, x);
5658}
5659
43e9d192
IB
5660bool
5661aarch64_label_mentioned_p (rtx x)
5662{
5663 const char *fmt;
5664 int i;
5665
5666 if (GET_CODE (x) == LABEL_REF)
5667 return true;
5668
5669 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5670 referencing instruction, but they are constant offsets, not
5671 symbols. */
5672 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5673 return false;
5674
5675 fmt = GET_RTX_FORMAT (GET_CODE (x));
5676 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5677 {
5678 if (fmt[i] == 'E')
5679 {
5680 int j;
5681
5682 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5683 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
5684 return 1;
5685 }
5686 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
5687 return 1;
5688 }
5689
5690 return 0;
5691}
5692
5693/* Implement REGNO_REG_CLASS. */
5694
5695enum reg_class
5696aarch64_regno_regclass (unsigned regno)
5697{
5698 if (GP_REGNUM_P (regno))
a4a182c6 5699 return GENERAL_REGS;
43e9d192
IB
5700
5701 if (regno == SP_REGNUM)
5702 return STACK_REG;
5703
5704 if (regno == FRAME_POINTER_REGNUM
5705 || regno == ARG_POINTER_REGNUM)
f24bb080 5706 return POINTER_REGS;
43e9d192
IB
5707
5708 if (FP_REGNUM_P (regno))
5709 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5710
5711 return NO_REGS;
5712}
5713
0c4ec427 5714static rtx
ef4bddc2 5715aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
5716{
5717 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5718 where mask is selected by alignment and size of the offset.
5719 We try to pick as large a range for the offset as possible to
5720 maximize the chance of a CSE. However, for aligned addresses
5721 we limit the range to 4k so that structures with different sized
e8426e0a
BC
5722 elements are likely to use the same base. We need to be careful
5723 not to split a CONST for some forms of address expression, otherwise
5724 it will generate sub-optimal code. */
0c4ec427
RE
5725
5726 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5727 {
9e0218fc 5728 rtx base = XEXP (x, 0);
17d7bdd8 5729 rtx offset_rtx = XEXP (x, 1);
9e0218fc 5730 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 5731
9e0218fc 5732 if (GET_CODE (base) == PLUS)
e8426e0a 5733 {
9e0218fc
RH
5734 rtx op0 = XEXP (base, 0);
5735 rtx op1 = XEXP (base, 1);
5736
5737 /* Force any scaling into a temp for CSE. */
5738 op0 = force_reg (Pmode, op0);
5739 op1 = force_reg (Pmode, op1);
5740
5741 /* Let the pointer register be in op0. */
5742 if (REG_POINTER (op1))
5743 std::swap (op0, op1);
5744
5745 /* If the pointer is virtual or frame related, then we know that
5746 virtual register instantiation or register elimination is going
5747 to apply a second constant. We want the two constants folded
5748 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5749 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 5750 {
9e0218fc
RH
5751 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
5752 NULL_RTX, true, OPTAB_DIRECT);
5753 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 5754 }
e8426e0a 5755
9e0218fc
RH
5756 /* Otherwise, in order to encourage CSE (and thence loop strength
5757 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5758 base = expand_binop (Pmode, add_optab, op0, op1,
5759 NULL_RTX, true, OPTAB_DIRECT);
5760 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
5761 }
5762
8734dfac 5763 /* Does it look like we'll need a 16-byte load/store-pair operation? */
9e0218fc 5764 HOST_WIDE_INT base_offset;
8734dfac
WD
5765 if (GET_MODE_SIZE (mode) > 16)
5766 base_offset = (offset + 0x400) & ~0x7f0;
0c4ec427
RE
5767 /* For offsets aren't a multiple of the access size, the limit is
5768 -256...255. */
5769 else if (offset & (GET_MODE_SIZE (mode) - 1))
ff0f3f1c
WD
5770 {
5771 base_offset = (offset + 0x100) & ~0x1ff;
5772
5773 /* BLKmode typically uses LDP of X-registers. */
5774 if (mode == BLKmode)
5775 base_offset = (offset + 512) & ~0x3ff;
5776 }
5777 /* Small negative offsets are supported. */
5778 else if (IN_RANGE (offset, -256, 0))
5779 base_offset = 0;
8734dfac
WD
5780 else if (mode == TImode || mode == TFmode)
5781 base_offset = (offset + 0x100) & ~0x1ff;
ff0f3f1c 5782 /* Use 12-bit offset by access size. */
0c4ec427 5783 else
ff0f3f1c 5784 base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
0c4ec427 5785
9e0218fc
RH
5786 if (base_offset != 0)
5787 {
5788 base = plus_constant (Pmode, base, base_offset);
5789 base = force_operand (base, NULL_RTX);
5790 return plus_constant (Pmode, base, offset - base_offset);
5791 }
0c4ec427
RE
5792 }
5793
5794 return x;
5795}
5796
b4f50fd4
RR
5797/* Return the reload icode required for a constant pool in mode. */
5798static enum insn_code
5799aarch64_constant_pool_reload_icode (machine_mode mode)
5800{
5801 switch (mode)
5802 {
4e10a5a7 5803 case E_SFmode:
b4f50fd4
RR
5804 return CODE_FOR_aarch64_reload_movcpsfdi;
5805
4e10a5a7 5806 case E_DFmode:
b4f50fd4
RR
5807 return CODE_FOR_aarch64_reload_movcpdfdi;
5808
4e10a5a7 5809 case E_TFmode:
b4f50fd4
RR
5810 return CODE_FOR_aarch64_reload_movcptfdi;
5811
4e10a5a7 5812 case E_V8QImode:
b4f50fd4
RR
5813 return CODE_FOR_aarch64_reload_movcpv8qidi;
5814
4e10a5a7 5815 case E_V16QImode:
b4f50fd4
RR
5816 return CODE_FOR_aarch64_reload_movcpv16qidi;
5817
4e10a5a7 5818 case E_V4HImode:
b4f50fd4
RR
5819 return CODE_FOR_aarch64_reload_movcpv4hidi;
5820
4e10a5a7 5821 case E_V8HImode:
b4f50fd4
RR
5822 return CODE_FOR_aarch64_reload_movcpv8hidi;
5823
4e10a5a7 5824 case E_V2SImode:
b4f50fd4
RR
5825 return CODE_FOR_aarch64_reload_movcpv2sidi;
5826
4e10a5a7 5827 case E_V4SImode:
b4f50fd4
RR
5828 return CODE_FOR_aarch64_reload_movcpv4sidi;
5829
4e10a5a7 5830 case E_V2DImode:
b4f50fd4
RR
5831 return CODE_FOR_aarch64_reload_movcpv2didi;
5832
4e10a5a7 5833 case E_V2DFmode:
b4f50fd4
RR
5834 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5835
5836 default:
5837 gcc_unreachable ();
5838 }
5839
5840 gcc_unreachable ();
5841}
43e9d192
IB
5842static reg_class_t
5843aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5844 reg_class_t rclass,
ef4bddc2 5845 machine_mode mode,
43e9d192
IB
5846 secondary_reload_info *sri)
5847{
b4f50fd4
RR
5848
5849 /* If we have to disable direct literal pool loads and stores because the
5850 function is too big, then we need a scratch register. */
5851 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5852 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5853 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 5854 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
5855 {
5856 sri->icode = aarch64_constant_pool_reload_icode (mode);
5857 return NO_REGS;
5858 }
5859
43e9d192
IB
5860 /* Without the TARGET_SIMD instructions we cannot move a Q register
5861 to a Q register directly. We need a scratch. */
5862 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5863 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5864 && reg_class_subset_p (rclass, FP_REGS))
5865 {
5866 if (mode == TFmode)
5867 sri->icode = CODE_FOR_aarch64_reload_movtf;
5868 else if (mode == TImode)
5869 sri->icode = CODE_FOR_aarch64_reload_movti;
5870 return NO_REGS;
5871 }
5872
5873 /* A TFmode or TImode memory access should be handled via an FP_REGS
5874 because AArch64 has richer addressing modes for LDR/STR instructions
5875 than LDP/STP instructions. */
d5726973 5876 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
5877 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5878 return FP_REGS;
5879
5880 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 5881 return GENERAL_REGS;
43e9d192
IB
5882
5883 return NO_REGS;
5884}
5885
5886static bool
5887aarch64_can_eliminate (const int from, const int to)
5888{
5889 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5890 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5891
5892 if (frame_pointer_needed)
5893 {
5894 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5895 return true;
5896 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5897 return false;
5898 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5899 && !cfun->calls_alloca)
5900 return true;
5901 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5902 return true;
0b7f8166
MS
5903
5904 return false;
43e9d192 5905 }
1c923b60
JW
5906 else
5907 {
5908 /* If we decided that we didn't need a leaf frame pointer but then used
5909 LR in the function, then we'll want a frame pointer after all, so
5910 prevent this elimination to ensure a frame pointer is used. */
5911 if (to == STACK_POINTER_REGNUM
5912 && flag_omit_leaf_frame_pointer
5913 && df_regs_ever_live_p (LR_REGNUM))
5914 return false;
5915 }
777e6976 5916
43e9d192
IB
5917 return true;
5918}
5919
5920HOST_WIDE_INT
5921aarch64_initial_elimination_offset (unsigned from, unsigned to)
5922{
43e9d192 5923 aarch64_layout_frame ();
78c29983
MS
5924
5925 if (to == HARD_FRAME_POINTER_REGNUM)
5926 {
5927 if (from == ARG_POINTER_REGNUM)
71bfb77a 5928 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
5929
5930 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5931 return cfun->machine->frame.hard_fp_offset
5932 - cfun->machine->frame.locals_offset;
78c29983
MS
5933 }
5934
5935 if (to == STACK_POINTER_REGNUM)
5936 {
5937 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5938 return cfun->machine->frame.frame_size
5939 - cfun->machine->frame.locals_offset;
78c29983
MS
5940 }
5941
1c960e02 5942 return cfun->machine->frame.frame_size;
43e9d192
IB
5943}
5944
43e9d192
IB
5945/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5946 previous frame. */
5947
5948rtx
5949aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5950{
5951 if (count != 0)
5952 return const0_rtx;
5953 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5954}
5955
5956
5957static void
5958aarch64_asm_trampoline_template (FILE *f)
5959{
28514dda
YZ
5960 if (TARGET_ILP32)
5961 {
5962 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5963 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5964 }
5965 else
5966 {
5967 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5968 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5969 }
01a3a324 5970 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5971 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5972 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5973 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5974}
5975
5976static void
5977aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5978{
5979 rtx fnaddr, mem, a_tramp;
28514dda 5980 const int tramp_code_sz = 16;
43e9d192
IB
5981
5982 /* Don't need to copy the trailing D-words, we fill those in below. */
5983 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5984 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5985 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5986 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5987 if (GET_MODE (fnaddr) != ptr_mode)
5988 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5989 emit_move_insn (mem, fnaddr);
5990
28514dda 5991 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5992 emit_move_insn (mem, chain_value);
5993
5994 /* XXX We should really define a "clear_cache" pattern and use
5995 gen_clear_cache(). */
5996 a_tramp = XEXP (m_tramp, 0);
5997 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 5998 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
5999 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
6000 ptr_mode);
43e9d192
IB
6001}
6002
6003static unsigned char
ef4bddc2 6004aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
6005{
6006 switch (regclass)
6007 {
fee9ba42 6008 case CALLER_SAVE_REGS:
43e9d192
IB
6009 case POINTER_REGS:
6010 case GENERAL_REGS:
6011 case ALL_REGS:
6012 case FP_REGS:
6013 case FP_LO_REGS:
6014 return
7bd11911
KT
6015 aarch64_vector_mode_p (mode)
6016 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
6017 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
6018 case STACK_REG:
6019 return 1;
6020
6021 case NO_REGS:
6022 return 0;
6023
6024 default:
6025 break;
6026 }
6027 gcc_unreachable ();
6028}
6029
6030static reg_class_t
78d8b9f0 6031aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 6032{
51bb310d 6033 if (regclass == POINTER_REGS)
78d8b9f0
IB
6034 return GENERAL_REGS;
6035
51bb310d
MS
6036 if (regclass == STACK_REG)
6037 {
6038 if (REG_P(x)
6039 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
6040 return regclass;
6041
6042 return NO_REGS;
6043 }
6044
27bd251b
IB
6045 /* Register eliminiation can result in a request for
6046 SP+constant->FP_REGS. We cannot support such operations which
6047 use SP as source and an FP_REG as destination, so reject out
6048 right now. */
6049 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
6050 {
6051 rtx lhs = XEXP (x, 0);
6052
6053 /* Look through a possible SUBREG introduced by ILP32. */
6054 if (GET_CODE (lhs) == SUBREG)
6055 lhs = SUBREG_REG (lhs);
6056
6057 gcc_assert (REG_P (lhs));
6058 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
6059 POINTER_REGS));
6060 return NO_REGS;
6061 }
6062
78d8b9f0 6063 return regclass;
43e9d192
IB
6064}
6065
6066void
6067aarch64_asm_output_labelref (FILE* f, const char *name)
6068{
6069 asm_fprintf (f, "%U%s", name);
6070}
6071
6072static void
6073aarch64_elf_asm_constructor (rtx symbol, int priority)
6074{
6075 if (priority == DEFAULT_INIT_PRIORITY)
6076 default_ctor_section_asm_out_constructor (symbol, priority);
6077 else
6078 {
6079 section *s;
53d190c1
AT
6080 /* While priority is known to be in range [0, 65535], so 18 bytes
6081 would be enough, the compiler might not know that. To avoid
6082 -Wformat-truncation false positive, use a larger size. */
6083 char buf[23];
43e9d192
IB
6084 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
6085 s = get_section (buf, SECTION_WRITE, NULL);
6086 switch_to_section (s);
6087 assemble_align (POINTER_SIZE);
28514dda 6088 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
6089 }
6090}
6091
6092static void
6093aarch64_elf_asm_destructor (rtx symbol, int priority)
6094{
6095 if (priority == DEFAULT_INIT_PRIORITY)
6096 default_dtor_section_asm_out_destructor (symbol, priority);
6097 else
6098 {
6099 section *s;
53d190c1
AT
6100 /* While priority is known to be in range [0, 65535], so 18 bytes
6101 would be enough, the compiler might not know that. To avoid
6102 -Wformat-truncation false positive, use a larger size. */
6103 char buf[23];
43e9d192
IB
6104 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
6105 s = get_section (buf, SECTION_WRITE, NULL);
6106 switch_to_section (s);
6107 assemble_align (POINTER_SIZE);
28514dda 6108 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
6109 }
6110}
6111
6112const char*
6113aarch64_output_casesi (rtx *operands)
6114{
6115 char buf[100];
6116 char label[100];
b32d5189 6117 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
6118 int index;
6119 static const char *const patterns[4][2] =
6120 {
6121 {
6122 "ldrb\t%w3, [%0,%w1,uxtw]",
6123 "add\t%3, %4, %w3, sxtb #2"
6124 },
6125 {
6126 "ldrh\t%w3, [%0,%w1,uxtw #1]",
6127 "add\t%3, %4, %w3, sxth #2"
6128 },
6129 {
6130 "ldr\t%w3, [%0,%w1,uxtw #2]",
6131 "add\t%3, %4, %w3, sxtw #2"
6132 },
6133 /* We assume that DImode is only generated when not optimizing and
6134 that we don't really need 64-bit address offsets. That would
6135 imply an object file with 8GB of code in a single function! */
6136 {
6137 "ldr\t%w3, [%0,%w1,uxtw #2]",
6138 "add\t%3, %4, %w3, sxtw #2"
6139 }
6140 };
6141
6142 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
6143
77e994c9
RS
6144 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
6145 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
6146
6147 gcc_assert (index >= 0 && index <= 3);
6148
6149 /* Need to implement table size reduction, by chaning the code below. */
6150 output_asm_insn (patterns[index][0], operands);
6151 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
6152 snprintf (buf, sizeof (buf),
6153 "adr\t%%4, %s", targetm.strip_name_encoding (label));
6154 output_asm_insn (buf, operands);
6155 output_asm_insn (patterns[index][1], operands);
6156 output_asm_insn ("br\t%3", operands);
6157 assemble_label (asm_out_file, label);
6158 return "";
6159}
6160
6161
6162/* Return size in bits of an arithmetic operand which is shifted/scaled and
6163 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
6164 operator. */
6165
6166int
6167aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
6168{
6169 if (shift >= 0 && shift <= 3)
6170 {
6171 int size;
6172 for (size = 8; size <= 32; size *= 2)
6173 {
6174 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
6175 if (mask == bits << shift)
6176 return size;
6177 }
6178 }
6179 return 0;
6180}
6181
e78d485e
RR
6182/* Constant pools are per function only when PC relative
6183 literal loads are true or we are in the large memory
6184 model. */
6185
6186static inline bool
6187aarch64_can_use_per_function_literal_pools_p (void)
6188{
9ee6540a 6189 return (aarch64_pcrelative_literal_loads
e78d485e
RR
6190 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
6191}
6192
43e9d192 6193static bool
e78d485e 6194aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 6195{
3eece53d
RR
6196 /* Fixme:: In an ideal world this would work similar
6197 to the logic in aarch64_select_rtx_section but this
6198 breaks bootstrap in gcc go. For now we workaround
6199 this by returning false here. */
6200 return false;
43e9d192
IB
6201}
6202
e78d485e
RR
6203/* Select appropriate section for constants depending
6204 on where we place literal pools. */
6205
43e9d192 6206static section *
e78d485e
RR
6207aarch64_select_rtx_section (machine_mode mode,
6208 rtx x,
6209 unsigned HOST_WIDE_INT align)
43e9d192 6210{
e78d485e
RR
6211 if (aarch64_can_use_per_function_literal_pools_p ())
6212 return function_section (current_function_decl);
43e9d192 6213
e78d485e
RR
6214 return default_elf_select_rtx_section (mode, x, align);
6215}
43e9d192 6216
5fca7b66
RH
6217/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
6218void
6219aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
6220 HOST_WIDE_INT offset)
6221{
6222 /* When using per-function literal pools, we must ensure that any code
6223 section is aligned to the minimal instruction length, lest we get
6224 errors from the assembler re "unaligned instructions". */
6225 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
6226 ASM_OUTPUT_ALIGN (f, 2);
6227}
6228
43e9d192
IB
6229/* Costs. */
6230
6231/* Helper function for rtx cost calculation. Strip a shift expression
6232 from X. Returns the inner operand if successful, or the original
6233 expression on failure. */
6234static rtx
6235aarch64_strip_shift (rtx x)
6236{
6237 rtx op = x;
6238
57b77d46
RE
6239 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
6240 we can convert both to ROR during final output. */
43e9d192
IB
6241 if ((GET_CODE (op) == ASHIFT
6242 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
6243 || GET_CODE (op) == LSHIFTRT
6244 || GET_CODE (op) == ROTATERT
6245 || GET_CODE (op) == ROTATE)
43e9d192
IB
6246 && CONST_INT_P (XEXP (op, 1)))
6247 return XEXP (op, 0);
6248
6249 if (GET_CODE (op) == MULT
6250 && CONST_INT_P (XEXP (op, 1))
6251 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
6252 return XEXP (op, 0);
6253
6254 return x;
6255}
6256
4745e701 6257/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
6258 expression from X. Returns the inner operand if successful, or the
6259 original expression on failure. We deal with a number of possible
b10f1009
AP
6260 canonicalization variations here. If STRIP_SHIFT is true, then
6261 we can strip off a shift also. */
43e9d192 6262static rtx
b10f1009 6263aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 6264{
77e994c9 6265 scalar_int_mode mode;
43e9d192
IB
6266 rtx op = x;
6267
77e994c9
RS
6268 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
6269 return op;
6270
43e9d192
IB
6271 /* Zero and sign extraction of a widened value. */
6272 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
6273 && XEXP (op, 2) == const0_rtx
4745e701 6274 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 6275 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
6276 XEXP (op, 1)))
6277 return XEXP (XEXP (op, 0), 0);
6278
6279 /* It can also be represented (for zero-extend) as an AND with an
6280 immediate. */
6281 if (GET_CODE (op) == AND
6282 && GET_CODE (XEXP (op, 0)) == MULT
6283 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
6284 && CONST_INT_P (XEXP (op, 1))
6285 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
6286 INTVAL (XEXP (op, 1))) != 0)
6287 return XEXP (XEXP (op, 0), 0);
6288
6289 /* Now handle extended register, as this may also have an optional
6290 left shift by 1..4. */
b10f1009
AP
6291 if (strip_shift
6292 && GET_CODE (op) == ASHIFT
43e9d192
IB
6293 && CONST_INT_P (XEXP (op, 1))
6294 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
6295 op = XEXP (op, 0);
6296
6297 if (GET_CODE (op) == ZERO_EXTEND
6298 || GET_CODE (op) == SIGN_EXTEND)
6299 op = XEXP (op, 0);
6300
6301 if (op != x)
6302 return op;
6303
4745e701
JG
6304 return x;
6305}
6306
0a78ebe4
KT
6307/* Return true iff CODE is a shift supported in combination
6308 with arithmetic instructions. */
4d1919ed 6309
0a78ebe4
KT
6310static bool
6311aarch64_shift_p (enum rtx_code code)
6312{
6313 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
6314}
6315
b10f1009
AP
6316
6317/* Return true iff X is a cheap shift without a sign extend. */
6318
6319static bool
6320aarch64_cheap_mult_shift_p (rtx x)
6321{
6322 rtx op0, op1;
6323
6324 op0 = XEXP (x, 0);
6325 op1 = XEXP (x, 1);
6326
6327 if (!(aarch64_tune_params.extra_tuning_flags
6328 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
6329 return false;
6330
6331 if (GET_CODE (op0) == SIGN_EXTEND)
6332 return false;
6333
6334 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
6335 && UINTVAL (op1) <= 4)
6336 return true;
6337
6338 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
6339 return false;
6340
6341 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
6342
6343 if (l2 > 0 && l2 <= 4)
6344 return true;
6345
6346 return false;
6347}
6348
4745e701 6349/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
6350 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
6351 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
6352 operands where needed. */
6353
6354static int
e548c9df 6355aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
6356{
6357 rtx op0, op1;
6358 const struct cpu_cost_table *extra_cost
b175b679 6359 = aarch64_tune_params.insn_extra_cost;
4745e701 6360 int cost = 0;
0a78ebe4 6361 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 6362 machine_mode mode = GET_MODE (x);
4745e701
JG
6363
6364 gcc_checking_assert (code == MULT);
6365
6366 op0 = XEXP (x, 0);
6367 op1 = XEXP (x, 1);
6368
6369 if (VECTOR_MODE_P (mode))
6370 mode = GET_MODE_INNER (mode);
6371
6372 /* Integer multiply/fma. */
6373 if (GET_MODE_CLASS (mode) == MODE_INT)
6374 {
6375 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
6376 if (aarch64_shift_p (GET_CODE (x))
6377 || (CONST_INT_P (op1)
6378 && exact_log2 (INTVAL (op1)) > 0))
4745e701 6379 {
0a78ebe4
KT
6380 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
6381 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
6382 if (speed)
6383 {
0a78ebe4
KT
6384 if (compound_p)
6385 {
b10f1009
AP
6386 /* If the shift is considered cheap,
6387 then don't add any cost. */
6388 if (aarch64_cheap_mult_shift_p (x))
6389 ;
6390 else if (REG_P (op1))
0a78ebe4
KT
6391 /* ARITH + shift-by-register. */
6392 cost += extra_cost->alu.arith_shift_reg;
6393 else if (is_extend)
6394 /* ARITH + extended register. We don't have a cost field
6395 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6396 cost += extra_cost->alu.extend_arith;
6397 else
6398 /* ARITH + shift-by-immediate. */
6399 cost += extra_cost->alu.arith_shift;
6400 }
4745e701
JG
6401 else
6402 /* LSL (immediate). */
0a78ebe4
KT
6403 cost += extra_cost->alu.shift;
6404
4745e701 6405 }
0a78ebe4
KT
6406 /* Strip extends as we will have costed them in the case above. */
6407 if (is_extend)
b10f1009 6408 op0 = aarch64_strip_extend (op0, true);
4745e701 6409
e548c9df 6410 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
6411
6412 return cost;
6413 }
6414
d2ac256b
KT
6415 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6416 compound and let the below cases handle it. After all, MNEG is a
6417 special-case alias of MSUB. */
6418 if (GET_CODE (op0) == NEG)
6419 {
6420 op0 = XEXP (op0, 0);
6421 compound_p = true;
6422 }
6423
4745e701
JG
6424 /* Integer multiplies or FMAs have zero/sign extending variants. */
6425 if ((GET_CODE (op0) == ZERO_EXTEND
6426 && GET_CODE (op1) == ZERO_EXTEND)
6427 || (GET_CODE (op0) == SIGN_EXTEND
6428 && GET_CODE (op1) == SIGN_EXTEND))
6429 {
e548c9df
AM
6430 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
6431 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
6432
6433 if (speed)
6434 {
0a78ebe4 6435 if (compound_p)
d2ac256b 6436 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
6437 cost += extra_cost->mult[0].extend_add;
6438 else
6439 /* MUL/SMULL/UMULL. */
6440 cost += extra_cost->mult[0].extend;
6441 }
6442
6443 return cost;
6444 }
6445
d2ac256b 6446 /* This is either an integer multiply or a MADD. In both cases
4745e701 6447 we want to recurse and cost the operands. */
e548c9df
AM
6448 cost += rtx_cost (op0, mode, MULT, 0, speed);
6449 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6450
6451 if (speed)
6452 {
0a78ebe4 6453 if (compound_p)
d2ac256b 6454 /* MADD/MSUB. */
4745e701
JG
6455 cost += extra_cost->mult[mode == DImode].add;
6456 else
6457 /* MUL. */
6458 cost += extra_cost->mult[mode == DImode].simple;
6459 }
6460
6461 return cost;
6462 }
6463 else
6464 {
6465 if (speed)
6466 {
3d840f7d 6467 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
6468 operands, unless the rounding mode is upward or downward in
6469 which case FNMUL is different than FMUL with operand negation. */
6470 bool neg0 = GET_CODE (op0) == NEG;
6471 bool neg1 = GET_CODE (op1) == NEG;
6472 if (compound_p || !flag_rounding_math || (neg0 && neg1))
6473 {
6474 if (neg0)
6475 op0 = XEXP (op0, 0);
6476 if (neg1)
6477 op1 = XEXP (op1, 0);
6478 }
4745e701 6479
0a78ebe4 6480 if (compound_p)
4745e701
JG
6481 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6482 cost += extra_cost->fp[mode == DFmode].fma;
6483 else
3d840f7d 6484 /* FMUL/FNMUL. */
4745e701
JG
6485 cost += extra_cost->fp[mode == DFmode].mult;
6486 }
6487
e548c9df
AM
6488 cost += rtx_cost (op0, mode, MULT, 0, speed);
6489 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
6490 return cost;
6491 }
43e9d192
IB
6492}
6493
67747367
JG
6494static int
6495aarch64_address_cost (rtx x,
ef4bddc2 6496 machine_mode mode,
67747367
JG
6497 addr_space_t as ATTRIBUTE_UNUSED,
6498 bool speed)
6499{
6500 enum rtx_code c = GET_CODE (x);
b175b679 6501 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
6502 struct aarch64_address_info info;
6503 int cost = 0;
6504 info.shift = 0;
6505
6506 if (!aarch64_classify_address (&info, x, mode, c, false))
6507 {
6508 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
6509 {
6510 /* This is a CONST or SYMBOL ref which will be split
6511 in a different way depending on the code model in use.
6512 Cost it through the generic infrastructure. */
e548c9df 6513 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
6514 /* Divide through by the cost of one instruction to
6515 bring it to the same units as the address costs. */
6516 cost_symbol_ref /= COSTS_N_INSNS (1);
6517 /* The cost is then the cost of preparing the address,
6518 followed by an immediate (possibly 0) offset. */
6519 return cost_symbol_ref + addr_cost->imm_offset;
6520 }
6521 else
6522 {
6523 /* This is most likely a jump table from a case
6524 statement. */
6525 return addr_cost->register_offset;
6526 }
6527 }
6528
6529 switch (info.type)
6530 {
6531 case ADDRESS_LO_SUM:
6532 case ADDRESS_SYMBOLIC:
6533 case ADDRESS_REG_IMM:
6534 cost += addr_cost->imm_offset;
6535 break;
6536
6537 case ADDRESS_REG_WB:
6538 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
6539 cost += addr_cost->pre_modify;
6540 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
6541 cost += addr_cost->post_modify;
6542 else
6543 gcc_unreachable ();
6544
6545 break;
6546
6547 case ADDRESS_REG_REG:
6548 cost += addr_cost->register_offset;
6549 break;
6550
67747367 6551 case ADDRESS_REG_SXTW:
783879e6
EM
6552 cost += addr_cost->register_sextend;
6553 break;
6554
6555 case ADDRESS_REG_UXTW:
6556 cost += addr_cost->register_zextend;
67747367
JG
6557 break;
6558
6559 default:
6560 gcc_unreachable ();
6561 }
6562
6563
6564 if (info.shift > 0)
6565 {
6566 /* For the sake of calculating the cost of the shifted register
6567 component, we can treat same sized modes in the same way. */
6568 switch (GET_MODE_BITSIZE (mode))
6569 {
6570 case 16:
6571 cost += addr_cost->addr_scale_costs.hi;
6572 break;
6573
6574 case 32:
6575 cost += addr_cost->addr_scale_costs.si;
6576 break;
6577
6578 case 64:
6579 cost += addr_cost->addr_scale_costs.di;
6580 break;
6581
6582 /* We can't tell, or this is a 128-bit vector. */
6583 default:
6584 cost += addr_cost->addr_scale_costs.ti;
6585 break;
6586 }
6587 }
6588
6589 return cost;
6590}
6591
b9066f5a
MW
6592/* Return the cost of a branch. If SPEED_P is true then the compiler is
6593 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6594 to be taken. */
6595
6596int
6597aarch64_branch_cost (bool speed_p, bool predictable_p)
6598{
6599 /* When optimizing for speed, use the cost of unpredictable branches. */
6600 const struct cpu_branch_cost *branch_costs =
b175b679 6601 aarch64_tune_params.branch_costs;
b9066f5a
MW
6602
6603 if (!speed_p || predictable_p)
6604 return branch_costs->predictable;
6605 else
6606 return branch_costs->unpredictable;
6607}
6608
7cc2145f
JG
6609/* Return true if the RTX X in mode MODE is a zero or sign extract
6610 usable in an ADD or SUB (extended register) instruction. */
6611static bool
77e994c9 6612aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
6613{
6614 /* Catch add with a sign extract.
6615 This is add_<optab><mode>_multp2. */
6616 if (GET_CODE (x) == SIGN_EXTRACT
6617 || GET_CODE (x) == ZERO_EXTRACT)
6618 {
6619 rtx op0 = XEXP (x, 0);
6620 rtx op1 = XEXP (x, 1);
6621 rtx op2 = XEXP (x, 2);
6622
6623 if (GET_CODE (op0) == MULT
6624 && CONST_INT_P (op1)
6625 && op2 == const0_rtx
6626 && CONST_INT_P (XEXP (op0, 1))
6627 && aarch64_is_extend_from_extract (mode,
6628 XEXP (op0, 1),
6629 op1))
6630 {
6631 return true;
6632 }
6633 }
e47c4031
KT
6634 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6635 No shift. */
6636 else if (GET_CODE (x) == SIGN_EXTEND
6637 || GET_CODE (x) == ZERO_EXTEND)
6638 return REG_P (XEXP (x, 0));
7cc2145f
JG
6639
6640 return false;
6641}
6642
61263118
KT
6643static bool
6644aarch64_frint_unspec_p (unsigned int u)
6645{
6646 switch (u)
6647 {
6648 case UNSPEC_FRINTZ:
6649 case UNSPEC_FRINTP:
6650 case UNSPEC_FRINTM:
6651 case UNSPEC_FRINTA:
6652 case UNSPEC_FRINTN:
6653 case UNSPEC_FRINTX:
6654 case UNSPEC_FRINTI:
6655 return true;
6656
6657 default:
6658 return false;
6659 }
6660}
6661
fb0cb7fa
KT
6662/* Return true iff X is an rtx that will match an extr instruction
6663 i.e. as described in the *extr<mode>5_insn family of patterns.
6664 OP0 and OP1 will be set to the operands of the shifts involved
6665 on success and will be NULL_RTX otherwise. */
6666
6667static bool
6668aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
6669{
6670 rtx op0, op1;
77e994c9
RS
6671 scalar_int_mode mode;
6672 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
6673 return false;
fb0cb7fa
KT
6674
6675 *res_op0 = NULL_RTX;
6676 *res_op1 = NULL_RTX;
6677
6678 if (GET_CODE (x) != IOR)
6679 return false;
6680
6681 op0 = XEXP (x, 0);
6682 op1 = XEXP (x, 1);
6683
6684 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
6685 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
6686 {
6687 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6688 if (GET_CODE (op1) == ASHIFT)
6689 std::swap (op0, op1);
6690
6691 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
6692 return false;
6693
6694 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
6695 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
6696
6697 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
6698 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
6699 {
6700 *res_op0 = XEXP (op0, 0);
6701 *res_op1 = XEXP (op1, 0);
6702 return true;
6703 }
6704 }
6705
6706 return false;
6707}
6708
2d5ffe46
AP
6709/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6710 storing it in *COST. Result is true if the total cost of the operation
6711 has now been calculated. */
6712static bool
6713aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
6714{
b9e3afe9
AP
6715 rtx inner;
6716 rtx comparator;
6717 enum rtx_code cmpcode;
6718
6719 if (COMPARISON_P (op0))
6720 {
6721 inner = XEXP (op0, 0);
6722 comparator = XEXP (op0, 1);
6723 cmpcode = GET_CODE (op0);
6724 }
6725 else
6726 {
6727 inner = op0;
6728 comparator = const0_rtx;
6729 cmpcode = NE;
6730 }
6731
2d5ffe46
AP
6732 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6733 {
6734 /* Conditional branch. */
b9e3afe9 6735 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
6736 return true;
6737 else
6738 {
b9e3afe9 6739 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 6740 {
2d5ffe46
AP
6741 if (comparator == const0_rtx)
6742 {
6743 /* TBZ/TBNZ/CBZ/CBNZ. */
6744 if (GET_CODE (inner) == ZERO_EXTRACT)
6745 /* TBZ/TBNZ. */
e548c9df
AM
6746 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6747 ZERO_EXTRACT, 0, speed);
6748 else
6749 /* CBZ/CBNZ. */
6750 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
6751
6752 return true;
6753 }
6754 }
b9e3afe9 6755 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 6756 {
2d5ffe46
AP
6757 /* TBZ/TBNZ. */
6758 if (comparator == const0_rtx)
6759 return true;
6760 }
6761 }
6762 }
b9e3afe9 6763 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 6764 {
786298dc 6765 /* CCMP. */
6dfeb7ce 6766 if (GET_CODE (op1) == COMPARE)
786298dc
WD
6767 {
6768 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6769 if (XEXP (op1, 1) == const0_rtx)
6770 *cost += 1;
6771 if (speed)
6772 {
6773 machine_mode mode = GET_MODE (XEXP (op1, 0));
6774 const struct cpu_cost_table *extra_cost
6775 = aarch64_tune_params.insn_extra_cost;
6776
6777 if (GET_MODE_CLASS (mode) == MODE_INT)
6778 *cost += extra_cost->alu.arith;
6779 else
6780 *cost += extra_cost->fp[mode == DFmode].compare;
6781 }
6782 return true;
6783 }
6784
2d5ffe46
AP
6785 /* It's a conditional operation based on the status flags,
6786 so it must be some flavor of CSEL. */
6787
6788 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6789 if (GET_CODE (op1) == NEG
6790 || GET_CODE (op1) == NOT
6791 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6792 op1 = XEXP (op1, 0);
bad00732
KT
6793 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
6794 {
6795 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6796 op1 = XEXP (op1, 0);
6797 op2 = XEXP (op2, 0);
6798 }
2d5ffe46 6799
e548c9df
AM
6800 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6801 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
6802 return true;
6803 }
6804
6805 /* We don't know what this is, cost all operands. */
6806 return false;
6807}
6808
283b6c85
KT
6809/* Check whether X is a bitfield operation of the form shift + extend that
6810 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6811 operand to which the bitfield operation is applied. Otherwise return
6812 NULL_RTX. */
6813
6814static rtx
6815aarch64_extend_bitfield_pattern_p (rtx x)
6816{
6817 rtx_code outer_code = GET_CODE (x);
6818 machine_mode outer_mode = GET_MODE (x);
6819
6820 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6821 && outer_mode != SImode && outer_mode != DImode)
6822 return NULL_RTX;
6823
6824 rtx inner = XEXP (x, 0);
6825 rtx_code inner_code = GET_CODE (inner);
6826 machine_mode inner_mode = GET_MODE (inner);
6827 rtx op = NULL_RTX;
6828
6829 switch (inner_code)
6830 {
6831 case ASHIFT:
6832 if (CONST_INT_P (XEXP (inner, 1))
6833 && (inner_mode == QImode || inner_mode == HImode))
6834 op = XEXP (inner, 0);
6835 break;
6836 case LSHIFTRT:
6837 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6838 && (inner_mode == QImode || inner_mode == HImode))
6839 op = XEXP (inner, 0);
6840 break;
6841 case ASHIFTRT:
6842 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6843 && (inner_mode == QImode || inner_mode == HImode))
6844 op = XEXP (inner, 0);
6845 break;
6846 default:
6847 break;
6848 }
6849
6850 return op;
6851}
6852
8c83f71d
KT
6853/* Return true if the mask and a shift amount from an RTX of the form
6854 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6855 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6856
6857bool
77e994c9
RS
6858aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
6859 rtx shft_amnt)
8c83f71d
KT
6860{
6861 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
6862 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
6863 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
6864 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
6865}
6866
43e9d192
IB
6867/* Calculate the cost of calculating X, storing it in *COST. Result
6868 is true if the total cost of the operation has now been calculated. */
6869static bool
e548c9df 6870aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
6871 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6872{
a8eecd00 6873 rtx op0, op1, op2;
73250c4c 6874 const struct cpu_cost_table *extra_cost
b175b679 6875 = aarch64_tune_params.insn_extra_cost;
e548c9df 6876 int code = GET_CODE (x);
b4206259 6877 scalar_int_mode int_mode;
43e9d192 6878
7fc5ef02
JG
6879 /* By default, assume that everything has equivalent cost to the
6880 cheapest instruction. Any additional costs are applied as a delta
6881 above this default. */
6882 *cost = COSTS_N_INSNS (1);
6883
43e9d192
IB
6884 switch (code)
6885 {
6886 case SET:
ba123b0d
JG
6887 /* The cost depends entirely on the operands to SET. */
6888 *cost = 0;
43e9d192
IB
6889 op0 = SET_DEST (x);
6890 op1 = SET_SRC (x);
6891
6892 switch (GET_CODE (op0))
6893 {
6894 case MEM:
6895 if (speed)
2961177e
JG
6896 {
6897 rtx address = XEXP (op0, 0);
b6875aac
KV
6898 if (VECTOR_MODE_P (mode))
6899 *cost += extra_cost->ldst.storev;
6900 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6901 *cost += extra_cost->ldst.store;
6902 else if (mode == SFmode)
6903 *cost += extra_cost->ldst.storef;
6904 else if (mode == DFmode)
6905 *cost += extra_cost->ldst.stored;
6906
6907 *cost +=
6908 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6909 0, speed));
6910 }
43e9d192 6911
e548c9df 6912 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6913 return true;
6914
6915 case SUBREG:
6916 if (! REG_P (SUBREG_REG (op0)))
e548c9df 6917 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 6918
43e9d192
IB
6919 /* Fall through. */
6920 case REG:
b6875aac
KV
6921 /* The cost is one per vector-register copied. */
6922 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6923 {
6924 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6925 / GET_MODE_SIZE (V4SImode);
6926 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6927 }
ba123b0d
JG
6928 /* const0_rtx is in general free, but we will use an
6929 instruction to set a register to 0. */
b6875aac
KV
6930 else if (REG_P (op1) || op1 == const0_rtx)
6931 {
6932 /* The cost is 1 per register copied. */
6933 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 6934 / UNITS_PER_WORD;
b6875aac
KV
6935 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6936 }
ba123b0d
JG
6937 else
6938 /* Cost is just the cost of the RHS of the set. */
e548c9df 6939 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6940 return true;
6941
ba123b0d 6942 case ZERO_EXTRACT:
43e9d192 6943 case SIGN_EXTRACT:
ba123b0d
JG
6944 /* Bit-field insertion. Strip any redundant widening of
6945 the RHS to meet the width of the target. */
43e9d192
IB
6946 if (GET_CODE (op1) == SUBREG)
6947 op1 = SUBREG_REG (op1);
6948 if ((GET_CODE (op1) == ZERO_EXTEND
6949 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 6950 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
6951 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
6952 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 6953 op1 = XEXP (op1, 0);
ba123b0d
JG
6954
6955 if (CONST_INT_P (op1))
6956 {
6957 /* MOV immediate is assumed to always be cheap. */
6958 *cost = COSTS_N_INSNS (1);
6959 }
6960 else
6961 {
6962 /* BFM. */
6963 if (speed)
6964 *cost += extra_cost->alu.bfi;
e548c9df 6965 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
6966 }
6967
43e9d192
IB
6968 return true;
6969
6970 default:
ba123b0d
JG
6971 /* We can't make sense of this, assume default cost. */
6972 *cost = COSTS_N_INSNS (1);
61263118 6973 return false;
43e9d192
IB
6974 }
6975 return false;
6976
9dfc162c
JG
6977 case CONST_INT:
6978 /* If an instruction can incorporate a constant within the
6979 instruction, the instruction's expression avoids calling
6980 rtx_cost() on the constant. If rtx_cost() is called on a
6981 constant, then it is usually because the constant must be
6982 moved into a register by one or more instructions.
6983
6984 The exception is constant 0, which can be expressed
6985 as XZR/WZR and is therefore free. The exception to this is
6986 if we have (set (reg) (const0_rtx)) in which case we must cost
6987 the move. However, we can catch that when we cost the SET, so
6988 we don't need to consider that here. */
6989 if (x == const0_rtx)
6990 *cost = 0;
6991 else
6992 {
6993 /* To an approximation, building any other constant is
6994 proportionally expensive to the number of instructions
6995 required to build that constant. This is true whether we
6996 are compiling for SPEED or otherwise. */
77e994c9
RS
6997 if (!is_a <scalar_int_mode> (mode, &int_mode))
6998 int_mode = word_mode;
82614948 6999 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 7000 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
7001 }
7002 return true;
7003
7004 case CONST_DOUBLE:
a2170965
TC
7005
7006 /* First determine number of instructions to do the move
7007 as an integer constant. */
7008 if (!aarch64_float_const_representable_p (x)
7009 && !aarch64_can_const_movi_rtx_p (x, mode)
7010 && aarch64_float_const_rtx_p (x))
7011 {
7012 unsigned HOST_WIDE_INT ival;
7013 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
7014 gcc_assert (succeed);
7015
77e994c9
RS
7016 scalar_int_mode imode = (mode == HFmode
7017 ? SImode
7018 : int_mode_for_mode (mode).require ());
a2170965
TC
7019 int ncost = aarch64_internal_mov_immediate
7020 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
7021 *cost += COSTS_N_INSNS (ncost);
7022 return true;
7023 }
7024
9dfc162c
JG
7025 if (speed)
7026 {
7027 /* mov[df,sf]_aarch64. */
7028 if (aarch64_float_const_representable_p (x))
7029 /* FMOV (scalar immediate). */
7030 *cost += extra_cost->fp[mode == DFmode].fpconst;
7031 else if (!aarch64_float_const_zero_rtx_p (x))
7032 {
7033 /* This will be a load from memory. */
7034 if (mode == DFmode)
7035 *cost += extra_cost->ldst.loadd;
7036 else
7037 *cost += extra_cost->ldst.loadf;
7038 }
7039 else
7040 /* Otherwise this is +0.0. We get this using MOVI d0, #0
7041 or MOV v0.s[0], wzr - neither of which are modeled by the
7042 cost tables. Just use the default cost. */
7043 {
7044 }
7045 }
7046
7047 return true;
7048
43e9d192
IB
7049 case MEM:
7050 if (speed)
2961177e
JG
7051 {
7052 /* For loads we want the base cost of a load, plus an
7053 approximation for the additional cost of the addressing
7054 mode. */
7055 rtx address = XEXP (x, 0);
b6875aac
KV
7056 if (VECTOR_MODE_P (mode))
7057 *cost += extra_cost->ldst.loadv;
7058 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
7059 *cost += extra_cost->ldst.load;
7060 else if (mode == SFmode)
7061 *cost += extra_cost->ldst.loadf;
7062 else if (mode == DFmode)
7063 *cost += extra_cost->ldst.loadd;
7064
7065 *cost +=
7066 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7067 0, speed));
7068 }
43e9d192
IB
7069
7070 return true;
7071
7072 case NEG:
4745e701
JG
7073 op0 = XEXP (x, 0);
7074
b6875aac
KV
7075 if (VECTOR_MODE_P (mode))
7076 {
7077 if (speed)
7078 {
7079 /* FNEG. */
7080 *cost += extra_cost->vect.alu;
7081 }
7082 return false;
7083 }
7084
e548c9df
AM
7085 if (GET_MODE_CLASS (mode) == MODE_INT)
7086 {
4745e701
JG
7087 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
7088 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
7089 {
7090 /* CSETM. */
e548c9df 7091 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
7092 return true;
7093 }
7094
7095 /* Cost this as SUB wzr, X. */
e548c9df 7096 op0 = CONST0_RTX (mode);
4745e701
JG
7097 op1 = XEXP (x, 0);
7098 goto cost_minus;
7099 }
7100
e548c9df 7101 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
7102 {
7103 /* Support (neg(fma...)) as a single instruction only if
7104 sign of zeros is unimportant. This matches the decision
7105 making in aarch64.md. */
7106 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
7107 {
7108 /* FNMADD. */
e548c9df 7109 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
7110 return true;
7111 }
d318517d
SN
7112 if (GET_CODE (op0) == MULT)
7113 {
7114 /* FNMUL. */
7115 *cost = rtx_cost (op0, mode, NEG, 0, speed);
7116 return true;
7117 }
4745e701
JG
7118 if (speed)
7119 /* FNEG. */
7120 *cost += extra_cost->fp[mode == DFmode].neg;
7121 return false;
7122 }
7123
7124 return false;
43e9d192 7125
781aeb73
KT
7126 case CLRSB:
7127 case CLZ:
7128 if (speed)
b6875aac
KV
7129 {
7130 if (VECTOR_MODE_P (mode))
7131 *cost += extra_cost->vect.alu;
7132 else
7133 *cost += extra_cost->alu.clz;
7134 }
781aeb73
KT
7135
7136 return false;
7137
43e9d192
IB
7138 case COMPARE:
7139 op0 = XEXP (x, 0);
7140 op1 = XEXP (x, 1);
7141
7142 if (op1 == const0_rtx
7143 && GET_CODE (op0) == AND)
7144 {
7145 x = op0;
e548c9df 7146 mode = GET_MODE (op0);
43e9d192
IB
7147 goto cost_logic;
7148 }
7149
a8eecd00
JG
7150 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
7151 {
7152 /* TODO: A write to the CC flags possibly costs extra, this
7153 needs encoding in the cost tables. */
7154
e548c9df 7155 mode = GET_MODE (op0);
a8eecd00
JG
7156 /* ANDS. */
7157 if (GET_CODE (op0) == AND)
7158 {
7159 x = op0;
7160 goto cost_logic;
7161 }
7162
7163 if (GET_CODE (op0) == PLUS)
7164 {
7165 /* ADDS (and CMN alias). */
7166 x = op0;
7167 goto cost_plus;
7168 }
7169
7170 if (GET_CODE (op0) == MINUS)
7171 {
7172 /* SUBS. */
7173 x = op0;
7174 goto cost_minus;
7175 }
7176
345854d8
KT
7177 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
7178 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
7179 && CONST_INT_P (XEXP (op0, 2)))
7180 {
7181 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
7182 Handle it here directly rather than going to cost_logic
7183 since we know the immediate generated for the TST is valid
7184 so we can avoid creating an intermediate rtx for it only
7185 for costing purposes. */
7186 if (speed)
7187 *cost += extra_cost->alu.logical;
7188
7189 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
7190 ZERO_EXTRACT, 0, speed);
7191 return true;
7192 }
7193
a8eecd00
JG
7194 if (GET_CODE (op1) == NEG)
7195 {
7196 /* CMN. */
7197 if (speed)
7198 *cost += extra_cost->alu.arith;
7199
e548c9df
AM
7200 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
7201 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
7202 return true;
7203 }
7204
7205 /* CMP.
7206
7207 Compare can freely swap the order of operands, and
7208 canonicalization puts the more complex operation first.
7209 But the integer MINUS logic expects the shift/extend
7210 operation in op1. */
7211 if (! (REG_P (op0)
7212 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
7213 {
7214 op0 = XEXP (x, 1);
7215 op1 = XEXP (x, 0);
7216 }
7217 goto cost_minus;
7218 }
7219
7220 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7221 {
7222 /* FCMP. */
7223 if (speed)
7224 *cost += extra_cost->fp[mode == DFmode].compare;
7225
7226 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
7227 {
e548c9df 7228 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
7229 /* FCMP supports constant 0.0 for no extra cost. */
7230 return true;
7231 }
7232 return false;
7233 }
7234
b6875aac
KV
7235 if (VECTOR_MODE_P (mode))
7236 {
7237 /* Vector compare. */
7238 if (speed)
7239 *cost += extra_cost->vect.alu;
7240
7241 if (aarch64_float_const_zero_rtx_p (op1))
7242 {
7243 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
7244 cost. */
7245 return true;
7246 }
7247 return false;
7248 }
a8eecd00 7249 return false;
43e9d192
IB
7250
7251 case MINUS:
4745e701
JG
7252 {
7253 op0 = XEXP (x, 0);
7254 op1 = XEXP (x, 1);
7255
7256cost_minus:
e548c9df 7257 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 7258
4745e701
JG
7259 /* Detect valid immediates. */
7260 if ((GET_MODE_CLASS (mode) == MODE_INT
7261 || (GET_MODE_CLASS (mode) == MODE_CC
7262 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
7263 && CONST_INT_P (op1)
7264 && aarch64_uimm12_shift (INTVAL (op1)))
7265 {
4745e701
JG
7266 if (speed)
7267 /* SUB(S) (immediate). */
7268 *cost += extra_cost->alu.arith;
7269 return true;
4745e701
JG
7270 }
7271
7cc2145f 7272 /* Look for SUB (extended register). */
77e994c9
RS
7273 if (is_a <scalar_int_mode> (mode, &int_mode)
7274 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
7275 {
7276 if (speed)
2533c820 7277 *cost += extra_cost->alu.extend_arith;
7cc2145f 7278
b10f1009 7279 op1 = aarch64_strip_extend (op1, true);
e47c4031 7280 *cost += rtx_cost (op1, VOIDmode,
e548c9df 7281 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
7282 return true;
7283 }
7284
b10f1009 7285 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
7286
7287 /* Cost this as an FMA-alike operation. */
7288 if ((GET_CODE (new_op1) == MULT
0a78ebe4 7289 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
7290 && code != COMPARE)
7291 {
7292 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
7293 (enum rtx_code) code,
7294 speed);
4745e701
JG
7295 return true;
7296 }
43e9d192 7297
e548c9df 7298 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 7299
4745e701
JG
7300 if (speed)
7301 {
b6875aac
KV
7302 if (VECTOR_MODE_P (mode))
7303 {
7304 /* Vector SUB. */
7305 *cost += extra_cost->vect.alu;
7306 }
7307 else if (GET_MODE_CLASS (mode) == MODE_INT)
7308 {
7309 /* SUB(S). */
7310 *cost += extra_cost->alu.arith;
7311 }
4745e701 7312 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
7313 {
7314 /* FSUB. */
7315 *cost += extra_cost->fp[mode == DFmode].addsub;
7316 }
4745e701
JG
7317 }
7318 return true;
7319 }
43e9d192
IB
7320
7321 case PLUS:
4745e701
JG
7322 {
7323 rtx new_op0;
43e9d192 7324
4745e701
JG
7325 op0 = XEXP (x, 0);
7326 op1 = XEXP (x, 1);
43e9d192 7327
a8eecd00 7328cost_plus:
4745e701
JG
7329 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
7330 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
7331 {
7332 /* CSINC. */
e548c9df
AM
7333 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
7334 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
7335 return true;
7336 }
43e9d192 7337
4745e701
JG
7338 if (GET_MODE_CLASS (mode) == MODE_INT
7339 && CONST_INT_P (op1)
7340 && aarch64_uimm12_shift (INTVAL (op1)))
7341 {
e548c9df 7342 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 7343
4745e701
JG
7344 if (speed)
7345 /* ADD (immediate). */
7346 *cost += extra_cost->alu.arith;
7347 return true;
7348 }
7349
e548c9df 7350 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 7351
7cc2145f 7352 /* Look for ADD (extended register). */
77e994c9
RS
7353 if (is_a <scalar_int_mode> (mode, &int_mode)
7354 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
7355 {
7356 if (speed)
2533c820 7357 *cost += extra_cost->alu.extend_arith;
7cc2145f 7358
b10f1009 7359 op0 = aarch64_strip_extend (op0, true);
e47c4031 7360 *cost += rtx_cost (op0, VOIDmode,
e548c9df 7361 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
7362 return true;
7363 }
7364
4745e701
JG
7365 /* Strip any extend, leave shifts behind as we will
7366 cost them through mult_cost. */
b10f1009 7367 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
7368
7369 if (GET_CODE (new_op0) == MULT
0a78ebe4 7370 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
7371 {
7372 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
7373 speed);
4745e701
JG
7374 return true;
7375 }
7376
e548c9df 7377 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
7378
7379 if (speed)
7380 {
b6875aac
KV
7381 if (VECTOR_MODE_P (mode))
7382 {
7383 /* Vector ADD. */
7384 *cost += extra_cost->vect.alu;
7385 }
7386 else if (GET_MODE_CLASS (mode) == MODE_INT)
7387 {
7388 /* ADD. */
7389 *cost += extra_cost->alu.arith;
7390 }
4745e701 7391 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
7392 {
7393 /* FADD. */
7394 *cost += extra_cost->fp[mode == DFmode].addsub;
7395 }
4745e701
JG
7396 }
7397 return true;
7398 }
43e9d192 7399
18b42b2a
KT
7400 case BSWAP:
7401 *cost = COSTS_N_INSNS (1);
7402
7403 if (speed)
b6875aac
KV
7404 {
7405 if (VECTOR_MODE_P (mode))
7406 *cost += extra_cost->vect.alu;
7407 else
7408 *cost += extra_cost->alu.rev;
7409 }
18b42b2a
KT
7410 return false;
7411
43e9d192 7412 case IOR:
f7d5cf8d
KT
7413 if (aarch_rev16_p (x))
7414 {
7415 *cost = COSTS_N_INSNS (1);
7416
b6875aac
KV
7417 if (speed)
7418 {
7419 if (VECTOR_MODE_P (mode))
7420 *cost += extra_cost->vect.alu;
7421 else
7422 *cost += extra_cost->alu.rev;
7423 }
7424 return true;
f7d5cf8d 7425 }
fb0cb7fa
KT
7426
7427 if (aarch64_extr_rtx_p (x, &op0, &op1))
7428 {
e548c9df
AM
7429 *cost += rtx_cost (op0, mode, IOR, 0, speed);
7430 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
7431 if (speed)
7432 *cost += extra_cost->alu.shift;
7433
7434 return true;
7435 }
f7d5cf8d 7436 /* Fall through. */
43e9d192
IB
7437 case XOR:
7438 case AND:
7439 cost_logic:
7440 op0 = XEXP (x, 0);
7441 op1 = XEXP (x, 1);
7442
b6875aac
KV
7443 if (VECTOR_MODE_P (mode))
7444 {
7445 if (speed)
7446 *cost += extra_cost->vect.alu;
7447 return true;
7448 }
7449
268c3b47
JG
7450 if (code == AND
7451 && GET_CODE (op0) == MULT
7452 && CONST_INT_P (XEXP (op0, 1))
7453 && CONST_INT_P (op1)
7454 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
7455 INTVAL (op1)) != 0)
7456 {
7457 /* This is a UBFM/SBFM. */
e548c9df 7458 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
7459 if (speed)
7460 *cost += extra_cost->alu.bfx;
7461 return true;
7462 }
7463
b4206259 7464 if (is_int_mode (mode, &int_mode))
43e9d192 7465 {
8c83f71d 7466 if (CONST_INT_P (op1))
43e9d192 7467 {
8c83f71d
KT
7468 /* We have a mask + shift version of a UBFIZ
7469 i.e. the *andim_ashift<mode>_bfiz pattern. */
7470 if (GET_CODE (op0) == ASHIFT
b4206259
RS
7471 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
7472 XEXP (op0, 1)))
8c83f71d 7473 {
b4206259 7474 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
7475 (enum rtx_code) code, 0, speed);
7476 if (speed)
7477 *cost += extra_cost->alu.bfx;
268c3b47 7478
8c83f71d
KT
7479 return true;
7480 }
b4206259 7481 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
7482 {
7483 /* We possibly get the immediate for free, this is not
7484 modelled. */
b4206259
RS
7485 *cost += rtx_cost (op0, int_mode,
7486 (enum rtx_code) code, 0, speed);
8c83f71d
KT
7487 if (speed)
7488 *cost += extra_cost->alu.logical;
268c3b47 7489
8c83f71d
KT
7490 return true;
7491 }
43e9d192
IB
7492 }
7493 else
7494 {
268c3b47
JG
7495 rtx new_op0 = op0;
7496
7497 /* Handle ORN, EON, or BIC. */
43e9d192
IB
7498 if (GET_CODE (op0) == NOT)
7499 op0 = XEXP (op0, 0);
268c3b47
JG
7500
7501 new_op0 = aarch64_strip_shift (op0);
7502
7503 /* If we had a shift on op0 then this is a logical-shift-
7504 by-register/immediate operation. Otherwise, this is just
7505 a logical operation. */
7506 if (speed)
7507 {
7508 if (new_op0 != op0)
7509 {
7510 /* Shift by immediate. */
7511 if (CONST_INT_P (XEXP (op0, 1)))
7512 *cost += extra_cost->alu.log_shift;
7513 else
7514 *cost += extra_cost->alu.log_shift_reg;
7515 }
7516 else
7517 *cost += extra_cost->alu.logical;
7518 }
7519
7520 /* In both cases we want to cost both operands. */
b4206259
RS
7521 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
7522 0, speed);
7523 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
7524 1, speed);
268c3b47
JG
7525
7526 return true;
43e9d192 7527 }
43e9d192
IB
7528 }
7529 return false;
7530
268c3b47 7531 case NOT:
6365da9e
KT
7532 x = XEXP (x, 0);
7533 op0 = aarch64_strip_shift (x);
7534
b6875aac
KV
7535 if (VECTOR_MODE_P (mode))
7536 {
7537 /* Vector NOT. */
7538 *cost += extra_cost->vect.alu;
7539 return false;
7540 }
7541
6365da9e
KT
7542 /* MVN-shifted-reg. */
7543 if (op0 != x)
7544 {
e548c9df 7545 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
7546
7547 if (speed)
7548 *cost += extra_cost->alu.log_shift;
7549
7550 return true;
7551 }
7552 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7553 Handle the second form here taking care that 'a' in the above can
7554 be a shift. */
7555 else if (GET_CODE (op0) == XOR)
7556 {
7557 rtx newop0 = XEXP (op0, 0);
7558 rtx newop1 = XEXP (op0, 1);
7559 rtx op0_stripped = aarch64_strip_shift (newop0);
7560
e548c9df
AM
7561 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
7562 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
7563
7564 if (speed)
7565 {
7566 if (op0_stripped != newop0)
7567 *cost += extra_cost->alu.log_shift;
7568 else
7569 *cost += extra_cost->alu.logical;
7570 }
7571
7572 return true;
7573 }
268c3b47
JG
7574 /* MVN. */
7575 if (speed)
7576 *cost += extra_cost->alu.logical;
7577
268c3b47
JG
7578 return false;
7579
43e9d192 7580 case ZERO_EXTEND:
b1685e62
JG
7581
7582 op0 = XEXP (x, 0);
7583 /* If a value is written in SI mode, then zero extended to DI
7584 mode, the operation will in general be free as a write to
7585 a 'w' register implicitly zeroes the upper bits of an 'x'
7586 register. However, if this is
7587
7588 (set (reg) (zero_extend (reg)))
7589
7590 we must cost the explicit register move. */
7591 if (mode == DImode
7592 && GET_MODE (op0) == SImode
7593 && outer == SET)
7594 {
e548c9df 7595 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 7596
dde23f43
KM
7597 /* If OP_COST is non-zero, then the cost of the zero extend
7598 is effectively the cost of the inner operation. Otherwise
7599 we have a MOV instruction and we take the cost from the MOV
7600 itself. This is true independently of whether we are
7601 optimizing for space or time. */
7602 if (op_cost)
b1685e62
JG
7603 *cost = op_cost;
7604
7605 return true;
7606 }
e548c9df 7607 else if (MEM_P (op0))
43e9d192 7608 {
b1685e62 7609 /* All loads can zero extend to any size for free. */
e548c9df 7610 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
7611 return true;
7612 }
b1685e62 7613
283b6c85
KT
7614 op0 = aarch64_extend_bitfield_pattern_p (x);
7615 if (op0)
7616 {
7617 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
7618 if (speed)
7619 *cost += extra_cost->alu.bfx;
7620 return true;
7621 }
7622
b1685e62 7623 if (speed)
b6875aac
KV
7624 {
7625 if (VECTOR_MODE_P (mode))
7626 {
7627 /* UMOV. */
7628 *cost += extra_cost->vect.alu;
7629 }
7630 else
7631 {
63715e5e
WD
7632 /* We generate an AND instead of UXTB/UXTH. */
7633 *cost += extra_cost->alu.logical;
b6875aac
KV
7634 }
7635 }
43e9d192
IB
7636 return false;
7637
7638 case SIGN_EXTEND:
b1685e62 7639 if (MEM_P (XEXP (x, 0)))
43e9d192 7640 {
b1685e62
JG
7641 /* LDRSH. */
7642 if (speed)
7643 {
7644 rtx address = XEXP (XEXP (x, 0), 0);
7645 *cost += extra_cost->ldst.load_sign_extend;
7646
7647 *cost +=
7648 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7649 0, speed));
7650 }
43e9d192
IB
7651 return true;
7652 }
b1685e62 7653
283b6c85
KT
7654 op0 = aarch64_extend_bitfield_pattern_p (x);
7655 if (op0)
7656 {
7657 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
7658 if (speed)
7659 *cost += extra_cost->alu.bfx;
7660 return true;
7661 }
7662
b1685e62 7663 if (speed)
b6875aac
KV
7664 {
7665 if (VECTOR_MODE_P (mode))
7666 *cost += extra_cost->vect.alu;
7667 else
7668 *cost += extra_cost->alu.extend;
7669 }
43e9d192
IB
7670 return false;
7671
ba0cfa17
JG
7672 case ASHIFT:
7673 op0 = XEXP (x, 0);
7674 op1 = XEXP (x, 1);
7675
7676 if (CONST_INT_P (op1))
7677 {
ba0cfa17 7678 if (speed)
b6875aac
KV
7679 {
7680 if (VECTOR_MODE_P (mode))
7681 {
7682 /* Vector shift (immediate). */
7683 *cost += extra_cost->vect.alu;
7684 }
7685 else
7686 {
7687 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7688 aliases. */
7689 *cost += extra_cost->alu.shift;
7690 }
7691 }
ba0cfa17
JG
7692
7693 /* We can incorporate zero/sign extend for free. */
7694 if (GET_CODE (op0) == ZERO_EXTEND
7695 || GET_CODE (op0) == SIGN_EXTEND)
7696 op0 = XEXP (op0, 0);
7697
e548c9df 7698 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
7699 return true;
7700 }
7701 else
7702 {
7813b280 7703 if (VECTOR_MODE_P (mode))
b6875aac 7704 {
7813b280
KT
7705 if (speed)
7706 /* Vector shift (register). */
7707 *cost += extra_cost->vect.alu;
7708 }
7709 else
7710 {
7711 if (speed)
7712 /* LSLV. */
7713 *cost += extra_cost->alu.shift_reg;
7714
7715 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
7716 && CONST_INT_P (XEXP (op1, 1))
7717 && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
b6875aac 7718 {
7813b280
KT
7719 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
7720 /* We already demanded XEXP (op1, 0) to be REG_P, so
7721 don't recurse into it. */
7722 return true;
b6875aac
KV
7723 }
7724 }
ba0cfa17
JG
7725 return false; /* All arguments need to be in registers. */
7726 }
7727
43e9d192 7728 case ROTATE:
43e9d192
IB
7729 case ROTATERT:
7730 case LSHIFTRT:
43e9d192 7731 case ASHIFTRT:
ba0cfa17
JG
7732 op0 = XEXP (x, 0);
7733 op1 = XEXP (x, 1);
43e9d192 7734
ba0cfa17
JG
7735 if (CONST_INT_P (op1))
7736 {
7737 /* ASR (immediate) and friends. */
7738 if (speed)
b6875aac
KV
7739 {
7740 if (VECTOR_MODE_P (mode))
7741 *cost += extra_cost->vect.alu;
7742 else
7743 *cost += extra_cost->alu.shift;
7744 }
43e9d192 7745
e548c9df 7746 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
7747 return true;
7748 }
7749 else
7750 {
7813b280 7751 if (VECTOR_MODE_P (mode))
b6875aac 7752 {
7813b280
KT
7753 if (speed)
7754 /* Vector shift (register). */
b6875aac 7755 *cost += extra_cost->vect.alu;
7813b280
KT
7756 }
7757 else
7758 {
7759 if (speed)
7760 /* ASR (register) and friends. */
b6875aac 7761 *cost += extra_cost->alu.shift_reg;
7813b280
KT
7762
7763 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
7764 && CONST_INT_P (XEXP (op1, 1))
7765 && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
7766 {
7767 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
7768 /* We already demanded XEXP (op1, 0) to be REG_P, so
7769 don't recurse into it. */
7770 return true;
7771 }
b6875aac 7772 }
ba0cfa17
JG
7773 return false; /* All arguments need to be in registers. */
7774 }
43e9d192 7775
909734be
JG
7776 case SYMBOL_REF:
7777
1b1e81f8
JW
7778 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7779 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
7780 {
7781 /* LDR. */
7782 if (speed)
7783 *cost += extra_cost->ldst.load;
7784 }
7785 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7786 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7787 {
7788 /* ADRP, followed by ADD. */
7789 *cost += COSTS_N_INSNS (1);
7790 if (speed)
7791 *cost += 2 * extra_cost->alu.arith;
7792 }
7793 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7794 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7795 {
7796 /* ADR. */
7797 if (speed)
7798 *cost += extra_cost->alu.arith;
7799 }
7800
7801 if (flag_pic)
7802 {
7803 /* One extra load instruction, after accessing the GOT. */
7804 *cost += COSTS_N_INSNS (1);
7805 if (speed)
7806 *cost += extra_cost->ldst.load;
7807 }
43e9d192
IB
7808 return true;
7809
909734be 7810 case HIGH:
43e9d192 7811 case LO_SUM:
909734be
JG
7812 /* ADRP/ADD (immediate). */
7813 if (speed)
7814 *cost += extra_cost->alu.arith;
43e9d192
IB
7815 return true;
7816
7817 case ZERO_EXTRACT:
7818 case SIGN_EXTRACT:
7cc2145f
JG
7819 /* UBFX/SBFX. */
7820 if (speed)
b6875aac
KV
7821 {
7822 if (VECTOR_MODE_P (mode))
7823 *cost += extra_cost->vect.alu;
7824 else
7825 *cost += extra_cost->alu.bfx;
7826 }
7cc2145f
JG
7827
7828 /* We can trust that the immediates used will be correct (there
7829 are no by-register forms), so we need only cost op0. */
e548c9df 7830 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
7831 return true;
7832
7833 case MULT:
4745e701
JG
7834 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7835 /* aarch64_rtx_mult_cost always handles recursion to its
7836 operands. */
7837 return true;
43e9d192
IB
7838
7839 case MOD:
4f58fe36
KT
7840 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7841 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7842 an unconditional negate. This case should only ever be reached through
7843 the set_smod_pow2_cheap check in expmed.c. */
7844 if (CONST_INT_P (XEXP (x, 1))
7845 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7846 && (mode == SImode || mode == DImode))
7847 {
7848 /* We expand to 4 instructions. Reset the baseline. */
7849 *cost = COSTS_N_INSNS (4);
7850
7851 if (speed)
7852 *cost += 2 * extra_cost->alu.logical
7853 + 2 * extra_cost->alu.arith;
7854
7855 return true;
7856 }
7857
7858 /* Fall-through. */
43e9d192 7859 case UMOD:
43e9d192
IB
7860 if (speed)
7861 {
cb9ac430 7862 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
7863 if (VECTOR_MODE_P (mode))
7864 *cost += extra_cost->vect.alu;
e548c9df
AM
7865 else if (GET_MODE_CLASS (mode) == MODE_INT)
7866 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
7867 + extra_cost->mult[mode == DImode].idiv
7868 + (code == MOD ? 1 : 0));
43e9d192
IB
7869 }
7870 return false; /* All arguments need to be in registers. */
7871
7872 case DIV:
7873 case UDIV:
4105fe38 7874 case SQRT:
43e9d192
IB
7875 if (speed)
7876 {
b6875aac
KV
7877 if (VECTOR_MODE_P (mode))
7878 *cost += extra_cost->vect.alu;
7879 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
7880 /* There is no integer SQRT, so only DIV and UDIV can get
7881 here. */
cb9ac430
TC
7882 *cost += (extra_cost->mult[mode == DImode].idiv
7883 /* Slighly prefer UDIV over SDIV. */
7884 + (code == DIV ? 1 : 0));
4105fe38
JG
7885 else
7886 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
7887 }
7888 return false; /* All arguments need to be in registers. */
7889
a8eecd00 7890 case IF_THEN_ELSE:
2d5ffe46
AP
7891 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7892 XEXP (x, 2), cost, speed);
a8eecd00
JG
7893
7894 case EQ:
7895 case NE:
7896 case GT:
7897 case GTU:
7898 case LT:
7899 case LTU:
7900 case GE:
7901 case GEU:
7902 case LE:
7903 case LEU:
7904
7905 return false; /* All arguments must be in registers. */
7906
b292109f
JG
7907 case FMA:
7908 op0 = XEXP (x, 0);
7909 op1 = XEXP (x, 1);
7910 op2 = XEXP (x, 2);
7911
7912 if (speed)
b6875aac
KV
7913 {
7914 if (VECTOR_MODE_P (mode))
7915 *cost += extra_cost->vect.alu;
7916 else
7917 *cost += extra_cost->fp[mode == DFmode].fma;
7918 }
b292109f
JG
7919
7920 /* FMSUB, FNMADD, and FNMSUB are free. */
7921 if (GET_CODE (op0) == NEG)
7922 op0 = XEXP (op0, 0);
7923
7924 if (GET_CODE (op2) == NEG)
7925 op2 = XEXP (op2, 0);
7926
7927 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7928 and the by-element operand as operand 0. */
7929 if (GET_CODE (op1) == NEG)
7930 op1 = XEXP (op1, 0);
7931
7932 /* Catch vector-by-element operations. The by-element operand can
7933 either be (vec_duplicate (vec_select (x))) or just
7934 (vec_select (x)), depending on whether we are multiplying by
7935 a vector or a scalar.
7936
7937 Canonicalization is not very good in these cases, FMA4 will put the
7938 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7939 if (GET_CODE (op0) == VEC_DUPLICATE)
7940 op0 = XEXP (op0, 0);
7941 else if (GET_CODE (op1) == VEC_DUPLICATE)
7942 op1 = XEXP (op1, 0);
7943
7944 if (GET_CODE (op0) == VEC_SELECT)
7945 op0 = XEXP (op0, 0);
7946 else if (GET_CODE (op1) == VEC_SELECT)
7947 op1 = XEXP (op1, 0);
7948
7949 /* If the remaining parameters are not registers,
7950 get the cost to put them into registers. */
e548c9df
AM
7951 *cost += rtx_cost (op0, mode, FMA, 0, speed);
7952 *cost += rtx_cost (op1, mode, FMA, 1, speed);
7953 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
7954 return true;
7955
5e2a765b
KT
7956 case FLOAT:
7957 case UNSIGNED_FLOAT:
7958 if (speed)
7959 *cost += extra_cost->fp[mode == DFmode].fromint;
7960 return false;
7961
b292109f
JG
7962 case FLOAT_EXTEND:
7963 if (speed)
b6875aac
KV
7964 {
7965 if (VECTOR_MODE_P (mode))
7966 {
7967 /*Vector truncate. */
7968 *cost += extra_cost->vect.alu;
7969 }
7970 else
7971 *cost += extra_cost->fp[mode == DFmode].widen;
7972 }
b292109f
JG
7973 return false;
7974
7975 case FLOAT_TRUNCATE:
7976 if (speed)
b6875aac
KV
7977 {
7978 if (VECTOR_MODE_P (mode))
7979 {
7980 /*Vector conversion. */
7981 *cost += extra_cost->vect.alu;
7982 }
7983 else
7984 *cost += extra_cost->fp[mode == DFmode].narrow;
7985 }
b292109f
JG
7986 return false;
7987
61263118
KT
7988 case FIX:
7989 case UNSIGNED_FIX:
7990 x = XEXP (x, 0);
7991 /* Strip the rounding part. They will all be implemented
7992 by the fcvt* family of instructions anyway. */
7993 if (GET_CODE (x) == UNSPEC)
7994 {
7995 unsigned int uns_code = XINT (x, 1);
7996
7997 if (uns_code == UNSPEC_FRINTA
7998 || uns_code == UNSPEC_FRINTM
7999 || uns_code == UNSPEC_FRINTN
8000 || uns_code == UNSPEC_FRINTP
8001 || uns_code == UNSPEC_FRINTZ)
8002 x = XVECEXP (x, 0, 0);
8003 }
8004
8005 if (speed)
b6875aac
KV
8006 {
8007 if (VECTOR_MODE_P (mode))
8008 *cost += extra_cost->vect.alu;
8009 else
8010 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
8011 }
39252973
KT
8012
8013 /* We can combine fmul by a power of 2 followed by a fcvt into a single
8014 fixed-point fcvt. */
8015 if (GET_CODE (x) == MULT
8016 && ((VECTOR_MODE_P (mode)
8017 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
8018 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
8019 {
8020 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
8021 0, speed);
8022 return true;
8023 }
8024
e548c9df 8025 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
8026 return true;
8027
b292109f 8028 case ABS:
b6875aac
KV
8029 if (VECTOR_MODE_P (mode))
8030 {
8031 /* ABS (vector). */
8032 if (speed)
8033 *cost += extra_cost->vect.alu;
8034 }
8035 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 8036 {
19261b99
KT
8037 op0 = XEXP (x, 0);
8038
8039 /* FABD, which is analogous to FADD. */
8040 if (GET_CODE (op0) == MINUS)
8041 {
e548c9df
AM
8042 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
8043 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
8044 if (speed)
8045 *cost += extra_cost->fp[mode == DFmode].addsub;
8046
8047 return true;
8048 }
8049 /* Simple FABS is analogous to FNEG. */
b292109f
JG
8050 if (speed)
8051 *cost += extra_cost->fp[mode == DFmode].neg;
8052 }
8053 else
8054 {
8055 /* Integer ABS will either be split to
8056 two arithmetic instructions, or will be an ABS
8057 (scalar), which we don't model. */
8058 *cost = COSTS_N_INSNS (2);
8059 if (speed)
8060 *cost += 2 * extra_cost->alu.arith;
8061 }
8062 return false;
8063
8064 case SMAX:
8065 case SMIN:
8066 if (speed)
8067 {
b6875aac
KV
8068 if (VECTOR_MODE_P (mode))
8069 *cost += extra_cost->vect.alu;
8070 else
8071 {
8072 /* FMAXNM/FMINNM/FMAX/FMIN.
8073 TODO: This may not be accurate for all implementations, but
8074 we do not model this in the cost tables. */
8075 *cost += extra_cost->fp[mode == DFmode].addsub;
8076 }
b292109f
JG
8077 }
8078 return false;
8079
61263118
KT
8080 case UNSPEC:
8081 /* The floating point round to integer frint* instructions. */
8082 if (aarch64_frint_unspec_p (XINT (x, 1)))
8083 {
8084 if (speed)
8085 *cost += extra_cost->fp[mode == DFmode].roundint;
8086
8087 return false;
8088 }
781aeb73
KT
8089
8090 if (XINT (x, 1) == UNSPEC_RBIT)
8091 {
8092 if (speed)
8093 *cost += extra_cost->alu.rev;
8094
8095 return false;
8096 }
61263118
KT
8097 break;
8098
fb620c4a
JG
8099 case TRUNCATE:
8100
8101 /* Decompose <su>muldi3_highpart. */
8102 if (/* (truncate:DI */
8103 mode == DImode
8104 /* (lshiftrt:TI */
8105 && GET_MODE (XEXP (x, 0)) == TImode
8106 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8107 /* (mult:TI */
8108 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8109 /* (ANY_EXTEND:TI (reg:DI))
8110 (ANY_EXTEND:TI (reg:DI))) */
8111 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8112 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
8113 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
8114 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
8115 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
8116 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
8117 /* (const_int 64) */
8118 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8119 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
8120 {
8121 /* UMULH/SMULH. */
8122 if (speed)
8123 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
8124 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
8125 mode, MULT, 0, speed);
8126 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
8127 mode, MULT, 1, speed);
fb620c4a
JG
8128 return true;
8129 }
8130
8131 /* Fall through. */
43e9d192 8132 default:
61263118 8133 break;
43e9d192 8134 }
61263118 8135
c10e3d7f
AP
8136 if (dump_file
8137 && flag_aarch64_verbose_cost)
61263118
KT
8138 fprintf (dump_file,
8139 "\nFailed to cost RTX. Assuming default cost.\n");
8140
8141 return true;
43e9d192
IB
8142}
8143
0ee859b5
JG
8144/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
8145 calculated for X. This cost is stored in *COST. Returns true
8146 if the total cost of X was calculated. */
8147static bool
e548c9df 8148aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
8149 int param, int *cost, bool speed)
8150{
e548c9df 8151 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 8152
c10e3d7f
AP
8153 if (dump_file
8154 && flag_aarch64_verbose_cost)
0ee859b5
JG
8155 {
8156 print_rtl_single (dump_file, x);
8157 fprintf (dump_file, "\n%s cost: %d (%s)\n",
8158 speed ? "Hot" : "Cold",
8159 *cost, result ? "final" : "partial");
8160 }
8161
8162 return result;
8163}
8164
43e9d192 8165static int
ef4bddc2 8166aarch64_register_move_cost (machine_mode mode,
8a3a7e67 8167 reg_class_t from_i, reg_class_t to_i)
43e9d192 8168{
8a3a7e67
RH
8169 enum reg_class from = (enum reg_class) from_i;
8170 enum reg_class to = (enum reg_class) to_i;
43e9d192 8171 const struct cpu_regmove_cost *regmove_cost
b175b679 8172 = aarch64_tune_params.regmove_cost;
43e9d192 8173
3be07662 8174 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
2876a13f 8175 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
3be07662
WD
8176 to = GENERAL_REGS;
8177
2876a13f 8178 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
3be07662
WD
8179 from = GENERAL_REGS;
8180
6ee70f81
AP
8181 /* Moving between GPR and stack cost is the same as GP2GP. */
8182 if ((from == GENERAL_REGS && to == STACK_REG)
8183 || (to == GENERAL_REGS && from == STACK_REG))
8184 return regmove_cost->GP2GP;
8185
8186 /* To/From the stack register, we move via the gprs. */
8187 if (to == STACK_REG || from == STACK_REG)
8188 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
8189 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
8190
8919453c
WD
8191 if (GET_MODE_SIZE (mode) == 16)
8192 {
8193 /* 128-bit operations on general registers require 2 instructions. */
8194 if (from == GENERAL_REGS && to == GENERAL_REGS)
8195 return regmove_cost->GP2GP * 2;
8196 else if (from == GENERAL_REGS)
8197 return regmove_cost->GP2FP * 2;
8198 else if (to == GENERAL_REGS)
8199 return regmove_cost->FP2GP * 2;
8200
8201 /* When AdvSIMD instructions are disabled it is not possible to move
8202 a 128-bit value directly between Q registers. This is handled in
8203 secondary reload. A general register is used as a scratch to move
8204 the upper DI value and the lower DI value is moved directly,
8205 hence the cost is the sum of three moves. */
8206 if (! TARGET_SIMD)
8207 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
8208
8209 return regmove_cost->FP2FP;
8210 }
8211
43e9d192
IB
8212 if (from == GENERAL_REGS && to == GENERAL_REGS)
8213 return regmove_cost->GP2GP;
8214 else if (from == GENERAL_REGS)
8215 return regmove_cost->GP2FP;
8216 else if (to == GENERAL_REGS)
8217 return regmove_cost->FP2GP;
8218
43e9d192
IB
8219 return regmove_cost->FP2FP;
8220}
8221
8222static int
ef4bddc2 8223aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
8224 reg_class_t rclass ATTRIBUTE_UNUSED,
8225 bool in ATTRIBUTE_UNUSED)
8226{
b175b679 8227 return aarch64_tune_params.memmov_cost;
43e9d192
IB
8228}
8229
0c30e0f3
EM
8230/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
8231 to optimize 1.0/sqrt. */
ee62a5a6
RS
8232
8233static bool
9acc9cbe 8234use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
8235{
8236 return (!flag_trapping_math
8237 && flag_unsafe_math_optimizations
9acc9cbe
EM
8238 && ((aarch64_tune_params.approx_modes->recip_sqrt
8239 & AARCH64_APPROX_MODE (mode))
1a33079e 8240 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
8241}
8242
0c30e0f3
EM
8243/* Function to decide when to use the approximate reciprocal square root
8244 builtin. */
a6fc00da
BH
8245
8246static tree
ee62a5a6 8247aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 8248{
9acc9cbe
EM
8249 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
8250
8251 if (!use_rsqrt_p (mode))
a6fc00da 8252 return NULL_TREE;
ee62a5a6 8253 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
8254}
8255
8256typedef rtx (*rsqrte_type) (rtx, rtx);
8257
98daafa0
EM
8258/* Select reciprocal square root initial estimate insn depending on machine
8259 mode. */
a6fc00da 8260
98daafa0 8261static rsqrte_type
a6fc00da
BH
8262get_rsqrte_type (machine_mode mode)
8263{
8264 switch (mode)
8265 {
4e10a5a7
RS
8266 case E_DFmode: return gen_aarch64_rsqrtedf;
8267 case E_SFmode: return gen_aarch64_rsqrtesf;
8268 case E_V2DFmode: return gen_aarch64_rsqrtev2df;
8269 case E_V2SFmode: return gen_aarch64_rsqrtev2sf;
8270 case E_V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
8271 default: gcc_unreachable ();
8272 }
8273}
8274
8275typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
8276
98daafa0 8277/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 8278
98daafa0 8279static rsqrts_type
a6fc00da
BH
8280get_rsqrts_type (machine_mode mode)
8281{
8282 switch (mode)
8283 {
4e10a5a7
RS
8284 case E_DFmode: return gen_aarch64_rsqrtsdf;
8285 case E_SFmode: return gen_aarch64_rsqrtssf;
8286 case E_V2DFmode: return gen_aarch64_rsqrtsv2df;
8287 case E_V2SFmode: return gen_aarch64_rsqrtsv2sf;
8288 case E_V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
8289 default: gcc_unreachable ();
8290 }
8291}
8292
98daafa0
EM
8293/* Emit instruction sequence to compute either the approximate square root
8294 or its approximate reciprocal, depending on the flag RECP, and return
8295 whether the sequence was emitted or not. */
a6fc00da 8296
98daafa0
EM
8297bool
8298aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 8299{
98daafa0 8300 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
8301
8302 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
8303 {
8304 gcc_assert (!recp);
8305 return false;
8306 }
8307
2e19adc8
RE
8308 if (!recp)
8309 {
8310 if (!(flag_mlow_precision_sqrt
8311 || (aarch64_tune_params.approx_modes->sqrt
8312 & AARCH64_APPROX_MODE (mode))))
8313 return false;
8314
8315 if (flag_finite_math_only
8316 || flag_trapping_math
8317 || !flag_unsafe_math_optimizations
8318 || optimize_function_for_size_p (cfun))
8319 return false;
8320 }
8321 else
8322 /* Caller assumes we cannot fail. */
8323 gcc_assert (use_rsqrt_p (mode));
daef0a8c 8324
ddc203a7 8325 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
8326 rtx xmsk = gen_reg_rtx (mmsk);
8327 if (!recp)
2e19adc8
RE
8328 /* When calculating the approximate square root, compare the
8329 argument with 0.0 and create a mask. */
8330 emit_insn (gen_rtx_SET (xmsk,
8331 gen_rtx_NEG (mmsk,
8332 gen_rtx_EQ (mmsk, src,
8333 CONST0_RTX (mode)))));
a6fc00da 8334
98daafa0
EM
8335 /* Estimate the approximate reciprocal square root. */
8336 rtx xdst = gen_reg_rtx (mode);
8337 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 8338
98daafa0
EM
8339 /* Iterate over the series twice for SF and thrice for DF. */
8340 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 8341
98daafa0
EM
8342 /* Optionally iterate over the series once less for faster performance
8343 while sacrificing the accuracy. */
8344 if ((recp && flag_mrecip_low_precision_sqrt)
8345 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
8346 iterations--;
8347
98daafa0
EM
8348 /* Iterate over the series to calculate the approximate reciprocal square
8349 root. */
8350 rtx x1 = gen_reg_rtx (mode);
8351 while (iterations--)
a6fc00da 8352 {
a6fc00da 8353 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
8354 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
8355
8356 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 8357
98daafa0
EM
8358 if (iterations > 0)
8359 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
8360 }
8361
8362 if (!recp)
8363 {
8364 /* Qualify the approximate reciprocal square root when the argument is
8365 0.0 by squashing the intermediary result to 0.0. */
8366 rtx xtmp = gen_reg_rtx (mmsk);
8367 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
8368 gen_rtx_SUBREG (mmsk, xdst, 0)));
8369 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 8370
98daafa0
EM
8371 /* Calculate the approximate square root. */
8372 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
8373 }
8374
98daafa0
EM
8375 /* Finalize the approximation. */
8376 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
8377
8378 return true;
a6fc00da
BH
8379}
8380
79a2bc2d
EM
8381typedef rtx (*recpe_type) (rtx, rtx);
8382
8383/* Select reciprocal initial estimate insn depending on machine mode. */
8384
8385static recpe_type
8386get_recpe_type (machine_mode mode)
8387{
8388 switch (mode)
8389 {
4e10a5a7
RS
8390 case E_SFmode: return (gen_aarch64_frecpesf);
8391 case E_V2SFmode: return (gen_aarch64_frecpev2sf);
8392 case E_V4SFmode: return (gen_aarch64_frecpev4sf);
8393 case E_DFmode: return (gen_aarch64_frecpedf);
8394 case E_V2DFmode: return (gen_aarch64_frecpev2df);
8395 default: gcc_unreachable ();
79a2bc2d
EM
8396 }
8397}
8398
8399typedef rtx (*recps_type) (rtx, rtx, rtx);
8400
8401/* Select reciprocal series step insn depending on machine mode. */
8402
8403static recps_type
8404get_recps_type (machine_mode mode)
8405{
8406 switch (mode)
8407 {
4e10a5a7
RS
8408 case E_SFmode: return (gen_aarch64_frecpssf);
8409 case E_V2SFmode: return (gen_aarch64_frecpsv2sf);
8410 case E_V4SFmode: return (gen_aarch64_frecpsv4sf);
8411 case E_DFmode: return (gen_aarch64_frecpsdf);
8412 case E_V2DFmode: return (gen_aarch64_frecpsv2df);
8413 default: gcc_unreachable ();
79a2bc2d
EM
8414 }
8415}
8416
8417/* Emit the instruction sequence to compute the approximation for the division
8418 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
8419
8420bool
8421aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
8422{
8423 machine_mode mode = GET_MODE (quo);
33d72b63
JW
8424
8425 if (GET_MODE_INNER (mode) == HFmode)
8426 return false;
8427
79a2bc2d
EM
8428 bool use_approx_division_p = (flag_mlow_precision_div
8429 || (aarch64_tune_params.approx_modes->division
8430 & AARCH64_APPROX_MODE (mode)));
8431
8432 if (!flag_finite_math_only
8433 || flag_trapping_math
8434 || !flag_unsafe_math_optimizations
8435 || optimize_function_for_size_p (cfun)
8436 || !use_approx_division_p)
8437 return false;
8438
8439 /* Estimate the approximate reciprocal. */
8440 rtx xrcp = gen_reg_rtx (mode);
8441 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
8442
8443 /* Iterate over the series twice for SF and thrice for DF. */
8444 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
8445
8446 /* Optionally iterate over the series once less for faster performance,
8447 while sacrificing the accuracy. */
8448 if (flag_mlow_precision_div)
8449 iterations--;
8450
8451 /* Iterate over the series to calculate the approximate reciprocal. */
8452 rtx xtmp = gen_reg_rtx (mode);
8453 while (iterations--)
8454 {
8455 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
8456
8457 if (iterations > 0)
8458 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
8459 }
8460
8461 if (num != CONST1_RTX (mode))
8462 {
8463 /* As the approximate reciprocal of DEN is already calculated, only
8464 calculate the approximate division when NUM is not 1.0. */
8465 rtx xnum = force_reg (mode, num);
8466 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
8467 }
8468
8469 /* Finalize the approximation. */
8470 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
8471 return true;
8472}
8473
d126a4ae
AP
8474/* Return the number of instructions that can be issued per cycle. */
8475static int
8476aarch64_sched_issue_rate (void)
8477{
b175b679 8478 return aarch64_tune_params.issue_rate;
d126a4ae
AP
8479}
8480
d03f7e44
MK
8481static int
8482aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8483{
8484 int issue_rate = aarch64_sched_issue_rate ();
8485
8486 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
8487}
8488
2d6bc7fa
KT
8489
8490/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8491 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8492 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8493
8494static int
8495aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
8496 int ready_index)
8497{
8498 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
8499}
8500
8501
8990e73a
TB
8502/* Vectorizer cost model target hooks. */
8503
8504/* Implement targetm.vectorize.builtin_vectorization_cost. */
8505static int
8506aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8507 tree vectype,
8508 int misalign ATTRIBUTE_UNUSED)
8509{
8510 unsigned elements;
cd8ae5ed
AP
8511 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
8512 bool fp = false;
8513
8514 if (vectype != NULL)
8515 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
8516
8517 switch (type_of_cost)
8518 {
8519 case scalar_stmt:
cd8ae5ed 8520 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
8521
8522 case scalar_load:
cd8ae5ed 8523 return costs->scalar_load_cost;
8990e73a
TB
8524
8525 case scalar_store:
cd8ae5ed 8526 return costs->scalar_store_cost;
8990e73a
TB
8527
8528 case vector_stmt:
cd8ae5ed 8529 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
8530
8531 case vector_load:
cd8ae5ed 8532 return costs->vec_align_load_cost;
8990e73a
TB
8533
8534 case vector_store:
cd8ae5ed 8535 return costs->vec_store_cost;
8990e73a
TB
8536
8537 case vec_to_scalar:
cd8ae5ed 8538 return costs->vec_to_scalar_cost;
8990e73a
TB
8539
8540 case scalar_to_vec:
cd8ae5ed 8541 return costs->scalar_to_vec_cost;
8990e73a
TB
8542
8543 case unaligned_load:
cd8ae5ed 8544 return costs->vec_unalign_load_cost;
8990e73a
TB
8545
8546 case unaligned_store:
cd8ae5ed 8547 return costs->vec_unalign_store_cost;
8990e73a
TB
8548
8549 case cond_branch_taken:
cd8ae5ed 8550 return costs->cond_taken_branch_cost;
8990e73a
TB
8551
8552 case cond_branch_not_taken:
cd8ae5ed 8553 return costs->cond_not_taken_branch_cost;
8990e73a
TB
8554
8555 case vec_perm:
cd8ae5ed 8556 return costs->vec_permute_cost;
c428f91c 8557
8990e73a 8558 case vec_promote_demote:
cd8ae5ed 8559 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
8560
8561 case vec_construct:
8562 elements = TYPE_VECTOR_SUBPARTS (vectype);
8563 return elements / 2 + 1;
8564
8565 default:
8566 gcc_unreachable ();
8567 }
8568}
8569
8570/* Implement targetm.vectorize.add_stmt_cost. */
8571static unsigned
8572aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8573 struct _stmt_vec_info *stmt_info, int misalign,
8574 enum vect_cost_model_location where)
8575{
8576 unsigned *cost = (unsigned *) data;
8577 unsigned retval = 0;
8578
8579 if (flag_vect_cost_model)
8580 {
8581 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8582 int stmt_cost =
8583 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
8584
8585 /* Statements in an inner loop relative to the loop being
8586 vectorized are weighted more heavily. The value here is
058e4c71 8587 arbitrary and could potentially be improved with analysis. */
8990e73a 8588 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 8589 count *= 50; /* FIXME */
8990e73a
TB
8590
8591 retval = (unsigned) (count * stmt_cost);
8592 cost[where] += retval;
8593 }
8594
8595 return retval;
8596}
8597
0cfff2a1 8598static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 8599
0cfff2a1
KT
8600/* Parse the TO_PARSE string and put the architecture struct that it
8601 selects into RES and the architectural features into ISA_FLAGS.
8602 Return an aarch64_parse_opt_result describing the parse result.
8603 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 8604
0cfff2a1
KT
8605static enum aarch64_parse_opt_result
8606aarch64_parse_arch (const char *to_parse, const struct processor **res,
8607 unsigned long *isa_flags)
43e9d192
IB
8608{
8609 char *ext;
8610 const struct processor *arch;
0cfff2a1 8611 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8612 size_t len;
8613
0cfff2a1 8614 strcpy (str, to_parse);
43e9d192
IB
8615
8616 ext = strchr (str, '+');
8617
8618 if (ext != NULL)
8619 len = ext - str;
8620 else
8621 len = strlen (str);
8622
8623 if (len == 0)
0cfff2a1
KT
8624 return AARCH64_PARSE_MISSING_ARG;
8625
43e9d192 8626
0cfff2a1 8627 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
8628 for (arch = all_architectures; arch->name != NULL; arch++)
8629 {
8630 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
8631 {
0cfff2a1 8632 unsigned long isa_temp = arch->flags;
43e9d192
IB
8633
8634 if (ext != NULL)
8635 {
0cfff2a1
KT
8636 /* TO_PARSE string contains at least one extension. */
8637 enum aarch64_parse_opt_result ext_res
8638 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8639
0cfff2a1
KT
8640 if (ext_res != AARCH64_PARSE_OK)
8641 return ext_res;
ffee7aa9 8642 }
0cfff2a1
KT
8643 /* Extension parsing was successful. Confirm the result
8644 arch and ISA flags. */
8645 *res = arch;
8646 *isa_flags = isa_temp;
8647 return AARCH64_PARSE_OK;
43e9d192
IB
8648 }
8649 }
8650
8651 /* ARCH name not found in list. */
0cfff2a1 8652 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8653}
8654
0cfff2a1
KT
8655/* Parse the TO_PARSE string and put the result tuning in RES and the
8656 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8657 describing the parse result. If there is an error parsing, RES and
8658 ISA_FLAGS are left unchanged. */
43e9d192 8659
0cfff2a1
KT
8660static enum aarch64_parse_opt_result
8661aarch64_parse_cpu (const char *to_parse, const struct processor **res,
8662 unsigned long *isa_flags)
43e9d192
IB
8663{
8664 char *ext;
8665 const struct processor *cpu;
0cfff2a1 8666 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
8667 size_t len;
8668
0cfff2a1 8669 strcpy (str, to_parse);
43e9d192
IB
8670
8671 ext = strchr (str, '+');
8672
8673 if (ext != NULL)
8674 len = ext - str;
8675 else
8676 len = strlen (str);
8677
8678 if (len == 0)
0cfff2a1
KT
8679 return AARCH64_PARSE_MISSING_ARG;
8680
43e9d192
IB
8681
8682 /* Loop through the list of supported CPUs to find a match. */
8683 for (cpu = all_cores; cpu->name != NULL; cpu++)
8684 {
8685 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
8686 {
0cfff2a1
KT
8687 unsigned long isa_temp = cpu->flags;
8688
43e9d192
IB
8689
8690 if (ext != NULL)
8691 {
0cfff2a1
KT
8692 /* TO_PARSE string contains at least one extension. */
8693 enum aarch64_parse_opt_result ext_res
8694 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 8695
0cfff2a1
KT
8696 if (ext_res != AARCH64_PARSE_OK)
8697 return ext_res;
8698 }
8699 /* Extension parsing was successfull. Confirm the result
8700 cpu and ISA flags. */
8701 *res = cpu;
8702 *isa_flags = isa_temp;
8703 return AARCH64_PARSE_OK;
43e9d192
IB
8704 }
8705 }
8706
8707 /* CPU name not found in list. */
0cfff2a1 8708 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8709}
8710
0cfff2a1
KT
8711/* Parse the TO_PARSE string and put the cpu it selects into RES.
8712 Return an aarch64_parse_opt_result describing the parse result.
8713 If the parsing fails the RES does not change. */
43e9d192 8714
0cfff2a1
KT
8715static enum aarch64_parse_opt_result
8716aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
8717{
8718 const struct processor *cpu;
0cfff2a1
KT
8719 char *str = (char *) alloca (strlen (to_parse) + 1);
8720
8721 strcpy (str, to_parse);
43e9d192
IB
8722
8723 /* Loop through the list of supported CPUs to find a match. */
8724 for (cpu = all_cores; cpu->name != NULL; cpu++)
8725 {
8726 if (strcmp (cpu->name, str) == 0)
8727 {
0cfff2a1
KT
8728 *res = cpu;
8729 return AARCH64_PARSE_OK;
43e9d192
IB
8730 }
8731 }
8732
8733 /* CPU name not found in list. */
0cfff2a1 8734 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8735}
8736
8dec06f2
JG
8737/* Parse TOKEN, which has length LENGTH to see if it is an option
8738 described in FLAG. If it is, return the index bit for that fusion type.
8739 If not, error (printing OPTION_NAME) and return zero. */
8740
8741static unsigned int
8742aarch64_parse_one_option_token (const char *token,
8743 size_t length,
8744 const struct aarch64_flag_desc *flag,
8745 const char *option_name)
8746{
8747 for (; flag->name != NULL; flag++)
8748 {
8749 if (length == strlen (flag->name)
8750 && !strncmp (flag->name, token, length))
8751 return flag->flag;
8752 }
8753
8754 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
8755 return 0;
8756}
8757
8758/* Parse OPTION which is a comma-separated list of flags to enable.
8759 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8760 default state we inherit from the CPU tuning structures. OPTION_NAME
8761 gives the top-level option we are parsing in the -moverride string,
8762 for use in error messages. */
8763
8764static unsigned int
8765aarch64_parse_boolean_options (const char *option,
8766 const struct aarch64_flag_desc *flags,
8767 unsigned int initial_state,
8768 const char *option_name)
8769{
8770 const char separator = '.';
8771 const char* specs = option;
8772 const char* ntoken = option;
8773 unsigned int found_flags = initial_state;
8774
8775 while ((ntoken = strchr (specs, separator)))
8776 {
8777 size_t token_length = ntoken - specs;
8778 unsigned token_ops = aarch64_parse_one_option_token (specs,
8779 token_length,
8780 flags,
8781 option_name);
8782 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8783 in the token stream, reset the supported operations. So:
8784
8785 adrp+add.cmp+branch.none.adrp+add
8786
8787 would have the result of turning on only adrp+add fusion. */
8788 if (!token_ops)
8789 found_flags = 0;
8790
8791 found_flags |= token_ops;
8792 specs = ++ntoken;
8793 }
8794
8795 /* We ended with a comma, print something. */
8796 if (!(*specs))
8797 {
8798 error ("%s string ill-formed\n", option_name);
8799 return 0;
8800 }
8801
8802 /* We still have one more token to parse. */
8803 size_t token_length = strlen (specs);
8804 unsigned token_ops = aarch64_parse_one_option_token (specs,
8805 token_length,
8806 flags,
8807 option_name);
8808 if (!token_ops)
8809 found_flags = 0;
8810
8811 found_flags |= token_ops;
8812 return found_flags;
8813}
8814
8815/* Support for overriding instruction fusion. */
8816
8817static void
8818aarch64_parse_fuse_string (const char *fuse_string,
8819 struct tune_params *tune)
8820{
8821 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8822 aarch64_fusible_pairs,
8823 tune->fusible_ops,
8824 "fuse=");
8825}
8826
8827/* Support for overriding other tuning flags. */
8828
8829static void
8830aarch64_parse_tune_string (const char *tune_string,
8831 struct tune_params *tune)
8832{
8833 tune->extra_tuning_flags
8834 = aarch64_parse_boolean_options (tune_string,
8835 aarch64_tuning_flags,
8836 tune->extra_tuning_flags,
8837 "tune=");
8838}
8839
8840/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8841 we understand. If it is, extract the option string and handoff to
8842 the appropriate function. */
8843
8844void
8845aarch64_parse_one_override_token (const char* token,
8846 size_t length,
8847 struct tune_params *tune)
8848{
8849 const struct aarch64_tuning_override_function *fn
8850 = aarch64_tuning_override_functions;
8851
8852 const char *option_part = strchr (token, '=');
8853 if (!option_part)
8854 {
8855 error ("tuning string missing in option (%s)", token);
8856 return;
8857 }
8858
8859 /* Get the length of the option name. */
8860 length = option_part - token;
8861 /* Skip the '=' to get to the option string. */
8862 option_part++;
8863
8864 for (; fn->name != NULL; fn++)
8865 {
8866 if (!strncmp (fn->name, token, length))
8867 {
8868 fn->parse_override (option_part, tune);
8869 return;
8870 }
8871 }
8872
8873 error ("unknown tuning option (%s)",token);
8874 return;
8875}
8876
5eee3c34
JW
8877/* A checking mechanism for the implementation of the tls size. */
8878
8879static void
8880initialize_aarch64_tls_size (struct gcc_options *opts)
8881{
8882 if (aarch64_tls_size == 0)
8883 aarch64_tls_size = 24;
8884
8885 switch (opts->x_aarch64_cmodel_var)
8886 {
8887 case AARCH64_CMODEL_TINY:
8888 /* Both the default and maximum TLS size allowed under tiny is 1M which
8889 needs two instructions to address, so we clamp the size to 24. */
8890 if (aarch64_tls_size > 24)
8891 aarch64_tls_size = 24;
8892 break;
8893 case AARCH64_CMODEL_SMALL:
8894 /* The maximum TLS size allowed under small is 4G. */
8895 if (aarch64_tls_size > 32)
8896 aarch64_tls_size = 32;
8897 break;
8898 case AARCH64_CMODEL_LARGE:
8899 /* The maximum TLS size allowed under large is 16E.
8900 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8901 if (aarch64_tls_size > 48)
8902 aarch64_tls_size = 48;
8903 break;
8904 default:
8905 gcc_unreachable ();
8906 }
8907
8908 return;
8909}
8910
8dec06f2
JG
8911/* Parse STRING looking for options in the format:
8912 string :: option:string
8913 option :: name=substring
8914 name :: {a-z}
8915 substring :: defined by option. */
8916
8917static void
8918aarch64_parse_override_string (const char* input_string,
8919 struct tune_params* tune)
8920{
8921 const char separator = ':';
8922 size_t string_length = strlen (input_string) + 1;
8923 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8924 char *string = string_root;
8925 strncpy (string, input_string, string_length);
8926 string[string_length - 1] = '\0';
8927
8928 char* ntoken = string;
8929
8930 while ((ntoken = strchr (string, separator)))
8931 {
8932 size_t token_length = ntoken - string;
8933 /* Make this substring look like a string. */
8934 *ntoken = '\0';
8935 aarch64_parse_one_override_token (string, token_length, tune);
8936 string = ++ntoken;
8937 }
8938
8939 /* One last option to parse. */
8940 aarch64_parse_one_override_token (string, strlen (string), tune);
8941 free (string_root);
8942}
43e9d192 8943
43e9d192
IB
8944
8945static void
0cfff2a1 8946aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 8947{
a3dc8760
NC
8948 /* The logic here is that if we are disabling all frame pointer generation
8949 then we do not need to disable leaf frame pointer generation as a
8950 separate operation. But if we are *only* disabling leaf frame pointer
8951 generation then we set flag_omit_frame_pointer to true, but in
8952 aarch64_frame_pointer_required we return false only for leaf functions.
8953
8954 PR 70044: We have to be careful about being called multiple times for the
8955 same function. Once we have decided to set flag_omit_frame_pointer just
8956 so that we can omit leaf frame pointers, we must then not interpret a
8957 second call as meaning that all frame pointer generation should be
8958 omitted. We do this by setting flag_omit_frame_pointer to a special,
8959 non-zero value. */
8960 if (opts->x_flag_omit_frame_pointer == 2)
8961 opts->x_flag_omit_frame_pointer = 0;
8962
0cfff2a1
KT
8963 if (opts->x_flag_omit_frame_pointer)
8964 opts->x_flag_omit_leaf_frame_pointer = false;
8965 else if (opts->x_flag_omit_leaf_frame_pointer)
a3dc8760 8966 opts->x_flag_omit_frame_pointer = 2;
43e9d192 8967
1be34295 8968 /* If not optimizing for size, set the default
0cfff2a1
KT
8969 alignment to what the target wants. */
8970 if (!opts->x_optimize_size)
43e9d192 8971 {
0cfff2a1
KT
8972 if (opts->x_align_loops <= 0)
8973 opts->x_align_loops = aarch64_tune_params.loop_align;
8974 if (opts->x_align_jumps <= 0)
8975 opts->x_align_jumps = aarch64_tune_params.jump_align;
8976 if (opts->x_align_functions <= 0)
8977 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 8978 }
b4f50fd4 8979
9ee6540a
WD
8980 /* We default to no pc-relative literal loads. */
8981
8982 aarch64_pcrelative_literal_loads = false;
8983
8984 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 8985 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
8986 if (opts->x_pcrelative_literal_loads == 1)
8987 aarch64_pcrelative_literal_loads = true;
b4f50fd4 8988
48bb1a55
CL
8989 /* This is PR70113. When building the Linux kernel with
8990 CONFIG_ARM64_ERRATUM_843419, support for relocations
8991 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8992 removed from the kernel to avoid loading objects with possibly
9ee6540a 8993 offending sequences. Without -mpc-relative-literal-loads we would
48bb1a55
CL
8994 generate such relocations, preventing the kernel build from
8995 succeeding. */
9ee6540a
WD
8996 if (opts->x_pcrelative_literal_loads == 2
8997 && TARGET_FIX_ERR_A53_843419)
8998 aarch64_pcrelative_literal_loads = true;
8999
9000 /* In the tiny memory model it makes no sense to disallow PC relative
9001 literal pool loads. */
9002 if (aarch64_cmodel == AARCH64_CMODEL_TINY
9003 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
9004 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
9005
9006 /* When enabling the lower precision Newton series for the square root, also
9007 enable it for the reciprocal square root, since the latter is an
9008 intermediary step for the former. */
9009 if (flag_mlow_precision_sqrt)
9010 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 9011}
43e9d192 9012
0cfff2a1
KT
9013/* 'Unpack' up the internal tuning structs and update the options
9014 in OPTS. The caller must have set up selected_tune and selected_arch
9015 as all the other target-specific codegen decisions are
9016 derived from them. */
9017
e4ea20c8 9018void
0cfff2a1
KT
9019aarch64_override_options_internal (struct gcc_options *opts)
9020{
9021 aarch64_tune_flags = selected_tune->flags;
9022 aarch64_tune = selected_tune->sched_core;
9023 /* Make a copy of the tuning parameters attached to the core, which
9024 we may later overwrite. */
9025 aarch64_tune_params = *(selected_tune->tune);
9026 aarch64_architecture_version = selected_arch->architecture_version;
9027
9028 if (opts->x_aarch64_override_tune_string)
9029 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
9030 &aarch64_tune_params);
9031
9032 /* This target defaults to strict volatile bitfields. */
9033 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
9034 opts->x_flag_strict_volatile_bitfields = 1;
9035
0cfff2a1 9036 initialize_aarch64_code_model (opts);
5eee3c34 9037 initialize_aarch64_tls_size (opts);
63892fa2 9038
2d6bc7fa
KT
9039 int queue_depth = 0;
9040 switch (aarch64_tune_params.autoprefetcher_model)
9041 {
9042 case tune_params::AUTOPREFETCHER_OFF:
9043 queue_depth = -1;
9044 break;
9045 case tune_params::AUTOPREFETCHER_WEAK:
9046 queue_depth = 0;
9047 break;
9048 case tune_params::AUTOPREFETCHER_STRONG:
9049 queue_depth = max_insn_queue_index + 1;
9050 break;
9051 default:
9052 gcc_unreachable ();
9053 }
9054
9055 /* We don't mind passing in global_options_set here as we don't use
9056 the *options_set structs anyway. */
9057 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
9058 queue_depth,
9059 opts->x_param_values,
9060 global_options_set.x_param_values);
9061
9d2c6e2e
MK
9062 /* Set up parameters to be used in prefetching algorithm. Do not
9063 override the defaults unless we are tuning for a core we have
9064 researched values for. */
9065 if (aarch64_tune_params.prefetch->num_slots > 0)
9066 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
9067 aarch64_tune_params.prefetch->num_slots,
9068 opts->x_param_values,
9069 global_options_set.x_param_values);
9070 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
9071 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
9072 aarch64_tune_params.prefetch->l1_cache_size,
9073 opts->x_param_values,
9074 global_options_set.x_param_values);
9075 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 9076 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
9077 aarch64_tune_params.prefetch->l1_cache_line_size,
9078 opts->x_param_values,
9079 global_options_set.x_param_values);
9080 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
9081 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
9082 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
9083 opts->x_param_values,
9084 global_options_set.x_param_values);
9085
16b2cafd
MK
9086 /* Enable sw prefetching at specified optimization level for
9087 CPUS that have prefetch. Lower optimization level threshold by 1
9088 when profiling is enabled. */
9089 if (opts->x_flag_prefetch_loop_arrays < 0
9090 && !opts->x_optimize_size
9091 && aarch64_tune_params.prefetch->default_opt_level >= 0
9092 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
9093 opts->x_flag_prefetch_loop_arrays = 1;
9094
0cfff2a1
KT
9095 aarch64_override_options_after_change_1 (opts);
9096}
43e9d192 9097
01f44038
KT
9098/* Print a hint with a suggestion for a core or architecture name that
9099 most closely resembles what the user passed in STR. ARCH is true if
9100 the user is asking for an architecture name. ARCH is false if the user
9101 is asking for a core name. */
9102
9103static void
9104aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
9105{
9106 auto_vec<const char *> candidates;
9107 const struct processor *entry = arch ? all_architectures : all_cores;
9108 for (; entry->name != NULL; entry++)
9109 candidates.safe_push (entry->name);
9110 char *s;
9111 const char *hint = candidates_list_and_hint (str, s, candidates);
9112 if (hint)
9113 inform (input_location, "valid arguments are: %s;"
9114 " did you mean %qs?", s, hint);
9115 XDELETEVEC (s);
9116}
9117
9118/* Print a hint with a suggestion for a core name that most closely resembles
9119 what the user passed in STR. */
9120
9121inline static void
9122aarch64_print_hint_for_core (const char *str)
9123{
9124 aarch64_print_hint_for_core_or_arch (str, false);
9125}
9126
9127/* Print a hint with a suggestion for an architecture name that most closely
9128 resembles what the user passed in STR. */
9129
9130inline static void
9131aarch64_print_hint_for_arch (const char *str)
9132{
9133 aarch64_print_hint_for_core_or_arch (str, true);
9134}
9135
0cfff2a1
KT
9136/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
9137 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
9138 they are valid in RES and ISA_FLAGS. Return whether the option is
9139 valid. */
43e9d192 9140
361fb3ee 9141static bool
0cfff2a1
KT
9142aarch64_validate_mcpu (const char *str, const struct processor **res,
9143 unsigned long *isa_flags)
9144{
9145 enum aarch64_parse_opt_result parse_res
9146 = aarch64_parse_cpu (str, res, isa_flags);
9147
9148 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 9149 return true;
0cfff2a1
KT
9150
9151 switch (parse_res)
9152 {
9153 case AARCH64_PARSE_MISSING_ARG:
fb241da2 9154 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
9155 break;
9156 case AARCH64_PARSE_INVALID_ARG:
9157 error ("unknown value %qs for -mcpu", str);
01f44038 9158 aarch64_print_hint_for_core (str);
0cfff2a1
KT
9159 break;
9160 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 9161 error ("invalid feature modifier in %<-mcpu=%s%>", str);
0cfff2a1
KT
9162 break;
9163 default:
9164 gcc_unreachable ();
9165 }
361fb3ee
KT
9166
9167 return false;
0cfff2a1
KT
9168}
9169
9170/* Validate a command-line -march option. Parse the arch and extensions
9171 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
9172 results, if they are valid, in RES and ISA_FLAGS. Return whether the
9173 option is valid. */
0cfff2a1 9174
361fb3ee 9175static bool
0cfff2a1 9176aarch64_validate_march (const char *str, const struct processor **res,
01f44038 9177 unsigned long *isa_flags)
0cfff2a1
KT
9178{
9179 enum aarch64_parse_opt_result parse_res
9180 = aarch64_parse_arch (str, res, isa_flags);
9181
9182 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 9183 return true;
0cfff2a1
KT
9184
9185 switch (parse_res)
9186 {
9187 case AARCH64_PARSE_MISSING_ARG:
fb241da2 9188 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
9189 break;
9190 case AARCH64_PARSE_INVALID_ARG:
9191 error ("unknown value %qs for -march", str);
01f44038 9192 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
9193 break;
9194 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 9195 error ("invalid feature modifier in %<-march=%s%>", str);
0cfff2a1
KT
9196 break;
9197 default:
9198 gcc_unreachable ();
9199 }
361fb3ee
KT
9200
9201 return false;
0cfff2a1
KT
9202}
9203
9204/* Validate a command-line -mtune option. Parse the cpu
9205 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
9206 result, if it is valid, in RES. Return whether the option is
9207 valid. */
0cfff2a1 9208
361fb3ee 9209static bool
0cfff2a1
KT
9210aarch64_validate_mtune (const char *str, const struct processor **res)
9211{
9212 enum aarch64_parse_opt_result parse_res
9213 = aarch64_parse_tune (str, res);
9214
9215 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 9216 return true;
0cfff2a1
KT
9217
9218 switch (parse_res)
9219 {
9220 case AARCH64_PARSE_MISSING_ARG:
fb241da2 9221 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
9222 break;
9223 case AARCH64_PARSE_INVALID_ARG:
9224 error ("unknown value %qs for -mtune", str);
01f44038 9225 aarch64_print_hint_for_core (str);
0cfff2a1
KT
9226 break;
9227 default:
9228 gcc_unreachable ();
9229 }
361fb3ee
KT
9230 return false;
9231}
9232
9233/* Return the CPU corresponding to the enum CPU.
9234 If it doesn't specify a cpu, return the default. */
9235
9236static const struct processor *
9237aarch64_get_tune_cpu (enum aarch64_processor cpu)
9238{
9239 if (cpu != aarch64_none)
9240 return &all_cores[cpu];
9241
9242 /* The & 0x3f is to extract the bottom 6 bits that encode the
9243 default cpu as selected by the --with-cpu GCC configure option
9244 in config.gcc.
9245 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
9246 flags mechanism should be reworked to make it more sane. */
9247 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
9248}
9249
9250/* Return the architecture corresponding to the enum ARCH.
9251 If it doesn't specify a valid architecture, return the default. */
9252
9253static const struct processor *
9254aarch64_get_arch (enum aarch64_arch arch)
9255{
9256 if (arch != aarch64_no_arch)
9257 return &all_architectures[arch];
9258
9259 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
9260
9261 return &all_architectures[cpu->arch];
0cfff2a1
KT
9262}
9263
9264/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
9265 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
9266 tuning structs. In particular it must set selected_tune and
9267 aarch64_isa_flags that define the available ISA features and tuning
9268 decisions. It must also set selected_arch as this will be used to
9269 output the .arch asm tags for each function. */
9270
9271static void
9272aarch64_override_options (void)
9273{
9274 unsigned long cpu_isa = 0;
9275 unsigned long arch_isa = 0;
9276 aarch64_isa_flags = 0;
9277
361fb3ee
KT
9278 bool valid_cpu = true;
9279 bool valid_tune = true;
9280 bool valid_arch = true;
9281
0cfff2a1
KT
9282 selected_cpu = NULL;
9283 selected_arch = NULL;
9284 selected_tune = NULL;
9285
9286 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
9287 If either of -march or -mtune is given, they override their
9288 respective component of -mcpu. */
9289 if (aarch64_cpu_string)
361fb3ee
KT
9290 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
9291 &cpu_isa);
0cfff2a1
KT
9292
9293 if (aarch64_arch_string)
361fb3ee
KT
9294 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
9295 &arch_isa);
0cfff2a1
KT
9296
9297 if (aarch64_tune_string)
361fb3ee 9298 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
9299
9300 /* If the user did not specify a processor, choose the default
9301 one for them. This will be the CPU set during configuration using
a3cd0246 9302 --with-cpu, otherwise it is "generic". */
43e9d192
IB
9303 if (!selected_cpu)
9304 {
0cfff2a1
KT
9305 if (selected_arch)
9306 {
9307 selected_cpu = &all_cores[selected_arch->ident];
9308 aarch64_isa_flags = arch_isa;
361fb3ee 9309 explicit_arch = selected_arch->arch;
0cfff2a1
KT
9310 }
9311 else
9312 {
361fb3ee
KT
9313 /* Get default configure-time CPU. */
9314 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
9315 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
9316 }
361fb3ee
KT
9317
9318 if (selected_tune)
9319 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
9320 }
9321 /* If both -mcpu and -march are specified check that they are architecturally
9322 compatible, warn if they're not and prefer the -march ISA flags. */
9323 else if (selected_arch)
9324 {
9325 if (selected_arch->arch != selected_cpu->arch)
9326 {
9327 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
9328 all_architectures[selected_cpu->arch].name,
9329 selected_arch->name);
9330 }
9331 aarch64_isa_flags = arch_isa;
361fb3ee
KT
9332 explicit_arch = selected_arch->arch;
9333 explicit_tune_core = selected_tune ? selected_tune->ident
9334 : selected_cpu->ident;
0cfff2a1
KT
9335 }
9336 else
9337 {
9338 /* -mcpu but no -march. */
9339 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
9340 explicit_tune_core = selected_tune ? selected_tune->ident
9341 : selected_cpu->ident;
9342 gcc_assert (selected_cpu);
9343 selected_arch = &all_architectures[selected_cpu->arch];
9344 explicit_arch = selected_arch->arch;
43e9d192
IB
9345 }
9346
0cfff2a1
KT
9347 /* Set the arch as well as we will need it when outputing
9348 the .arch directive in assembly. */
9349 if (!selected_arch)
9350 {
9351 gcc_assert (selected_cpu);
9352 selected_arch = &all_architectures[selected_cpu->arch];
9353 }
43e9d192 9354
43e9d192 9355 if (!selected_tune)
3edaf26d 9356 selected_tune = selected_cpu;
43e9d192 9357
0cfff2a1
KT
9358#ifndef HAVE_AS_MABI_OPTION
9359 /* The compiler may have been configured with 2.23.* binutils, which does
9360 not have support for ILP32. */
9361 if (TARGET_ILP32)
9362 error ("Assembler does not support -mabi=ilp32");
9363#endif
43e9d192 9364
db58fd89
JW
9365 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
9366 sorry ("Return address signing is only supported for -mabi=lp64");
9367
361fb3ee
KT
9368 /* Make sure we properly set up the explicit options. */
9369 if ((aarch64_cpu_string && valid_cpu)
9370 || (aarch64_tune_string && valid_tune))
9371 gcc_assert (explicit_tune_core != aarch64_none);
9372
9373 if ((aarch64_cpu_string && valid_cpu)
9374 || (aarch64_arch_string && valid_arch))
9375 gcc_assert (explicit_arch != aarch64_no_arch);
9376
0cfff2a1
KT
9377 aarch64_override_options_internal (&global_options);
9378
9379 /* Save these options as the default ones in case we push and pop them later
9380 while processing functions with potential target attributes. */
9381 target_option_default_node = target_option_current_node
9382 = build_target_option_node (&global_options);
43e9d192
IB
9383}
9384
9385/* Implement targetm.override_options_after_change. */
9386
9387static void
9388aarch64_override_options_after_change (void)
9389{
0cfff2a1 9390 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
9391}
9392
9393static struct machine_function *
9394aarch64_init_machine_status (void)
9395{
9396 struct machine_function *machine;
766090c2 9397 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
9398 return machine;
9399}
9400
9401void
9402aarch64_init_expanders (void)
9403{
9404 init_machine_status = aarch64_init_machine_status;
9405}
9406
9407/* A checking mechanism for the implementation of the various code models. */
9408static void
0cfff2a1 9409initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 9410{
0cfff2a1 9411 if (opts->x_flag_pic)
43e9d192 9412 {
0cfff2a1 9413 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
9414 {
9415 case AARCH64_CMODEL_TINY:
9416 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
9417 break;
9418 case AARCH64_CMODEL_SMALL:
34ecdb0f 9419#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
9420 aarch64_cmodel = (flag_pic == 2
9421 ? AARCH64_CMODEL_SMALL_PIC
9422 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
9423#else
9424 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
9425#endif
43e9d192
IB
9426 break;
9427 case AARCH64_CMODEL_LARGE:
9428 sorry ("code model %qs with -f%s", "large",
0cfff2a1 9429 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 9430 break;
43e9d192
IB
9431 default:
9432 gcc_unreachable ();
9433 }
9434 }
9435 else
0cfff2a1 9436 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
9437}
9438
361fb3ee
KT
9439/* Implement TARGET_OPTION_SAVE. */
9440
9441static void
9442aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
9443{
9444 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
9445}
9446
9447/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9448 using the information saved in PTR. */
9449
9450static void
9451aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
9452{
9453 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
9454 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9455 opts->x_explicit_arch = ptr->x_explicit_arch;
9456 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
9457 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
9458
9459 aarch64_override_options_internal (opts);
9460}
9461
9462/* Implement TARGET_OPTION_PRINT. */
9463
9464static void
9465aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
9466{
9467 const struct processor *cpu
9468 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9469 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
9470 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 9471 std::string extension
04a99ebe 9472 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
9473
9474 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
9475 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
9476 arch->name, extension.c_str ());
361fb3ee
KT
9477}
9478
d78006d9
KT
9479static GTY(()) tree aarch64_previous_fndecl;
9480
e4ea20c8
KT
9481void
9482aarch64_reset_previous_fndecl (void)
9483{
9484 aarch64_previous_fndecl = NULL;
9485}
9486
acfc1ac1
KT
9487/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9488 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9489 make sure optab availability predicates are recomputed when necessary. */
9490
9491void
9492aarch64_save_restore_target_globals (tree new_tree)
9493{
9494 if (TREE_TARGET_GLOBALS (new_tree))
9495 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9496 else if (new_tree == target_option_default_node)
9497 restore_target_globals (&default_target_globals);
9498 else
9499 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9500}
9501
d78006d9
KT
9502/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9503 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9504 of the function, if such exists. This function may be called multiple
9505 times on a single function so use aarch64_previous_fndecl to avoid
9506 setting up identical state. */
9507
9508static void
9509aarch64_set_current_function (tree fndecl)
9510{
acfc1ac1
KT
9511 if (!fndecl || fndecl == aarch64_previous_fndecl)
9512 return;
9513
d78006d9
KT
9514 tree old_tree = (aarch64_previous_fndecl
9515 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
9516 : NULL_TREE);
9517
acfc1ac1 9518 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 9519
acfc1ac1
KT
9520 /* If current function has no attributes but the previous one did,
9521 use the default node. */
9522 if (!new_tree && old_tree)
9523 new_tree = target_option_default_node;
d78006d9 9524
acfc1ac1
KT
9525 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9526 the default have been handled by aarch64_save_restore_target_globals from
9527 aarch64_pragma_target_parse. */
9528 if (old_tree == new_tree)
9529 return;
d78006d9 9530
acfc1ac1 9531 aarch64_previous_fndecl = fndecl;
6e17a23b 9532
acfc1ac1
KT
9533 /* First set the target options. */
9534 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 9535
acfc1ac1 9536 aarch64_save_restore_target_globals (new_tree);
d78006d9 9537}
361fb3ee 9538
5a2c8331
KT
9539/* Enum describing the various ways we can handle attributes.
9540 In many cases we can reuse the generic option handling machinery. */
9541
9542enum aarch64_attr_opt_type
9543{
9544 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
9545 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
9546 aarch64_attr_enum, /* Attribute sets an enum variable. */
9547 aarch64_attr_custom /* Attribute requires a custom handling function. */
9548};
9549
9550/* All the information needed to handle a target attribute.
9551 NAME is the name of the attribute.
9c582551 9552 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
9553 in the definition of enum aarch64_attr_opt_type.
9554 ALLOW_NEG is true if the attribute supports a "no-" form.
9555 HANDLER is the function that takes the attribute string and whether
9556 it is a pragma or attribute and handles the option. It is needed only
9557 when the ATTR_TYPE is aarch64_attr_custom.
9558 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 9559 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
9560 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9561 aarch64_attr_enum. */
9562
9563struct aarch64_attribute_info
9564{
9565 const char *name;
9566 enum aarch64_attr_opt_type attr_type;
9567 bool allow_neg;
9568 bool (*handler) (const char *, const char *);
9569 enum opt_code opt_num;
9570};
9571
9572/* Handle the ARCH_STR argument to the arch= target attribute.
9573 PRAGMA_OR_ATTR is used in potential error messages. */
9574
9575static bool
9576aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
9577{
9578 const struct processor *tmp_arch = NULL;
9579 enum aarch64_parse_opt_result parse_res
9580 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
9581
9582 if (parse_res == AARCH64_PARSE_OK)
9583 {
9584 gcc_assert (tmp_arch);
9585 selected_arch = tmp_arch;
9586 explicit_arch = selected_arch->arch;
9587 return true;
9588 }
9589
9590 switch (parse_res)
9591 {
9592 case AARCH64_PARSE_MISSING_ARG:
9593 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
9594 break;
9595 case AARCH64_PARSE_INVALID_ARG:
9596 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
01f44038 9597 aarch64_print_hint_for_arch (str);
5a2c8331
KT
9598 break;
9599 case AARCH64_PARSE_INVALID_FEATURE:
9600 error ("invalid feature modifier %qs for 'arch' target %s",
9601 str, pragma_or_attr);
9602 break;
9603 default:
9604 gcc_unreachable ();
9605 }
9606
9607 return false;
9608}
9609
9610/* Handle the argument CPU_STR to the cpu= target attribute.
9611 PRAGMA_OR_ATTR is used in potential error messages. */
9612
9613static bool
9614aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
9615{
9616 const struct processor *tmp_cpu = NULL;
9617 enum aarch64_parse_opt_result parse_res
9618 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
9619
9620 if (parse_res == AARCH64_PARSE_OK)
9621 {
9622 gcc_assert (tmp_cpu);
9623 selected_tune = tmp_cpu;
9624 explicit_tune_core = selected_tune->ident;
9625
9626 selected_arch = &all_architectures[tmp_cpu->arch];
9627 explicit_arch = selected_arch->arch;
9628 return true;
9629 }
9630
9631 switch (parse_res)
9632 {
9633 case AARCH64_PARSE_MISSING_ARG:
9634 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
9635 break;
9636 case AARCH64_PARSE_INVALID_ARG:
9637 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
01f44038 9638 aarch64_print_hint_for_core (str);
5a2c8331
KT
9639 break;
9640 case AARCH64_PARSE_INVALID_FEATURE:
9641 error ("invalid feature modifier %qs for 'cpu' target %s",
9642 str, pragma_or_attr);
9643 break;
9644 default:
9645 gcc_unreachable ();
9646 }
9647
9648 return false;
9649}
9650
9651/* Handle the argument STR to the tune= target attribute.
9652 PRAGMA_OR_ATTR is used in potential error messages. */
9653
9654static bool
9655aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
9656{
9657 const struct processor *tmp_tune = NULL;
9658 enum aarch64_parse_opt_result parse_res
9659 = aarch64_parse_tune (str, &tmp_tune);
9660
9661 if (parse_res == AARCH64_PARSE_OK)
9662 {
9663 gcc_assert (tmp_tune);
9664 selected_tune = tmp_tune;
9665 explicit_tune_core = selected_tune->ident;
9666 return true;
9667 }
9668
9669 switch (parse_res)
9670 {
9671 case AARCH64_PARSE_INVALID_ARG:
9672 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
01f44038 9673 aarch64_print_hint_for_core (str);
5a2c8331
KT
9674 break;
9675 default:
9676 gcc_unreachable ();
9677 }
9678
9679 return false;
9680}
9681
9682/* Parse an architecture extensions target attribute string specified in STR.
9683 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9684 if successful. Update aarch64_isa_flags to reflect the ISA features
9685 modified.
9686 PRAGMA_OR_ATTR is used in potential error messages. */
9687
9688static bool
9689aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
9690{
9691 enum aarch64_parse_opt_result parse_res;
9692 unsigned long isa_flags = aarch64_isa_flags;
9693
e4ea20c8
KT
9694 /* We allow "+nothing" in the beginning to clear out all architectural
9695 features if the user wants to handpick specific features. */
9696 if (strncmp ("+nothing", str, 8) == 0)
9697 {
9698 isa_flags = 0;
9699 str += 8;
9700 }
9701
5a2c8331
KT
9702 parse_res = aarch64_parse_extension (str, &isa_flags);
9703
9704 if (parse_res == AARCH64_PARSE_OK)
9705 {
9706 aarch64_isa_flags = isa_flags;
9707 return true;
9708 }
9709
9710 switch (parse_res)
9711 {
9712 case AARCH64_PARSE_MISSING_ARG:
9713 error ("missing feature modifier in target %s %qs",
9714 pragma_or_attr, str);
9715 break;
9716
9717 case AARCH64_PARSE_INVALID_FEATURE:
9718 error ("invalid feature modifier in target %s %qs",
9719 pragma_or_attr, str);
9720 break;
9721
9722 default:
9723 gcc_unreachable ();
9724 }
9725
9726 return false;
9727}
9728
9729/* The target attributes that we support. On top of these we also support just
9730 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9731 handled explicitly in aarch64_process_one_target_attr. */
9732
9733static const struct aarch64_attribute_info aarch64_attributes[] =
9734{
9735 { "general-regs-only", aarch64_attr_mask, false, NULL,
9736 OPT_mgeneral_regs_only },
9737 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
9738 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
9739 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
9740 OPT_mfix_cortex_a53_843419 },
5a2c8331
KT
9741 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
9742 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
9743 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
9744 OPT_momit_leaf_frame_pointer },
9745 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
9746 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
9747 OPT_march_ },
9748 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
9749 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
9750 OPT_mtune_ },
db58fd89
JW
9751 { "sign-return-address", aarch64_attr_enum, false, NULL,
9752 OPT_msign_return_address_ },
5a2c8331
KT
9753 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
9754};
9755
9756/* Parse ARG_STR which contains the definition of one target attribute.
9757 Show appropriate errors if any or return true if the attribute is valid.
9758 PRAGMA_OR_ATTR holds the string to use in error messages about whether
9759 we're processing a target attribute or pragma. */
9760
9761static bool
9762aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
9763{
9764 bool invert = false;
9765
9766 size_t len = strlen (arg_str);
9767
9768 if (len == 0)
9769 {
9770 error ("malformed target %s", pragma_or_attr);
9771 return false;
9772 }
9773
9774 char *str_to_check = (char *) alloca (len + 1);
9775 strcpy (str_to_check, arg_str);
9776
9777 /* Skip leading whitespace. */
9778 while (*str_to_check == ' ' || *str_to_check == '\t')
9779 str_to_check++;
9780
9781 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9782 It is easier to detect and handle it explicitly here rather than going
9783 through the machinery for the rest of the target attributes in this
9784 function. */
9785 if (*str_to_check == '+')
9786 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
9787
9788 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
9789 {
9790 invert = true;
9791 str_to_check += 3;
9792 }
9793 char *arg = strchr (str_to_check, '=');
9794
9795 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9796 and point ARG to "foo". */
9797 if (arg)
9798 {
9799 *arg = '\0';
9800 arg++;
9801 }
9802 const struct aarch64_attribute_info *p_attr;
16d12992 9803 bool found = false;
5a2c8331
KT
9804 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
9805 {
9806 /* If the names don't match up, or the user has given an argument
9807 to an attribute that doesn't accept one, or didn't give an argument
9808 to an attribute that expects one, fail to match. */
9809 if (strcmp (str_to_check, p_attr->name) != 0)
9810 continue;
9811
16d12992 9812 found = true;
5a2c8331
KT
9813 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
9814 || p_attr->attr_type == aarch64_attr_enum;
9815
9816 if (attr_need_arg_p ^ (arg != NULL))
9817 {
9818 error ("target %s %qs does not accept an argument",
9819 pragma_or_attr, str_to_check);
9820 return false;
9821 }
9822
9823 /* If the name matches but the attribute does not allow "no-" versions
9824 then we can't match. */
9825 if (invert && !p_attr->allow_neg)
9826 {
9827 error ("target %s %qs does not allow a negated form",
9828 pragma_or_attr, str_to_check);
9829 return false;
9830 }
9831
9832 switch (p_attr->attr_type)
9833 {
9834 /* Has a custom handler registered.
9835 For example, cpu=, arch=, tune=. */
9836 case aarch64_attr_custom:
9837 gcc_assert (p_attr->handler);
9838 if (!p_attr->handler (arg, pragma_or_attr))
9839 return false;
9840 break;
9841
9842 /* Either set or unset a boolean option. */
9843 case aarch64_attr_bool:
9844 {
9845 struct cl_decoded_option decoded;
9846
9847 generate_option (p_attr->opt_num, NULL, !invert,
9848 CL_TARGET, &decoded);
9849 aarch64_handle_option (&global_options, &global_options_set,
9850 &decoded, input_location);
9851 break;
9852 }
9853 /* Set or unset a bit in the target_flags. aarch64_handle_option
9854 should know what mask to apply given the option number. */
9855 case aarch64_attr_mask:
9856 {
9857 struct cl_decoded_option decoded;
9858 /* We only need to specify the option number.
9859 aarch64_handle_option will know which mask to apply. */
9860 decoded.opt_index = p_attr->opt_num;
9861 decoded.value = !invert;
9862 aarch64_handle_option (&global_options, &global_options_set,
9863 &decoded, input_location);
9864 break;
9865 }
9866 /* Use the option setting machinery to set an option to an enum. */
9867 case aarch64_attr_enum:
9868 {
9869 gcc_assert (arg);
9870 bool valid;
9871 int value;
9872 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
9873 &value, CL_TARGET);
9874 if (valid)
9875 {
9876 set_option (&global_options, NULL, p_attr->opt_num, value,
9877 NULL, DK_UNSPECIFIED, input_location,
9878 global_dc);
9879 }
9880 else
9881 {
9882 error ("target %s %s=%s is not valid",
9883 pragma_or_attr, str_to_check, arg);
9884 }
9885 break;
9886 }
9887 default:
9888 gcc_unreachable ();
9889 }
9890 }
9891
16d12992
KT
9892 /* If we reached here we either have found an attribute and validated
9893 it or didn't match any. If we matched an attribute but its arguments
9894 were malformed we will have returned false already. */
9895 return found;
5a2c8331
KT
9896}
9897
9898/* Count how many times the character C appears in
9899 NULL-terminated string STR. */
9900
9901static unsigned int
9902num_occurences_in_str (char c, char *str)
9903{
9904 unsigned int res = 0;
9905 while (*str != '\0')
9906 {
9907 if (*str == c)
9908 res++;
9909
9910 str++;
9911 }
9912
9913 return res;
9914}
9915
9916/* Parse the tree in ARGS that contains the target attribute information
9917 and update the global target options space. PRAGMA_OR_ATTR is a string
9918 to be used in error messages, specifying whether this is processing
9919 a target attribute or a target pragma. */
9920
9921bool
9922aarch64_process_target_attr (tree args, const char* pragma_or_attr)
9923{
9924 if (TREE_CODE (args) == TREE_LIST)
9925 {
9926 do
9927 {
9928 tree head = TREE_VALUE (args);
9929 if (head)
9930 {
9931 if (!aarch64_process_target_attr (head, pragma_or_attr))
9932 return false;
9933 }
9934 args = TREE_CHAIN (args);
9935 } while (args);
9936
9937 return true;
9938 }
3b6cb9e3
ML
9939
9940 if (TREE_CODE (args) != STRING_CST)
9941 {
9942 error ("attribute %<target%> argument not a string");
9943 return false;
9944 }
5a2c8331
KT
9945
9946 size_t len = strlen (TREE_STRING_POINTER (args));
9947 char *str_to_check = (char *) alloca (len + 1);
9948 strcpy (str_to_check, TREE_STRING_POINTER (args));
9949
9950 if (len == 0)
9951 {
9952 error ("malformed target %s value", pragma_or_attr);
9953 return false;
9954 }
9955
9956 /* Used to catch empty spaces between commas i.e.
9957 attribute ((target ("attr1,,attr2"))). */
9958 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9959
9960 /* Handle multiple target attributes separated by ','. */
9961 char *token = strtok (str_to_check, ",");
9962
9963 unsigned int num_attrs = 0;
9964 while (token)
9965 {
9966 num_attrs++;
9967 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
9968 {
9969 error ("target %s %qs is invalid", pragma_or_attr, token);
9970 return false;
9971 }
9972
9973 token = strtok (NULL, ",");
9974 }
9975
9976 if (num_attrs != num_commas + 1)
9977 {
9978 error ("malformed target %s list %qs",
9979 pragma_or_attr, TREE_STRING_POINTER (args));
9980 return false;
9981 }
9982
9983 return true;
9984}
9985
9986/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9987 process attribute ((target ("..."))). */
9988
9989static bool
9990aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
9991{
9992 struct cl_target_option cur_target;
9993 bool ret;
9994 tree old_optimize;
9995 tree new_target, new_optimize;
9996 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
9997
9998 /* If what we're processing is the current pragma string then the
9999 target option node is already stored in target_option_current_node
10000 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
10001 having to re-parse the string. This is especially useful to keep
10002 arm_neon.h compile times down since that header contains a lot
10003 of intrinsics enclosed in pragmas. */
10004 if (!existing_target && args == current_target_pragma)
10005 {
10006 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
10007 return true;
10008 }
5a2c8331
KT
10009 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
10010
10011 old_optimize = build_optimization_node (&global_options);
10012 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
10013
10014 /* If the function changed the optimization levels as well as setting
10015 target options, start with the optimizations specified. */
10016 if (func_optimize && func_optimize != old_optimize)
10017 cl_optimization_restore (&global_options,
10018 TREE_OPTIMIZATION (func_optimize));
10019
10020 /* Save the current target options to restore at the end. */
10021 cl_target_option_save (&cur_target, &global_options);
10022
10023 /* If fndecl already has some target attributes applied to it, unpack
10024 them so that we add this attribute on top of them, rather than
10025 overwriting them. */
10026 if (existing_target)
10027 {
10028 struct cl_target_option *existing_options
10029 = TREE_TARGET_OPTION (existing_target);
10030
10031 if (existing_options)
10032 cl_target_option_restore (&global_options, existing_options);
10033 }
10034 else
10035 cl_target_option_restore (&global_options,
10036 TREE_TARGET_OPTION (target_option_current_node));
10037
10038
10039 ret = aarch64_process_target_attr (args, "attribute");
10040
10041 /* Set up any additional state. */
10042 if (ret)
10043 {
10044 aarch64_override_options_internal (&global_options);
e95a988a
KT
10045 /* Initialize SIMD builtins if we haven't already.
10046 Set current_target_pragma to NULL for the duration so that
10047 the builtin initialization code doesn't try to tag the functions
10048 being built with the attributes specified by any current pragma, thus
10049 going into an infinite recursion. */
10050 if (TARGET_SIMD)
10051 {
10052 tree saved_current_target_pragma = current_target_pragma;
10053 current_target_pragma = NULL;
10054 aarch64_init_simd_builtins ();
10055 current_target_pragma = saved_current_target_pragma;
10056 }
5a2c8331
KT
10057 new_target = build_target_option_node (&global_options);
10058 }
10059 else
10060 new_target = NULL;
10061
10062 new_optimize = build_optimization_node (&global_options);
10063
10064 if (fndecl && ret)
10065 {
10066 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
10067
10068 if (old_optimize != new_optimize)
10069 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
10070 }
10071
10072 cl_target_option_restore (&global_options, &cur_target);
10073
10074 if (old_optimize != new_optimize)
10075 cl_optimization_restore (&global_options,
10076 TREE_OPTIMIZATION (old_optimize));
10077 return ret;
10078}
10079
1fd8d40c
KT
10080/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
10081 tri-bool options (yes, no, don't care) and the default value is
10082 DEF, determine whether to reject inlining. */
10083
10084static bool
10085aarch64_tribools_ok_for_inlining_p (int caller, int callee,
10086 int dont_care, int def)
10087{
10088 /* If the callee doesn't care, always allow inlining. */
10089 if (callee == dont_care)
10090 return true;
10091
10092 /* If the caller doesn't care, always allow inlining. */
10093 if (caller == dont_care)
10094 return true;
10095
10096 /* Otherwise, allow inlining if either the callee and caller values
10097 agree, or if the callee is using the default value. */
10098 return (callee == caller || callee == def);
10099}
10100
10101/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
10102 to inline CALLEE into CALLER based on target-specific info.
10103 Make sure that the caller and callee have compatible architectural
10104 features. Then go through the other possible target attributes
10105 and see if they can block inlining. Try not to reject always_inline
10106 callees unless they are incompatible architecturally. */
10107
10108static bool
10109aarch64_can_inline_p (tree caller, tree callee)
10110{
10111 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
10112 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
10113
10114 /* If callee has no option attributes, then it is ok to inline. */
10115 if (!callee_tree)
10116 return true;
10117
10118 struct cl_target_option *caller_opts
10119 = TREE_TARGET_OPTION (caller_tree ? caller_tree
10120 : target_option_default_node);
10121
10122 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
10123
10124
10125 /* Callee's ISA flags should be a subset of the caller's. */
10126 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
10127 != callee_opts->x_aarch64_isa_flags)
10128 return false;
10129
10130 /* Allow non-strict aligned functions inlining into strict
10131 aligned ones. */
10132 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
10133 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
10134 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
10135 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
10136 return false;
10137
10138 bool always_inline = lookup_attribute ("always_inline",
10139 DECL_ATTRIBUTES (callee));
10140
10141 /* If the architectural features match up and the callee is always_inline
10142 then the other attributes don't matter. */
10143 if (always_inline)
10144 return true;
10145
10146 if (caller_opts->x_aarch64_cmodel_var
10147 != callee_opts->x_aarch64_cmodel_var)
10148 return false;
10149
10150 if (caller_opts->x_aarch64_tls_dialect
10151 != callee_opts->x_aarch64_tls_dialect)
10152 return false;
10153
10154 /* Honour explicit requests to workaround errata. */
10155 if (!aarch64_tribools_ok_for_inlining_p (
10156 caller_opts->x_aarch64_fix_a53_err835769,
10157 callee_opts->x_aarch64_fix_a53_err835769,
10158 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
10159 return false;
10160
48bb1a55
CL
10161 if (!aarch64_tribools_ok_for_inlining_p (
10162 caller_opts->x_aarch64_fix_a53_err843419,
10163 callee_opts->x_aarch64_fix_a53_err843419,
10164 2, TARGET_FIX_ERR_A53_843419))
10165 return false;
10166
1fd8d40c
KT
10167 /* If the user explicitly specified -momit-leaf-frame-pointer for the
10168 caller and calle and they don't match up, reject inlining. */
10169 if (!aarch64_tribools_ok_for_inlining_p (
10170 caller_opts->x_flag_omit_leaf_frame_pointer,
10171 callee_opts->x_flag_omit_leaf_frame_pointer,
10172 2, 1))
10173 return false;
10174
10175 /* If the callee has specific tuning overrides, respect them. */
10176 if (callee_opts->x_aarch64_override_tune_string != NULL
10177 && caller_opts->x_aarch64_override_tune_string == NULL)
10178 return false;
10179
10180 /* If the user specified tuning override strings for the
10181 caller and callee and they don't match up, reject inlining.
10182 We just do a string compare here, we don't analyze the meaning
10183 of the string, as it would be too costly for little gain. */
10184 if (callee_opts->x_aarch64_override_tune_string
10185 && caller_opts->x_aarch64_override_tune_string
10186 && (strcmp (callee_opts->x_aarch64_override_tune_string,
10187 caller_opts->x_aarch64_override_tune_string) != 0))
10188 return false;
10189
10190 return true;
10191}
10192
43e9d192
IB
10193/* Return true if SYMBOL_REF X binds locally. */
10194
10195static bool
10196aarch64_symbol_binds_local_p (const_rtx x)
10197{
10198 return (SYMBOL_REF_DECL (x)
10199 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
10200 : SYMBOL_REF_LOCAL_P (x));
10201}
10202
10203/* Return true if SYMBOL_REF X is thread local */
10204static bool
10205aarch64_tls_symbol_p (rtx x)
10206{
10207 if (! TARGET_HAVE_TLS)
10208 return false;
10209
10210 if (GET_CODE (x) != SYMBOL_REF)
10211 return false;
10212
10213 return SYMBOL_REF_TLS_MODEL (x) != 0;
10214}
10215
10216/* Classify a TLS symbol into one of the TLS kinds. */
10217enum aarch64_symbol_type
10218aarch64_classify_tls_symbol (rtx x)
10219{
10220 enum tls_model tls_kind = tls_symbolic_operand_type (x);
10221
10222 switch (tls_kind)
10223 {
10224 case TLS_MODEL_GLOBAL_DYNAMIC:
10225 case TLS_MODEL_LOCAL_DYNAMIC:
10226 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
10227
10228 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
10229 switch (aarch64_cmodel)
10230 {
10231 case AARCH64_CMODEL_TINY:
10232 case AARCH64_CMODEL_TINY_PIC:
10233 return SYMBOL_TINY_TLSIE;
10234 default:
79496620 10235 return SYMBOL_SMALL_TLSIE;
5ae7caad 10236 }
43e9d192
IB
10237
10238 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
10239 if (aarch64_tls_size == 12)
10240 return SYMBOL_TLSLE12;
10241 else if (aarch64_tls_size == 24)
10242 return SYMBOL_TLSLE24;
10243 else if (aarch64_tls_size == 32)
10244 return SYMBOL_TLSLE32;
10245 else if (aarch64_tls_size == 48)
10246 return SYMBOL_TLSLE48;
10247 else
10248 gcc_unreachable ();
43e9d192
IB
10249
10250 case TLS_MODEL_EMULATED:
10251 case TLS_MODEL_NONE:
10252 return SYMBOL_FORCE_TO_MEM;
10253
10254 default:
10255 gcc_unreachable ();
10256 }
10257}
10258
10259/* Return the method that should be used to access SYMBOL_REF or
a6e0bfa7 10260 LABEL_REF X. */
17f4d4bf 10261
43e9d192 10262enum aarch64_symbol_type
a6e0bfa7 10263aarch64_classify_symbol (rtx x, rtx offset)
43e9d192
IB
10264{
10265 if (GET_CODE (x) == LABEL_REF)
10266 {
10267 switch (aarch64_cmodel)
10268 {
10269 case AARCH64_CMODEL_LARGE:
10270 return SYMBOL_FORCE_TO_MEM;
10271
10272 case AARCH64_CMODEL_TINY_PIC:
10273 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
10274 return SYMBOL_TINY_ABSOLUTE;
10275
1b1e81f8 10276 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
10277 case AARCH64_CMODEL_SMALL_PIC:
10278 case AARCH64_CMODEL_SMALL:
10279 return SYMBOL_SMALL_ABSOLUTE;
10280
10281 default:
10282 gcc_unreachable ();
10283 }
10284 }
10285
17f4d4bf 10286 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 10287 {
43e9d192
IB
10288 if (aarch64_tls_symbol_p (x))
10289 return aarch64_classify_tls_symbol (x);
10290
17f4d4bf
CSS
10291 switch (aarch64_cmodel)
10292 {
10293 case AARCH64_CMODEL_TINY:
15f6e0da 10294 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
10295 the offset does not cause overflow of the final address. But
10296 we have no way of knowing the address of symbol at compile time
10297 so we can't accurately say if the distance between the PC and
10298 symbol + offset is outside the addressible range of +/-1M in the
10299 TINY code model. So we rely on images not being greater than
10300 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
10301 be loaded using an alternative mechanism. Furthermore if the
10302 symbol is a weak reference to something that isn't known to
10303 resolve to a symbol in this module, then force to memory. */
10304 if ((SYMBOL_REF_WEAK (x)
10305 && !aarch64_symbol_binds_local_p (x))
f8b756b7 10306 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
10307 return SYMBOL_FORCE_TO_MEM;
10308 return SYMBOL_TINY_ABSOLUTE;
10309
17f4d4bf 10310 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
10311 /* Same reasoning as the tiny code model, but the offset cap here is
10312 4G. */
15f6e0da
RR
10313 if ((SYMBOL_REF_WEAK (x)
10314 && !aarch64_symbol_binds_local_p (x))
3ff5d1f0
TB
10315 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
10316 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
10317 return SYMBOL_FORCE_TO_MEM;
10318 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 10319
17f4d4bf 10320 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 10321 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 10322 return SYMBOL_TINY_GOT;
38e6c9a6
MS
10323 return SYMBOL_TINY_ABSOLUTE;
10324
1b1e81f8 10325 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
10326 case AARCH64_CMODEL_SMALL_PIC:
10327 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
10328 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
10329 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 10330 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 10331
9ee6540a
WD
10332 case AARCH64_CMODEL_LARGE:
10333 /* This is alright even in PIC code as the constant
10334 pool reference is always PC relative and within
10335 the same translation unit. */
d47d34bb 10336 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
10337 return SYMBOL_SMALL_ABSOLUTE;
10338 else
10339 return SYMBOL_FORCE_TO_MEM;
10340
17f4d4bf
CSS
10341 default:
10342 gcc_unreachable ();
10343 }
43e9d192 10344 }
17f4d4bf 10345
43e9d192
IB
10346 /* By default push everything into the constant pool. */
10347 return SYMBOL_FORCE_TO_MEM;
10348}
10349
43e9d192
IB
10350bool
10351aarch64_constant_address_p (rtx x)
10352{
10353 return (CONSTANT_P (x) && memory_address_p (DImode, x));
10354}
10355
10356bool
10357aarch64_legitimate_pic_operand_p (rtx x)
10358{
10359 if (GET_CODE (x) == SYMBOL_REF
10360 || (GET_CODE (x) == CONST
10361 && GET_CODE (XEXP (x, 0)) == PLUS
10362 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
10363 return false;
10364
10365 return true;
10366}
10367
3520f7cc
JG
10368/* Return true if X holds either a quarter-precision or
10369 floating-point +0.0 constant. */
10370static bool
a2170965 10371aarch64_valid_floating_const (rtx x)
3520f7cc
JG
10372{
10373 if (!CONST_DOUBLE_P (x))
10374 return false;
10375
a2170965
TC
10376 /* This call determines which constants can be used in mov<mode>
10377 as integer moves instead of constant loads. */
10378 if (aarch64_float_const_rtx_p (x))
6a0f8c01
JW
10379 return true;
10380
3520f7cc
JG
10381 return aarch64_float_const_representable_p (x);
10382}
10383
43e9d192 10384static bool
ef4bddc2 10385aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
10386{
10387 /* Do not allow vector struct mode constants. We could support
10388 0 and -1 easily, but they need support in aarch64-simd.md. */
10389 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
10390 return false;
10391
a2170965
TC
10392 /* For these cases we never want to use a literal load.
10393 As such we have to prevent the compiler from forcing these
10394 to memory. */
43e9d192 10395 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 10396 && aarch64_simd_valid_immediate (x, mode, false, NULL))
a2170965
TC
10397 || CONST_INT_P (x)
10398 || aarch64_valid_floating_const (x)
10399 || aarch64_can_const_movi_rtx_p (x, mode)
10400 || aarch64_float_const_rtx_p (x))
3520f7cc 10401 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
10402
10403 if (GET_CODE (x) == HIGH
10404 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10405 return true;
10406
f28e54bd
WD
10407 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
10408 so spilling them is better than rematerialization. */
10409 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
10410 return true;
10411
43e9d192
IB
10412 return aarch64_constant_address_p (x);
10413}
10414
a5bc806c 10415rtx
43e9d192
IB
10416aarch64_load_tp (rtx target)
10417{
10418 if (!target
10419 || GET_MODE (target) != Pmode
10420 || !register_operand (target, Pmode))
10421 target = gen_reg_rtx (Pmode);
10422
10423 /* Can return in any reg. */
10424 emit_insn (gen_aarch64_load_tp_hard (target));
10425 return target;
10426}
10427
43e9d192
IB
10428/* On AAPCS systems, this is the "struct __va_list". */
10429static GTY(()) tree va_list_type;
10430
10431/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
10432 Return the type to use as __builtin_va_list.
10433
10434 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
10435
10436 struct __va_list
10437 {
10438 void *__stack;
10439 void *__gr_top;
10440 void *__vr_top;
10441 int __gr_offs;
10442 int __vr_offs;
10443 }; */
10444
10445static tree
10446aarch64_build_builtin_va_list (void)
10447{
10448 tree va_list_name;
10449 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10450
10451 /* Create the type. */
10452 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
10453 /* Give it the required name. */
10454 va_list_name = build_decl (BUILTINS_LOCATION,
10455 TYPE_DECL,
10456 get_identifier ("__va_list"),
10457 va_list_type);
10458 DECL_ARTIFICIAL (va_list_name) = 1;
10459 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 10460 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
10461
10462 /* Create the fields. */
10463 f_stack = build_decl (BUILTINS_LOCATION,
10464 FIELD_DECL, get_identifier ("__stack"),
10465 ptr_type_node);
10466 f_grtop = build_decl (BUILTINS_LOCATION,
10467 FIELD_DECL, get_identifier ("__gr_top"),
10468 ptr_type_node);
10469 f_vrtop = build_decl (BUILTINS_LOCATION,
10470 FIELD_DECL, get_identifier ("__vr_top"),
10471 ptr_type_node);
10472 f_groff = build_decl (BUILTINS_LOCATION,
10473 FIELD_DECL, get_identifier ("__gr_offs"),
10474 integer_type_node);
10475 f_vroff = build_decl (BUILTINS_LOCATION,
10476 FIELD_DECL, get_identifier ("__vr_offs"),
10477 integer_type_node);
10478
88e3bdd1 10479 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
10480 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
10481 purpose to identify whether the code is updating va_list internal
10482 offset fields through irregular way. */
10483 va_list_gpr_counter_field = f_groff;
10484 va_list_fpr_counter_field = f_vroff;
10485
43e9d192
IB
10486 DECL_ARTIFICIAL (f_stack) = 1;
10487 DECL_ARTIFICIAL (f_grtop) = 1;
10488 DECL_ARTIFICIAL (f_vrtop) = 1;
10489 DECL_ARTIFICIAL (f_groff) = 1;
10490 DECL_ARTIFICIAL (f_vroff) = 1;
10491
10492 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
10493 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
10494 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
10495 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
10496 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
10497
10498 TYPE_FIELDS (va_list_type) = f_stack;
10499 DECL_CHAIN (f_stack) = f_grtop;
10500 DECL_CHAIN (f_grtop) = f_vrtop;
10501 DECL_CHAIN (f_vrtop) = f_groff;
10502 DECL_CHAIN (f_groff) = f_vroff;
10503
10504 /* Compute its layout. */
10505 layout_type (va_list_type);
10506
10507 return va_list_type;
10508}
10509
10510/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10511static void
10512aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
10513{
10514 const CUMULATIVE_ARGS *cum;
10515 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10516 tree stack, grtop, vrtop, groff, vroff;
10517 tree t;
88e3bdd1
JW
10518 int gr_save_area_size = cfun->va_list_gpr_size;
10519 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
10520 int vr_offset;
10521
10522 cum = &crtl->args.info;
88e3bdd1
JW
10523 if (cfun->va_list_gpr_size)
10524 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
10525 cfun->va_list_gpr_size);
10526 if (cfun->va_list_fpr_size)
10527 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
10528 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 10529
d5726973 10530 if (!TARGET_FLOAT)
43e9d192 10531 {
261fb553 10532 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
10533 vr_save_area_size = 0;
10534 }
10535
10536 f_stack = TYPE_FIELDS (va_list_type_node);
10537 f_grtop = DECL_CHAIN (f_stack);
10538 f_vrtop = DECL_CHAIN (f_grtop);
10539 f_groff = DECL_CHAIN (f_vrtop);
10540 f_vroff = DECL_CHAIN (f_groff);
10541
10542 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
10543 NULL_TREE);
10544 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
10545 NULL_TREE);
10546 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
10547 NULL_TREE);
10548 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
10549 NULL_TREE);
10550 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
10551 NULL_TREE);
10552
10553 /* Emit code to initialize STACK, which points to the next varargs stack
10554 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10555 by named arguments. STACK is 8-byte aligned. */
10556 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
10557 if (cum->aapcs_stack_size > 0)
10558 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
10559 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
10560 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10561
10562 /* Emit code to initialize GRTOP, the top of the GR save area.
10563 virtual_incoming_args_rtx should have been 16 byte aligned. */
10564 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
10565 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
10566 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10567
10568 /* Emit code to initialize VRTOP, the top of the VR save area.
10569 This address is gr_save_area_bytes below GRTOP, rounded
10570 down to the next 16-byte boundary. */
10571 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
10572 vr_offset = ROUND_UP (gr_save_area_size,
10573 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10574
10575 if (vr_offset)
10576 t = fold_build_pointer_plus_hwi (t, -vr_offset);
10577 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
10578 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10579
10580 /* Emit code to initialize GROFF, the offset from GRTOP of the
10581 next GPR argument. */
10582 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
10583 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
10584 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10585
10586 /* Likewise emit code to initialize VROFF, the offset from FTOP
10587 of the next VR argument. */
10588 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
10589 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
10590 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10591}
10592
10593/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10594
10595static tree
10596aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
10597 gimple_seq *post_p ATTRIBUTE_UNUSED)
10598{
10599 tree addr;
10600 bool indirect_p;
10601 bool is_ha; /* is HFA or HVA. */
10602 bool dw_align; /* double-word align. */
ef4bddc2 10603 machine_mode ag_mode = VOIDmode;
43e9d192 10604 int nregs;
ef4bddc2 10605 machine_mode mode;
43e9d192
IB
10606
10607 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10608 tree stack, f_top, f_off, off, arg, roundup, on_stack;
10609 HOST_WIDE_INT size, rsize, adjust, align;
10610 tree t, u, cond1, cond2;
10611
10612 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10613 if (indirect_p)
10614 type = build_pointer_type (type);
10615
10616 mode = TYPE_MODE (type);
10617
10618 f_stack = TYPE_FIELDS (va_list_type_node);
10619 f_grtop = DECL_CHAIN (f_stack);
10620 f_vrtop = DECL_CHAIN (f_grtop);
10621 f_groff = DECL_CHAIN (f_vrtop);
10622 f_vroff = DECL_CHAIN (f_groff);
10623
10624 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
10625 f_stack, NULL_TREE);
10626 size = int_size_in_bytes (type);
985b8393 10627 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
10628
10629 dw_align = false;
10630 adjust = 0;
10631 if (aarch64_vfp_is_call_or_return_candidate (mode,
10632 type,
10633 &ag_mode,
10634 &nregs,
10635 &is_ha))
10636 {
10637 /* TYPE passed in fp/simd registers. */
d5726973 10638 if (!TARGET_FLOAT)
261fb553 10639 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
10640
10641 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
10642 unshare_expr (valist), f_vrtop, NULL_TREE);
10643 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
10644 unshare_expr (valist), f_vroff, NULL_TREE);
10645
10646 rsize = nregs * UNITS_PER_VREG;
10647
10648 if (is_ha)
10649 {
10650 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
10651 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
10652 }
76b0cbf8 10653 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
10654 && size < UNITS_PER_VREG)
10655 {
10656 adjust = UNITS_PER_VREG - size;
10657 }
10658 }
10659 else
10660 {
10661 /* TYPE passed in general registers. */
10662 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
10663 unshare_expr (valist), f_grtop, NULL_TREE);
10664 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
10665 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 10666 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
10667 nregs = rsize / UNITS_PER_WORD;
10668
10669 if (align > 8)
10670 dw_align = true;
10671
76b0cbf8 10672 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
10673 && size < UNITS_PER_WORD)
10674 {
10675 adjust = UNITS_PER_WORD - size;
10676 }
10677 }
10678
10679 /* Get a local temporary for the field value. */
10680 off = get_initialized_tmp_var (f_off, pre_p, NULL);
10681
10682 /* Emit code to branch if off >= 0. */
10683 t = build2 (GE_EXPR, boolean_type_node, off,
10684 build_int_cst (TREE_TYPE (off), 0));
10685 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
10686
10687 if (dw_align)
10688 {
10689 /* Emit: offs = (offs + 15) & -16. */
10690 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10691 build_int_cst (TREE_TYPE (off), 15));
10692 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
10693 build_int_cst (TREE_TYPE (off), -16));
10694 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
10695 }
10696 else
10697 roundup = NULL;
10698
10699 /* Update ap.__[g|v]r_offs */
10700 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10701 build_int_cst (TREE_TYPE (off), rsize));
10702 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
10703
10704 /* String up. */
10705 if (roundup)
10706 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10707
10708 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10709 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
10710 build_int_cst (TREE_TYPE (f_off), 0));
10711 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
10712
10713 /* String up: make sure the assignment happens before the use. */
10714 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
10715 COND_EXPR_ELSE (cond1) = t;
10716
10717 /* Prepare the trees handling the argument that is passed on the stack;
10718 the top level node will store in ON_STACK. */
10719 arg = get_initialized_tmp_var (stack, pre_p, NULL);
10720 if (align > 8)
10721 {
10722 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10723 t = fold_convert (intDI_type_node, arg);
10724 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10725 build_int_cst (TREE_TYPE (t), 15));
10726 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10727 build_int_cst (TREE_TYPE (t), -16));
10728 t = fold_convert (TREE_TYPE (arg), t);
10729 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
10730 }
10731 else
10732 roundup = NULL;
10733 /* Advance ap.__stack */
10734 t = fold_convert (intDI_type_node, arg);
10735 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10736 build_int_cst (TREE_TYPE (t), size + 7));
10737 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10738 build_int_cst (TREE_TYPE (t), -8));
10739 t = fold_convert (TREE_TYPE (arg), t);
10740 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
10741 /* String up roundup and advance. */
10742 if (roundup)
10743 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10744 /* String up with arg */
10745 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
10746 /* Big-endianness related address adjustment. */
76b0cbf8 10747 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
10748 && size < UNITS_PER_WORD)
10749 {
10750 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
10751 size_int (UNITS_PER_WORD - size));
10752 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
10753 }
10754
10755 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
10756 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
10757
10758 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10759 t = off;
10760 if (adjust)
10761 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
10762 build_int_cst (TREE_TYPE (off), adjust));
10763
10764 t = fold_convert (sizetype, t);
10765 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
10766
10767 if (is_ha)
10768 {
10769 /* type ha; // treat as "struct {ftype field[n];}"
10770 ... [computing offs]
10771 for (i = 0; i <nregs; ++i, offs += 16)
10772 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10773 return ha; */
10774 int i;
10775 tree tmp_ha, field_t, field_ptr_t;
10776
10777 /* Declare a local variable. */
10778 tmp_ha = create_tmp_var_raw (type, "ha");
10779 gimple_add_tmp_var (tmp_ha);
10780
10781 /* Establish the base type. */
10782 switch (ag_mode)
10783 {
4e10a5a7 10784 case E_SFmode:
43e9d192
IB
10785 field_t = float_type_node;
10786 field_ptr_t = float_ptr_type_node;
10787 break;
4e10a5a7 10788 case E_DFmode:
43e9d192
IB
10789 field_t = double_type_node;
10790 field_ptr_t = double_ptr_type_node;
10791 break;
4e10a5a7 10792 case E_TFmode:
43e9d192
IB
10793 field_t = long_double_type_node;
10794 field_ptr_t = long_double_ptr_type_node;
10795 break;
4e10a5a7 10796 case E_HFmode:
1b62ed4f
JG
10797 field_t = aarch64_fp16_type_node;
10798 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 10799 break;
4e10a5a7
RS
10800 case E_V2SImode:
10801 case E_V4SImode:
43e9d192
IB
10802 {
10803 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
10804 field_t = build_vector_type_for_mode (innertype, ag_mode);
10805 field_ptr_t = build_pointer_type (field_t);
10806 }
10807 break;
10808 default:
10809 gcc_assert (0);
10810 }
10811
10812 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10813 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
10814 addr = t;
10815 t = fold_convert (field_ptr_t, addr);
10816 t = build2 (MODIFY_EXPR, field_t,
10817 build1 (INDIRECT_REF, field_t, tmp_ha),
10818 build1 (INDIRECT_REF, field_t, t));
10819
10820 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10821 for (i = 1; i < nregs; ++i)
10822 {
10823 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
10824 u = fold_convert (field_ptr_t, addr);
10825 u = build2 (MODIFY_EXPR, field_t,
10826 build2 (MEM_REF, field_t, tmp_ha,
10827 build_int_cst (field_ptr_t,
10828 (i *
10829 int_size_in_bytes (field_t)))),
10830 build1 (INDIRECT_REF, field_t, u));
10831 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
10832 }
10833
10834 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
10835 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
10836 }
10837
10838 COND_EXPR_ELSE (cond2) = t;
10839 addr = fold_convert (build_pointer_type (type), cond1);
10840 addr = build_va_arg_indirect_ref (addr);
10841
10842 if (indirect_p)
10843 addr = build_va_arg_indirect_ref (addr);
10844
10845 return addr;
10846}
10847
10848/* Implement TARGET_SETUP_INCOMING_VARARGS. */
10849
10850static void
ef4bddc2 10851aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
10852 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10853 int no_rtl)
10854{
10855 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10856 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
10857 int gr_saved = cfun->va_list_gpr_size;
10858 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
10859
10860 /* The caller has advanced CUM up to, but not beyond, the last named
10861 argument. Advance a local copy of CUM past the last "real" named
10862 argument, to find out how many registers are left over. */
10863 local_cum = *cum;
10864 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
10865
88e3bdd1
JW
10866 /* Found out how many registers we need to save.
10867 Honor tree-stdvar analysis results. */
10868 if (cfun->va_list_gpr_size)
10869 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
10870 cfun->va_list_gpr_size / UNITS_PER_WORD);
10871 if (cfun->va_list_fpr_size)
10872 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
10873 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 10874
d5726973 10875 if (!TARGET_FLOAT)
43e9d192 10876 {
261fb553 10877 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
10878 vr_saved = 0;
10879 }
10880
10881 if (!no_rtl)
10882 {
10883 if (gr_saved > 0)
10884 {
10885 rtx ptr, mem;
10886
10887 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10888 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
10889 - gr_saved * UNITS_PER_WORD);
10890 mem = gen_frame_mem (BLKmode, ptr);
10891 set_mem_alias_set (mem, get_varargs_alias_set ());
10892
10893 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
10894 mem, gr_saved);
10895 }
10896 if (vr_saved > 0)
10897 {
10898 /* We can't use move_block_from_reg, because it will use
10899 the wrong mode, storing D regs only. */
ef4bddc2 10900 machine_mode mode = TImode;
88e3bdd1 10901 int off, i, vr_start;
43e9d192
IB
10902
10903 /* Set OFF to the offset from virtual_incoming_args_rtx of
10904 the first vector register. The VR save area lies below
10905 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
10906 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
10907 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10908 off -= vr_saved * UNITS_PER_VREG;
10909
88e3bdd1
JW
10910 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
10911 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
10912 {
10913 rtx ptr, mem;
10914
10915 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
10916 mem = gen_frame_mem (mode, ptr);
10917 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 10918 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
10919 off += UNITS_PER_VREG;
10920 }
10921 }
10922 }
10923
10924 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10925 any complication of having crtl->args.pretend_args_size changed. */
8799637a 10926 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
10927 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
10928 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
10929 + vr_saved * UNITS_PER_VREG);
10930}
10931
10932static void
10933aarch64_conditional_register_usage (void)
10934{
10935 int i;
10936 if (!TARGET_FLOAT)
10937 {
10938 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
10939 {
10940 fixed_regs[i] = 1;
10941 call_used_regs[i] = 1;
10942 }
10943 }
10944}
10945
10946/* Walk down the type tree of TYPE counting consecutive base elements.
10947 If *MODEP is VOIDmode, then set it to the first valid floating point
10948 type. If a non-floating point type is found, or if a floating point
10949 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10950 otherwise return the count in the sub-tree. */
10951static int
ef4bddc2 10952aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 10953{
ef4bddc2 10954 machine_mode mode;
43e9d192
IB
10955 HOST_WIDE_INT size;
10956
10957 switch (TREE_CODE (type))
10958 {
10959 case REAL_TYPE:
10960 mode = TYPE_MODE (type);
1b62ed4f
JG
10961 if (mode != DFmode && mode != SFmode
10962 && mode != TFmode && mode != HFmode)
43e9d192
IB
10963 return -1;
10964
10965 if (*modep == VOIDmode)
10966 *modep = mode;
10967
10968 if (*modep == mode)
10969 return 1;
10970
10971 break;
10972
10973 case COMPLEX_TYPE:
10974 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
10975 if (mode != DFmode && mode != SFmode
10976 && mode != TFmode && mode != HFmode)
43e9d192
IB
10977 return -1;
10978
10979 if (*modep == VOIDmode)
10980 *modep = mode;
10981
10982 if (*modep == mode)
10983 return 2;
10984
10985 break;
10986
10987 case VECTOR_TYPE:
10988 /* Use V2SImode and V4SImode as representatives of all 64-bit
10989 and 128-bit vector types. */
10990 size = int_size_in_bytes (type);
10991 switch (size)
10992 {
10993 case 8:
10994 mode = V2SImode;
10995 break;
10996 case 16:
10997 mode = V4SImode;
10998 break;
10999 default:
11000 return -1;
11001 }
11002
11003 if (*modep == VOIDmode)
11004 *modep = mode;
11005
11006 /* Vector modes are considered to be opaque: two vectors are
11007 equivalent for the purposes of being homogeneous aggregates
11008 if they are the same size. */
11009 if (*modep == mode)
11010 return 1;
11011
11012 break;
11013
11014 case ARRAY_TYPE:
11015 {
11016 int count;
11017 tree index = TYPE_DOMAIN (type);
11018
807e902e
KZ
11019 /* Can't handle incomplete types nor sizes that are not
11020 fixed. */
11021 if (!COMPLETE_TYPE_P (type)
11022 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
11023 return -1;
11024
11025 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
11026 if (count == -1
11027 || !index
11028 || !TYPE_MAX_VALUE (index)
cc269bb6 11029 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 11030 || !TYPE_MIN_VALUE (index)
cc269bb6 11031 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
11032 || count < 0)
11033 return -1;
11034
ae7e9ddd
RS
11035 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11036 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
11037
11038 /* There must be no padding. */
807e902e 11039 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
11040 return -1;
11041
11042 return count;
11043 }
11044
11045 case RECORD_TYPE:
11046 {
11047 int count = 0;
11048 int sub_count;
11049 tree field;
11050
807e902e
KZ
11051 /* Can't handle incomplete types nor sizes that are not
11052 fixed. */
11053 if (!COMPLETE_TYPE_P (type)
11054 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
11055 return -1;
11056
11057 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11058 {
11059 if (TREE_CODE (field) != FIELD_DECL)
11060 continue;
11061
11062 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
11063 if (sub_count < 0)
11064 return -1;
11065 count += sub_count;
11066 }
11067
11068 /* There must be no padding. */
807e902e 11069 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
11070 return -1;
11071
11072 return count;
11073 }
11074
11075 case UNION_TYPE:
11076 case QUAL_UNION_TYPE:
11077 {
11078 /* These aren't very interesting except in a degenerate case. */
11079 int count = 0;
11080 int sub_count;
11081 tree field;
11082
807e902e
KZ
11083 /* Can't handle incomplete types nor sizes that are not
11084 fixed. */
11085 if (!COMPLETE_TYPE_P (type)
11086 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
11087 return -1;
11088
11089 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11090 {
11091 if (TREE_CODE (field) != FIELD_DECL)
11092 continue;
11093
11094 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
11095 if (sub_count < 0)
11096 return -1;
11097 count = count > sub_count ? count : sub_count;
11098 }
11099
11100 /* There must be no padding. */
807e902e 11101 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
11102 return -1;
11103
11104 return count;
11105 }
11106
11107 default:
11108 break;
11109 }
11110
11111 return -1;
11112}
11113
b6ec6215
KT
11114/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
11115 type as described in AAPCS64 \S 4.1.2.
11116
11117 See the comment above aarch64_composite_type_p for the notes on MODE. */
11118
11119static bool
11120aarch64_short_vector_p (const_tree type,
11121 machine_mode mode)
11122{
11123 HOST_WIDE_INT size = -1;
11124
11125 if (type && TREE_CODE (type) == VECTOR_TYPE)
11126 size = int_size_in_bytes (type);
11127 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
11128 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11129 size = GET_MODE_SIZE (mode);
11130
11131 return (size == 8 || size == 16);
11132}
11133
43e9d192
IB
11134/* Return TRUE if the type, as described by TYPE and MODE, is a composite
11135 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
11136 array types. The C99 floating-point complex types are also considered
11137 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
11138 types, which are GCC extensions and out of the scope of AAPCS64, are
11139 treated as composite types here as well.
11140
11141 Note that MODE itself is not sufficient in determining whether a type
11142 is such a composite type or not. This is because
11143 stor-layout.c:compute_record_mode may have already changed the MODE
11144 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
11145 structure with only one field may have its MODE set to the mode of the
11146 field. Also an integer mode whose size matches the size of the
11147 RECORD_TYPE type may be used to substitute the original mode
11148 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
11149 solely relied on. */
11150
11151static bool
11152aarch64_composite_type_p (const_tree type,
ef4bddc2 11153 machine_mode mode)
43e9d192 11154{
b6ec6215
KT
11155 if (aarch64_short_vector_p (type, mode))
11156 return false;
11157
43e9d192
IB
11158 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
11159 return true;
11160
11161 if (mode == BLKmode
11162 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
11163 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
11164 return true;
11165
11166 return false;
11167}
11168
43e9d192
IB
11169/* Return TRUE if an argument, whose type is described by TYPE and MODE,
11170 shall be passed or returned in simd/fp register(s) (providing these
11171 parameter passing registers are available).
11172
11173 Upon successful return, *COUNT returns the number of needed registers,
11174 *BASE_MODE returns the mode of the individual register and when IS_HAF
11175 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
11176 floating-point aggregate or a homogeneous short-vector aggregate. */
11177
11178static bool
ef4bddc2 11179aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 11180 const_tree type,
ef4bddc2 11181 machine_mode *base_mode,
43e9d192
IB
11182 int *count,
11183 bool *is_ha)
11184{
ef4bddc2 11185 machine_mode new_mode = VOIDmode;
43e9d192
IB
11186 bool composite_p = aarch64_composite_type_p (type, mode);
11187
11188 if (is_ha != NULL) *is_ha = false;
11189
11190 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
11191 || aarch64_short_vector_p (type, mode))
11192 {
11193 *count = 1;
11194 new_mode = mode;
11195 }
11196 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
11197 {
11198 if (is_ha != NULL) *is_ha = true;
11199 *count = 2;
11200 new_mode = GET_MODE_INNER (mode);
11201 }
11202 else if (type && composite_p)
11203 {
11204 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
11205
11206 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
11207 {
11208 if (is_ha != NULL) *is_ha = true;
11209 *count = ag_count;
11210 }
11211 else
11212 return false;
11213 }
11214 else
11215 return false;
11216
11217 *base_mode = new_mode;
11218 return true;
11219}
11220
11221/* Implement TARGET_STRUCT_VALUE_RTX. */
11222
11223static rtx
11224aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
11225 int incoming ATTRIBUTE_UNUSED)
11226{
11227 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
11228}
11229
11230/* Implements target hook vector_mode_supported_p. */
11231static bool
ef4bddc2 11232aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
11233{
11234 if (TARGET_SIMD
11235 && (mode == V4SImode || mode == V8HImode
11236 || mode == V16QImode || mode == V2DImode
11237 || mode == V2SImode || mode == V4HImode
11238 || mode == V8QImode || mode == V2SFmode
ad7d90cc 11239 || mode == V4SFmode || mode == V2DFmode
71a11456 11240 || mode == V4HFmode || mode == V8HFmode
ad7d90cc 11241 || mode == V1DFmode))
43e9d192
IB
11242 return true;
11243
11244 return false;
11245}
11246
b7342d25
IB
11247/* Return appropriate SIMD container
11248 for MODE within a vector of WIDTH bits. */
ef4bddc2 11249static machine_mode
146c2e3a 11250aarch64_simd_container_mode (scalar_mode mode, unsigned width)
43e9d192 11251{
b7342d25 11252 gcc_assert (width == 64 || width == 128);
43e9d192 11253 if (TARGET_SIMD)
b7342d25
IB
11254 {
11255 if (width == 128)
11256 switch (mode)
11257 {
4e10a5a7 11258 case E_DFmode:
b7342d25 11259 return V2DFmode;
4e10a5a7 11260 case E_SFmode:
b7342d25 11261 return V4SFmode;
4e10a5a7 11262 case E_HFmode:
b719f884 11263 return V8HFmode;
4e10a5a7 11264 case E_SImode:
b7342d25 11265 return V4SImode;
4e10a5a7 11266 case E_HImode:
b7342d25 11267 return V8HImode;
4e10a5a7 11268 case E_QImode:
b7342d25 11269 return V16QImode;
4e10a5a7 11270 case E_DImode:
b7342d25
IB
11271 return V2DImode;
11272 default:
11273 break;
11274 }
11275 else
11276 switch (mode)
11277 {
4e10a5a7 11278 case E_SFmode:
b7342d25 11279 return V2SFmode;
4e10a5a7 11280 case E_HFmode:
b719f884 11281 return V4HFmode;
4e10a5a7 11282 case E_SImode:
b7342d25 11283 return V2SImode;
4e10a5a7 11284 case E_HImode:
b7342d25 11285 return V4HImode;
4e10a5a7 11286 case E_QImode:
b7342d25
IB
11287 return V8QImode;
11288 default:
11289 break;
11290 }
11291 }
43e9d192
IB
11292 return word_mode;
11293}
11294
b7342d25 11295/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 11296static machine_mode
005ba29c 11297aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25
IB
11298{
11299 return aarch64_simd_container_mode (mode, 128);
11300}
11301
3b357264
JG
11302/* Return the bitmask of possible vector sizes for the vectorizer
11303 to iterate over. */
11304static unsigned int
11305aarch64_autovectorize_vector_sizes (void)
11306{
11307 return (16 | 8);
11308}
11309
ac2b960f
YZ
11310/* Implement TARGET_MANGLE_TYPE. */
11311
6f549691 11312static const char *
ac2b960f
YZ
11313aarch64_mangle_type (const_tree type)
11314{
11315 /* The AArch64 ABI documents say that "__va_list" has to be
11316 managled as if it is in the "std" namespace. */
11317 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
11318 return "St9__va_list";
11319
c2ec330c
AL
11320 /* Half-precision float. */
11321 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
11322 return "Dh";
11323
f9d53c27
TB
11324 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
11325 builtin types. */
11326 if (TYPE_NAME (type) != NULL)
11327 return aarch64_mangle_builtin_type (type);
c6fc9e43 11328
ac2b960f
YZ
11329 /* Use the default mangling. */
11330 return NULL;
11331}
11332
75cf1494
KT
11333/* Find the first rtx_insn before insn that will generate an assembly
11334 instruction. */
11335
11336static rtx_insn *
11337aarch64_prev_real_insn (rtx_insn *insn)
11338{
11339 if (!insn)
11340 return NULL;
11341
11342 do
11343 {
11344 insn = prev_real_insn (insn);
11345 }
11346 while (insn && recog_memoized (insn) < 0);
11347
11348 return insn;
11349}
11350
11351static bool
11352is_madd_op (enum attr_type t1)
11353{
11354 unsigned int i;
11355 /* A number of these may be AArch32 only. */
11356 enum attr_type mlatypes[] = {
11357 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
11358 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
11359 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
11360 };
11361
11362 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
11363 {
11364 if (t1 == mlatypes[i])
11365 return true;
11366 }
11367
11368 return false;
11369}
11370
11371/* Check if there is a register dependency between a load and the insn
11372 for which we hold recog_data. */
11373
11374static bool
11375dep_between_memop_and_curr (rtx memop)
11376{
11377 rtx load_reg;
11378 int opno;
11379
8baff86e 11380 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
11381
11382 if (!REG_P (SET_DEST (memop)))
11383 return false;
11384
11385 load_reg = SET_DEST (memop);
8baff86e 11386 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
11387 {
11388 rtx operand = recog_data.operand[opno];
11389 if (REG_P (operand)
11390 && reg_overlap_mentioned_p (load_reg, operand))
11391 return true;
11392
11393 }
11394 return false;
11395}
11396
8baff86e
KT
11397
11398/* When working around the Cortex-A53 erratum 835769,
11399 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
11400 instruction and has a preceding memory instruction such that a NOP
11401 should be inserted between them. */
11402
75cf1494
KT
11403bool
11404aarch64_madd_needs_nop (rtx_insn* insn)
11405{
11406 enum attr_type attr_type;
11407 rtx_insn *prev;
11408 rtx body;
11409
b32c1043 11410 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
11411 return false;
11412
e322d6e3 11413 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
11414 return false;
11415
11416 attr_type = get_attr_type (insn);
11417 if (!is_madd_op (attr_type))
11418 return false;
11419
11420 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
11421 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
11422 Restore recog state to INSN to avoid state corruption. */
11423 extract_constrain_insn_cached (insn);
11424
550e2205 11425 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
11426 return false;
11427
11428 body = single_set (prev);
11429
11430 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
11431 it and the DImode madd, emit a NOP between them. If body is NULL then we
11432 have a complex memory operation, probably a load/store pair.
11433 Be conservative for now and emit a NOP. */
11434 if (GET_MODE (recog_data.operand[0]) == DImode
11435 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
11436 return true;
11437
11438 return false;
11439
11440}
11441
8baff86e
KT
11442
11443/* Implement FINAL_PRESCAN_INSN. */
11444
75cf1494
KT
11445void
11446aarch64_final_prescan_insn (rtx_insn *insn)
11447{
11448 if (aarch64_madd_needs_nop (insn))
11449 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
11450}
11451
11452
43e9d192 11453/* Return the equivalent letter for size. */
81c2dfb9 11454static char
43e9d192
IB
11455sizetochar (int size)
11456{
11457 switch (size)
11458 {
11459 case 64: return 'd';
11460 case 32: return 's';
11461 case 16: return 'h';
11462 case 8 : return 'b';
11463 default: gcc_unreachable ();
11464 }
11465}
11466
3520f7cc
JG
11467/* Return true iff x is a uniform vector of floating-point
11468 constants, and the constant can be represented in
11469 quarter-precision form. Note, as aarch64_float_const_representable
11470 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
11471static bool
11472aarch64_vect_float_const_representable_p (rtx x)
11473{
92695fbb
RS
11474 rtx elt;
11475 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
11476 && const_vec_duplicate_p (x, &elt)
11477 && aarch64_float_const_representable_p (elt));
3520f7cc
JG
11478}
11479
d8edd899 11480/* Return true for valid and false for invalid. */
3ea63f60 11481bool
ef4bddc2 11482aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 11483 struct simd_immediate_info *info)
43e9d192
IB
11484{
11485#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11486 matches = 1; \
11487 for (i = 0; i < idx; i += (STRIDE)) \
11488 if (!(TEST)) \
11489 matches = 0; \
11490 if (matches) \
11491 { \
11492 immtype = (CLASS); \
11493 elsize = (ELSIZE); \
43e9d192
IB
11494 eshift = (SHIFT); \
11495 emvn = (NEG); \
11496 break; \
11497 }
11498
11499 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 11500 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 11501 unsigned char bytes[16];
43e9d192
IB
11502 int immtype = -1, matches;
11503 unsigned int invmask = inverse ? 0xff : 0;
11504 int eshift, emvn;
11505
43e9d192 11506 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 11507 {
81c2dfb9
IB
11508 if (! (aarch64_simd_imm_zero_p (op, mode)
11509 || aarch64_vect_float_const_representable_p (op)))
d8edd899 11510 return false;
3520f7cc 11511
48063b9d
IB
11512 if (info)
11513 {
e386a52f
RS
11514 rtx elt = CONST_VECTOR_ELT (op, 0);
11515 scalar_float_mode elt_mode
11516 = as_a <scalar_float_mode> (GET_MODE (elt));
11517
11518 info->value = elt;
11519 info->element_width = GET_MODE_BITSIZE (elt_mode);
48063b9d
IB
11520 info->mvn = false;
11521 info->shift = 0;
11522 }
3520f7cc 11523
d8edd899 11524 return true;
3520f7cc 11525 }
43e9d192
IB
11526
11527 /* Splat vector constant out into a byte vector. */
11528 for (i = 0; i < n_elts; i++)
11529 {
4b1e108c
AL
11530 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11531 it must be laid out in the vector register in reverse order. */
11532 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192 11533 unsigned HOST_WIDE_INT elpart;
43e9d192 11534
ee78df47
KT
11535 gcc_assert (CONST_INT_P (el));
11536 elpart = INTVAL (el);
11537
11538 for (unsigned int byte = 0; byte < innersize; byte++)
11539 {
11540 bytes[idx++] = (elpart & 0xff) ^ invmask;
11541 elpart >>= BITS_PER_UNIT;
11542 }
43e9d192 11543
43e9d192
IB
11544 }
11545
11546 /* Sanity check. */
11547 gcc_assert (idx == GET_MODE_SIZE (mode));
11548
11549 do
11550 {
11551 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11552 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
11553
11554 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11555 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
11556
11557 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11558 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
11559
11560 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11561 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
11562
11563 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
11564
11565 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
11566
11567 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11568 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
11569
11570 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11571 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
11572
11573 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11574 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
11575
11576 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11577 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
11578
11579 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
11580
11581 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
11582
11583 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 11584 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
11585
11586 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 11587 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
11588
11589 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 11590 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
11591
11592 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 11593 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
11594
11595 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
11596
11597 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11598 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
11599 }
11600 while (0);
11601
e4f0f84d 11602 if (immtype == -1)
d8edd899 11603 return false;
43e9d192 11604
48063b9d 11605 if (info)
43e9d192 11606 {
48063b9d 11607 info->element_width = elsize;
48063b9d
IB
11608 info->mvn = emvn != 0;
11609 info->shift = eshift;
11610
43e9d192
IB
11611 unsigned HOST_WIDE_INT imm = 0;
11612
e4f0f84d
TB
11613 if (immtype >= 12 && immtype <= 15)
11614 info->msl = true;
11615
43e9d192
IB
11616 /* Un-invert bytes of recognized vector, if necessary. */
11617 if (invmask != 0)
11618 for (i = 0; i < idx; i++)
11619 bytes[i] ^= invmask;
11620
11621 if (immtype == 17)
11622 {
11623 /* FIXME: Broken on 32-bit H_W_I hosts. */
11624 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11625
11626 for (i = 0; i < 8; i++)
11627 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11628 << (i * BITS_PER_UNIT);
11629
43e9d192 11630
48063b9d
IB
11631 info->value = GEN_INT (imm);
11632 }
11633 else
11634 {
11635 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11636 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
11637
11638 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
11639 generic constants. */
11640 if (info->mvn)
43e9d192 11641 imm = ~imm;
48063b9d
IB
11642 imm = (imm >> info->shift) & 0xff;
11643 info->value = GEN_INT (imm);
11644 }
43e9d192
IB
11645 }
11646
48063b9d 11647 return true;
43e9d192
IB
11648#undef CHECK
11649}
11650
43e9d192
IB
11651/* Check of immediate shift constants are within range. */
11652bool
ef4bddc2 11653aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
11654{
11655 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
11656 if (left)
ddeabd3e 11657 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 11658 else
ddeabd3e 11659 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
11660}
11661
3520f7cc
JG
11662/* Return true if X is a uniform vector where all elements
11663 are either the floating-point constant 0.0 or the
11664 integer constant 0. */
43e9d192 11665bool
ef4bddc2 11666aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 11667{
3520f7cc 11668 return x == CONST0_RTX (mode);
43e9d192
IB
11669}
11670
7325d85a
KT
11671
11672/* Return the bitmask CONST_INT to select the bits required by a zero extract
11673 operation of width WIDTH at bit position POS. */
11674
11675rtx
11676aarch64_mask_from_zextract_ops (rtx width, rtx pos)
11677{
11678 gcc_assert (CONST_INT_P (width));
11679 gcc_assert (CONST_INT_P (pos));
11680
11681 unsigned HOST_WIDE_INT mask
11682 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
11683 return GEN_INT (mask << UINTVAL (pos));
11684}
11685
83f8c414 11686bool
a6e0bfa7 11687aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 11688{
83f8c414
CSS
11689 if (GET_CODE (x) == HIGH
11690 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
11691 return true;
11692
82614948 11693 if (CONST_INT_P (x))
83f8c414
CSS
11694 return true;
11695
11696 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
11697 return true;
11698
a6e0bfa7 11699 return aarch64_classify_symbolic_expression (x)
a5350ddc 11700 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
11701}
11702
43e9d192
IB
11703/* Return a const_int vector of VAL. */
11704rtx
ab014eb3 11705aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192
IB
11706{
11707 int nunits = GET_MODE_NUNITS (mode);
11708 rtvec v = rtvec_alloc (nunits);
11709 int i;
11710
ab014eb3
TC
11711 rtx cache = GEN_INT (val);
11712
43e9d192 11713 for (i=0; i < nunits; i++)
ab014eb3 11714 RTVEC_ELT (v, i) = cache;
43e9d192
IB
11715
11716 return gen_rtx_CONST_VECTOR (mode, v);
11717}
11718
051d0e2f
SN
11719/* Check OP is a legal scalar immediate for the MOVI instruction. */
11720
11721bool
77e994c9 11722aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 11723{
ef4bddc2 11724 machine_mode vmode;
051d0e2f 11725
77e994c9 11726 vmode = aarch64_preferred_simd_mode (mode);
051d0e2f 11727 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 11728 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
11729}
11730
988fa693
JG
11731/* Construct and return a PARALLEL RTX vector with elements numbering the
11732 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11733 the vector - from the perspective of the architecture. This does not
11734 line up with GCC's perspective on lane numbers, so we end up with
11735 different masks depending on our target endian-ness. The diagram
11736 below may help. We must draw the distinction when building masks
11737 which select one half of the vector. An instruction selecting
11738 architectural low-lanes for a big-endian target, must be described using
11739 a mask selecting GCC high-lanes.
11740
11741 Big-Endian Little-Endian
11742
11743GCC 0 1 2 3 3 2 1 0
11744 | x | x | x | x | | x | x | x | x |
11745Architecture 3 2 1 0 3 2 1 0
11746
11747Low Mask: { 2, 3 } { 0, 1 }
11748High Mask: { 0, 1 } { 2, 3 }
11749*/
11750
43e9d192 11751rtx
ef4bddc2 11752aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
11753{
11754 int nunits = GET_MODE_NUNITS (mode);
11755 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
11756 int high_base = nunits / 2;
11757 int low_base = 0;
11758 int base;
43e9d192
IB
11759 rtx t1;
11760 int i;
11761
988fa693
JG
11762 if (BYTES_BIG_ENDIAN)
11763 base = high ? low_base : high_base;
11764 else
11765 base = high ? high_base : low_base;
11766
11767 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
11768 RTVEC_ELT (v, i) = GEN_INT (base + i);
11769
11770 t1 = gen_rtx_PARALLEL (mode, v);
11771 return t1;
11772}
11773
988fa693
JG
11774/* Check OP for validity as a PARALLEL RTX vector with elements
11775 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11776 from the perspective of the architecture. See the diagram above
11777 aarch64_simd_vect_par_cnst_half for more details. */
11778
11779bool
ef4bddc2 11780aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
11781 bool high)
11782{
11783 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
11784 HOST_WIDE_INT count_op = XVECLEN (op, 0);
11785 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
11786 int i = 0;
11787
11788 if (!VECTOR_MODE_P (mode))
11789 return false;
11790
11791 if (count_op != count_ideal)
11792 return false;
11793
11794 for (i = 0; i < count_ideal; i++)
11795 {
11796 rtx elt_op = XVECEXP (op, 0, i);
11797 rtx elt_ideal = XVECEXP (ideal, 0, i);
11798
4aa81c2e 11799 if (!CONST_INT_P (elt_op)
988fa693
JG
11800 || INTVAL (elt_ideal) != INTVAL (elt_op))
11801 return false;
11802 }
11803 return true;
11804}
11805
43e9d192
IB
11806/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11807 HIGH (exclusive). */
11808void
46ed6024
CB
11809aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11810 const_tree exp)
43e9d192
IB
11811{
11812 HOST_WIDE_INT lane;
4aa81c2e 11813 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
11814 lane = INTVAL (operand);
11815
11816 if (lane < low || lane >= high)
46ed6024
CB
11817 {
11818 if (exp)
cf0c27ef 11819 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 11820 else
cf0c27ef 11821 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 11822 }
43e9d192
IB
11823}
11824
43e9d192
IB
11825/* Return TRUE if OP is a valid vector addressing mode. */
11826bool
11827aarch64_simd_mem_operand_p (rtx op)
11828{
11829 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 11830 || REG_P (XEXP (op, 0)));
43e9d192
IB
11831}
11832
2d8c6dc1
AH
11833/* Emit a register copy from operand to operand, taking care not to
11834 early-clobber source registers in the process.
43e9d192 11835
2d8c6dc1
AH
11836 COUNT is the number of components into which the copy needs to be
11837 decomposed. */
43e9d192 11838void
b8506a8a 11839aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 11840 unsigned int count)
43e9d192
IB
11841{
11842 unsigned int i;
2d8c6dc1
AH
11843 int rdest = REGNO (operands[0]);
11844 int rsrc = REGNO (operands[1]);
43e9d192
IB
11845
11846 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
11847 || rdest < rsrc)
11848 for (i = 0; i < count; i++)
11849 emit_move_insn (gen_rtx_REG (mode, rdest + i),
11850 gen_rtx_REG (mode, rsrc + i));
43e9d192 11851 else
2d8c6dc1
AH
11852 for (i = 0; i < count; i++)
11853 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
11854 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
11855}
11856
668046d1 11857/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 11858 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 11859int
b8506a8a 11860aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1
DS
11861{
11862 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
11863}
11864
db0253a4
TB
11865/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11866 alignment of a vector to 128 bits. */
11867static HOST_WIDE_INT
11868aarch64_simd_vector_alignment (const_tree type)
11869{
9439e9a1 11870 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
11871 return MIN (align, 128);
11872}
11873
11874/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11875static bool
11876aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
11877{
11878 if (is_packed)
11879 return false;
11880
11881 /* We guarantee alignment for vectors up to 128-bits. */
11882 if (tree_int_cst_compare (TYPE_SIZE (type),
11883 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
11884 return false;
11885
11886 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11887 return true;
11888}
11889
7df76747
N
11890/* Return true if the vector misalignment factor is supported by the
11891 target. */
11892static bool
11893aarch64_builtin_support_vector_misalignment (machine_mode mode,
11894 const_tree type, int misalignment,
11895 bool is_packed)
11896{
11897 if (TARGET_SIMD && STRICT_ALIGNMENT)
11898 {
11899 /* Return if movmisalign pattern is not supported for this mode. */
11900 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
11901 return false;
11902
11903 if (misalignment == -1)
11904 {
11905 /* Misalignment factor is unknown at compile time but we know
11906 it's word aligned. */
11907 if (aarch64_simd_vector_alignment_reachable (type, is_packed))
11908 {
11909 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
11910
11911 if (element_size != 64)
11912 return true;
11913 }
11914 return false;
11915 }
11916 }
11917 return default_builtin_support_vector_misalignment (mode, type, misalignment,
11918 is_packed);
11919}
11920
4369c11e
TB
11921/* If VALS is a vector constant that can be loaded into a register
11922 using DUP, generate instructions to do so and return an RTX to
11923 assign to the register. Otherwise return NULL_RTX. */
11924static rtx
11925aarch64_simd_dup_constant (rtx vals)
11926{
ef4bddc2
RS
11927 machine_mode mode = GET_MODE (vals);
11928 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 11929 rtx x;
4369c11e 11930
92695fbb 11931 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
11932 return NULL_RTX;
11933
11934 /* We can load this constant by using DUP and a constant in a
11935 single ARM register. This will be cheaper than a vector
11936 load. */
92695fbb 11937 x = copy_to_mode_reg (inner_mode, x);
4369c11e
TB
11938 return gen_rtx_VEC_DUPLICATE (mode, x);
11939}
11940
11941
11942/* Generate code to load VALS, which is a PARALLEL containing only
11943 constants (for vec_init) or CONST_VECTOR, efficiently into a
11944 register. Returns an RTX to copy into the register, or NULL_RTX
11945 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 11946static rtx
4369c11e
TB
11947aarch64_simd_make_constant (rtx vals)
11948{
ef4bddc2 11949 machine_mode mode = GET_MODE (vals);
4369c11e
TB
11950 rtx const_dup;
11951 rtx const_vec = NULL_RTX;
11952 int n_elts = GET_MODE_NUNITS (mode);
11953 int n_const = 0;
11954 int i;
11955
11956 if (GET_CODE (vals) == CONST_VECTOR)
11957 const_vec = vals;
11958 else if (GET_CODE (vals) == PARALLEL)
11959 {
11960 /* A CONST_VECTOR must contain only CONST_INTs and
11961 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11962 Only store valid constants in a CONST_VECTOR. */
11963 for (i = 0; i < n_elts; ++i)
11964 {
11965 rtx x = XVECEXP (vals, 0, i);
11966 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11967 n_const++;
11968 }
11969 if (n_const == n_elts)
11970 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11971 }
11972 else
11973 gcc_unreachable ();
11974
11975 if (const_vec != NULL_RTX
48063b9d 11976 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
11977 /* Load using MOVI/MVNI. */
11978 return const_vec;
11979 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
11980 /* Loaded using DUP. */
11981 return const_dup;
11982 else if (const_vec != NULL_RTX)
11983 /* Load from constant pool. We can not take advantage of single-cycle
11984 LD1 because we need a PC-relative addressing mode. */
11985 return const_vec;
11986 else
11987 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11988 We can not construct an initializer. */
11989 return NULL_RTX;
11990}
11991
35a093b6
JG
11992/* Expand a vector initialisation sequence, such that TARGET is
11993 initialised to contain VALS. */
11994
4369c11e
TB
11995void
11996aarch64_expand_vector_init (rtx target, rtx vals)
11997{
ef4bddc2 11998 machine_mode mode = GET_MODE (target);
146c2e3a 11999 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 12000 /* The number of vector elements. */
4369c11e 12001 int n_elts = GET_MODE_NUNITS (mode);
35a093b6 12002 /* The number of vector elements which are not constant. */
8b66a2d4
AL
12003 int n_var = 0;
12004 rtx any_const = NULL_RTX;
35a093b6
JG
12005 /* The first element of vals. */
12006 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 12007 bool all_same = true;
4369c11e 12008
35a093b6 12009 /* Count the number of variable elements to initialise. */
8b66a2d4 12010 for (int i = 0; i < n_elts; ++i)
4369c11e 12011 {
8b66a2d4 12012 rtx x = XVECEXP (vals, 0, i);
35a093b6 12013 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
12014 ++n_var;
12015 else
12016 any_const = x;
4369c11e 12017
35a093b6 12018 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
12019 }
12020
35a093b6
JG
12021 /* No variable elements, hand off to aarch64_simd_make_constant which knows
12022 how best to handle this. */
4369c11e
TB
12023 if (n_var == 0)
12024 {
12025 rtx constant = aarch64_simd_make_constant (vals);
12026 if (constant != NULL_RTX)
12027 {
12028 emit_move_insn (target, constant);
12029 return;
12030 }
12031 }
12032
12033 /* Splat a single non-constant element if we can. */
12034 if (all_same)
12035 {
35a093b6 12036 rtx x = copy_to_mode_reg (inner_mode, v0);
4369c11e
TB
12037 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
12038 return;
12039 }
12040
85c1b6d7
AP
12041 enum insn_code icode = optab_handler (vec_set_optab, mode);
12042 gcc_assert (icode != CODE_FOR_nothing);
12043
12044 /* If there are only variable elements, try to optimize
12045 the insertion using dup for the most common element
12046 followed by insertions. */
12047
12048 /* The algorithm will fill matches[*][0] with the earliest matching element,
12049 and matches[X][1] with the count of duplicate elements (if X is the
12050 earliest element which has duplicates). */
12051
12052 if (n_var == n_elts && n_elts <= 16)
12053 {
12054 int matches[16][2] = {0};
12055 for (int i = 0; i < n_elts; i++)
12056 {
12057 for (int j = 0; j <= i; j++)
12058 {
12059 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
12060 {
12061 matches[i][0] = j;
12062 matches[j][1]++;
12063 break;
12064 }
12065 }
12066 }
12067 int maxelement = 0;
12068 int maxv = 0;
12069 for (int i = 0; i < n_elts; i++)
12070 if (matches[i][1] > maxv)
12071 {
12072 maxelement = i;
12073 maxv = matches[i][1];
12074 }
12075
12076 /* Create a duplicate of the most common element. */
12077 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
12078 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
12079
12080 /* Insert the rest. */
12081 for (int i = 0; i < n_elts; i++)
12082 {
12083 rtx x = XVECEXP (vals, 0, i);
12084 if (matches[i][0] == maxelement)
12085 continue;
12086 x = copy_to_mode_reg (inner_mode, x);
12087 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
12088 }
12089 return;
12090 }
12091
35a093b6
JG
12092 /* Initialise a vector which is part-variable. We want to first try
12093 to build those lanes which are constant in the most efficient way we
12094 can. */
12095 if (n_var != n_elts)
4369c11e
TB
12096 {
12097 rtx copy = copy_rtx (vals);
4369c11e 12098
8b66a2d4
AL
12099 /* Load constant part of vector. We really don't care what goes into the
12100 parts we will overwrite, but we're more likely to be able to load the
12101 constant efficiently if it has fewer, larger, repeating parts
12102 (see aarch64_simd_valid_immediate). */
12103 for (int i = 0; i < n_elts; i++)
12104 {
12105 rtx x = XVECEXP (vals, 0, i);
12106 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12107 continue;
12108 rtx subst = any_const;
12109 for (int bit = n_elts / 2; bit > 0; bit /= 2)
12110 {
12111 /* Look in the copied vector, as more elements are const. */
12112 rtx test = XVECEXP (copy, 0, i ^ bit);
12113 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
12114 {
12115 subst = test;
12116 break;
12117 }
12118 }
12119 XVECEXP (copy, 0, i) = subst;
12120 }
4369c11e 12121 aarch64_expand_vector_init (target, copy);
35a093b6 12122 }
4369c11e 12123
35a093b6 12124 /* Insert the variable lanes directly. */
8b66a2d4 12125 for (int i = 0; i < n_elts; i++)
35a093b6
JG
12126 {
12127 rtx x = XVECEXP (vals, 0, i);
12128 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12129 continue;
12130 x = copy_to_mode_reg (inner_mode, x);
12131 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
12132 }
4369c11e
TB
12133}
12134
43e9d192 12135static unsigned HOST_WIDE_INT
ef4bddc2 12136aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
12137{
12138 return
ac59ad4e
KT
12139 (!SHIFT_COUNT_TRUNCATED
12140 || aarch64_vector_mode_supported_p (mode)
43e9d192
IB
12141 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
12142}
12143
43e9d192
IB
12144/* Select a format to encode pointers in exception handling data. */
12145int
12146aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
12147{
12148 int type;
12149 switch (aarch64_cmodel)
12150 {
12151 case AARCH64_CMODEL_TINY:
12152 case AARCH64_CMODEL_TINY_PIC:
12153 case AARCH64_CMODEL_SMALL:
12154 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 12155 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
12156 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
12157 for everything. */
12158 type = DW_EH_PE_sdata4;
12159 break;
12160 default:
12161 /* No assumptions here. 8-byte relocs required. */
12162 type = DW_EH_PE_sdata8;
12163 break;
12164 }
12165 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
12166}
12167
e1c1ecb0
KT
12168/* The last .arch and .tune assembly strings that we printed. */
12169static std::string aarch64_last_printed_arch_string;
12170static std::string aarch64_last_printed_tune_string;
12171
361fb3ee
KT
12172/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
12173 by the function fndecl. */
12174
12175void
12176aarch64_declare_function_name (FILE *stream, const char* name,
12177 tree fndecl)
12178{
12179 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
12180
12181 struct cl_target_option *targ_options;
12182 if (target_parts)
12183 targ_options = TREE_TARGET_OPTION (target_parts);
12184 else
12185 targ_options = TREE_TARGET_OPTION (target_option_current_node);
12186 gcc_assert (targ_options);
12187
12188 const struct processor *this_arch
12189 = aarch64_get_arch (targ_options->x_explicit_arch);
12190
054b4005
JG
12191 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
12192 std::string extension
04a99ebe
JG
12193 = aarch64_get_extension_string_for_isa_flags (isa_flags,
12194 this_arch->flags);
e1c1ecb0
KT
12195 /* Only update the assembler .arch string if it is distinct from the last
12196 such string we printed. */
12197 std::string to_print = this_arch->name + extension;
12198 if (to_print != aarch64_last_printed_arch_string)
12199 {
12200 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
12201 aarch64_last_printed_arch_string = to_print;
12202 }
361fb3ee
KT
12203
12204 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
12205 useful to readers of the generated asm. Do it only when it changes
12206 from function to function and verbose assembly is requested. */
361fb3ee
KT
12207 const struct processor *this_tune
12208 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
12209
e1c1ecb0
KT
12210 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
12211 {
12212 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
12213 this_tune->name);
12214 aarch64_last_printed_tune_string = this_tune->name;
12215 }
361fb3ee
KT
12216
12217 /* Don't forget the type directive for ELF. */
12218 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
12219 ASM_OUTPUT_LABEL (stream, name);
12220}
12221
e1c1ecb0
KT
12222/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
12223
12224static void
12225aarch64_start_file (void)
12226{
12227 struct cl_target_option *default_options
12228 = TREE_TARGET_OPTION (target_option_default_node);
12229
12230 const struct processor *default_arch
12231 = aarch64_get_arch (default_options->x_explicit_arch);
12232 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
12233 std::string extension
04a99ebe
JG
12234 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
12235 default_arch->flags);
e1c1ecb0
KT
12236
12237 aarch64_last_printed_arch_string = default_arch->name + extension;
12238 aarch64_last_printed_tune_string = "";
12239 asm_fprintf (asm_out_file, "\t.arch %s\n",
12240 aarch64_last_printed_arch_string.c_str ());
12241
12242 default_file_start ();
12243}
12244
0462169c
SN
12245/* Emit load exclusive. */
12246
12247static void
ef4bddc2 12248aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
12249 rtx mem, rtx model_rtx)
12250{
12251 rtx (*gen) (rtx, rtx, rtx);
12252
12253 switch (mode)
12254 {
4e10a5a7
RS
12255 case E_QImode: gen = gen_aarch64_load_exclusiveqi; break;
12256 case E_HImode: gen = gen_aarch64_load_exclusivehi; break;
12257 case E_SImode: gen = gen_aarch64_load_exclusivesi; break;
12258 case E_DImode: gen = gen_aarch64_load_exclusivedi; break;
0462169c
SN
12259 default:
12260 gcc_unreachable ();
12261 }
12262
12263 emit_insn (gen (rval, mem, model_rtx));
12264}
12265
12266/* Emit store exclusive. */
12267
12268static void
ef4bddc2 12269aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
12270 rtx rval, rtx mem, rtx model_rtx)
12271{
12272 rtx (*gen) (rtx, rtx, rtx, rtx);
12273
12274 switch (mode)
12275 {
4e10a5a7
RS
12276 case E_QImode: gen = gen_aarch64_store_exclusiveqi; break;
12277 case E_HImode: gen = gen_aarch64_store_exclusivehi; break;
12278 case E_SImode: gen = gen_aarch64_store_exclusivesi; break;
12279 case E_DImode: gen = gen_aarch64_store_exclusivedi; break;
0462169c
SN
12280 default:
12281 gcc_unreachable ();
12282 }
12283
12284 emit_insn (gen (bval, rval, mem, model_rtx));
12285}
12286
12287/* Mark the previous jump instruction as unlikely. */
12288
12289static void
12290aarch64_emit_unlikely_jump (rtx insn)
12291{
f370536c 12292 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 12293 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
12294}
12295
12296/* Expand a compare and swap pattern. */
12297
12298void
12299aarch64_expand_compare_and_swap (rtx operands[])
12300{
12301 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 12302 machine_mode mode, cmp_mode;
b0770c0f
MW
12303 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
12304 int idx;
12305 gen_cas_fn gen;
12306 const gen_cas_fn split_cas[] =
12307 {
12308 gen_aarch64_compare_and_swapqi,
12309 gen_aarch64_compare_and_swaphi,
12310 gen_aarch64_compare_and_swapsi,
12311 gen_aarch64_compare_and_swapdi
12312 };
12313 const gen_cas_fn atomic_cas[] =
12314 {
12315 gen_aarch64_compare_and_swapqi_lse,
12316 gen_aarch64_compare_and_swaphi_lse,
12317 gen_aarch64_compare_and_swapsi_lse,
12318 gen_aarch64_compare_and_swapdi_lse
12319 };
0462169c
SN
12320
12321 bval = operands[0];
12322 rval = operands[1];
12323 mem = operands[2];
12324 oldval = operands[3];
12325 newval = operands[4];
12326 is_weak = operands[5];
12327 mod_s = operands[6];
12328 mod_f = operands[7];
12329 mode = GET_MODE (mem);
12330 cmp_mode = mode;
12331
12332 /* Normally the succ memory model must be stronger than fail, but in the
12333 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
12334 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
12335
46b35980
AM
12336 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
12337 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
12338 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
12339
12340 switch (mode)
12341 {
4e10a5a7
RS
12342 case E_QImode:
12343 case E_HImode:
0462169c
SN
12344 /* For short modes, we're going to perform the comparison in SImode,
12345 so do the zero-extension now. */
12346 cmp_mode = SImode;
12347 rval = gen_reg_rtx (SImode);
12348 oldval = convert_modes (SImode, mode, oldval, true);
12349 /* Fall through. */
12350
4e10a5a7
RS
12351 case E_SImode:
12352 case E_DImode:
0462169c
SN
12353 /* Force the value into a register if needed. */
12354 if (!aarch64_plus_operand (oldval, mode))
12355 oldval = force_reg (cmp_mode, oldval);
12356 break;
12357
12358 default:
12359 gcc_unreachable ();
12360 }
12361
12362 switch (mode)
12363 {
4e10a5a7
RS
12364 case E_QImode: idx = 0; break;
12365 case E_HImode: idx = 1; break;
12366 case E_SImode: idx = 2; break;
12367 case E_DImode: idx = 3; break;
0462169c
SN
12368 default:
12369 gcc_unreachable ();
12370 }
b0770c0f
MW
12371 if (TARGET_LSE)
12372 gen = atomic_cas[idx];
12373 else
12374 gen = split_cas[idx];
0462169c
SN
12375
12376 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
12377
12378 if (mode == QImode || mode == HImode)
12379 emit_move_insn (operands[1], gen_lowpart (mode, rval));
12380
12381 x = gen_rtx_REG (CCmode, CC_REGNUM);
12382 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 12383 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
12384}
12385
641c2f8b
MW
12386/* Test whether the target supports using a atomic load-operate instruction.
12387 CODE is the operation and AFTER is TRUE if the data in memory after the
12388 operation should be returned and FALSE if the data before the operation
12389 should be returned. Returns FALSE if the operation isn't supported by the
12390 architecture. */
12391
12392bool
12393aarch64_atomic_ldop_supported_p (enum rtx_code code)
12394{
12395 if (!TARGET_LSE)
12396 return false;
12397
12398 switch (code)
12399 {
12400 case SET:
12401 case AND:
12402 case IOR:
12403 case XOR:
12404 case MINUS:
12405 case PLUS:
12406 return true;
12407 default:
12408 return false;
12409 }
12410}
12411
f70fb3b6
MW
12412/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
12413 sequence implementing an atomic operation. */
12414
12415static void
12416aarch64_emit_post_barrier (enum memmodel model)
12417{
12418 const enum memmodel base_model = memmodel_base (model);
12419
12420 if (is_mm_sync (model)
12421 && (base_model == MEMMODEL_ACQUIRE
12422 || base_model == MEMMODEL_ACQ_REL
12423 || base_model == MEMMODEL_SEQ_CST))
12424 {
12425 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
12426 }
12427}
12428
b0770c0f
MW
12429/* Emit an atomic compare-and-swap operation. RVAL is the destination register
12430 for the data in memory. EXPECTED is the value expected to be in memory.
12431 DESIRED is the value to store to memory. MEM is the memory location. MODEL
12432 is the memory ordering to use. */
12433
12434void
12435aarch64_gen_atomic_cas (rtx rval, rtx mem,
12436 rtx expected, rtx desired,
12437 rtx model)
12438{
12439 rtx (*gen) (rtx, rtx, rtx, rtx);
12440 machine_mode mode;
12441
12442 mode = GET_MODE (mem);
12443
12444 switch (mode)
12445 {
4e10a5a7
RS
12446 case E_QImode: gen = gen_aarch64_atomic_casqi; break;
12447 case E_HImode: gen = gen_aarch64_atomic_cashi; break;
12448 case E_SImode: gen = gen_aarch64_atomic_cassi; break;
12449 case E_DImode: gen = gen_aarch64_atomic_casdi; break;
b0770c0f
MW
12450 default:
12451 gcc_unreachable ();
12452 }
12453
12454 /* Move the expected value into the CAS destination register. */
12455 emit_insn (gen_rtx_SET (rval, expected));
12456
12457 /* Emit the CAS. */
12458 emit_insn (gen (rval, mem, desired, model));
12459
12460 /* Compare the expected value with the value loaded by the CAS, to establish
12461 whether the swap was made. */
12462 aarch64_gen_compare_reg (EQ, rval, expected);
12463}
12464
0462169c
SN
12465/* Split a compare and swap pattern. */
12466
12467void
12468aarch64_split_compare_and_swap (rtx operands[])
12469{
12470 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 12471 machine_mode mode;
0462169c 12472 bool is_weak;
5d8a22a5
DM
12473 rtx_code_label *label1, *label2;
12474 rtx x, cond;
ab876106
MW
12475 enum memmodel model;
12476 rtx model_rtx;
0462169c
SN
12477
12478 rval = operands[0];
12479 mem = operands[1];
12480 oldval = operands[2];
12481 newval = operands[3];
12482 is_weak = (operands[4] != const0_rtx);
ab876106 12483 model_rtx = operands[5];
0462169c
SN
12484 scratch = operands[7];
12485 mode = GET_MODE (mem);
ab876106 12486 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 12487
17f47f86
KT
12488 /* When OLDVAL is zero and we want the strong version we can emit a tighter
12489 loop:
12490 .label1:
12491 LD[A]XR rval, [mem]
12492 CBNZ rval, .label2
12493 ST[L]XR scratch, newval, [mem]
12494 CBNZ scratch, .label1
12495 .label2:
12496 CMP rval, 0. */
12497 bool strong_zero_p = !is_weak && oldval == const0_rtx;
12498
5d8a22a5 12499 label1 = NULL;
0462169c
SN
12500 if (!is_weak)
12501 {
12502 label1 = gen_label_rtx ();
12503 emit_label (label1);
12504 }
12505 label2 = gen_label_rtx ();
12506
ab876106
MW
12507 /* The initial load can be relaxed for a __sync operation since a final
12508 barrier will be emitted to stop code hoisting. */
12509 if (is_mm_sync (model))
12510 aarch64_emit_load_exclusive (mode, rval, mem,
12511 GEN_INT (MEMMODEL_RELAXED));
12512 else
12513 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 12514
17f47f86
KT
12515 if (strong_zero_p)
12516 {
12517 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
12518 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12519 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
12520 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12521 }
12522 else
12523 {
12524 cond = aarch64_gen_compare_reg (NE, rval, oldval);
12525 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12526 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12527 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
12528 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12529 }
0462169c 12530
ab876106 12531 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
12532
12533 if (!is_weak)
12534 {
12535 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
12536 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12537 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 12538 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
12539 }
12540 else
12541 {
12542 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12543 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 12544 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
12545 }
12546
12547 emit_label (label2);
17f47f86
KT
12548 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
12549 to set the condition flags. If this is not used it will be removed by
12550 later passes. */
12551 if (strong_zero_p)
12552 {
12553 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12554 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
12555 emit_insn (gen_rtx_SET (cond, x));
12556 }
ab876106
MW
12557 /* Emit any final barrier needed for a __sync operation. */
12558 if (is_mm_sync (model))
12559 aarch64_emit_post_barrier (model);
0462169c
SN
12560}
12561
68729b06
MW
12562/* Emit a BIC instruction. */
12563
12564static void
12565aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
12566{
12567 rtx shift_rtx = GEN_INT (shift);
12568 rtx (*gen) (rtx, rtx, rtx, rtx);
12569
12570 switch (mode)
12571 {
4e10a5a7
RS
12572 case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
12573 case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
68729b06
MW
12574 default:
12575 gcc_unreachable ();
12576 }
12577
12578 emit_insn (gen (dst, s2, shift_rtx, s1));
12579}
12580
9cd7b720
MW
12581/* Emit an atomic swap. */
12582
12583static void
12584aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
12585 rtx mem, rtx model)
12586{
12587 rtx (*gen) (rtx, rtx, rtx, rtx);
12588
12589 switch (mode)
12590 {
4e10a5a7
RS
12591 case E_QImode: gen = gen_aarch64_atomic_swpqi; break;
12592 case E_HImode: gen = gen_aarch64_atomic_swphi; break;
12593 case E_SImode: gen = gen_aarch64_atomic_swpsi; break;
12594 case E_DImode: gen = gen_aarch64_atomic_swpdi; break;
9cd7b720
MW
12595 default:
12596 gcc_unreachable ();
12597 }
12598
12599 emit_insn (gen (dst, mem, value, model));
12600}
12601
641c2f8b
MW
12602/* Operations supported by aarch64_emit_atomic_load_op. */
12603
12604enum aarch64_atomic_load_op_code
12605{
12606 AARCH64_LDOP_PLUS, /* A + B */
12607 AARCH64_LDOP_XOR, /* A ^ B */
12608 AARCH64_LDOP_OR, /* A | B */
12609 AARCH64_LDOP_BIC /* A & ~B */
12610};
12611
12612/* Emit an atomic load-operate. */
12613
12614static void
12615aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
12616 machine_mode mode, rtx dst, rtx src,
12617 rtx mem, rtx model)
12618{
12619 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
12620 const aarch64_atomic_load_op_fn plus[] =
12621 {
12622 gen_aarch64_atomic_loadaddqi,
12623 gen_aarch64_atomic_loadaddhi,
12624 gen_aarch64_atomic_loadaddsi,
12625 gen_aarch64_atomic_loadadddi
12626 };
12627 const aarch64_atomic_load_op_fn eor[] =
12628 {
12629 gen_aarch64_atomic_loadeorqi,
12630 gen_aarch64_atomic_loadeorhi,
12631 gen_aarch64_atomic_loadeorsi,
12632 gen_aarch64_atomic_loadeordi
12633 };
12634 const aarch64_atomic_load_op_fn ior[] =
12635 {
12636 gen_aarch64_atomic_loadsetqi,
12637 gen_aarch64_atomic_loadsethi,
12638 gen_aarch64_atomic_loadsetsi,
12639 gen_aarch64_atomic_loadsetdi
12640 };
12641 const aarch64_atomic_load_op_fn bic[] =
12642 {
12643 gen_aarch64_atomic_loadclrqi,
12644 gen_aarch64_atomic_loadclrhi,
12645 gen_aarch64_atomic_loadclrsi,
12646 gen_aarch64_atomic_loadclrdi
12647 };
12648 aarch64_atomic_load_op_fn gen;
12649 int idx = 0;
12650
12651 switch (mode)
12652 {
4e10a5a7
RS
12653 case E_QImode: idx = 0; break;
12654 case E_HImode: idx = 1; break;
12655 case E_SImode: idx = 2; break;
12656 case E_DImode: idx = 3; break;
641c2f8b
MW
12657 default:
12658 gcc_unreachable ();
12659 }
12660
12661 switch (code)
12662 {
12663 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
12664 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
12665 case AARCH64_LDOP_OR: gen = ior[idx]; break;
12666 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
12667 default:
12668 gcc_unreachable ();
12669 }
12670
12671 emit_insn (gen (dst, mem, src, model));
12672}
12673
12674/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
12675 location to store the data read from memory. OUT_RESULT is the location to
12676 store the result of the operation. MEM is the memory location to read and
12677 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12678 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12679 be NULL. */
9cd7b720
MW
12680
12681void
68729b06 12682aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
12683 rtx mem, rtx value, rtx model_rtx)
12684{
12685 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
12686 machine_mode wmode = (mode == DImode ? DImode : SImode);
12687 const bool short_mode = (mode < SImode);
12688 aarch64_atomic_load_op_code ldop_code;
12689 rtx src;
12690 rtx x;
12691
12692 if (out_data)
12693 out_data = gen_lowpart (mode, out_data);
9cd7b720 12694
68729b06
MW
12695 if (out_result)
12696 out_result = gen_lowpart (mode, out_result);
12697
641c2f8b
MW
12698 /* Make sure the value is in a register, putting it into a destination
12699 register if it needs to be manipulated. */
12700 if (!register_operand (value, mode)
12701 || code == AND || code == MINUS)
12702 {
68729b06 12703 src = out_result ? out_result : out_data;
641c2f8b
MW
12704 emit_move_insn (src, gen_lowpart (mode, value));
12705 }
12706 else
12707 src = value;
12708 gcc_assert (register_operand (src, mode));
9cd7b720 12709
641c2f8b
MW
12710 /* Preprocess the data for the operation as necessary. If the operation is
12711 a SET then emit a swap instruction and finish. */
9cd7b720
MW
12712 switch (code)
12713 {
12714 case SET:
641c2f8b 12715 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
12716 return;
12717
641c2f8b
MW
12718 case MINUS:
12719 /* Negate the value and treat it as a PLUS. */
12720 {
12721 rtx neg_src;
12722
12723 /* Resize the value if necessary. */
12724 if (short_mode)
12725 src = gen_lowpart (wmode, src);
12726
12727 neg_src = gen_rtx_NEG (wmode, src);
12728 emit_insn (gen_rtx_SET (src, neg_src));
12729
12730 if (short_mode)
12731 src = gen_lowpart (mode, src);
12732 }
12733 /* Fall-through. */
12734 case PLUS:
12735 ldop_code = AARCH64_LDOP_PLUS;
12736 break;
12737
12738 case IOR:
12739 ldop_code = AARCH64_LDOP_OR;
12740 break;
12741
12742 case XOR:
12743 ldop_code = AARCH64_LDOP_XOR;
12744 break;
12745
12746 case AND:
12747 {
12748 rtx not_src;
12749
12750 /* Resize the value if necessary. */
12751 if (short_mode)
12752 src = gen_lowpart (wmode, src);
12753
12754 not_src = gen_rtx_NOT (wmode, src);
12755 emit_insn (gen_rtx_SET (src, not_src));
12756
12757 if (short_mode)
12758 src = gen_lowpart (mode, src);
12759 }
12760 ldop_code = AARCH64_LDOP_BIC;
12761 break;
12762
9cd7b720
MW
12763 default:
12764 /* The operation can't be done with atomic instructions. */
12765 gcc_unreachable ();
12766 }
641c2f8b
MW
12767
12768 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
12769
12770 /* If necessary, calculate the data in memory after the update by redoing the
12771 operation from values in registers. */
12772 if (!out_result)
12773 return;
12774
12775 if (short_mode)
12776 {
12777 src = gen_lowpart (wmode, src);
12778 out_data = gen_lowpart (wmode, out_data);
12779 out_result = gen_lowpart (wmode, out_result);
12780 }
12781
12782 x = NULL_RTX;
12783
12784 switch (code)
12785 {
12786 case MINUS:
12787 case PLUS:
12788 x = gen_rtx_PLUS (wmode, out_data, src);
12789 break;
12790 case IOR:
12791 x = gen_rtx_IOR (wmode, out_data, src);
12792 break;
12793 case XOR:
12794 x = gen_rtx_XOR (wmode, out_data, src);
12795 break;
12796 case AND:
12797 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
12798 return;
12799 default:
12800 gcc_unreachable ();
12801 }
12802
12803 emit_set_insn (out_result, x);
12804
12805 return;
9cd7b720
MW
12806}
12807
0462169c
SN
12808/* Split an atomic operation. */
12809
12810void
12811aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 12812 rtx value, rtx model_rtx, rtx cond)
0462169c 12813{
ef4bddc2
RS
12814 machine_mode mode = GET_MODE (mem);
12815 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
12816 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
12817 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
12818 rtx_code_label *label;
12819 rtx x;
0462169c 12820
9cd7b720 12821 /* Split the atomic operation into a sequence. */
0462169c
SN
12822 label = gen_label_rtx ();
12823 emit_label (label);
12824
12825 if (new_out)
12826 new_out = gen_lowpart (wmode, new_out);
12827 if (old_out)
12828 old_out = gen_lowpart (wmode, old_out);
12829 else
12830 old_out = new_out;
12831 value = simplify_gen_subreg (wmode, value, mode, 0);
12832
f70fb3b6
MW
12833 /* The initial load can be relaxed for a __sync operation since a final
12834 barrier will be emitted to stop code hoisting. */
12835 if (is_sync)
12836 aarch64_emit_load_exclusive (mode, old_out, mem,
12837 GEN_INT (MEMMODEL_RELAXED));
12838 else
12839 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
12840
12841 switch (code)
12842 {
12843 case SET:
12844 new_out = value;
12845 break;
12846
12847 case NOT:
12848 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 12849 emit_insn (gen_rtx_SET (new_out, x));
0462169c 12850 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 12851 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12852 break;
12853
12854 case MINUS:
12855 if (CONST_INT_P (value))
12856 {
12857 value = GEN_INT (-INTVAL (value));
12858 code = PLUS;
12859 }
12860 /* Fall through. */
12861
12862 default:
12863 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 12864 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12865 break;
12866 }
12867
12868 aarch64_emit_store_exclusive (mode, cond, mem,
12869 gen_lowpart (mode, new_out), model_rtx);
12870
12871 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12872 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12873 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 12874 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
12875
12876 /* Emit any final barrier needed for a __sync operation. */
12877 if (is_sync)
12878 aarch64_emit_post_barrier (model);
0462169c
SN
12879}
12880
c2ec330c
AL
12881static void
12882aarch64_init_libfuncs (void)
12883{
12884 /* Half-precision float operations. The compiler handles all operations
12885 with NULL libfuncs by converting to SFmode. */
12886
12887 /* Conversions. */
12888 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
12889 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
12890
12891 /* Arithmetic. */
12892 set_optab_libfunc (add_optab, HFmode, NULL);
12893 set_optab_libfunc (sdiv_optab, HFmode, NULL);
12894 set_optab_libfunc (smul_optab, HFmode, NULL);
12895 set_optab_libfunc (neg_optab, HFmode, NULL);
12896 set_optab_libfunc (sub_optab, HFmode, NULL);
12897
12898 /* Comparisons. */
12899 set_optab_libfunc (eq_optab, HFmode, NULL);
12900 set_optab_libfunc (ne_optab, HFmode, NULL);
12901 set_optab_libfunc (lt_optab, HFmode, NULL);
12902 set_optab_libfunc (le_optab, HFmode, NULL);
12903 set_optab_libfunc (ge_optab, HFmode, NULL);
12904 set_optab_libfunc (gt_optab, HFmode, NULL);
12905 set_optab_libfunc (unord_optab, HFmode, NULL);
12906}
12907
43e9d192 12908/* Target hook for c_mode_for_suffix. */
ef4bddc2 12909static machine_mode
43e9d192
IB
12910aarch64_c_mode_for_suffix (char suffix)
12911{
12912 if (suffix == 'q')
12913 return TFmode;
12914
12915 return VOIDmode;
12916}
12917
3520f7cc
JG
12918/* We can only represent floating point constants which will fit in
12919 "quarter-precision" values. These values are characterised by
12920 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12921 by:
12922
12923 (-1)^s * (n/16) * 2^r
12924
12925 Where:
12926 's' is the sign bit.
12927 'n' is an integer in the range 16 <= n <= 31.
12928 'r' is an integer in the range -3 <= r <= 4. */
12929
12930/* Return true iff X can be represented by a quarter-precision
12931 floating point immediate operand X. Note, we cannot represent 0.0. */
12932bool
12933aarch64_float_const_representable_p (rtx x)
12934{
12935 /* This represents our current view of how many bits
12936 make up the mantissa. */
12937 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 12938 int exponent;
3520f7cc 12939 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 12940 REAL_VALUE_TYPE r, m;
807e902e 12941 bool fail;
3520f7cc
JG
12942
12943 if (!CONST_DOUBLE_P (x))
12944 return false;
12945
c2ec330c
AL
12946 /* We don't support HFmode constants yet. */
12947 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
12948 return false;
12949
34a72c33 12950 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
12951
12952 /* We cannot represent infinities, NaNs or +/-zero. We won't
12953 know if we have +zero until we analyse the mantissa, but we
12954 can reject the other invalid values. */
12955 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
12956 || REAL_VALUE_MINUS_ZERO (r))
12957 return false;
12958
ba96cdfb 12959 /* Extract exponent. */
3520f7cc
JG
12960 r = real_value_abs (&r);
12961 exponent = REAL_EXP (&r);
12962
12963 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12964 highest (sign) bit, with a fixed binary point at bit point_pos.
12965 m1 holds the low part of the mantissa, m2 the high part.
12966 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12967 bits for the mantissa, this can fail (low bits will be lost). */
12968 real_ldexp (&m, &r, point_pos - exponent);
807e902e 12969 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
12970
12971 /* If the low part of the mantissa has bits set we cannot represent
12972 the value. */
d9074b29 12973 if (w.ulow () != 0)
3520f7cc
JG
12974 return false;
12975 /* We have rejected the lower HOST_WIDE_INT, so update our
12976 understanding of how many bits lie in the mantissa and
12977 look only at the high HOST_WIDE_INT. */
807e902e 12978 mantissa = w.elt (1);
3520f7cc
JG
12979 point_pos -= HOST_BITS_PER_WIDE_INT;
12980
12981 /* We can only represent values with a mantissa of the form 1.xxxx. */
12982 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12983 if ((mantissa & mask) != 0)
12984 return false;
12985
12986 /* Having filtered unrepresentable values, we may now remove all
12987 but the highest 5 bits. */
12988 mantissa >>= point_pos - 5;
12989
12990 /* We cannot represent the value 0.0, so reject it. This is handled
12991 elsewhere. */
12992 if (mantissa == 0)
12993 return false;
12994
12995 /* Then, as bit 4 is always set, we can mask it off, leaving
12996 the mantissa in the range [0, 15]. */
12997 mantissa &= ~(1 << 4);
12998 gcc_assert (mantissa <= 15);
12999
13000 /* GCC internally does not use IEEE754-like encoding (where normalized
13001 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
13002 Our mantissa values are shifted 4 places to the left relative to
13003 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
13004 by 5 places to correct for GCC's representation. */
13005 exponent = 5 - exponent;
13006
13007 return (exponent >= 0 && exponent <= 7);
13008}
13009
13010char*
81c2dfb9 13011aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 13012 machine_mode mode,
3520f7cc
JG
13013 unsigned width)
13014{
3ea63f60 13015 bool is_valid;
3520f7cc 13016 static char templ[40];
3520f7cc 13017 const char *mnemonic;
e4f0f84d 13018 const char *shift_op;
3520f7cc 13019 unsigned int lane_count = 0;
81c2dfb9 13020 char element_char;
3520f7cc 13021
e4f0f84d 13022 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
13023
13024 /* This will return true to show const_vector is legal for use as either
13025 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
13026 also update INFO to show how the immediate should be generated. */
81c2dfb9 13027 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
13028 gcc_assert (is_valid);
13029
81c2dfb9 13030 element_char = sizetochar (info.element_width);
48063b9d
IB
13031 lane_count = width / info.element_width;
13032
3520f7cc 13033 mode = GET_MODE_INNER (mode);
0d8e1702 13034 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3520f7cc 13035 {
48063b9d 13036 gcc_assert (info.shift == 0 && ! info.mvn);
0d8e1702
KT
13037 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
13038 move immediate path. */
48063b9d
IB
13039 if (aarch64_float_const_zero_rtx_p (info.value))
13040 info.value = GEN_INT (0);
13041 else
13042 {
83faf7d0 13043 const unsigned int buf_size = 20;
48063b9d 13044 char float_buf[buf_size] = {'\0'};
34a72c33
RS
13045 real_to_decimal_for_mode (float_buf,
13046 CONST_DOUBLE_REAL_VALUE (info.value),
13047 buf_size, buf_size, 1, mode);
48063b9d
IB
13048
13049 if (lane_count == 1)
13050 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
13051 else
13052 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 13053 lane_count, element_char, float_buf);
48063b9d
IB
13054 return templ;
13055 }
3520f7cc 13056 }
3520f7cc 13057
48063b9d 13058 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 13059 shift_op = info.msl ? "msl" : "lsl";
3520f7cc 13060
0d8e1702 13061 gcc_assert (CONST_INT_P (info.value));
3520f7cc 13062 if (lane_count == 1)
48063b9d
IB
13063 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
13064 mnemonic, UINTVAL (info.value));
13065 else if (info.shift)
13066 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
13067 ", %s %d", mnemonic, lane_count, element_char,
13068 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 13069 else
48063b9d 13070 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 13071 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
13072 return templ;
13073}
13074
b7342d25 13075char*
77e994c9 13076aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 13077{
a2170965
TC
13078
13079 /* If a floating point number was passed and we desire to use it in an
13080 integer mode do the conversion to integer. */
13081 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
13082 {
13083 unsigned HOST_WIDE_INT ival;
13084 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
13085 gcc_unreachable ();
13086 immediate = gen_int_mode (ival, mode);
13087 }
13088
ef4bddc2 13089 machine_mode vmode;
a2170965
TC
13090 /* use a 64 bit mode for everything except for DI/DF mode, where we use
13091 a 128 bit vector mode. */
13092 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 13093
a2170965 13094 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 13095 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
a2170965 13096 return aarch64_output_simd_mov_immediate (v_op, vmode, width);
b7342d25
IB
13097}
13098
88b08073
JG
13099/* Split operands into moves from op[1] + op[2] into op[0]. */
13100
13101void
13102aarch64_split_combinev16qi (rtx operands[3])
13103{
13104 unsigned int dest = REGNO (operands[0]);
13105 unsigned int src1 = REGNO (operands[1]);
13106 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 13107 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 13108 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
13109 rtx destlo, desthi;
13110
13111 gcc_assert (halfmode == V16QImode);
13112
13113 if (src1 == dest && src2 == dest + halfregs)
13114 {
13115 /* No-op move. Can't split to nothing; emit something. */
13116 emit_note (NOTE_INSN_DELETED);
13117 return;
13118 }
13119
13120 /* Preserve register attributes for variable tracking. */
13121 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
13122 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
13123 GET_MODE_SIZE (halfmode));
13124
13125 /* Special case of reversed high/low parts. */
13126 if (reg_overlap_mentioned_p (operands[2], destlo)
13127 && reg_overlap_mentioned_p (operands[1], desthi))
13128 {
13129 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
13130 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
13131 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
13132 }
13133 else if (!reg_overlap_mentioned_p (operands[2], destlo))
13134 {
13135 /* Try to avoid unnecessary moves if part of the result
13136 is in the right place already. */
13137 if (src1 != dest)
13138 emit_move_insn (destlo, operands[1]);
13139 if (src2 != dest + halfregs)
13140 emit_move_insn (desthi, operands[2]);
13141 }
13142 else
13143 {
13144 if (src2 != dest + halfregs)
13145 emit_move_insn (desthi, operands[2]);
13146 if (src1 != dest)
13147 emit_move_insn (destlo, operands[1]);
13148 }
13149}
13150
13151/* vec_perm support. */
13152
13153#define MAX_VECT_LEN 16
13154
13155struct expand_vec_perm_d
13156{
13157 rtx target, op0, op1;
13158 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 13159 machine_mode vmode;
88b08073
JG
13160 unsigned char nelt;
13161 bool one_vector_p;
13162 bool testing_p;
13163};
13164
13165/* Generate a variable permutation. */
13166
13167static void
13168aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
13169{
ef4bddc2 13170 machine_mode vmode = GET_MODE (target);
88b08073
JG
13171 bool one_vector_p = rtx_equal_p (op0, op1);
13172
13173 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
13174 gcc_checking_assert (GET_MODE (op0) == vmode);
13175 gcc_checking_assert (GET_MODE (op1) == vmode);
13176 gcc_checking_assert (GET_MODE (sel) == vmode);
13177 gcc_checking_assert (TARGET_SIMD);
13178
13179 if (one_vector_p)
13180 {
13181 if (vmode == V8QImode)
13182 {
13183 /* Expand the argument to a V16QI mode by duplicating it. */
13184 rtx pair = gen_reg_rtx (V16QImode);
13185 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
13186 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
13187 }
13188 else
13189 {
13190 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
13191 }
13192 }
13193 else
13194 {
13195 rtx pair;
13196
13197 if (vmode == V8QImode)
13198 {
13199 pair = gen_reg_rtx (V16QImode);
13200 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
13201 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
13202 }
13203 else
13204 {
13205 pair = gen_reg_rtx (OImode);
13206 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
13207 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
13208 }
13209 }
13210}
13211
13212void
13213aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
13214{
ef4bddc2 13215 machine_mode vmode = GET_MODE (target);
c9d1a16a 13216 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 13217 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 13218 rtx mask;
88b08073
JG
13219
13220 /* The TBL instruction does not use a modulo index, so we must take care
13221 of that ourselves. */
f7c4e5b8
AL
13222 mask = aarch64_simd_gen_const_vector_dup (vmode,
13223 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
13224 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
13225
f7c4e5b8
AL
13226 /* For big-endian, we also need to reverse the index within the vector
13227 (but not which vector). */
13228 if (BYTES_BIG_ENDIAN)
13229 {
13230 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
13231 if (!one_vector_p)
13232 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
13233 sel = expand_simple_binop (vmode, XOR, sel, mask,
13234 NULL, 0, OPTAB_LIB_WIDEN);
13235 }
88b08073
JG
13236 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
13237}
13238
cc4d934f
JG
13239/* Recognize patterns suitable for the TRN instructions. */
13240static bool
13241aarch64_evpc_trn (struct expand_vec_perm_d *d)
13242{
13243 unsigned int i, odd, mask, nelt = d->nelt;
13244 rtx out, in0, in1, x;
13245 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13246 machine_mode vmode = d->vmode;
cc4d934f
JG
13247
13248 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13249 return false;
13250
13251 /* Note that these are little-endian tests.
13252 We correct for big-endian later. */
13253 if (d->perm[0] == 0)
13254 odd = 0;
13255 else if (d->perm[0] == 1)
13256 odd = 1;
13257 else
13258 return false;
13259 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13260
13261 for (i = 0; i < nelt; i += 2)
13262 {
13263 if (d->perm[i] != i + odd)
13264 return false;
13265 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
13266 return false;
13267 }
13268
13269 /* Success! */
13270 if (d->testing_p)
13271 return true;
13272
13273 in0 = d->op0;
13274 in1 = d->op1;
13275 if (BYTES_BIG_ENDIAN)
13276 {
13277 x = in0, in0 = in1, in1 = x;
13278 odd = !odd;
13279 }
13280 out = d->target;
13281
13282 if (odd)
13283 {
13284 switch (vmode)
13285 {
4e10a5a7
RS
13286 case E_V16QImode: gen = gen_aarch64_trn2v16qi; break;
13287 case E_V8QImode: gen = gen_aarch64_trn2v8qi; break;
13288 case E_V8HImode: gen = gen_aarch64_trn2v8hi; break;
13289 case E_V4HImode: gen = gen_aarch64_trn2v4hi; break;
13290 case E_V4SImode: gen = gen_aarch64_trn2v4si; break;
13291 case E_V2SImode: gen = gen_aarch64_trn2v2si; break;
13292 case E_V2DImode: gen = gen_aarch64_trn2v2di; break;
13293 case E_V4HFmode: gen = gen_aarch64_trn2v4hf; break;
13294 case E_V8HFmode: gen = gen_aarch64_trn2v8hf; break;
13295 case E_V4SFmode: gen = gen_aarch64_trn2v4sf; break;
13296 case E_V2SFmode: gen = gen_aarch64_trn2v2sf; break;
13297 case E_V2DFmode: gen = gen_aarch64_trn2v2df; break;
cc4d934f
JG
13298 default:
13299 return false;
13300 }
13301 }
13302 else
13303 {
13304 switch (vmode)
13305 {
4e10a5a7
RS
13306 case E_V16QImode: gen = gen_aarch64_trn1v16qi; break;
13307 case E_V8QImode: gen = gen_aarch64_trn1v8qi; break;
13308 case E_V8HImode: gen = gen_aarch64_trn1v8hi; break;
13309 case E_V4HImode: gen = gen_aarch64_trn1v4hi; break;
13310 case E_V4SImode: gen = gen_aarch64_trn1v4si; break;
13311 case E_V2SImode: gen = gen_aarch64_trn1v2si; break;
13312 case E_V2DImode: gen = gen_aarch64_trn1v2di; break;
13313 case E_V4HFmode: gen = gen_aarch64_trn1v4hf; break;
13314 case E_V8HFmode: gen = gen_aarch64_trn1v8hf; break;
13315 case E_V4SFmode: gen = gen_aarch64_trn1v4sf; break;
13316 case E_V2SFmode: gen = gen_aarch64_trn1v2sf; break;
13317 case E_V2DFmode: gen = gen_aarch64_trn1v2df; break;
cc4d934f
JG
13318 default:
13319 return false;
13320 }
13321 }
13322
13323 emit_insn (gen (out, in0, in1));
13324 return true;
13325}
13326
13327/* Recognize patterns suitable for the UZP instructions. */
13328static bool
13329aarch64_evpc_uzp (struct expand_vec_perm_d *d)
13330{
13331 unsigned int i, odd, mask, nelt = d->nelt;
13332 rtx out, in0, in1, x;
13333 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13334 machine_mode vmode = d->vmode;
cc4d934f
JG
13335
13336 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13337 return false;
13338
13339 /* Note that these are little-endian tests.
13340 We correct for big-endian later. */
13341 if (d->perm[0] == 0)
13342 odd = 0;
13343 else if (d->perm[0] == 1)
13344 odd = 1;
13345 else
13346 return false;
13347 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13348
13349 for (i = 0; i < nelt; i++)
13350 {
13351 unsigned elt = (i * 2 + odd) & mask;
13352 if (d->perm[i] != elt)
13353 return false;
13354 }
13355
13356 /* Success! */
13357 if (d->testing_p)
13358 return true;
13359
13360 in0 = d->op0;
13361 in1 = d->op1;
13362 if (BYTES_BIG_ENDIAN)
13363 {
13364 x = in0, in0 = in1, in1 = x;
13365 odd = !odd;
13366 }
13367 out = d->target;
13368
13369 if (odd)
13370 {
13371 switch (vmode)
13372 {
4e10a5a7
RS
13373 case E_V16QImode: gen = gen_aarch64_uzp2v16qi; break;
13374 case E_V8QImode: gen = gen_aarch64_uzp2v8qi; break;
13375 case E_V8HImode: gen = gen_aarch64_uzp2v8hi; break;
13376 case E_V4HImode: gen = gen_aarch64_uzp2v4hi; break;
13377 case E_V4SImode: gen = gen_aarch64_uzp2v4si; break;
13378 case E_V2SImode: gen = gen_aarch64_uzp2v2si; break;
13379 case E_V2DImode: gen = gen_aarch64_uzp2v2di; break;
13380 case E_V4HFmode: gen = gen_aarch64_uzp2v4hf; break;
13381 case E_V8HFmode: gen = gen_aarch64_uzp2v8hf; break;
13382 case E_V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
13383 case E_V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
13384 case E_V2DFmode: gen = gen_aarch64_uzp2v2df; break;
cc4d934f
JG
13385 default:
13386 return false;
13387 }
13388 }
13389 else
13390 {
13391 switch (vmode)
13392 {
4e10a5a7
RS
13393 case E_V16QImode: gen = gen_aarch64_uzp1v16qi; break;
13394 case E_V8QImode: gen = gen_aarch64_uzp1v8qi; break;
13395 case E_V8HImode: gen = gen_aarch64_uzp1v8hi; break;
13396 case E_V4HImode: gen = gen_aarch64_uzp1v4hi; break;
13397 case E_V4SImode: gen = gen_aarch64_uzp1v4si; break;
13398 case E_V2SImode: gen = gen_aarch64_uzp1v2si; break;
13399 case E_V2DImode: gen = gen_aarch64_uzp1v2di; break;
13400 case E_V4HFmode: gen = gen_aarch64_uzp1v4hf; break;
13401 case E_V8HFmode: gen = gen_aarch64_uzp1v8hf; break;
13402 case E_V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
13403 case E_V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
13404 case E_V2DFmode: gen = gen_aarch64_uzp1v2df; break;
cc4d934f
JG
13405 default:
13406 return false;
13407 }
13408 }
13409
13410 emit_insn (gen (out, in0, in1));
13411 return true;
13412}
13413
13414/* Recognize patterns suitable for the ZIP instructions. */
13415static bool
13416aarch64_evpc_zip (struct expand_vec_perm_d *d)
13417{
13418 unsigned int i, high, mask, nelt = d->nelt;
13419 rtx out, in0, in1, x;
13420 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 13421 machine_mode vmode = d->vmode;
cc4d934f
JG
13422
13423 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13424 return false;
13425
13426 /* Note that these are little-endian tests.
13427 We correct for big-endian later. */
13428 high = nelt / 2;
13429 if (d->perm[0] == high)
13430 /* Do Nothing. */
13431 ;
13432 else if (d->perm[0] == 0)
13433 high = 0;
13434 else
13435 return false;
13436 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13437
13438 for (i = 0; i < nelt / 2; i++)
13439 {
13440 unsigned elt = (i + high) & mask;
13441 if (d->perm[i * 2] != elt)
13442 return false;
13443 elt = (elt + nelt) & mask;
13444 if (d->perm[i * 2 + 1] != elt)
13445 return false;
13446 }
13447
13448 /* Success! */
13449 if (d->testing_p)
13450 return true;
13451
13452 in0 = d->op0;
13453 in1 = d->op1;
13454 if (BYTES_BIG_ENDIAN)
13455 {
13456 x = in0, in0 = in1, in1 = x;
13457 high = !high;
13458 }
13459 out = d->target;
13460
13461 if (high)
13462 {
13463 switch (vmode)
13464 {
4e10a5a7
RS
13465 case E_V16QImode: gen = gen_aarch64_zip2v16qi; break;
13466 case E_V8QImode: gen = gen_aarch64_zip2v8qi; break;
13467 case E_V8HImode: gen = gen_aarch64_zip2v8hi; break;
13468 case E_V4HImode: gen = gen_aarch64_zip2v4hi; break;
13469 case E_V4SImode: gen = gen_aarch64_zip2v4si; break;
13470 case E_V2SImode: gen = gen_aarch64_zip2v2si; break;
13471 case E_V2DImode: gen = gen_aarch64_zip2v2di; break;
13472 case E_V4HFmode: gen = gen_aarch64_zip2v4hf; break;
13473 case E_V8HFmode: gen = gen_aarch64_zip2v8hf; break;
13474 case E_V4SFmode: gen = gen_aarch64_zip2v4sf; break;
13475 case E_V2SFmode: gen = gen_aarch64_zip2v2sf; break;
13476 case E_V2DFmode: gen = gen_aarch64_zip2v2df; break;
cc4d934f
JG
13477 default:
13478 return false;
13479 }
13480 }
13481 else
13482 {
13483 switch (vmode)
13484 {
4e10a5a7
RS
13485 case E_V16QImode: gen = gen_aarch64_zip1v16qi; break;
13486 case E_V8QImode: gen = gen_aarch64_zip1v8qi; break;
13487 case E_V8HImode: gen = gen_aarch64_zip1v8hi; break;
13488 case E_V4HImode: gen = gen_aarch64_zip1v4hi; break;
13489 case E_V4SImode: gen = gen_aarch64_zip1v4si; break;
13490 case E_V2SImode: gen = gen_aarch64_zip1v2si; break;
13491 case E_V2DImode: gen = gen_aarch64_zip1v2di; break;
13492 case E_V4HFmode: gen = gen_aarch64_zip1v4hf; break;
13493 case E_V8HFmode: gen = gen_aarch64_zip1v8hf; break;
13494 case E_V4SFmode: gen = gen_aarch64_zip1v4sf; break;
13495 case E_V2SFmode: gen = gen_aarch64_zip1v2sf; break;
13496 case E_V2DFmode: gen = gen_aarch64_zip1v2df; break;
cc4d934f
JG
13497 default:
13498 return false;
13499 }
13500 }
13501
13502 emit_insn (gen (out, in0, in1));
13503 return true;
13504}
13505
ae0533da
AL
13506/* Recognize patterns for the EXT insn. */
13507
13508static bool
13509aarch64_evpc_ext (struct expand_vec_perm_d *d)
13510{
13511 unsigned int i, nelt = d->nelt;
13512 rtx (*gen) (rtx, rtx, rtx, rtx);
13513 rtx offset;
13514
13515 unsigned int location = d->perm[0]; /* Always < nelt. */
13516
13517 /* Check if the extracted indices are increasing by one. */
13518 for (i = 1; i < nelt; i++)
13519 {
13520 unsigned int required = location + i;
13521 if (d->one_vector_p)
13522 {
13523 /* We'll pass the same vector in twice, so allow indices to wrap. */
13524 required &= (nelt - 1);
13525 }
13526 if (d->perm[i] != required)
13527 return false;
13528 }
13529
ae0533da
AL
13530 switch (d->vmode)
13531 {
4e10a5a7
RS
13532 case E_V16QImode: gen = gen_aarch64_extv16qi; break;
13533 case E_V8QImode: gen = gen_aarch64_extv8qi; break;
13534 case E_V4HImode: gen = gen_aarch64_extv4hi; break;
13535 case E_V8HImode: gen = gen_aarch64_extv8hi; break;
13536 case E_V2SImode: gen = gen_aarch64_extv2si; break;
13537 case E_V4SImode: gen = gen_aarch64_extv4si; break;
13538 case E_V4HFmode: gen = gen_aarch64_extv4hf; break;
13539 case E_V8HFmode: gen = gen_aarch64_extv8hf; break;
13540 case E_V2SFmode: gen = gen_aarch64_extv2sf; break;
13541 case E_V4SFmode: gen = gen_aarch64_extv4sf; break;
13542 case E_V2DImode: gen = gen_aarch64_extv2di; break;
13543 case E_V2DFmode: gen = gen_aarch64_extv2df; break;
ae0533da
AL
13544 default:
13545 return false;
13546 }
13547
13548 /* Success! */
13549 if (d->testing_p)
13550 return true;
13551
b31e65bb
AL
13552 /* The case where (location == 0) is a no-op for both big- and little-endian,
13553 and is removed by the mid-end at optimization levels -O1 and higher. */
13554
13555 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
13556 {
13557 /* After setup, we want the high elements of the first vector (stored
13558 at the LSB end of the register), and the low elements of the second
13559 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 13560 std::swap (d->op0, d->op1);
ae0533da
AL
13561 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13562 location = nelt - location;
13563 }
13564
13565 offset = GEN_INT (location);
13566 emit_insn (gen (d->target, d->op0, d->op1, offset));
13567 return true;
13568}
13569
923fcec3
AL
13570/* Recognize patterns for the REV insns. */
13571
13572static bool
13573aarch64_evpc_rev (struct expand_vec_perm_d *d)
13574{
13575 unsigned int i, j, diff, nelt = d->nelt;
13576 rtx (*gen) (rtx, rtx);
13577
13578 if (!d->one_vector_p)
13579 return false;
13580
13581 diff = d->perm[0];
13582 switch (diff)
13583 {
13584 case 7:
13585 switch (d->vmode)
13586 {
4e10a5a7
RS
13587 case E_V16QImode: gen = gen_aarch64_rev64v16qi; break;
13588 case E_V8QImode: gen = gen_aarch64_rev64v8qi; break;
923fcec3
AL
13589 default:
13590 return false;
13591 }
13592 break;
13593 case 3:
13594 switch (d->vmode)
13595 {
4e10a5a7
RS
13596 case E_V16QImode: gen = gen_aarch64_rev32v16qi; break;
13597 case E_V8QImode: gen = gen_aarch64_rev32v8qi; break;
13598 case E_V8HImode: gen = gen_aarch64_rev64v8hi; break;
13599 case E_V4HImode: gen = gen_aarch64_rev64v4hi; break;
923fcec3
AL
13600 default:
13601 return false;
13602 }
13603 break;
13604 case 1:
13605 switch (d->vmode)
13606 {
4e10a5a7
RS
13607 case E_V16QImode: gen = gen_aarch64_rev16v16qi; break;
13608 case E_V8QImode: gen = gen_aarch64_rev16v8qi; break;
13609 case E_V8HImode: gen = gen_aarch64_rev32v8hi; break;
13610 case E_V4HImode: gen = gen_aarch64_rev32v4hi; break;
13611 case E_V4SImode: gen = gen_aarch64_rev64v4si; break;
13612 case E_V2SImode: gen = gen_aarch64_rev64v2si; break;
13613 case E_V4SFmode: gen = gen_aarch64_rev64v4sf; break;
13614 case E_V2SFmode: gen = gen_aarch64_rev64v2sf; break;
13615 case E_V8HFmode: gen = gen_aarch64_rev64v8hf; break;
13616 case E_V4HFmode: gen = gen_aarch64_rev64v4hf; break;
923fcec3
AL
13617 default:
13618 return false;
13619 }
13620 break;
13621 default:
13622 return false;
13623 }
13624
13625 for (i = 0; i < nelt ; i += diff + 1)
13626 for (j = 0; j <= diff; j += 1)
13627 {
13628 /* This is guaranteed to be true as the value of diff
13629 is 7, 3, 1 and we should have enough elements in the
13630 queue to generate this. Getting a vector mask with a
13631 value of diff other than these values implies that
13632 something is wrong by the time we get here. */
13633 gcc_assert (i + j < nelt);
13634 if (d->perm[i + j] != i + diff - j)
13635 return false;
13636 }
13637
13638 /* Success! */
13639 if (d->testing_p)
13640 return true;
13641
13642 emit_insn (gen (d->target, d->op0));
13643 return true;
13644}
13645
91bd4114
JG
13646static bool
13647aarch64_evpc_dup (struct expand_vec_perm_d *d)
13648{
13649 rtx (*gen) (rtx, rtx, rtx);
13650 rtx out = d->target;
13651 rtx in0;
ef4bddc2 13652 machine_mode vmode = d->vmode;
91bd4114
JG
13653 unsigned int i, elt, nelt = d->nelt;
13654 rtx lane;
13655
91bd4114
JG
13656 elt = d->perm[0];
13657 for (i = 1; i < nelt; i++)
13658 {
13659 if (elt != d->perm[i])
13660 return false;
13661 }
13662
13663 /* The generic preparation in aarch64_expand_vec_perm_const_1
13664 swaps the operand order and the permute indices if it finds
13665 d->perm[0] to be in the second operand. Thus, we can always
13666 use d->op0 and need not do any extra arithmetic to get the
13667 correct lane number. */
13668 in0 = d->op0;
f901401e 13669 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
13670
13671 switch (vmode)
13672 {
4e10a5a7
RS
13673 case E_V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
13674 case E_V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
13675 case E_V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
13676 case E_V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
13677 case E_V4SImode: gen = gen_aarch64_dup_lanev4si; break;
13678 case E_V2SImode: gen = gen_aarch64_dup_lanev2si; break;
13679 case E_V2DImode: gen = gen_aarch64_dup_lanev2di; break;
13680 case E_V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
13681 case E_V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
13682 case E_V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
13683 case E_V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
13684 case E_V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
91bd4114
JG
13685 default:
13686 return false;
13687 }
13688
13689 emit_insn (gen (out, in0, lane));
13690 return true;
13691}
13692
88b08073
JG
13693static bool
13694aarch64_evpc_tbl (struct expand_vec_perm_d *d)
13695{
13696 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 13697 machine_mode vmode = d->vmode;
88b08073
JG
13698 unsigned int i, nelt = d->nelt;
13699
88b08073
JG
13700 if (d->testing_p)
13701 return true;
13702
13703 /* Generic code will try constant permutation twice. Once with the
13704 original mode and again with the elements lowered to QImode.
13705 So wait and don't do the selector expansion ourselves. */
13706 if (vmode != V8QImode && vmode != V16QImode)
13707 return false;
13708
13709 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
13710 {
13711 int nunits = GET_MODE_NUNITS (vmode);
13712
13713 /* If big-endian and two vectors we end up with a weird mixed-endian
13714 mode on NEON. Reverse the index within each word but not the word
13715 itself. */
13716 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
13717 : d->perm[i]);
13718 }
88b08073
JG
13719 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
13720 sel = force_reg (vmode, sel);
13721
13722 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
13723 return true;
13724}
13725
13726static bool
13727aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
13728{
13729 /* The pattern matching functions above are written to look for a small
13730 number to begin the sequence (0, 1, N/2). If we begin with an index
13731 from the second operand, we can swap the operands. */
13732 if (d->perm[0] >= d->nelt)
13733 {
13734 unsigned i, nelt = d->nelt;
88b08073 13735
0696116a 13736 gcc_assert (nelt == (nelt & -nelt));
88b08073 13737 for (i = 0; i < nelt; ++i)
0696116a 13738 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 13739
cb5c6c29 13740 std::swap (d->op0, d->op1);
88b08073
JG
13741 }
13742
13743 if (TARGET_SIMD)
cc4d934f 13744 {
923fcec3
AL
13745 if (aarch64_evpc_rev (d))
13746 return true;
13747 else if (aarch64_evpc_ext (d))
ae0533da 13748 return true;
f901401e
AL
13749 else if (aarch64_evpc_dup (d))
13750 return true;
ae0533da 13751 else if (aarch64_evpc_zip (d))
cc4d934f
JG
13752 return true;
13753 else if (aarch64_evpc_uzp (d))
13754 return true;
13755 else if (aarch64_evpc_trn (d))
13756 return true;
13757 return aarch64_evpc_tbl (d);
13758 }
88b08073
JG
13759 return false;
13760}
13761
13762/* Expand a vec_perm_const pattern. */
13763
13764bool
13765aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
13766{
13767 struct expand_vec_perm_d d;
13768 int i, nelt, which;
13769
13770 d.target = target;
13771 d.op0 = op0;
13772 d.op1 = op1;
13773
13774 d.vmode = GET_MODE (target);
13775 gcc_assert (VECTOR_MODE_P (d.vmode));
13776 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13777 d.testing_p = false;
13778
13779 for (i = which = 0; i < nelt; ++i)
13780 {
13781 rtx e = XVECEXP (sel, 0, i);
13782 int ei = INTVAL (e) & (2 * nelt - 1);
13783 which |= (ei < nelt ? 1 : 2);
13784 d.perm[i] = ei;
13785 }
13786
13787 switch (which)
13788 {
13789 default:
13790 gcc_unreachable ();
13791
13792 case 3:
13793 d.one_vector_p = false;
13794 if (!rtx_equal_p (op0, op1))
13795 break;
13796
13797 /* The elements of PERM do not suggest that only the first operand
13798 is used, but both operands are identical. Allow easier matching
13799 of the permutation by folding the permutation into the single
13800 input vector. */
13801 /* Fall Through. */
13802 case 2:
13803 for (i = 0; i < nelt; ++i)
13804 d.perm[i] &= nelt - 1;
13805 d.op0 = op1;
13806 d.one_vector_p = true;
13807 break;
13808
13809 case 1:
13810 d.op1 = op0;
13811 d.one_vector_p = true;
13812 break;
13813 }
13814
13815 return aarch64_expand_vec_perm_const_1 (&d);
13816}
13817
13818static bool
ef4bddc2 13819aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
13820 const unsigned char *sel)
13821{
13822 struct expand_vec_perm_d d;
13823 unsigned int i, nelt, which;
13824 bool ret;
13825
13826 d.vmode = vmode;
13827 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
13828 d.testing_p = true;
13829 memcpy (d.perm, sel, nelt);
13830
13831 /* Calculate whether all elements are in one vector. */
13832 for (i = which = 0; i < nelt; ++i)
13833 {
13834 unsigned char e = d.perm[i];
13835 gcc_assert (e < 2 * nelt);
13836 which |= (e < nelt ? 1 : 2);
13837 }
13838
13839 /* If all elements are from the second vector, reindex as if from the
13840 first vector. */
13841 if (which == 2)
13842 for (i = 0; i < nelt; ++i)
13843 d.perm[i] -= nelt;
13844
13845 /* Check whether the mask can be applied to a single vector. */
13846 d.one_vector_p = (which != 3);
13847
13848 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
13849 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
13850 if (!d.one_vector_p)
13851 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
13852
13853 start_sequence ();
13854 ret = aarch64_expand_vec_perm_const_1 (&d);
13855 end_sequence ();
13856
13857 return ret;
13858}
13859
668046d1 13860rtx
b8506a8a 13861aarch64_reverse_mask (machine_mode mode)
668046d1
DS
13862{
13863 /* We have to reverse each vector because we dont have
13864 a permuted load that can reverse-load according to ABI rules. */
13865 rtx mask;
13866 rtvec v = rtvec_alloc (16);
13867 int i, j;
13868 int nunits = GET_MODE_NUNITS (mode);
13869 int usize = GET_MODE_UNIT_SIZE (mode);
13870
13871 gcc_assert (BYTES_BIG_ENDIAN);
13872 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
13873
13874 for (i = 0; i < nunits; i++)
13875 for (j = 0; j < usize; j++)
13876 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
13877 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
13878 return force_reg (V16QImode, mask);
13879}
13880
99e1629f
RS
13881/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
13882 true. However due to issues with register allocation it is preferable
13883 to avoid tieing integer scalar and FP scalar modes. Executing integer
13884 operations in general registers is better than treating them as scalar
13885 vector operations. This reduces latency and avoids redundant int<->FP
13886 moves. So tie modes if they are either the same class, or vector modes
13887 with other vector modes, vector structs or any scalar mode. */
97e1ad78 13888
99e1629f 13889static bool
ef4bddc2 13890aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
13891{
13892 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
13893 return true;
13894
13895 /* We specifically want to allow elements of "structure" modes to
13896 be tieable to the structure. This more general condition allows
13897 other rarer situations too. */
61f17a5c
WD
13898 if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2))
13899 return true;
13900
13901 /* Also allow any scalar modes with vectors. */
13902 if (aarch64_vector_mode_supported_p (mode1)
13903 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
13904 return true;
13905
13906 return false;
13907}
13908
e2c75eea
JG
13909/* Return a new RTX holding the result of moving POINTER forward by
13910 AMOUNT bytes. */
13911
13912static rtx
13913aarch64_move_pointer (rtx pointer, int amount)
13914{
13915 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
13916
13917 return adjust_automodify_address (pointer, GET_MODE (pointer),
13918 next, amount);
13919}
13920
13921/* Return a new RTX holding the result of moving POINTER forward by the
13922 size of the mode it points to. */
13923
13924static rtx
13925aarch64_progress_pointer (rtx pointer)
13926{
13927 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
13928
13929 return aarch64_move_pointer (pointer, amount);
13930}
13931
13932/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13933 MODE bytes. */
13934
13935static void
13936aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 13937 machine_mode mode)
e2c75eea
JG
13938{
13939 rtx reg = gen_reg_rtx (mode);
13940
13941 /* "Cast" the pointers to the correct mode. */
13942 *src = adjust_address (*src, mode, 0);
13943 *dst = adjust_address (*dst, mode, 0);
13944 /* Emit the memcpy. */
13945 emit_move_insn (reg, *src);
13946 emit_move_insn (*dst, reg);
13947 /* Move the pointers forward. */
13948 *src = aarch64_progress_pointer (*src);
13949 *dst = aarch64_progress_pointer (*dst);
13950}
13951
13952/* Expand movmem, as if from a __builtin_memcpy. Return true if
13953 we succeed, otherwise return false. */
13954
13955bool
13956aarch64_expand_movmem (rtx *operands)
13957{
13958 unsigned int n;
13959 rtx dst = operands[0];
13960 rtx src = operands[1];
13961 rtx base;
13962 bool speed_p = !optimize_function_for_size_p (cfun);
13963
13964 /* When optimizing for size, give a better estimate of the length of a
13965 memcpy call, but use the default otherwise. */
13966 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
13967
13968 /* We can't do anything smart if the amount to copy is not constant. */
13969 if (!CONST_INT_P (operands[2]))
13970 return false;
13971
13972 n = UINTVAL (operands[2]);
13973
13974 /* Try to keep the number of instructions low. For cases below 16 bytes we
13975 need to make at most two moves. For cases above 16 bytes it will be one
13976 move for each 16 byte chunk, then at most two additional moves. */
13977 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
13978 return false;
13979
13980 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13981 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13982
13983 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
13984 src = adjust_automodify_address (src, VOIDmode, base, 0);
13985
13986 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13987 1-byte chunk. */
13988 if (n < 4)
13989 {
13990 if (n >= 2)
13991 {
13992 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13993 n -= 2;
13994 }
13995
13996 if (n == 1)
13997 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13998
13999 return true;
14000 }
14001
14002 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
14003 4-byte chunk, partially overlapping with the previously copied chunk. */
14004 if (n < 8)
14005 {
14006 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
14007 n -= 4;
14008 if (n > 0)
14009 {
14010 int move = n - 4;
14011
14012 src = aarch64_move_pointer (src, move);
14013 dst = aarch64_move_pointer (dst, move);
14014 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
14015 }
14016 return true;
14017 }
14018
14019 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
14020 them, then (if applicable) an 8-byte chunk. */
14021 while (n >= 8)
14022 {
14023 if (n / 16)
14024 {
14025 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
14026 n -= 16;
14027 }
14028 else
14029 {
14030 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
14031 n -= 8;
14032 }
14033 }
14034
14035 /* Finish the final bytes of the copy. We can always do this in one
14036 instruction. We either copy the exact amount we need, or partially
14037 overlap with the previous chunk we copied and copy 8-bytes. */
14038 if (n == 0)
14039 return true;
14040 else if (n == 1)
14041 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
14042 else if (n == 2)
14043 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
14044 else if (n == 4)
14045 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
14046 else
14047 {
14048 if (n == 3)
14049 {
14050 src = aarch64_move_pointer (src, -1);
14051 dst = aarch64_move_pointer (dst, -1);
14052 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
14053 }
14054 else
14055 {
14056 int move = n - 8;
14057
14058 src = aarch64_move_pointer (src, move);
14059 dst = aarch64_move_pointer (dst, move);
14060 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
14061 }
14062 }
14063
14064 return true;
14065}
14066
141a3ccf
KT
14067/* Split a DImode store of a CONST_INT SRC to MEM DST as two
14068 SImode stores. Handle the case when the constant has identical
14069 bottom and top halves. This is beneficial when the two stores can be
14070 merged into an STP and we avoid synthesising potentially expensive
14071 immediates twice. Return true if such a split is possible. */
14072
14073bool
14074aarch64_split_dimode_const_store (rtx dst, rtx src)
14075{
14076 rtx lo = gen_lowpart (SImode, src);
14077 rtx hi = gen_highpart_mode (SImode, DImode, src);
14078
14079 bool size_p = optimize_function_for_size_p (cfun);
14080
14081 if (!rtx_equal_p (lo, hi))
14082 return false;
14083
14084 unsigned int orig_cost
14085 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
14086 unsigned int lo_cost
14087 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
14088
14089 /* We want to transform:
14090 MOV x1, 49370
14091 MOVK x1, 0x140, lsl 16
14092 MOVK x1, 0xc0da, lsl 32
14093 MOVK x1, 0x140, lsl 48
14094 STR x1, [x0]
14095 into:
14096 MOV w1, 49370
14097 MOVK w1, 0x140, lsl 16
14098 STP w1, w1, [x0]
14099 So we want to perform this only when we save two instructions
14100 or more. When optimizing for size, however, accept any code size
14101 savings we can. */
14102 if (size_p && orig_cost <= lo_cost)
14103 return false;
14104
14105 if (!size_p
14106 && (orig_cost <= lo_cost + 1))
14107 return false;
14108
14109 rtx mem_lo = adjust_address (dst, SImode, 0);
14110 if (!aarch64_mem_pair_operand (mem_lo, SImode))
14111 return false;
14112
14113 rtx tmp_reg = gen_reg_rtx (SImode);
14114 aarch64_expand_mov_immediate (tmp_reg, lo);
14115 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
14116 /* Don't emit an explicit store pair as this may not be always profitable.
14117 Let the sched-fusion logic decide whether to merge them. */
14118 emit_move_insn (mem_lo, tmp_reg);
14119 emit_move_insn (mem_hi, tmp_reg);
14120
14121 return true;
14122}
14123
a3125fc2
CL
14124/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
14125
14126static unsigned HOST_WIDE_INT
14127aarch64_asan_shadow_offset (void)
14128{
14129 return (HOST_WIDE_INT_1 << 36);
14130}
14131
d3006da6 14132static bool
445d7826 14133aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
14134 unsigned int align,
14135 enum by_pieces_operation op,
14136 bool speed_p)
14137{
14138 /* STORE_BY_PIECES can be used when copying a constant string, but
14139 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
14140 For now we always fail this and let the move_by_pieces code copy
14141 the string from read-only memory. */
14142 if (op == STORE_BY_PIECES)
14143 return false;
14144
14145 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
14146}
14147
5f3bc026 14148static rtx
cb4347e8 14149aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
14150 int code, tree treeop0, tree treeop1)
14151{
c8012fbc
WD
14152 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
14153 rtx op0, op1;
5f3bc026 14154 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 14155 insn_code icode;
5f3bc026
ZC
14156 struct expand_operand ops[4];
14157
5f3bc026
ZC
14158 start_sequence ();
14159 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
14160
14161 op_mode = GET_MODE (op0);
14162 if (op_mode == VOIDmode)
14163 op_mode = GET_MODE (op1);
14164
14165 switch (op_mode)
14166 {
4e10a5a7
RS
14167 case E_QImode:
14168 case E_HImode:
14169 case E_SImode:
5f3bc026
ZC
14170 cmp_mode = SImode;
14171 icode = CODE_FOR_cmpsi;
14172 break;
14173
4e10a5a7 14174 case E_DImode:
5f3bc026
ZC
14175 cmp_mode = DImode;
14176 icode = CODE_FOR_cmpdi;
14177 break;
14178
4e10a5a7 14179 case E_SFmode:
786e3c06
WD
14180 cmp_mode = SFmode;
14181 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
14182 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
14183 break;
14184
4e10a5a7 14185 case E_DFmode:
786e3c06
WD
14186 cmp_mode = DFmode;
14187 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
14188 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
14189 break;
14190
5f3bc026
ZC
14191 default:
14192 end_sequence ();
14193 return NULL_RTX;
14194 }
14195
c8012fbc
WD
14196 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
14197 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
14198 if (!op0 || !op1)
14199 {
14200 end_sequence ();
14201 return NULL_RTX;
14202 }
14203 *prep_seq = get_insns ();
14204 end_sequence ();
14205
c8012fbc
WD
14206 create_fixed_operand (&ops[0], op0);
14207 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
14208
14209 start_sequence ();
c8012fbc 14210 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
14211 {
14212 end_sequence ();
14213 return NULL_RTX;
14214 }
14215 *gen_seq = get_insns ();
14216 end_sequence ();
14217
c8012fbc
WD
14218 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
14219 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
14220}
14221
14222static rtx
cb4347e8
TS
14223aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
14224 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 14225{
c8012fbc
WD
14226 rtx op0, op1, target;
14227 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 14228 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 14229 insn_code icode;
5f3bc026 14230 struct expand_operand ops[6];
c8012fbc 14231 int aarch64_cond;
5f3bc026 14232
cb4347e8 14233 push_to_sequence (*prep_seq);
5f3bc026
ZC
14234 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
14235
14236 op_mode = GET_MODE (op0);
14237 if (op_mode == VOIDmode)
14238 op_mode = GET_MODE (op1);
14239
14240 switch (op_mode)
14241 {
4e10a5a7
RS
14242 case E_QImode:
14243 case E_HImode:
14244 case E_SImode:
5f3bc026 14245 cmp_mode = SImode;
c8012fbc 14246 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
14247 break;
14248
4e10a5a7 14249 case E_DImode:
5f3bc026 14250 cmp_mode = DImode;
c8012fbc 14251 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
14252 break;
14253
4e10a5a7 14254 case E_SFmode:
786e3c06
WD
14255 cmp_mode = SFmode;
14256 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
14257 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
14258 break;
14259
4e10a5a7 14260 case E_DFmode:
786e3c06
WD
14261 cmp_mode = DFmode;
14262 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
14263 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
14264 break;
14265
5f3bc026
ZC
14266 default:
14267 end_sequence ();
14268 return NULL_RTX;
14269 }
14270
14271 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
14272 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
14273 if (!op0 || !op1)
14274 {
14275 end_sequence ();
14276 return NULL_RTX;
14277 }
14278 *prep_seq = get_insns ();
14279 end_sequence ();
14280
14281 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 14282 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 14283
c8012fbc
WD
14284 if (bit_code != AND)
14285 {
14286 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
14287 GET_MODE (XEXP (prev, 0))),
14288 VOIDmode, XEXP (prev, 0), const0_rtx);
14289 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
14290 }
14291
14292 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
14293 create_fixed_operand (&ops[1], target);
14294 create_fixed_operand (&ops[2], op0);
14295 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
14296 create_fixed_operand (&ops[4], prev);
14297 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 14298
cb4347e8 14299 push_to_sequence (*gen_seq);
5f3bc026
ZC
14300 if (!maybe_expand_insn (icode, 6, ops))
14301 {
14302 end_sequence ();
14303 return NULL_RTX;
14304 }
14305
14306 *gen_seq = get_insns ();
14307 end_sequence ();
14308
c8012fbc 14309 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
14310}
14311
14312#undef TARGET_GEN_CCMP_FIRST
14313#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
14314
14315#undef TARGET_GEN_CCMP_NEXT
14316#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
14317
6a569cdd
KT
14318/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
14319 instruction fusion of some sort. */
14320
14321static bool
14322aarch64_macro_fusion_p (void)
14323{
b175b679 14324 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
14325}
14326
14327
14328/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
14329 should be kept together during scheduling. */
14330
14331static bool
14332aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
14333{
14334 rtx set_dest;
14335 rtx prev_set = single_set (prev);
14336 rtx curr_set = single_set (curr);
14337 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
14338 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
14339
14340 if (!aarch64_macro_fusion_p ())
14341 return false;
14342
d7b03373 14343 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
14344 {
14345 /* We are trying to match:
14346 prev (mov) == (set (reg r0) (const_int imm16))
14347 curr (movk) == (set (zero_extract (reg r0)
14348 (const_int 16)
14349 (const_int 16))
14350 (const_int imm16_1)) */
14351
14352 set_dest = SET_DEST (curr_set);
14353
14354 if (GET_CODE (set_dest) == ZERO_EXTRACT
14355 && CONST_INT_P (SET_SRC (curr_set))
14356 && CONST_INT_P (SET_SRC (prev_set))
14357 && CONST_INT_P (XEXP (set_dest, 2))
14358 && INTVAL (XEXP (set_dest, 2)) == 16
14359 && REG_P (XEXP (set_dest, 0))
14360 && REG_P (SET_DEST (prev_set))
14361 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
14362 {
14363 return true;
14364 }
14365 }
14366
d7b03373 14367 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
14368 {
14369
14370 /* We're trying to match:
14371 prev (adrp) == (set (reg r1)
14372 (high (symbol_ref ("SYM"))))
14373 curr (add) == (set (reg r0)
14374 (lo_sum (reg r1)
14375 (symbol_ref ("SYM"))))
14376 Note that r0 need not necessarily be the same as r1, especially
14377 during pre-regalloc scheduling. */
14378
14379 if (satisfies_constraint_Ush (SET_SRC (prev_set))
14380 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
14381 {
14382 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
14383 && REG_P (XEXP (SET_SRC (curr_set), 0))
14384 && REGNO (XEXP (SET_SRC (curr_set), 0))
14385 == REGNO (SET_DEST (prev_set))
14386 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
14387 XEXP (SET_SRC (curr_set), 1)))
14388 return true;
14389 }
14390 }
14391
d7b03373 14392 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
14393 {
14394
14395 /* We're trying to match:
14396 prev (movk) == (set (zero_extract (reg r0)
14397 (const_int 16)
14398 (const_int 32))
14399 (const_int imm16_1))
14400 curr (movk) == (set (zero_extract (reg r0)
14401 (const_int 16)
14402 (const_int 48))
14403 (const_int imm16_2)) */
14404
14405 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
14406 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
14407 && REG_P (XEXP (SET_DEST (prev_set), 0))
14408 && REG_P (XEXP (SET_DEST (curr_set), 0))
14409 && REGNO (XEXP (SET_DEST (prev_set), 0))
14410 == REGNO (XEXP (SET_DEST (curr_set), 0))
14411 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
14412 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
14413 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
14414 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
14415 && CONST_INT_P (SET_SRC (prev_set))
14416 && CONST_INT_P (SET_SRC (curr_set)))
14417 return true;
14418
14419 }
d7b03373 14420 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
14421 {
14422 /* We're trying to match:
14423 prev (adrp) == (set (reg r0)
14424 (high (symbol_ref ("SYM"))))
14425 curr (ldr) == (set (reg r1)
14426 (mem (lo_sum (reg r0)
14427 (symbol_ref ("SYM")))))
14428 or
14429 curr (ldr) == (set (reg r1)
14430 (zero_extend (mem
14431 (lo_sum (reg r0)
14432 (symbol_ref ("SYM")))))) */
14433 if (satisfies_constraint_Ush (SET_SRC (prev_set))
14434 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
14435 {
14436 rtx curr_src = SET_SRC (curr_set);
14437
14438 if (GET_CODE (curr_src) == ZERO_EXTEND)
14439 curr_src = XEXP (curr_src, 0);
14440
14441 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
14442 && REG_P (XEXP (XEXP (curr_src, 0), 0))
14443 && REGNO (XEXP (XEXP (curr_src, 0), 0))
14444 == REGNO (SET_DEST (prev_set))
14445 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
14446 XEXP (SET_SRC (prev_set), 0)))
14447 return true;
14448 }
14449 }
cd0cb232 14450
d7b03373 14451 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
14452 && aarch_crypto_can_dual_issue (prev, curr))
14453 return true;
14454
d7b03373 14455 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
14456 && any_condjump_p (curr))
14457 {
14458 enum attr_type prev_type = get_attr_type (prev);
14459
509f819a
N
14460 unsigned int condreg1, condreg2;
14461 rtx cc_reg_1;
14462 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
14463 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
14464
14465 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
14466 && prev
14467 && modified_in_p (cc_reg_1, prev))
14468 {
14469 /* FIXME: this misses some which is considered simple arthematic
14470 instructions for ThunderX. Simple shifts are missed here. */
14471 if (prev_type == TYPE_ALUS_SREG
14472 || prev_type == TYPE_ALUS_IMM
14473 || prev_type == TYPE_LOGICS_REG
14474 || prev_type == TYPE_LOGICS_IMM)
14475 return true;
14476 }
3759108f
AP
14477 }
14478
bee7e0fc
AP
14479 if (prev_set
14480 && curr_set
14481 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
14482 && any_condjump_p (curr))
14483 {
14484 /* We're trying to match:
14485 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
14486 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
14487 (const_int 0))
14488 (label_ref ("SYM"))
14489 (pc)) */
14490 if (SET_DEST (curr_set) == (pc_rtx)
14491 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
14492 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
14493 && REG_P (SET_DEST (prev_set))
14494 && REGNO (SET_DEST (prev_set))
14495 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
14496 {
14497 /* Fuse ALU operations followed by conditional branch instruction. */
14498 switch (get_attr_type (prev))
14499 {
14500 case TYPE_ALU_IMM:
14501 case TYPE_ALU_SREG:
14502 case TYPE_ADC_REG:
14503 case TYPE_ADC_IMM:
14504 case TYPE_ADCS_REG:
14505 case TYPE_ADCS_IMM:
14506 case TYPE_LOGIC_REG:
14507 case TYPE_LOGIC_IMM:
14508 case TYPE_CSEL:
14509 case TYPE_ADR:
14510 case TYPE_MOV_IMM:
14511 case TYPE_SHIFT_REG:
14512 case TYPE_SHIFT_IMM:
14513 case TYPE_BFM:
14514 case TYPE_RBIT:
14515 case TYPE_REV:
14516 case TYPE_EXTEND:
14517 return true;
14518
14519 default:;
14520 }
14521 }
14522 }
14523
6a569cdd
KT
14524 return false;
14525}
14526
f2879a90
KT
14527/* Return true iff the instruction fusion described by OP is enabled. */
14528
14529bool
14530aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
14531{
14532 return (aarch64_tune_params.fusible_ops & op) != 0;
14533}
14534
350013bc
BC
14535/* If MEM is in the form of [base+offset], extract the two parts
14536 of address and set to BASE and OFFSET, otherwise return false
14537 after clearing BASE and OFFSET. */
14538
14539bool
14540extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
14541{
14542 rtx addr;
14543
14544 gcc_assert (MEM_P (mem));
14545
14546 addr = XEXP (mem, 0);
14547
14548 if (REG_P (addr))
14549 {
14550 *base = addr;
14551 *offset = const0_rtx;
14552 return true;
14553 }
14554
14555 if (GET_CODE (addr) == PLUS
14556 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
14557 {
14558 *base = XEXP (addr, 0);
14559 *offset = XEXP (addr, 1);
14560 return true;
14561 }
14562
14563 *base = NULL_RTX;
14564 *offset = NULL_RTX;
14565
14566 return false;
14567}
14568
14569/* Types for scheduling fusion. */
14570enum sched_fusion_type
14571{
14572 SCHED_FUSION_NONE = 0,
14573 SCHED_FUSION_LD_SIGN_EXTEND,
14574 SCHED_FUSION_LD_ZERO_EXTEND,
14575 SCHED_FUSION_LD,
14576 SCHED_FUSION_ST,
14577 SCHED_FUSION_NUM
14578};
14579
14580/* If INSN is a load or store of address in the form of [base+offset],
14581 extract the two parts and set to BASE and OFFSET. Return scheduling
14582 fusion type this INSN is. */
14583
14584static enum sched_fusion_type
14585fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
14586{
14587 rtx x, dest, src;
14588 enum sched_fusion_type fusion = SCHED_FUSION_LD;
14589
14590 gcc_assert (INSN_P (insn));
14591 x = PATTERN (insn);
14592 if (GET_CODE (x) != SET)
14593 return SCHED_FUSION_NONE;
14594
14595 src = SET_SRC (x);
14596 dest = SET_DEST (x);
14597
abc52318
KT
14598 machine_mode dest_mode = GET_MODE (dest);
14599
14600 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
14601 return SCHED_FUSION_NONE;
14602
14603 if (GET_CODE (src) == SIGN_EXTEND)
14604 {
14605 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
14606 src = XEXP (src, 0);
14607 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14608 return SCHED_FUSION_NONE;
14609 }
14610 else if (GET_CODE (src) == ZERO_EXTEND)
14611 {
14612 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
14613 src = XEXP (src, 0);
14614 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14615 return SCHED_FUSION_NONE;
14616 }
14617
14618 if (GET_CODE (src) == MEM && REG_P (dest))
14619 extract_base_offset_in_addr (src, base, offset);
14620 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
14621 {
14622 fusion = SCHED_FUSION_ST;
14623 extract_base_offset_in_addr (dest, base, offset);
14624 }
14625 else
14626 return SCHED_FUSION_NONE;
14627
14628 if (*base == NULL_RTX || *offset == NULL_RTX)
14629 fusion = SCHED_FUSION_NONE;
14630
14631 return fusion;
14632}
14633
14634/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14635
14636 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14637 and PRI are only calculated for these instructions. For other instruction,
14638 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14639 type instruction fusion can be added by returning different priorities.
14640
14641 It's important that irrelevant instructions get the largest FUSION_PRI. */
14642
14643static void
14644aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
14645 int *fusion_pri, int *pri)
14646{
14647 int tmp, off_val;
14648 rtx base, offset;
14649 enum sched_fusion_type fusion;
14650
14651 gcc_assert (INSN_P (insn));
14652
14653 tmp = max_pri - 1;
14654 fusion = fusion_load_store (insn, &base, &offset);
14655 if (fusion == SCHED_FUSION_NONE)
14656 {
14657 *pri = tmp;
14658 *fusion_pri = tmp;
14659 return;
14660 }
14661
14662 /* Set FUSION_PRI according to fusion type and base register. */
14663 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
14664
14665 /* Calculate PRI. */
14666 tmp /= 2;
14667
14668 /* INSN with smaller offset goes first. */
14669 off_val = (int)(INTVAL (offset));
14670 if (off_val >= 0)
14671 tmp -= (off_val & 0xfffff);
14672 else
14673 tmp += ((- off_val) & 0xfffff);
14674
14675 *pri = tmp;
14676 return;
14677}
14678
9bca63d4
WD
14679/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
14680 Adjust priority of sha1h instructions so they are scheduled before
14681 other SHA1 instructions. */
14682
14683static int
14684aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
14685{
14686 rtx x = PATTERN (insn);
14687
14688 if (GET_CODE (x) == SET)
14689 {
14690 x = SET_SRC (x);
14691
14692 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
14693 return priority + 10;
14694 }
14695
14696 return priority;
14697}
14698
350013bc
BC
14699/* Given OPERANDS of consecutive load/store, check if we can merge
14700 them into ldp/stp. LOAD is true if they are load instructions.
14701 MODE is the mode of memory operands. */
14702
14703bool
14704aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 14705 machine_mode mode)
350013bc
BC
14706{
14707 HOST_WIDE_INT offval_1, offval_2, msize;
14708 enum reg_class rclass_1, rclass_2;
14709 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
14710
14711 if (load)
14712 {
14713 mem_1 = operands[1];
14714 mem_2 = operands[3];
14715 reg_1 = operands[0];
14716 reg_2 = operands[2];
14717 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
14718 if (REGNO (reg_1) == REGNO (reg_2))
14719 return false;
14720 }
14721 else
14722 {
14723 mem_1 = operands[0];
14724 mem_2 = operands[2];
14725 reg_1 = operands[1];
14726 reg_2 = operands[3];
14727 }
14728
bf84ac44
AP
14729 /* The mems cannot be volatile. */
14730 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
14731 return false;
14732
54700e2e
AP
14733 /* If we have SImode and slow unaligned ldp,
14734 check the alignment to be at least 8 byte. */
14735 if (mode == SImode
14736 && (aarch64_tune_params.extra_tuning_flags
14737 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14738 && !optimize_size
14739 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14740 return false;
14741
350013bc
BC
14742 /* Check if the addresses are in the form of [base+offset]. */
14743 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14744 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14745 return false;
14746 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14747 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14748 return false;
14749
14750 /* Check if the bases are same. */
14751 if (!rtx_equal_p (base_1, base_2))
14752 return false;
14753
14754 offval_1 = INTVAL (offset_1);
14755 offval_2 = INTVAL (offset_2);
14756 msize = GET_MODE_SIZE (mode);
14757 /* Check if the offsets are consecutive. */
14758 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
14759 return false;
14760
14761 /* Check if the addresses are clobbered by load. */
14762 if (load)
14763 {
14764 if (reg_mentioned_p (reg_1, mem_1))
14765 return false;
14766
14767 /* In increasing order, the last load can clobber the address. */
14768 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
14769 return false;
14770 }
14771
14772 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14773 rclass_1 = FP_REGS;
14774 else
14775 rclass_1 = GENERAL_REGS;
14776
14777 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14778 rclass_2 = FP_REGS;
14779 else
14780 rclass_2 = GENERAL_REGS;
14781
14782 /* Check if the registers are of same class. */
14783 if (rclass_1 != rclass_2)
14784 return false;
14785
14786 return true;
14787}
14788
14789/* Given OPERANDS of consecutive load/store, check if we can merge
14790 them into ldp/stp by adjusting the offset. LOAD is true if they
14791 are load instructions. MODE is the mode of memory operands.
14792
14793 Given below consecutive stores:
14794
14795 str w1, [xb, 0x100]
14796 str w1, [xb, 0x104]
14797 str w1, [xb, 0x108]
14798 str w1, [xb, 0x10c]
14799
14800 Though the offsets are out of the range supported by stp, we can
14801 still pair them after adjusting the offset, like:
14802
14803 add scratch, xb, 0x100
14804 stp w1, w1, [scratch]
14805 stp w1, w1, [scratch, 0x8]
14806
14807 The peephole patterns detecting this opportunity should guarantee
14808 the scratch register is avaliable. */
14809
14810bool
14811aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 14812 scalar_mode mode)
350013bc
BC
14813{
14814 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
14815 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
14816 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
14817 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
14818
14819 if (load)
14820 {
14821 reg_1 = operands[0];
14822 mem_1 = operands[1];
14823 reg_2 = operands[2];
14824 mem_2 = operands[3];
14825 reg_3 = operands[4];
14826 mem_3 = operands[5];
14827 reg_4 = operands[6];
14828 mem_4 = operands[7];
14829 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
14830 && REG_P (reg_3) && REG_P (reg_4));
14831 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
14832 return false;
14833 }
14834 else
14835 {
14836 mem_1 = operands[0];
14837 reg_1 = operands[1];
14838 mem_2 = operands[2];
14839 reg_2 = operands[3];
14840 mem_3 = operands[4];
14841 reg_3 = operands[5];
14842 mem_4 = operands[6];
14843 reg_4 = operands[7];
14844 }
14845 /* Skip if memory operand is by itslef valid for ldp/stp. */
14846 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
14847 return false;
14848
bf84ac44
AP
14849 /* The mems cannot be volatile. */
14850 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
14851 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
14852 return false;
14853
350013bc
BC
14854 /* Check if the addresses are in the form of [base+offset]. */
14855 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14856 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14857 return false;
14858 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14859 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14860 return false;
14861 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
14862 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
14863 return false;
14864 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
14865 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
14866 return false;
14867
14868 /* Check if the bases are same. */
14869 if (!rtx_equal_p (base_1, base_2)
14870 || !rtx_equal_p (base_2, base_3)
14871 || !rtx_equal_p (base_3, base_4))
14872 return false;
14873
14874 offval_1 = INTVAL (offset_1);
14875 offval_2 = INTVAL (offset_2);
14876 offval_3 = INTVAL (offset_3);
14877 offval_4 = INTVAL (offset_4);
14878 msize = GET_MODE_SIZE (mode);
14879 /* Check if the offsets are consecutive. */
14880 if ((offval_1 != (offval_2 + msize)
14881 || offval_1 != (offval_3 + msize * 2)
14882 || offval_1 != (offval_4 + msize * 3))
14883 && (offval_4 != (offval_3 + msize)
14884 || offval_4 != (offval_2 + msize * 2)
14885 || offval_4 != (offval_1 + msize * 3)))
14886 return false;
14887
14888 /* Check if the addresses are clobbered by load. */
14889 if (load)
14890 {
14891 if (reg_mentioned_p (reg_1, mem_1)
14892 || reg_mentioned_p (reg_2, mem_2)
14893 || reg_mentioned_p (reg_3, mem_3))
14894 return false;
14895
14896 /* In increasing order, the last load can clobber the address. */
14897 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
14898 return false;
14899 }
14900
54700e2e
AP
14901 /* If we have SImode and slow unaligned ldp,
14902 check the alignment to be at least 8 byte. */
14903 if (mode == SImode
14904 && (aarch64_tune_params.extra_tuning_flags
14905 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14906 && !optimize_size
14907 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14908 return false;
14909
350013bc
BC
14910 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14911 rclass_1 = FP_REGS;
14912 else
14913 rclass_1 = GENERAL_REGS;
14914
14915 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14916 rclass_2 = FP_REGS;
14917 else
14918 rclass_2 = GENERAL_REGS;
14919
14920 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
14921 rclass_3 = FP_REGS;
14922 else
14923 rclass_3 = GENERAL_REGS;
14924
14925 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
14926 rclass_4 = FP_REGS;
14927 else
14928 rclass_4 = GENERAL_REGS;
14929
14930 /* Check if the registers are of same class. */
14931 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
14932 return false;
14933
14934 return true;
14935}
14936
14937/* Given OPERANDS of consecutive load/store, this function pairs them
14938 into ldp/stp after adjusting the offset. It depends on the fact
14939 that addresses of load/store instructions are in increasing order.
14940 MODE is the mode of memory operands. CODE is the rtl operator
14941 which should be applied to all memory operands, it's SIGN_EXTEND,
14942 ZERO_EXTEND or UNKNOWN. */
14943
14944bool
14945aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 14946 scalar_mode mode, RTX_CODE code)
350013bc
BC
14947{
14948 rtx base, offset, t1, t2;
14949 rtx mem_1, mem_2, mem_3, mem_4;
14950 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
14951
14952 if (load)
14953 {
14954 mem_1 = operands[1];
14955 mem_2 = operands[3];
14956 mem_3 = operands[5];
14957 mem_4 = operands[7];
14958 }
14959 else
14960 {
14961 mem_1 = operands[0];
14962 mem_2 = operands[2];
14963 mem_3 = operands[4];
14964 mem_4 = operands[6];
14965 gcc_assert (code == UNKNOWN);
14966 }
14967
14968 extract_base_offset_in_addr (mem_1, &base, &offset);
14969 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
14970
14971 /* Adjust offset thus it can fit in ldp/stp instruction. */
14972 msize = GET_MODE_SIZE (mode);
14973 stp_off_limit = msize * 0x40;
14974 off_val = INTVAL (offset);
14975 abs_off = (off_val < 0) ? -off_val : off_val;
14976 new_off = abs_off % stp_off_limit;
14977 adj_off = abs_off - new_off;
14978
14979 /* Further adjust to make sure all offsets are OK. */
14980 if ((new_off + msize * 2) >= stp_off_limit)
14981 {
14982 adj_off += stp_off_limit;
14983 new_off -= stp_off_limit;
14984 }
14985
14986 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14987 if (adj_off >= 0x1000)
14988 return false;
14989
14990 if (off_val < 0)
14991 {
14992 adj_off = -adj_off;
14993 new_off = -new_off;
14994 }
14995
14996 /* Create new memory references. */
14997 mem_1 = change_address (mem_1, VOIDmode,
14998 plus_constant (DImode, operands[8], new_off));
14999
15000 /* Check if the adjusted address is OK for ldp/stp. */
15001 if (!aarch64_mem_pair_operand (mem_1, mode))
15002 return false;
15003
15004 msize = GET_MODE_SIZE (mode);
15005 mem_2 = change_address (mem_2, VOIDmode,
15006 plus_constant (DImode,
15007 operands[8],
15008 new_off + msize));
15009 mem_3 = change_address (mem_3, VOIDmode,
15010 plus_constant (DImode,
15011 operands[8],
15012 new_off + msize * 2));
15013 mem_4 = change_address (mem_4, VOIDmode,
15014 plus_constant (DImode,
15015 operands[8],
15016 new_off + msize * 3));
15017
15018 if (code == ZERO_EXTEND)
15019 {
15020 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
15021 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
15022 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
15023 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
15024 }
15025 else if (code == SIGN_EXTEND)
15026 {
15027 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
15028 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
15029 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
15030 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
15031 }
15032
15033 if (load)
15034 {
15035 operands[1] = mem_1;
15036 operands[3] = mem_2;
15037 operands[5] = mem_3;
15038 operands[7] = mem_4;
15039 }
15040 else
15041 {
15042 operands[0] = mem_1;
15043 operands[2] = mem_2;
15044 operands[4] = mem_3;
15045 operands[6] = mem_4;
15046 }
15047
15048 /* Emit adjusting instruction. */
f7df4a84 15049 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 15050 /* Emit ldp/stp instructions. */
f7df4a84
RS
15051 t1 = gen_rtx_SET (operands[0], operands[1]);
15052 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 15053 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
15054 t1 = gen_rtx_SET (operands[4], operands[5]);
15055 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
15056 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
15057 return true;
15058}
15059
1b1e81f8
JW
15060/* Return 1 if pseudo register should be created and used to hold
15061 GOT address for PIC code. */
15062
15063bool
15064aarch64_use_pseudo_pic_reg (void)
15065{
15066 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
15067}
15068
7b841a12
JW
15069/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
15070
15071static int
15072aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
15073{
15074 switch (XINT (x, 1))
15075 {
15076 case UNSPEC_GOTSMALLPIC:
15077 case UNSPEC_GOTSMALLPIC28K:
15078 case UNSPEC_GOTTINYPIC:
15079 return 0;
15080 default:
15081 break;
15082 }
15083
15084 return default_unspec_may_trap_p (x, flags);
15085}
15086
39252973
KT
15087
15088/* If X is a positive CONST_DOUBLE with a value that is a power of 2
15089 return the log2 of that value. Otherwise return -1. */
15090
15091int
15092aarch64_fpconst_pow_of_2 (rtx x)
15093{
15094 const REAL_VALUE_TYPE *r;
15095
15096 if (!CONST_DOUBLE_P (x))
15097 return -1;
15098
15099 r = CONST_DOUBLE_REAL_VALUE (x);
15100
15101 if (REAL_VALUE_NEGATIVE (*r)
15102 || REAL_VALUE_ISNAN (*r)
15103 || REAL_VALUE_ISINF (*r)
15104 || !real_isinteger (r, DFmode))
15105 return -1;
15106
15107 return exact_log2 (real_to_integer (r));
15108}
15109
15110/* If X is a vector of equal CONST_DOUBLE values and that value is
15111 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
15112
15113int
15114aarch64_vec_fpconst_pow_of_2 (rtx x)
15115{
15116 if (GET_CODE (x) != CONST_VECTOR)
15117 return -1;
15118
15119 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
15120 return -1;
15121
15122 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
15123 if (firstval <= 0)
15124 return -1;
15125
15126 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
15127 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
15128 return -1;
15129
15130 return firstval;
15131}
15132
11e554b3
JG
15133/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
15134 to float.
15135
15136 __fp16 always promotes through this hook.
15137 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
15138 through the generic excess precision logic rather than here. */
15139
c2ec330c
AL
15140static tree
15141aarch64_promoted_type (const_tree t)
15142{
11e554b3
JG
15143 if (SCALAR_FLOAT_TYPE_P (t)
15144 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 15145 return float_type_node;
11e554b3 15146
c2ec330c
AL
15147 return NULL_TREE;
15148}
ee62a5a6
RS
15149
15150/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
15151
15152static bool
9acc9cbe 15153aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
15154 optimization_type opt_type)
15155{
15156 switch (op)
15157 {
15158 case rsqrt_optab:
9acc9cbe 15159 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
15160
15161 default:
15162 return true;
15163 }
15164}
15165
11e554b3
JG
15166/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
15167 if MODE is HFmode, and punt to the generic implementation otherwise. */
15168
15169static bool
7c5bd57a 15170aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
15171{
15172 return (mode == HFmode
15173 ? true
15174 : default_libgcc_floating_mode_supported_p (mode));
15175}
15176
2e5f8203
JG
15177/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
15178 if MODE is HFmode, and punt to the generic implementation otherwise. */
15179
15180static bool
18e2a8b8 15181aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
15182{
15183 return (mode == HFmode
15184 ? true
15185 : default_scalar_mode_supported_p (mode));
15186}
15187
11e554b3
JG
15188/* Set the value of FLT_EVAL_METHOD.
15189 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
15190
15191 0: evaluate all operations and constants, whose semantic type has at
15192 most the range and precision of type float, to the range and
15193 precision of float; evaluate all other operations and constants to
15194 the range and precision of the semantic type;
15195
15196 N, where _FloatN is a supported interchange floating type
15197 evaluate all operations and constants, whose semantic type has at
15198 most the range and precision of _FloatN type, to the range and
15199 precision of the _FloatN type; evaluate all other operations and
15200 constants to the range and precision of the semantic type;
15201
15202 If we have the ARMv8.2-A extensions then we support _Float16 in native
15203 precision, so we should set this to 16. Otherwise, we support the type,
15204 but want to evaluate expressions in float precision, so set this to
15205 0. */
15206
15207static enum flt_eval_method
15208aarch64_excess_precision (enum excess_precision_type type)
15209{
15210 switch (type)
15211 {
15212 case EXCESS_PRECISION_TYPE_FAST:
15213 case EXCESS_PRECISION_TYPE_STANDARD:
15214 /* We can calculate either in 16-bit range and precision or
15215 32-bit range and precision. Make that decision based on whether
15216 we have native support for the ARMv8.2-A 16-bit floating-point
15217 instructions or not. */
15218 return (TARGET_FP_F16INST
15219 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
15220 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
15221 case EXCESS_PRECISION_TYPE_IMPLICIT:
15222 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
15223 default:
15224 gcc_unreachable ();
15225 }
15226 return FLT_EVAL_METHOD_UNPREDICTABLE;
15227}
15228
b48d6421
KT
15229/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
15230 scheduled for speculative execution. Reject the long-running division
15231 and square-root instructions. */
15232
15233static bool
15234aarch64_sched_can_speculate_insn (rtx_insn *insn)
15235{
15236 switch (get_attr_type (insn))
15237 {
15238 case TYPE_SDIV:
15239 case TYPE_UDIV:
15240 case TYPE_FDIVS:
15241 case TYPE_FDIVD:
15242 case TYPE_FSQRTS:
15243 case TYPE_FSQRTD:
15244 case TYPE_NEON_FP_SQRT_S:
15245 case TYPE_NEON_FP_SQRT_D:
15246 case TYPE_NEON_FP_SQRT_S_Q:
15247 case TYPE_NEON_FP_SQRT_D_Q:
15248 case TYPE_NEON_FP_DIV_S:
15249 case TYPE_NEON_FP_DIV_D:
15250 case TYPE_NEON_FP_DIV_S_Q:
15251 case TYPE_NEON_FP_DIV_D_Q:
15252 return false;
15253 default:
15254 return true;
15255 }
15256}
15257
51b86113
DM
15258/* Target-specific selftests. */
15259
15260#if CHECKING_P
15261
15262namespace selftest {
15263
15264/* Selftest for the RTL loader.
15265 Verify that the RTL loader copes with a dump from
15266 print_rtx_function. This is essentially just a test that class
15267 function_reader can handle a real dump, but it also verifies
15268 that lookup_reg_by_dump_name correctly handles hard regs.
15269 The presence of hard reg names in the dump means that the test is
15270 target-specific, hence it is in this file. */
15271
15272static void
15273aarch64_test_loading_full_dump ()
15274{
15275 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
15276
15277 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
15278
15279 rtx_insn *insn_1 = get_insn_by_uid (1);
15280 ASSERT_EQ (NOTE, GET_CODE (insn_1));
15281
15282 rtx_insn *insn_15 = get_insn_by_uid (15);
15283 ASSERT_EQ (INSN, GET_CODE (insn_15));
15284 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
15285
15286 /* Verify crtl->return_rtx. */
15287 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
15288 ASSERT_EQ (0, REGNO (crtl->return_rtx));
15289 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
15290}
15291
15292/* Run all target-specific selftests. */
15293
15294static void
15295aarch64_run_selftests (void)
15296{
15297 aarch64_test_loading_full_dump ();
15298}
15299
15300} // namespace selftest
15301
15302#endif /* #if CHECKING_P */
15303
43e9d192
IB
15304#undef TARGET_ADDRESS_COST
15305#define TARGET_ADDRESS_COST aarch64_address_cost
15306
15307/* This hook will determines whether unnamed bitfields affect the alignment
15308 of the containing structure. The hook returns true if the structure
15309 should inherit the alignment requirements of an unnamed bitfield's
15310 type. */
15311#undef TARGET_ALIGN_ANON_BITFIELD
15312#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
15313
15314#undef TARGET_ASM_ALIGNED_DI_OP
15315#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
15316
15317#undef TARGET_ASM_ALIGNED_HI_OP
15318#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
15319
15320#undef TARGET_ASM_ALIGNED_SI_OP
15321#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
15322
15323#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15324#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
15325 hook_bool_const_tree_hwi_hwi_const_tree_true
15326
e1c1ecb0
KT
15327#undef TARGET_ASM_FILE_START
15328#define TARGET_ASM_FILE_START aarch64_start_file
15329
43e9d192
IB
15330#undef TARGET_ASM_OUTPUT_MI_THUNK
15331#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
15332
15333#undef TARGET_ASM_SELECT_RTX_SECTION
15334#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
15335
15336#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15337#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
15338
15339#undef TARGET_BUILD_BUILTIN_VA_LIST
15340#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
15341
15342#undef TARGET_CALLEE_COPIES
15343#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
15344
15345#undef TARGET_CAN_ELIMINATE
15346#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
15347
1fd8d40c
KT
15348#undef TARGET_CAN_INLINE_P
15349#define TARGET_CAN_INLINE_P aarch64_can_inline_p
15350
43e9d192
IB
15351#undef TARGET_CANNOT_FORCE_CONST_MEM
15352#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
15353
50487d79
EM
15354#undef TARGET_CASE_VALUES_THRESHOLD
15355#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
15356
43e9d192
IB
15357#undef TARGET_CONDITIONAL_REGISTER_USAGE
15358#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
15359
15360/* Only the least significant bit is used for initialization guard
15361 variables. */
15362#undef TARGET_CXX_GUARD_MASK_BIT
15363#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
15364
15365#undef TARGET_C_MODE_FOR_SUFFIX
15366#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
15367
15368#ifdef TARGET_BIG_ENDIAN_DEFAULT
15369#undef TARGET_DEFAULT_TARGET_FLAGS
15370#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
15371#endif
15372
15373#undef TARGET_CLASS_MAX_NREGS
15374#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
15375
119103ca
JG
15376#undef TARGET_BUILTIN_DECL
15377#define TARGET_BUILTIN_DECL aarch64_builtin_decl
15378
a6fc00da
BH
15379#undef TARGET_BUILTIN_RECIPROCAL
15380#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
15381
11e554b3
JG
15382#undef TARGET_C_EXCESS_PRECISION
15383#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
15384
43e9d192
IB
15385#undef TARGET_EXPAND_BUILTIN
15386#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
15387
15388#undef TARGET_EXPAND_BUILTIN_VA_START
15389#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
15390
9697e620
JG
15391#undef TARGET_FOLD_BUILTIN
15392#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
15393
43e9d192
IB
15394#undef TARGET_FUNCTION_ARG
15395#define TARGET_FUNCTION_ARG aarch64_function_arg
15396
15397#undef TARGET_FUNCTION_ARG_ADVANCE
15398#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
15399
15400#undef TARGET_FUNCTION_ARG_BOUNDARY
15401#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
15402
76b0cbf8
RS
15403#undef TARGET_FUNCTION_ARG_PADDING
15404#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
15405
43e9d192
IB
15406#undef TARGET_FUNCTION_OK_FOR_SIBCALL
15407#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
15408
15409#undef TARGET_FUNCTION_VALUE
15410#define TARGET_FUNCTION_VALUE aarch64_function_value
15411
15412#undef TARGET_FUNCTION_VALUE_REGNO_P
15413#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
15414
15415#undef TARGET_FRAME_POINTER_REQUIRED
15416#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
15417
fc72cba7
AL
15418#undef TARGET_GIMPLE_FOLD_BUILTIN
15419#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 15420
43e9d192
IB
15421#undef TARGET_GIMPLIFY_VA_ARG_EXPR
15422#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
15423
15424#undef TARGET_INIT_BUILTINS
15425#define TARGET_INIT_BUILTINS aarch64_init_builtins
15426
c64f7d37
WD
15427#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
15428#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
15429 aarch64_ira_change_pseudo_allocno_class
15430
43e9d192
IB
15431#undef TARGET_LEGITIMATE_ADDRESS_P
15432#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
15433
15434#undef TARGET_LEGITIMATE_CONSTANT_P
15435#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
15436
491ec060
WD
15437#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
15438#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
15439 aarch64_legitimize_address_displacement
15440
43e9d192
IB
15441#undef TARGET_LIBGCC_CMP_RETURN_MODE
15442#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
15443
11e554b3
JG
15444#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
15445#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
15446aarch64_libgcc_floating_mode_supported_p
15447
ac2b960f
YZ
15448#undef TARGET_MANGLE_TYPE
15449#define TARGET_MANGLE_TYPE aarch64_mangle_type
15450
43e9d192
IB
15451#undef TARGET_MEMORY_MOVE_COST
15452#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
15453
26e0ff94
WD
15454#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
15455#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
15456
43e9d192
IB
15457#undef TARGET_MUST_PASS_IN_STACK
15458#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
15459
15460/* This target hook should return true if accesses to volatile bitfields
15461 should use the narrowest mode possible. It should return false if these
15462 accesses should use the bitfield container type. */
15463#undef TARGET_NARROW_VOLATILE_BITFIELD
15464#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
15465
15466#undef TARGET_OPTION_OVERRIDE
15467#define TARGET_OPTION_OVERRIDE aarch64_override_options
15468
15469#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
15470#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
15471 aarch64_override_options_after_change
15472
361fb3ee
KT
15473#undef TARGET_OPTION_SAVE
15474#define TARGET_OPTION_SAVE aarch64_option_save
15475
15476#undef TARGET_OPTION_RESTORE
15477#define TARGET_OPTION_RESTORE aarch64_option_restore
15478
15479#undef TARGET_OPTION_PRINT
15480#define TARGET_OPTION_PRINT aarch64_option_print
15481
5a2c8331
KT
15482#undef TARGET_OPTION_VALID_ATTRIBUTE_P
15483#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
15484
d78006d9
KT
15485#undef TARGET_SET_CURRENT_FUNCTION
15486#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
15487
43e9d192
IB
15488#undef TARGET_PASS_BY_REFERENCE
15489#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
15490
15491#undef TARGET_PREFERRED_RELOAD_CLASS
15492#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
15493
cee66c68
WD
15494#undef TARGET_SCHED_REASSOCIATION_WIDTH
15495#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
15496
c2ec330c
AL
15497#undef TARGET_PROMOTED_TYPE
15498#define TARGET_PROMOTED_TYPE aarch64_promoted_type
15499
43e9d192
IB
15500#undef TARGET_SECONDARY_RELOAD
15501#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
15502
15503#undef TARGET_SHIFT_TRUNCATION_MASK
15504#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
15505
15506#undef TARGET_SETUP_INCOMING_VARARGS
15507#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
15508
15509#undef TARGET_STRUCT_VALUE_RTX
15510#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
15511
15512#undef TARGET_REGISTER_MOVE_COST
15513#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
15514
15515#undef TARGET_RETURN_IN_MEMORY
15516#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
15517
15518#undef TARGET_RETURN_IN_MSB
15519#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
15520
15521#undef TARGET_RTX_COSTS
7cc2145f 15522#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 15523
2e5f8203
JG
15524#undef TARGET_SCALAR_MODE_SUPPORTED_P
15525#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
15526
d126a4ae
AP
15527#undef TARGET_SCHED_ISSUE_RATE
15528#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
15529
d03f7e44
MK
15530#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15531#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
15532 aarch64_sched_first_cycle_multipass_dfa_lookahead
15533
2d6bc7fa
KT
15534#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
15535#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
15536 aarch64_first_cycle_multipass_dfa_lookahead_guard
15537
827ab47a
KT
15538#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
15539#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
15540 aarch64_get_separate_components
15541
15542#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
15543#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
15544 aarch64_components_for_bb
15545
15546#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
15547#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
15548 aarch64_disqualify_components
15549
15550#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
15551#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
15552 aarch64_emit_prologue_components
15553
15554#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
15555#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
15556 aarch64_emit_epilogue_components
15557
15558#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
15559#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
15560 aarch64_set_handled_components
15561
43e9d192
IB
15562#undef TARGET_TRAMPOLINE_INIT
15563#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
15564
15565#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
15566#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
15567
15568#undef TARGET_VECTOR_MODE_SUPPORTED_P
15569#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
15570
7df76747
N
15571#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15572#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
15573 aarch64_builtin_support_vector_misalignment
15574
43e9d192
IB
15575#undef TARGET_ARRAY_MODE_SUPPORTED_P
15576#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
15577
8990e73a
TB
15578#undef TARGET_VECTORIZE_ADD_STMT_COST
15579#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
15580
15581#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15582#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15583 aarch64_builtin_vectorization_cost
15584
43e9d192
IB
15585#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15586#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
15587
42fc9a7f
JG
15588#undef TARGET_VECTORIZE_BUILTINS
15589#define TARGET_VECTORIZE_BUILTINS
15590
15591#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
15592#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
15593 aarch64_builtin_vectorized_function
15594
3b357264
JG
15595#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
15596#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
15597 aarch64_autovectorize_vector_sizes
15598
aa87aced
KV
15599#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15600#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
15601 aarch64_atomic_assign_expand_fenv
15602
43e9d192
IB
15603/* Section anchor support. */
15604
15605#undef TARGET_MIN_ANCHOR_OFFSET
15606#define TARGET_MIN_ANCHOR_OFFSET -256
15607
15608/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
15609 byte offset; we can do much more for larger data types, but have no way
15610 to determine the size of the access. We assume accesses are aligned. */
15611#undef TARGET_MAX_ANCHOR_OFFSET
15612#define TARGET_MAX_ANCHOR_OFFSET 4095
15613
db0253a4
TB
15614#undef TARGET_VECTOR_ALIGNMENT
15615#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
15616
15617#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
15618#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
15619 aarch64_simd_vector_alignment_reachable
15620
88b08073
JG
15621/* vec_perm support. */
15622
15623#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
15624#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
15625 aarch64_vectorize_vec_perm_const_ok
15626
c2ec330c
AL
15627#undef TARGET_INIT_LIBFUNCS
15628#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 15629
706b2314 15630#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
15631#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
15632
5cb74e90
RR
15633#undef TARGET_FLAGS_REGNUM
15634#define TARGET_FLAGS_REGNUM CC_REGNUM
15635
78607708
TV
15636#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
15637#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
15638
a3125fc2
CL
15639#undef TARGET_ASAN_SHADOW_OFFSET
15640#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
15641
0c4ec427
RE
15642#undef TARGET_LEGITIMIZE_ADDRESS
15643#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
15644
d3006da6
JG
15645#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15646#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15647 aarch64_use_by_pieces_infrastructure_p
15648
b48d6421
KT
15649#undef TARGET_SCHED_CAN_SPECULATE_INSN
15650#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
15651
594bdd53
FY
15652#undef TARGET_CAN_USE_DOLOOP_P
15653#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
15654
9bca63d4
WD
15655#undef TARGET_SCHED_ADJUST_PRIORITY
15656#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
15657
6a569cdd
KT
15658#undef TARGET_SCHED_MACRO_FUSION_P
15659#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
15660
15661#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
15662#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
15663
350013bc
BC
15664#undef TARGET_SCHED_FUSION_PRIORITY
15665#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
15666
7b841a12
JW
15667#undef TARGET_UNSPEC_MAY_TRAP_P
15668#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
15669
1b1e81f8
JW
15670#undef TARGET_USE_PSEUDO_PIC_REG
15671#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
15672
cc8ca59e
JB
15673#undef TARGET_PRINT_OPERAND
15674#define TARGET_PRINT_OPERAND aarch64_print_operand
15675
15676#undef TARGET_PRINT_OPERAND_ADDRESS
15677#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
15678
ee62a5a6
RS
15679#undef TARGET_OPTAB_SUPPORTED_P
15680#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
15681
43203dea
RR
15682#undef TARGET_OMIT_STRUCT_RETURN_REG
15683#define TARGET_OMIT_STRUCT_RETURN_REG true
15684
f46fe37e
EB
15685/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
15686#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15687#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
15688
c43f4279
RS
15689#undef TARGET_HARD_REGNO_NREGS
15690#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
15691#undef TARGET_HARD_REGNO_MODE_OK
15692#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
15693
99e1629f
RS
15694#undef TARGET_MODES_TIEABLE_P
15695#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
15696
80ec73f4
RS
15697#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
15698#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
15699 aarch64_hard_regno_call_part_clobbered
15700
51b86113
DM
15701#if CHECKING_P
15702#undef TARGET_RUN_TARGET_SELFTESTS
15703#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
15704#endif /* #if CHECKING_P */
15705
43e9d192
IB
15706struct gcc_target targetm = TARGET_INITIALIZER;
15707
15708#include "gt-aarch64.h"